diff mbox series

[1/3,v3] vect: generate suitable convert insn for int -> int, float -> float and int <-> float.

Message ID 20240611064901.37222-1-lin1.hu@intel.com
State New
Headers show
Series [1/3,v3] vect: generate suitable convert insn for int -> int, float -> float and int <-> float. | expand

Commit Message

Hu, Lin1 June 11, 2024, 6:49 a.m. UTC
I wrap a part of code about indirect conversion. The API refers to 
supportable_narrowing/widening_operations.

BRs,
Lin

gcc/ChangeLog:

	PR target/107432
	* tree-vect-generic.cc
	(expand_vector_conversion): Support convert for int -> int,
	float -> float and int <-> float.
	* tree-vect-stmts.cc (vectorizable_conversion): Wrap the
	indirect convert part.
	(supportable_indirect_convert_operation): New function.
	* tree-vectorizer.h (supportable_indirect_convert_operation):
	Define the new function.

gcc/testsuite/ChangeLog:

	PR target/107432
	* gcc.target/i386/pr107432-1.c: New test.
	* gcc.target/i386/pr107432-2.c: Ditto.
	* gcc.target/i386/pr107432-3.c: Ditto.
	* gcc.target/i386/pr107432-4.c: Ditto.
	* gcc.target/i386/pr107432-5.c: Ditto.
	* gcc.target/i386/pr107432-6.c: Ditto.
	* gcc.target/i386/pr107432-7.c: Ditto.
---
 gcc/testsuite/gcc.target/i386/pr107432-1.c | 234 ++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr107432-2.c | 105 +++++++++
 gcc/testsuite/gcc.target/i386/pr107432-3.c |  55 +++++
 gcc/testsuite/gcc.target/i386/pr107432-4.c |  56 +++++
 gcc/testsuite/gcc.target/i386/pr107432-5.c |  72 ++++++
 gcc/testsuite/gcc.target/i386/pr107432-6.c | 139 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr107432-7.c | 156 +++++++++++++
 gcc/tree-vect-generic.cc                   |  33 ++-
 gcc/tree-vect-stmts.cc                     | 244 +++++++++++++--------
 gcc/tree-vectorizer.h                      |   9 +
 10 files changed, 1011 insertions(+), 92 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-7.c

Comments

Hu, Lin1 June 17, 2024, 1:48 a.m. UTC | #1
Ping this thread.

BRs,
Lin

-----Original Message-----
From: Hu, Lin1 <lin1.hu@intel.com> 
Sent: Tuesday, June 11, 2024 2:49 PM
To: gcc-patches@gcc.gnu.org
Cc: Liu, Hongtao <hongtao.liu@intel.com>; ubizjak@gmail.com; rguenther@suse.de
Subject: [PATCH 1/3 v3] vect: generate suitable convert insn for int -> int, float -> float and int <-> float.

I wrap a part of code about indirect conversion. The API refers to supportable_narrowing/widening_operations.

BRs,
Lin

gcc/ChangeLog:

	PR target/107432
	* tree-vect-generic.cc
	(expand_vector_conversion): Support convert for int -> int,
	float -> float and int <-> float.
	* tree-vect-stmts.cc (vectorizable_conversion): Wrap the
	indirect convert part.
	(supportable_indirect_convert_operation): New function.
	* tree-vectorizer.h (supportable_indirect_convert_operation):
	Define the new function.

gcc/testsuite/ChangeLog:

	PR target/107432
	* gcc.target/i386/pr107432-1.c: New test.
	* gcc.target/i386/pr107432-2.c: Ditto.
	* gcc.target/i386/pr107432-3.c: Ditto.
	* gcc.target/i386/pr107432-4.c: Ditto.
	* gcc.target/i386/pr107432-5.c: Ditto.
	* gcc.target/i386/pr107432-6.c: Ditto.
	* gcc.target/i386/pr107432-7.c: Ditto.
---
 gcc/testsuite/gcc.target/i386/pr107432-1.c | 234 ++++++++++++++++++++  gcc/testsuite/gcc.target/i386/pr107432-2.c | 105 +++++++++  gcc/testsuite/gcc.target/i386/pr107432-3.c |  55 +++++  gcc/testsuite/gcc.target/i386/pr107432-4.c |  56 +++++  gcc/testsuite/gcc.target/i386/pr107432-5.c |  72 ++++++  gcc/testsuite/gcc.target/i386/pr107432-6.c | 139 ++++++++++++  gcc/testsuite/gcc.target/i386/pr107432-7.c | 156 +++++++++++++
 gcc/tree-vect-generic.cc                   |  33 ++-
 gcc/tree-vect-stmts.cc                     | 244 +++++++++++++--------
 gcc/tree-vectorizer.h                      |   9 +
 10 files changed, 1011 insertions(+), 92 deletions(-)  create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-7.c

diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c b/gcc/testsuite/gcc.target/i386/pr107432-1.c
new file mode 100644
index 00000000000..a4f37447eb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c
@@ -0,0 +1,234 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512bw -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vpmovqd" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovqw" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovqb" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovdw" 6 { target { ia32 } } } 
+} */
+/* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } 
+} */
+/* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
+
+#include <x86intrin.h>
+
+typedef short __v2hi __attribute__ ((__vector_size__ (4))); typedef 
+char __v2qi __attribute__ ((__vector_size__ (2))); typedef char __v4qi 
+__attribute__ ((__vector_size__ (4))); typedef char __v8qi 
+__attribute__ ((__vector_size__ (8)));
+
+typedef unsigned short __v2hu __attribute__ ((__vector_size__ (4))); 
+typedef unsigned short __v4hu __attribute__ ((__vector_size__ (8))); 
+typedef unsigned char __v2qu __attribute__ ((__vector_size__ (2))); 
+typedef unsigned char __v4qu __attribute__ ((__vector_size__ (4))); 
+typedef unsigned char __v8qu __attribute__ ((__vector_size__ (8))); 
+typedef unsigned int __v2su __attribute__ ((__vector_size__ (8)));
+
+__v2si mm_cvtepi64_epi32_builtin_convertvector(__m128i a) {
+  return __builtin_convertvector((__v2di)a, __v2si); }
+
+__m128i	mm256_cvtepi64_epi32_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v4di)a, __v4si); }
+
+__m256i	mm512_cvtepi64_epi32_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v8di)a, __v8si); }
+
+__v2hi	mm_cvtepi64_epi16_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2di)a, __v2hi); }
+
+__v4hi	mm256_cvtepi64_epi16_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v4di)a, __v4hi); }
+
+__m128i	mm512_cvtepi64_epi16_builtin_convertvector(__m512i a)
+{
+  return (__m128i)__builtin_convertvector((__v8di)a, __v8hi); }
+
+__v2qi	mm_cvtepi64_epi8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2di)a, __v2qi); }
+
+__v4qi	mm256_cvtepi64_epi8_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v4di)a, __v4qi); }
+
+__v8qi	mm512_cvtepi64_epi8_builtin_convertvector(__m512i a)
+{
+  return __builtin_convertvector((__v8di)a, __v8qi); }
+
+__v2hi	mm64_cvtepi32_epi16_builtin_convertvector(__v2si a)
+{
+  return __builtin_convertvector((__v2si)a, __v2hi); }
+
+__v4hi	mm_cvtepi32_epi16_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v4si)a, __v4hi); }
+
+__m128i	mm256_cvtepi32_epi16_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v8si)a, __v8hi); }
+
+__m256i	mm512_cvtepi32_epi16_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v16si)a, __v16hi); }
+
+__v2qi	mm64_cvtepi32_epi8_builtin_convertvector(__v2si a)
+{
+  return __builtin_convertvector((__v2si)a, __v2qi); }
+
+__v4qi	mm_cvtepi32_epi8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v4si)a, __v4qi); }
+
+__v8qi	mm256_cvtepi32_epi8_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v8si)a, __v8qi); }
+
+__m128i	mm512_cvtepi32_epi8_builtin_convertvector(__m512i a)
+{
+  return (__m128i)__builtin_convertvector((__v16si)a, __v16qi); }
+
+__v2qi	mm32_cvtepi16_epi8_builtin_convertvector(__v2hi a)
+{
+  return __builtin_convertvector((__v2hi)a, __v2qi); }
+
+__v8qi	mm_cvtepi16_epi8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v8hi)a, __v8qi); }
+
+__m128i	mm256_cvtepi16_epi8_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v16hi)a, __v16qi); }
+
+__m256i	mm512_cvtepi16_epi8_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v32hi)a, __v32qi); }
+
+__v2su mm_cvtepu64_epu32_builtin_convertvector(__m128i a) {
+  return __builtin_convertvector((__v2du)a, __v2su); }
+
+__m128i	mm256_cvtepu64_epu32_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v4du)a, __v4su); }
+
+__m256i	mm512_cvtepu64_epu32_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v8du)a, __v8su); }
+
+__v2hu	mm_cvtepu64_epu16_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2du)a, __v2hu); }
+
+__v4hu	mm256_cvtepu64_epu16_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v4du)a, __v4hu); }
+
+__m128i	mm512_cvtepu64_epu16_builtin_convertvector(__m512i a)
+{
+  return (__m128i)__builtin_convertvector((__v8du)a, __v8hu); }
+
+__v2qu	mm_cvtepu64_epu8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2du)a, __v2qu); }
+
+__v4qu	mm256_cvtepu64_epu8_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v4du)a, __v4qu); }
+
+__v8qu	mm512_cvtepu64_epu8_builtin_convertvector(__m512i a)
+{
+  return __builtin_convertvector((__v8du)a, __v8qu); }
+
+__v2hu	mm32_cvtepu32_epu16_builtin_convertvector(__v2su a)
+{
+  return __builtin_convertvector((__v2su)a, __v2hu); }
+
+__v4hu	mm_cvtepu32_epu16_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v4su)a, __v4hu); }
+
+__m128i	mm256_cvtepu32_epu16_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v8su)a, __v8hu); }
+
+__m256i	mm512_cvtepu32_epu16_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v16su)a, __v16hu); }
+
+__v2qu	mm32_cvtepu32_epu8_builtin_convertvector(__v2su a)
+{
+  return __builtin_convertvector((__v2su)a, __v2qu); }
+
+__v4qu	mm_cvtepu2_epu8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v4su)a, __v4qu); }
+
+__v8qu	mm256_cvtepu32_epu8_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v8su)a, __v8qu); }
+
+__m128i	mm512_cvtepu32_epu8_builtin_convertvector(__m512i a)
+{
+  return (__m128i)__builtin_convertvector((__v16su)a, __v16qu); }
+
+__v2qu	mm32_cvtepu16_epu8_builtin_convertvector(__v2hu a)
+{
+  return __builtin_convertvector((__v2hu)a, __v2qu); }
+
+__v8qu	mm_cvtepu16_epu8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v8hu)a, __v8qu); }
+
+__m128i	mm256_cvtepu16_epu8_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v16hu)a, __v16qu); }
+
+__m256i	mm512_cvtepu16_epu8_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v32hu)a, __v32qu); }
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-2.c b/gcc/testsuite/gcc.target/i386/pr107432-2.c
new file mode 100644
index 00000000000..02ffd811cb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-2.c
@@ -0,0 +1,105 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512bw -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vpmovsxdq" 3 } } */
+/* { dg-final { scan-assembler-times "vpmovsxwq" 3 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbq" 3 } } */
+/* { dg-final { scan-assembler-times "vpmovsxwd" 3 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbd" 3 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbw" 3 } } */
+
+#include <x86intrin.h>
+
+typedef short __v2hi __attribute__ ((__vector_size__ (4))); typedef 
+char __v2qi __attribute__ ((__vector_size__ (2))); typedef char __v4qi 
+__attribute__ ((__vector_size__ (4))); typedef char __v8qi 
+__attribute__ ((__vector_size__ (8)));
+
+__m128i mm_cvtepi32_epi64_builtin_convertvector(__v2si a) {
+  return __builtin_convertvector(a, __v2di); }
+
+__m256i	mm256_cvtepi32_epi64_builtin_convertvector(__v4si a)
+{
+  return (__m256i)__builtin_convertvector(a, __v4di); }
+
+__m512i	mm512_cvtepi32_epi64_builtin_convertvector(__v8si a)
+{
+  return (__m512i)__builtin_convertvector(a, __v8di); }
+
+__m128i mm_cvtepi16_epi64_builtin_convertvector(__v2hi a) {
+  return __builtin_convertvector(a, __v2di); }
+
+__m256i	mm256_cvtepi16_epi64_builtin_convertvector(__v4hi a)
+{
+  return (__m256i)__builtin_convertvector(a, __v4di); }
+
+__m512i	mm512_cvtepi16_epi64_builtin_convertvector(__v8hi a)
+{
+  return (__m512i)__builtin_convertvector(a, __v8di); }
+
+__m128i mm_cvtepi8_epi64_builtin_convertvector(__v2qi a) {
+  return __builtin_convertvector(a, __v2di); }
+
+__m256i	mm256_cvtepi8_epi64_builtin_convertvector(__v4qi a)
+{
+  return (__m256i)__builtin_convertvector(a, __v4di); }
+
+__m512i	mm512_cvtepi8_epi64_builtin_convertvector(__v8qi a)
+{
+  return (__m512i)__builtin_convertvector(a, __v8di); }
+
+__m128i mm_cvtepi16_epi32_builtin_convertvector(__v4hi a) {
+  return (__m128i)__builtin_convertvector(a, __v4si); }
+
+__m256i	mm256_cvtepi16_epi32_builtin_convertvector(__v8hi a)
+{
+  return (__m256i)__builtin_convertvector(a, __v8si); }
+
+__m512i	mm512_cvtepi16_epi32_builtin_convertvector(__v16hi a)
+{
+  return (__m512i)__builtin_convertvector(a, __v16si); }
+
+__m128i mm_cvtepi8_epi32_builtin_convertvector(__v4qi a) {
+  return (__m128i)__builtin_convertvector(a, __v4si); }
+
+__m256i	mm256_cvtepi8_epi32_builtin_convertvector(__v8qi a)
+{
+  return (__m256i)__builtin_convertvector(a, __v8si); }
+
+__m512i	mm512_cvtepi8_epi32_builtin_convertvector(__v16qi a)
+{
+  return (__m512i)__builtin_convertvector(a, __v16si); }
+
+__m128i mm_cvtepi8_epi16_builtin_convertvector(__v8qi a) {
+  return (__m128i)__builtin_convertvector(a, __v8hi); }
+
+__m256i	mm256_cvtepi8_epi16_builtin_convertvector(__v16qi a)
+{
+  return (__m256i)__builtin_convertvector(a, __v16hi); }
+
+__v32hi	mm512_cvtepi8_epi16_builtin_convertvector(__v32qi a)
+{
+  return __builtin_convertvector(a, __v32hi); }
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-3.c b/gcc/testsuite/gcc.target/i386/pr107432-3.c
new file mode 100644
index 00000000000..30dc947b6dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-3.c
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512fp16 -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vcvtpd2ps" 3 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2ph" 3 } } */
+/* { dg-final { scan-assembler-times "vcvtps2ph" 3 } } */
+
+#include <x86intrin.h>
+
+typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4))); typedef 
+_Float16 __v4hf __attribute__ ((__vector_size__ (8)));
+
+__v2sf mm_cvtpd_ps_builtin_convertvector(__v2df a) {
+  return __builtin_convertvector(a, __v2sf); }
+
+__v4sf	mm256_cvtpd_ps_builtin_convertvector(__v4df a)
+{
+  return __builtin_convertvector(a, __v4sf); }
+
+__v8sf	mm512_cvtpd_ps_builtin_convertvector(__v8df a)
+{
+  return __builtin_convertvector(a, __v8sf); }
+
+__v2hf mm_cvtpd_ph_builtin_convertvector(__v2df a) {
+  return __builtin_convertvector(a, __v2hf); }
+
+__v4hf	mm256_cvtpd_ph_builtin_convertvector(__v4df a)
+{
+  return __builtin_convertvector(a, __v4hf); }
+
+__v8hf	mm512_cvtpd_ph_builtin_convertvector(__v8df a)
+{
+  return __builtin_convertvector(a, __v8hf); }
+
+__v4hf mm_cvtps_ph_builtin_convertvector(__v4sf a) {
+  return __builtin_convertvector(a, __v4hf); }
+
+__v8hf	mm256_cvtps_ph_builtin_convertvector(__v8sf a)
+{
+  return __builtin_convertvector(a, __v8hf); }
+
+__v16hf	mm512_cvtps_ph_builtin_convertvector(__v16sf a)
+{
+  return __builtin_convertvector(a, __v16hf); }
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-4.c b/gcc/testsuite/gcc.target/i386/pr107432-4.c
new file mode 100644
index 00000000000..e537e7349e4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-4.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512fp16 -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vcvtps2pd" 2 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vcvtps2pd" 3 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vcvtph2pd" 3 } } */
+/* { dg-final { scan-assembler-times "vcvtph2ps" 3 } } */
+
+#include <x86intrin.h>
+
+typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4))); typedef 
+_Float16 __v4hf __attribute__ ((__vector_size__ (8)));
+
+__v2df mm_cvtps_pd_builtin_convertvector(__v2sf a) {
+  return __builtin_convertvector(a, __v2df); }
+
+__v4df	mm256_cvtps_pd_builtin_convertvector(__v4sf a)
+{
+  return __builtin_convertvector(a, __v4df); }
+
+__v8df	mm512_cvtps_pd_builtin_convertvector(__v8sf a)
+{
+  return __builtin_convertvector(a, __v8df); }
+
+__v2df mm_cvtph_pd_builtin_convertvector(__v2hf a) {
+  return __builtin_convertvector(a, __v2df); }
+
+__v4df	mm256_cvtph_pd_builtin_convertvector(__v4hf a)
+{
+  return __builtin_convertvector(a, __v4df); }
+
+__v8df	mm512_cvtph_pd_builtin_convertvector(__v8hf a)
+{
+  return __builtin_convertvector(a, __v8df); }
+
+__v4sf mm_cvtph_ps_builtin_convertvector(__v4hf a) {
+  return __builtin_convertvector(a, __v4sf); }
+
+__v8sf	mm256_cvtph_ps_builtin_convertvector(__v8hf a)
+{
+  return __builtin_convertvector(a, __v8sf); }
+
+__v16sf	mm512_cvtph_ps_builtin_convertvector(__v16hf a)
+{
+  return __builtin_convertvector(a, __v16sf); }
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-5.c b/gcc/testsuite/gcc.target/i386/pr107432-5.c
new file mode 100644
index 00000000000..5a44ef9f3b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-5.c
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512dq -mavx512fp16 -mavx512vl -O3" 
+} */
+/* { dg-final { scan-assembler-times "vcvttpd2dq" 3 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qq" 2 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vcvttps2qq" 3 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vcvttph2dq" 3 } } */
+/* { dg-final { scan-assembler-times "vcvttph2qq" 3 } } */
+
+#include <x86intrin.h>
+
+typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4))); typedef 
+_Float16 __v4hf __attribute__ ((__vector_size__ (8)));
+
+__v2si mm_cvtpd_epi32_builtin_convertvector(__v2df a) {
+  return __builtin_convertvector(a, __v2si); }
+
+__v4si	mm256_cvtpd_epi32_builtin_convertvector(__v4df a)
+{
+  return __builtin_convertvector(a, __v4si); }
+
+__v8si	mm512_cvtpd_epi32_builtin_convertvector(__v8df a)
+{
+  return __builtin_convertvector(a, __v8si); }
+
+__v2di mm_cvtps_epi64_builtin_convertvector(__v2sf a) {
+  return __builtin_convertvector(a, __v2di); }
+
+__v4di	mm256_cvtps_epi64_builtin_convertvector(__v4sf a)
+{
+  return __builtin_convertvector(a, __v4di); }
+
+__v8di	mm512_cvtps_epi64_builtin_convertvector(__v8sf a)
+{
+  return __builtin_convertvector(a, __v8di); }
+
+__v4si mm_cvtph_epi32_builtin_convertvector(__v4hf a) {
+  return __builtin_convertvector(a, __v4si); }
+
+__v8si	mm256_cvtph_epi32_builtin_convertvector(__v8hf a)
+{
+  return __builtin_convertvector(a, __v8si); }
+
+__v16si	mm512_cvtph_epi32_builtin_convertvector(__v16hf a)
+{
+  return __builtin_convertvector(a, __v16si); }
+
+__v2di mm_cvtph_epi64_builtin_convertvector(__v2hf a) {
+  return __builtin_convertvector(a, __v2di); }
+
+__v4di	mm256_cvtph_epi64_builtin_convertvector(__v4hf a)
+{
+  return __builtin_convertvector(a, __v4di); }
+
+__v8di	mm512_cvtph_epi64_builtin_convertvector(__v8hf a)
+{
+  return __builtin_convertvector(a, __v8di); }
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-6.c b/gcc/testsuite/gcc.target/i386/pr107432-6.c
new file mode 100644
index 00000000000..4a68a10b089
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-6.c
@@ -0,0 +1,139 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -mavx512vl -mavx512bw -O2 -mavx512dq 
+-fno-trapping-math" } */
+/* { dg-final { scan-assembler-times "vcvttpd2dq" 2 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vcvttpd2dq" 3 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udq" 2 { target { ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udq" 3 { target { ! ia32 
+} } } } */
+/* { dg-final { scan-assembler-times "vcvttps2dq" 3 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vcvttps2dq" 4 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vcvttps2udq" 3 { target { ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vcvttps2udq" 4 { target { ! ia32 
+} } } } */
+/* { dg-final { scan-assembler-times "vcvttph2w" 4 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uw" 4 } } */
+/* { dg-final { scan-assembler-times "vpmovdb" 10 { target { ia32 } } } 
+} */
+/* { dg-final { scan-assembler-times "vpmovdb" 14 { target { ! ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
+
+#include <x86intrin.h>
+
+typedef char __v2qi __attribute__ ((__vector_size__ (2))); typedef char 
+__v4qi __attribute__ ((__vector_size__ (4))); typedef char __v8qi 
+__attribute__ ((__vector_size__ (8))); typedef char __v16qi 
+__attribute__ ((__vector_size__ (16))); typedef unsigned char __v2qu 
+__attribute__ ((vector_size (2))); typedef unsigned char __v4qu 
+__attribute__ ((vector_size (4))); typedef unsigned char __v8qu 
+__attribute__ ((vector_size (8))); typedef unsigned char __v16qu 
+__attribute__ ((vector_size (16))); typedef _Float16 __v2hf 
+__attribute__ ((__vector_size__ (4))); typedef _Float16 __v4hf 
+__attribute__ ((__vector_size__ (8))); typedef _Float16 __v8hf 
+__attribute__ ((__vector_size__ (16)));
+
+__v2qi	mm_cvtpd_epi8_builtin_convertvector(__v2df a)
+{
+  return __builtin_convertvector((__v2df)a, __v2qi); }
+
+__v4qi	mm256_cvtpd_epi8_builtin_convertvector(__v4df a)
+{
+  return __builtin_convertvector((__v4df)a, __v4qi); }
+
+__v8qi	mm512_cvtpd_epi8_builtin_convertvector(__v8df a)
+{
+  return __builtin_convertvector((__v8df)a, __v8qi); }
+
+__v2qu	mm_cvtpd_epu8_builtin_convertvector(__v2df a)
+{
+  return __builtin_convertvector((__v2df)a, __v2qu); }
+
+__v4qu	mm256_cvtpd_epu8_builtin_convertvector(__v4df a)
+{
+  return __builtin_convertvector((__v4df)a, __v4qu); }
+
+__v8qu	mm512_cvtpd_epu8_builtin_convertvector(__v8df a)
+{
+  return __builtin_convertvector((__v8df)a, __v8qu); }
+
+__v2qi	mm64_cvtps_epi8_builtin_convertvector(__v2sf a)
+{
+  return __builtin_convertvector((__v2sf)a, __v2qi); }
+
+__v4qi	mm128_cvtps_epi8_builtin_convertvector(__v4sf a)
+{
+  return __builtin_convertvector((__v4sf)a, __v4qi); }
+
+__v8qi	mm256_cvtps_epi8_builtin_convertvector(__v8sf a)
+{
+  return __builtin_convertvector((__v8sf)a, __v8qi); }
+
+__v16qi	mm512_cvtps_epi8_builtin_convertvector(__v16sf a)
+{
+  return __builtin_convertvector((__v16sf)a, __v16qi); }
+
+__v2qu	mm64_cvtps_epu8_builtin_convertvector(__v2sf a)
+{
+  return __builtin_convertvector((__v2sf)a, __v2qu); }
+
+__v4qu	mm128_cvtps_epu8_builtin_convertvector(__v4sf a)
+{
+  return __builtin_convertvector((__v4sf)a, __v4qu); }
+
+__v8qu	mm256_cvtps_epu8_builtin_convertvector(__v8sf a)
+{
+  return __builtin_convertvector((__v8sf)a, __v8qu); }
+
+__v16qu	mm512_cvtps_epu8_builtin_convertvector(__v16sf a)
+{
+  return __builtin_convertvector((__v16sf)a, __v16qu); }
+
+__v2qi	mm32_cvtph_epi8_builtin_convertvector(__v2hf a)
+{
+  return __builtin_convertvector((__v2hf)a, __v2qi); }
+
+__v8qi	mm128_cvtph_epi8_builtin_convertvector(__v8hf a)
+{
+  return __builtin_convertvector((__v8hf)a, __v8qi); }
+
+__v16qi	mm256_cvtph_epi8_builtin_convertvector(__v16hf a)
+{
+  return __builtin_convertvector((__v16hf)a, __v16qi); }
+
+__v32qi	mm512_cvtph_epi8_builtin_convertvector(__v32hf a)
+{
+  return __builtin_convertvector((__v32hf)a, __v32qi); }
+
+__v2qu	mm32_cvtph_epu8_builtin_convertvector(__v2hf a)
+{
+  return __builtin_convertvector((__v2hf)a, __v2qu); }
+
+__v8qu	mm128_cvtph_epu8_builtin_convertvector(__v8hf a)
+{
+  return __builtin_convertvector((__v8hf)a, __v8qu); }
+
+__v16qu	mm256_cvtph_epu8_builtin_convertvector(__v16hf a)
+{
+  return __builtin_convertvector((__v16hf)a, __v16qu); }
+
+__v32qu	mm512_cvtph_epu8_builtin_convertvector(__v32hf a)
+{
+  return __builtin_convertvector((__v32hf)a, __v32qu); }
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-7.c b/gcc/testsuite/gcc.target/i386/pr107432-7.c
new file mode 100644
index 00000000000..0ff5a97ed1a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-7.c
@@ -0,0 +1,156 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -mavx512vl -mavx512bw -O2 -mavx512dq 
+-fno-trapping-math" } */
+/* { dg-final { scan-assembler-times "vcvtdq2pd" 2 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vcvtdq2pd" 3 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vcvtudq2pd" 2 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vcvtudq2pd" 3 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vcvtdq2ps" 3 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vcvtdq2ps" 4 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vcvtudq2ps" 3 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vcvtudq2ps" 4 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vcvtw2ph" 4 { target { ia32 } } } 
+} */
+/* { dg-final { scan-assembler-times "vcvtw2ph" 5 { target { ! ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vcvtuw2ph" 4 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vcvtuw2ph" 5 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vpmovsxbd" 5 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vpmovsxbd" 7 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vpmovzxbd" 5 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vpmovzxbd" 7 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vpmovsxbd" 5 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vpmovsxbd" 7 { target { ! ia32 } 
+} } } */
+/* { dg-final { scan-assembler-times "vpmovzxbd" 5 { target { ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vpmovzxbd" 7 { target { ! ia32 } 
+} } } */
+
+#include <x86intrin.h>
+
+typedef char __v2qi __attribute__ ((__vector_size__ (2))); typedef char 
+__v4qi __attribute__ ((__vector_size__ (4))); typedef char __v8qi 
+__attribute__ ((__vector_size__ (8))); typedef char __v16qi 
+__attribute__ ((__vector_size__ (16))); typedef unsigned char __v2qu 
+__attribute__ ((vector_size (2))); typedef unsigned char __v4qu 
+__attribute__ ((vector_size (4))); typedef unsigned char __v8qu 
+__attribute__ ((vector_size (8))); typedef unsigned char __v16qu 
+__attribute__ ((vector_size (16))); typedef _Float16 __v2hf 
+__attribute__ ((__vector_size__ (4))); typedef _Float16 __v4hf 
+__attribute__ ((__vector_size__ (8))); typedef _Float16 __v8hf 
+__attribute__ ((__vector_size__ (16)));
+
+__v2df	mm_cvtepi8_pd_builtin_convertvector(__v2qi a)
+{
+  return __builtin_convertvector((__v2qi)a, __v2df); }
+
+__v4df	mm256_cvtepi8_pd_builtin_convertvector(__v4qi a)
+{
+  return __builtin_convertvector((__v4qi)a, __v4df); }
+
+__v8df	mm512_cvtepi8_pd_builtin_convertvector(__v8qi a)
+{
+  return __builtin_convertvector((__v8qi)a, __v8df); }
+
+__v2df	mm_cvtepu8_pd_builtin_convertvector(__v2qu a)
+{
+  return __builtin_convertvector((__v2qu)a, __v2df); }
+
+__v4df	mm256_cvtepu8_pd_builtin_convertvector(__v4qu a)
+{
+  return __builtin_convertvector((__v4qu)a, __v4df); }
+
+__v8df	mm512_cvtepu8_pd_builtin_convertvector(__v8qu a)
+{
+  return __builtin_convertvector((__v8qu)a, __v8df); }
+
+__v2sf	mm64_cvtepi8_ps_builtin_convertvector(__v2qi a)
+{
+  return __builtin_convertvector((__v2qi)a, __v2sf); }
+
+__v4sf	mm128_cvtepi8_ps_builtin_convertvector(__v4qi a)
+{
+  return __builtin_convertvector((__v4qi)a, __v4sf); }
+
+__v8sf	mm256_cvtepi8_ps_builtin_convertvector(__v8qi a)
+{
+  return __builtin_convertvector((__v8qi)a, __v8sf); }
+
+__v16sf	mm512_cvtepi8_ps_builtin_convertvector(__v16qi a)
+{
+  return __builtin_convertvector((__v16qi)a, __v16sf); }
+
+__v2sf	mm64_cvtepu8_ps_builtin_convertvector(__v2qu a)
+{
+  return __builtin_convertvector((__v2qu)a, __v2sf); }
+
+__v4sf	mm128_cvtepu8_ps_builtin_convertvector(__v4qu a)
+{
+  return __builtin_convertvector((__v4qu)a, __v4sf); }
+
+__v8sf	mm256_cvtepu8_ps_builtin_convertvector(__v8qu a)
+{
+  return __builtin_convertvector((__v8qu)a, __v8sf); }
+
+__v16sf	mm512_cvtepu8_ps_builtin_convertvector(__v16qu a)
+{
+  return __builtin_convertvector((__v16qu)a, __v16sf); }
+
+__v2hf	mm32_cvtepi8_ph_builtin_convertvector(__v2qi a)
+{
+  return __builtin_convertvector((__v2qi)a, __v2hf); }
+
+__v4hf	mm64_cvtepi8_ph_builtin_convertvector(__v4qi a)
+{
+  return __builtin_convertvector((__v4qi)a, __v4hf); }
+
+__v8hf	mm128_cvtepi8_ph_builtin_convertvector(__v8qi a)
+{
+  return __builtin_convertvector((__v8qi)a, __v8hf); }
+
+__v16hf	mm256_cvtepi8_ph_builtin_convertvector(__v16qi a)
+{
+  return __builtin_convertvector((__v16qi)a, __v16hf); }
+
+__v32hf	mm512_cvtepi8_ph_builtin_convertvector(__v32qi a)
+{
+  return __builtin_convertvector((__v32qi)a, __v32hf); }
+
+__v2hf	mm32_cvtepu8_ph_builtin_convertvector(__v2qu a)
+{
+  return __builtin_convertvector((__v2qu)a, __v2hf); }
+
+__v4hf	mm64_cvtepu8_ph_builtin_convertvector(__v4qu a)
+{
+  return __builtin_convertvector((__v4qu)a, __v4hf); }
+
+__v8hf	mm128_cvtepu8_ph_builtin_convertvector(__v8qu a)
+{
+  return __builtin_convertvector((__v8qu)a, __v8hf); }
+
+__v16hf	mm256_cvtepu8_ph_builtin_convertvector(__v16qu a)
+{
+  return __builtin_convertvector((__v16qu)a, __v16hf); }
+
+__v32hf	mm512_cvtepu8_ph_builtin_convertvector(__v32qu a)
+{
+  return __builtin_convertvector((__v32qu)a, __v32hf); }
diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc index ea0069f7a67..c38c0b9dda8 100644
--- a/gcc/tree-vect-generic.cc
+++ b/gcc/tree-vect-generic.cc
@@ -45,6 +45,8 @@ along with GCC; see the file COPYING3.  If not see  #include "gimple-match.h"
 #include "recog.h"		/* FIXME: for insn_data */
 #include "optabs-libfuncs.h"
+#include "cfgloop.h"
+#include "tree-vectorizer.h"
 
 
 /* Build a ternary operation and gimplify it.  Emit code before GSI.
@@ -1870,14 +1872,33 @@ expand_vector_conversion (gimple_stmt_iterator *gsi)
   else if (ret_elt_bits > arg_elt_bits)
     modifier = WIDEN;
 
+  if (supportable_convert_operation (code, ret_type, arg_type, &code1))
+    {
+      g = gimple_build_assign (lhs, code1, arg);
+      gsi_replace (gsi, g, false);
+      return;
+    }
+
+  code_helper code2 = ERROR_MARK, code3 = ERROR_MARK;
+  int multi_step_cvt = 0;
+  vec<tree> interm_types = vNULL;
+  if (supportable_indirect_convert_operation (NULL,
+					      code,
+					      ret_type, arg_type,
+					      &code2, &code3,
+					      &multi_step_cvt,
+					      &interm_types, arg))
+    {
+      new_rhs = make_ssa_name (interm_types[0]);
+      g = gimple_build_assign (new_rhs, (tree_code) code3, arg);
+      gsi_insert_before (gsi, g, GSI_SAME_STMT);
+      g = gimple_build_assign (lhs, (tree_code) code2, new_rhs);
+      gsi_replace (gsi, g, false);
+      return;
+    }
+
   if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
     {
-      if (supportable_convert_operation (code, ret_type, arg_type, &code1))
-	{
-	  g = gimple_build_assign (lhs, code1, arg);
-	  gsi_replace (gsi, g, false);
-	  return;
-	}
       /* Can't use get_compute_type here, as supportable_convert_operation
 	 doesn't necessarily use an optab and needs two arguments.  */
       tree vec_compute_type
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 05a169ecb2d..0aa608202ca 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -5175,7 +5175,7 @@ vectorizable_conversion (vec_info *vinfo,
   tree scalar_dest;
   tree op0, op1 = NULL_TREE;
   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
-  tree_code tc1, tc2;
+  tree_code tc1;
   code_helper code, code1, code2;
   code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
   tree new_temp;
@@ -5384,92 +5384,17 @@ vectorizable_conversion (vec_info *vinfo,
 	break;
       }
 
-      /* For conversions between float and integer types try whether
-	 we can use intermediate signed integer types to support the
-	 conversion.  */
-      if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
-	  && (code == FLOAT_EXPR ||
-	      (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
-	{
-	  bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
-	  bool float_expr_p = code == FLOAT_EXPR;
-	  unsigned short target_size;
-	  scalar_mode intermediate_mode;
-	  if (demotion)
-	    {
-	      intermediate_mode = lhs_mode;
-	      target_size = GET_MODE_SIZE (rhs_mode);
-	    }
-	  else
-	    {
-	      target_size = GET_MODE_SIZE (lhs_mode);
-	      if (!int_mode_for_size
-		  (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
-		goto unsupported;
-	    }
-	  code1 = float_expr_p ? code : NOP_EXPR;
-	  codecvt1 = float_expr_p ? NOP_EXPR : code;
-	  opt_scalar_mode mode_iter;
-	  FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
-	    {
-	      intermediate_mode = mode_iter.require ();
-
-	      if (GET_MODE_SIZE (intermediate_mode) > target_size)
-		break;
-
-	      scalar_mode cvt_mode;
-	      if (!int_mode_for_size
-		  (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
-		break;
-
-	      cvt_type = build_nonstandard_integer_type
-		(GET_MODE_BITSIZE (cvt_mode), 0);
-
-	      /* Check if the intermediate type can hold OP0's range.
-		 When converting from float to integer this is not necessary
-		 because values that do not fit the (smaller) target type are
-		 unspecified anyway.  */
-	      if (demotion && float_expr_p)
-		{
-		  wide_int op_min_value, op_max_value;
-		  if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
-		    break;
-
-		  if (cvt_type == NULL_TREE
-		      || (wi::min_precision (op_max_value, SIGNED)
-			  > TYPE_PRECISION (cvt_type))
-		      || (wi::min_precision (op_min_value, SIGNED)
-			  > TYPE_PRECISION (cvt_type)))
-		    continue;
-		}
-
-	      cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
-	      /* This should only happened for SLP as long as loop vectorizer
-		 only supports same-sized vector.  */
-	      if (cvt_type == NULL_TREE
-		  || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
-		  || !supportable_convert_operation ((tree_code) code1,
-						     vectype_out,
-						     cvt_type, &tc1)
-		  || !supportable_convert_operation ((tree_code) codecvt1,
-						     cvt_type,
-						     vectype_in, &tc2))
-		continue;
-
-	      found_mode = true;
-	      break;
-	    }
+      if (supportable_indirect_convert_operation (vinfo,
+						  code,
+						  vectype_out,
+						  vectype_in,
+						  &code1,
+						  &codecvt1,
+						  &multi_step_cvt,
+						  &interm_types,
+						  op0,slp_node))
+	break;
 
-	  if (found_mode)
-	    {
-	      multi_step_cvt++;
-	      interm_types.safe_push (cvt_type);
-	      cvt_type = NULL_TREE;
-	      code1 = tc1;
-	      codecvt1 = tc2;
-	      break;
-	    }
-	}
       /* FALLTHRU */
     unsupported:
       if (dump_enabled_p ())
@@ -14626,6 +14551,153 @@ supportable_narrowing_operation (code_helper code,
   return false;
 }
 
+/* Function supportable_indirect_convert_operation
+
+   Check whether an operation represented by the code CODE is two
+   convert operations that are supported by the target platform in
+   vector form (i.e., when operating on arguments of type VECTYPE_IN
+   producing a result of type VECTYPE_OUT).
+
+   Convert operations we currently support directly are FIX_TRUNC and FLOAT.
+   This function checks if these operations are supported
+   by the target platform directly (via vector tree-codes).
+
+   Output:
+   - CODE1 is the code of a vector operation to be used when
+   converting the operation in the first step, if available.
+   - CODE2 is the code of a vector operation to be used when
+   converting the operation in the second step, if available.
+   - MULTI_STEP_CVT determines the number of required intermediate steps in
+   case of multi-step conversion (like int->short->char - in that case
+   MULTI_STEP_CVT will be 1). In the function, it should be 1.
+   - INTERM_TYPES contains the intermediate type required to perform the
+   convert operation (short in the above example).   */
+bool
+supportable_indirect_convert_operation (vec_info *vinfo,
+					code_helper code,
+					tree vectype_out,
+					tree vectype_in,
+					code_helper *code1,
+					code_helper *code2,
+					int *multi_step_cvt,
+					vec<tree> *interm_types,
+					tree op0,
+					slp_tree slp_node)
+{
+  bool found_mode = false;
+  scalar_mode lhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_out));
+  scalar_mode rhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_in));
+  opt_scalar_mode mode_iter;
+  tree_code tc1, tc2;
+
+  tree cvt_type = NULL_TREE;
+  poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype_in);
+
+  (*multi_step_cvt) = 0;
+  /* For conversions between float and integer types try whether
+     we can use intermediate signed integer types to support the
+     conversion.  */
+  if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
+      && (code == FLOAT_EXPR
+	  || (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
+    {
+      bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
+      bool float_expr_p = code == FLOAT_EXPR;
+      unsigned short target_size;
+      scalar_mode intermediate_mode;
+      if (demotion)
+	{
+	  intermediate_mode = lhs_mode;
+	  target_size = GET_MODE_SIZE (rhs_mode);
+	}
+      else
+	{
+	  target_size = GET_MODE_SIZE (lhs_mode);
+	  if (!int_mode_for_size
+	      (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
+	    return false;
+	}
+      *code1 = float_expr_p ? code : NOP_EXPR;
+      *code2 = float_expr_p ? NOP_EXPR : code;
+      opt_scalar_mode mode_iter;
+      FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
+	{
+	  intermediate_mode = mode_iter.require ();
+
+	  if (GET_MODE_SIZE (intermediate_mode) > target_size)
+	    break;
+
+	  scalar_mode cvt_mode;
+	  if (!int_mode_for_size
+	      (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
+	    break;
+
+	  cvt_type = build_nonstandard_integer_type
+	    (GET_MODE_BITSIZE (cvt_mode), 0);
+
+	  /* Check if the intermediate type can hold OP0's range.
+	     When converting from float to integer this is not necessary
+	     because values that do not fit the (smaller) target type are
+	     unspecified anyway.  */
+	  if (demotion && float_expr_p)
+	    {
+	      wide_int op_min_value, op_max_value;
+	      /* For vector form, it looks like op0 doesn't have RANGE_INFO.
+		 In the future, if it is supported, changes may need to be made
+		 to this part, such as checking the RANGE of each element
+		 in the vector.  */
+	      if (!SSA_NAME_RANGE_INFO (op0)
+		  || !vect_get_range_info (op0, &op_min_value, &op_max_value))
+		break;
+
+	      if (cvt_type == NULL_TREE
+		  || (wi::min_precision (op_max_value, SIGNED)
+		      > TYPE_PRECISION (cvt_type))
+		  || (wi::min_precision (op_min_value, SIGNED)
+		      > TYPE_PRECISION (cvt_type)))
+		continue;
+	    }
+
+	  if (vinfo != NULL && slp_node != NULL)
+	    cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
+	  else
+	    {
+	      bool uns = TYPE_UNSIGNED (TREE_TYPE (vectype_out))
+			 || TYPE_UNSIGNED (TREE_TYPE (vectype_in));
+	      cvt_type = build_nonstandard_integer_type
+		(GET_MODE_BITSIZE (cvt_mode), uns);
+	      cvt_type = build_vector_type (cvt_type, nelts);
+	    }
+	  /* This should only happened for SLP as long as loop vectorizer
+	     only supports same-sized vector.  */
+	  if (cvt_type == NULL_TREE
+	      || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nelts)
+	      || !supportable_convert_operation ((tree_code) *code1,
+						 vectype_out,
+						 cvt_type, &tc1)
+	      || !supportable_convert_operation ((tree_code) *code2,
+						 cvt_type,
+						 vectype_in, &tc2))
+	    continue;
+
+	  found_mode = true;
+	  break;
+	}
+
+      if (found_mode)
+	{
+	  (*multi_step_cvt)++;
+	  interm_types->safe_push (cvt_type);
+	  cvt_type = NULL_TREE;
+	  *code1 = tc1;
+	  *code2 = tc2;
+	  return true;
+	}
+    }
+  interm_types->release ();
+  return false;
+}
+
 /* Generate and return a vector mask of MASK_TYPE such that
    mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
    Add the statements to SEQ.  */
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 97ec9c341e7..ad65ce71bb7 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2265,6 +2265,15 @@ extern bool supportable_widening_operation (vec_info*, code_helper,  extern bool supportable_narrowing_operation (code_helper, tree, tree,
 					     code_helper *, int *,
 					     vec<tree> *);
+extern bool supportable_indirect_convert_operation (vec_info *,
+						    code_helper,
+						    tree, tree,
+						    code_helper *,
+						    code_helper *,
+						    int *,
+						    vec<tree> *,
+						    tree = NULL_TREE,
+						    slp_tree = NULL);
 
 extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
 				  enum vect_cost_for_stmt, stmt_vec_info,
--
2.31.1
Richard Biener June 18, 2024, 11:44 a.m. UTC | #2
On Tue, 11 Jun 2024, Hu, Lin1 wrote:

> I wrap a part of code about indirect conversion. The API refers to 
> supportable_narrowing/widening_operations.

Sorry for the delay - comments inline.

> BRs,
> Lin
> 
> gcc/ChangeLog:
> 
> 	PR target/107432
> 	* tree-vect-generic.cc
> 	(expand_vector_conversion): Support convert for int -> int,
> 	float -> float and int <-> float.
> 	* tree-vect-stmts.cc (vectorizable_conversion): Wrap the
> 	indirect convert part.
> 	(supportable_indirect_convert_operation): New function.
> 	* tree-vectorizer.h (supportable_indirect_convert_operation):
> 	Define the new function.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR target/107432
> 	* gcc.target/i386/pr107432-1.c: New test.
> 	* gcc.target/i386/pr107432-2.c: Ditto.
> 	* gcc.target/i386/pr107432-3.c: Ditto.
> 	* gcc.target/i386/pr107432-4.c: Ditto.
> 	* gcc.target/i386/pr107432-5.c: Ditto.
> 	* gcc.target/i386/pr107432-6.c: Ditto.
> 	* gcc.target/i386/pr107432-7.c: Ditto.
> ---
>  gcc/testsuite/gcc.target/i386/pr107432-1.c | 234 ++++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr107432-2.c | 105 +++++++++
>  gcc/testsuite/gcc.target/i386/pr107432-3.c |  55 +++++
>  gcc/testsuite/gcc.target/i386/pr107432-4.c |  56 +++++
>  gcc/testsuite/gcc.target/i386/pr107432-5.c |  72 ++++++
>  gcc/testsuite/gcc.target/i386/pr107432-6.c | 139 ++++++++++++
>  gcc/testsuite/gcc.target/i386/pr107432-7.c | 156 +++++++++++++
>  gcc/tree-vect-generic.cc                   |  33 ++-
>  gcc/tree-vect-stmts.cc                     | 244 +++++++++++++--------
>  gcc/tree-vectorizer.h                      |   9 +
>  10 files changed, 1011 insertions(+), 92 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-6.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-7.c
> 
> diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c b/gcc/testsuite/gcc.target/i386/pr107432-1.c
> new file mode 100644
> index 00000000000..a4f37447eb4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c
> @@ -0,0 +1,234 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64 -mavx512bw -mavx512vl -O3" } */
> +/* { dg-final { scan-assembler-times "vpmovqd" 6 } } */
> +/* { dg-final { scan-assembler-times "vpmovqw" 6 } } */
> +/* { dg-final { scan-assembler-times "vpmovqb" 6 } } */
> +/* { dg-final { scan-assembler-times "vpmovdw" 6 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
> +
> +#include <x86intrin.h>
> +
> +typedef short __v2hi __attribute__ ((__vector_size__ (4)));
> +typedef char __v2qi __attribute__ ((__vector_size__ (2)));
> +typedef char __v4qi __attribute__ ((__vector_size__ (4)));
> +typedef char __v8qi __attribute__ ((__vector_size__ (8)));
> +
> +typedef unsigned short __v2hu __attribute__ ((__vector_size__ (4)));
> +typedef unsigned short __v4hu __attribute__ ((__vector_size__ (8)));
> +typedef unsigned char __v2qu __attribute__ ((__vector_size__ (2)));
> +typedef unsigned char __v4qu __attribute__ ((__vector_size__ (4)));
> +typedef unsigned char __v8qu __attribute__ ((__vector_size__ (8)));
> +typedef unsigned int __v2su __attribute__ ((__vector_size__ (8)));
> +
> +__v2si mm_cvtepi64_epi32_builtin_convertvector(__m128i a)
> +{
> +  return __builtin_convertvector((__v2di)a, __v2si);
> +}
> +
> +__m128i	mm256_cvtepi64_epi32_builtin_convertvector(__m256i a)
> +{
> +  return (__m128i)__builtin_convertvector((__v4di)a, __v4si);
> +}
> +
> +__m256i	mm512_cvtepi64_epi32_builtin_convertvector(__m512i a)
> +{
> +  return (__m256i)__builtin_convertvector((__v8di)a, __v8si);
> +}
> +
> +__v2hi	mm_cvtepi64_epi16_builtin_convertvector(__m128i a)
> +{
> +  return __builtin_convertvector((__v2di)a, __v2hi);
> +}
> +
> +__v4hi	mm256_cvtepi64_epi16_builtin_convertvector(__m256i a)
> +{
> +  return __builtin_convertvector((__v4di)a, __v4hi);
> +}
> +
> +__m128i	mm512_cvtepi64_epi16_builtin_convertvector(__m512i a)
> +{
> +  return (__m128i)__builtin_convertvector((__v8di)a, __v8hi);
> +}
> +
> +__v2qi	mm_cvtepi64_epi8_builtin_convertvector(__m128i a)
> +{
> +  return __builtin_convertvector((__v2di)a, __v2qi);
> +}
> +
> +__v4qi	mm256_cvtepi64_epi8_builtin_convertvector(__m256i a)
> +{
> +  return __builtin_convertvector((__v4di)a, __v4qi);
> +}
> +
> +__v8qi	mm512_cvtepi64_epi8_builtin_convertvector(__m512i a)
> +{
> +  return __builtin_convertvector((__v8di)a, __v8qi);
> +}
> +
> +__v2hi	mm64_cvtepi32_epi16_builtin_convertvector(__v2si a)
> +{
> +  return __builtin_convertvector((__v2si)a, __v2hi);
> +}
> +
> +__v4hi	mm_cvtepi32_epi16_builtin_convertvector(__m128i a)
> +{
> +  return __builtin_convertvector((__v4si)a, __v4hi);
> +}
> +
> +__m128i	mm256_cvtepi32_epi16_builtin_convertvector(__m256i a)
> +{
> +  return (__m128i)__builtin_convertvector((__v8si)a, __v8hi);
> +}
> +
> +__m256i	mm512_cvtepi32_epi16_builtin_convertvector(__m512i a)
> +{
> +  return (__m256i)__builtin_convertvector((__v16si)a, __v16hi);
> +}
> +
> +__v2qi	mm64_cvtepi32_epi8_builtin_convertvector(__v2si a)
> +{
> +  return __builtin_convertvector((__v2si)a, __v2qi);
> +}
> +
> +__v4qi	mm_cvtepi32_epi8_builtin_convertvector(__m128i a)
> +{
> +  return __builtin_convertvector((__v4si)a, __v4qi);
> +}
> +
> +__v8qi	mm256_cvtepi32_epi8_builtin_convertvector(__m256i a)
> +{
> +  return __builtin_convertvector((__v8si)a, __v8qi);
> +}
> +
> +__m128i	mm512_cvtepi32_epi8_builtin_convertvector(__m512i a)
> +{
> +  return (__m128i)__builtin_convertvector((__v16si)a, __v16qi);
> +}
> +
> +__v2qi	mm32_cvtepi16_epi8_builtin_convertvector(__v2hi a)
> +{
> +  return __builtin_convertvector((__v2hi)a, __v2qi);
> +}
> +
> +__v8qi	mm_cvtepi16_epi8_builtin_convertvector(__m128i a)
> +{
> +  return __builtin_convertvector((__v8hi)a, __v8qi);
> +}
> +
> +__m128i	mm256_cvtepi16_epi8_builtin_convertvector(__m256i a)
> +{
> +  return (__m128i)__builtin_convertvector((__v16hi)a, __v16qi);
> +}
> +
> +__m256i	mm512_cvtepi16_epi8_builtin_convertvector(__m512i a)
> +{
> +  return (__m256i)__builtin_convertvector((__v32hi)a, __v32qi);
> +}
> +
> +__v2su mm_cvtepu64_epu32_builtin_convertvector(__m128i a)
> +{
> +  return __builtin_convertvector((__v2du)a, __v2su);
> +}
> +
> +__m128i	mm256_cvtepu64_epu32_builtin_convertvector(__m256i a)
> +{
> +  return (__m128i)__builtin_convertvector((__v4du)a, __v4su);
> +}
> +
> +__m256i	mm512_cvtepu64_epu32_builtin_convertvector(__m512i a)
> +{
> +  return (__m256i)__builtin_convertvector((__v8du)a, __v8su);
> +}
> +
> +__v2hu	mm_cvtepu64_epu16_builtin_convertvector(__m128i a)
> +{
> +  return __builtin_convertvector((__v2du)a, __v2hu);
> +}
> +
> +__v4hu	mm256_cvtepu64_epu16_builtin_convertvector(__m256i a)
> +{
> +  return __builtin_convertvector((__v4du)a, __v4hu);
> +}
> +
> +__m128i	mm512_cvtepu64_epu16_builtin_convertvector(__m512i a)
> +{
> +  return (__m128i)__builtin_convertvector((__v8du)a, __v8hu);
> +}
> +
> +__v2qu	mm_cvtepu64_epu8_builtin_convertvector(__m128i a)
> +{
> +  return __builtin_convertvector((__v2du)a, __v2qu);
> +}
> +
> +__v4qu	mm256_cvtepu64_epu8_builtin_convertvector(__m256i a)
> +{
> +  return __builtin_convertvector((__v4du)a, __v4qu);
> +}
> +
> +__v8qu	mm512_cvtepu64_epu8_builtin_convertvector(__m512i a)
> +{
> +  return __builtin_convertvector((__v8du)a, __v8qu);
> +}
> +
> +__v2hu	mm32_cvtepu32_epu16_builtin_convertvector(__v2su a)
> +{
> +  return __builtin_convertvector((__v2su)a, __v2hu);
> +}
> +
> +__v4hu	mm_cvtepu32_epu16_builtin_convertvector(__m128i a)
> +{
> +  return __builtin_convertvector((__v4su)a, __v4hu);
> +}
> +
> +__m128i	mm256_cvtepu32_epu16_builtin_convertvector(__m256i a)
> +{
> +  return (__m128i)__builtin_convertvector((__v8su)a, __v8hu);
> +}
> +
> +__m256i	mm512_cvtepu32_epu16_builtin_convertvector(__m512i a)
> +{
> +  return (__m256i)__builtin_convertvector((__v16su)a, __v16hu);
> +}
> +
> +__v2qu	mm32_cvtepu32_epu8_builtin_convertvector(__v2su a)
> +{
> +  return __builtin_convertvector((__v2su)a, __v2qu);
> +}
> +
> +__v4qu	mm_cvtepu2_epu8_builtin_convertvector(__m128i a)
> +{
> +  return __builtin_convertvector((__v4su)a, __v4qu);
> +}
> +
> +__v8qu	mm256_cvtepu32_epu8_builtin_convertvector(__m256i a)
> +{
> +  return __builtin_convertvector((__v8su)a, __v8qu);
> +}
> +
> +__m128i	mm512_cvtepu32_epu8_builtin_convertvector(__m512i a)
> +{
> +  return (__m128i)__builtin_convertvector((__v16su)a, __v16qu);
> +}
> +
> +__v2qu	mm32_cvtepu16_epu8_builtin_convertvector(__v2hu a)
> +{
> +  return __builtin_convertvector((__v2hu)a, __v2qu);
> +}
> +
> +__v8qu	mm_cvtepu16_epu8_builtin_convertvector(__m128i a)
> +{
> +  return __builtin_convertvector((__v8hu)a, __v8qu);
> +}
> +
> +__m128i	mm256_cvtepu16_epu8_builtin_convertvector(__m256i a)
> +{
> +  return (__m128i)__builtin_convertvector((__v16hu)a, __v16qu);
> +}
> +
> +__m256i	mm512_cvtepu16_epu8_builtin_convertvector(__m512i a)
> +{
> +  return (__m256i)__builtin_convertvector((__v32hu)a, __v32qu);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr107432-2.c b/gcc/testsuite/gcc.target/i386/pr107432-2.c
> new file mode 100644
> index 00000000000..02ffd811cb4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr107432-2.c
> @@ -0,0 +1,105 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64 -mavx512bw -mavx512vl -O3" } */
> +/* { dg-final { scan-assembler-times "vpmovsxdq" 3 } } */
> +/* { dg-final { scan-assembler-times "vpmovsxwq" 3 } } */
> +/* { dg-final { scan-assembler-times "vpmovsxbq" 3 } } */
> +/* { dg-final { scan-assembler-times "vpmovsxwd" 3 } } */
> +/* { dg-final { scan-assembler-times "vpmovsxbd" 3 } } */
> +/* { dg-final { scan-assembler-times "vpmovsxbw" 3 } } */
> +
> +#include <x86intrin.h>
> +
> +typedef short __v2hi __attribute__ ((__vector_size__ (4)));
> +typedef char __v2qi __attribute__ ((__vector_size__ (2)));
> +typedef char __v4qi __attribute__ ((__vector_size__ (4)));
> +typedef char __v8qi __attribute__ ((__vector_size__ (8)));
> +
> +__m128i mm_cvtepi32_epi64_builtin_convertvector(__v2si a)
> +{
> +  return __builtin_convertvector(a, __v2di);
> +}
> +
> +__m256i	mm256_cvtepi32_epi64_builtin_convertvector(__v4si a)
> +{
> +  return (__m256i)__builtin_convertvector(a, __v4di);
> +}
> +
> +__m512i	mm512_cvtepi32_epi64_builtin_convertvector(__v8si a)
> +{
> +  return (__m512i)__builtin_convertvector(a, __v8di);
> +}
> +
> +__m128i mm_cvtepi16_epi64_builtin_convertvector(__v2hi a)
> +{
> +  return __builtin_convertvector(a, __v2di);
> +}
> +
> +__m256i	mm256_cvtepi16_epi64_builtin_convertvector(__v4hi a)
> +{
> +  return (__m256i)__builtin_convertvector(a, __v4di);
> +}
> +
> +__m512i	mm512_cvtepi16_epi64_builtin_convertvector(__v8hi a)
> +{
> +  return (__m512i)__builtin_convertvector(a, __v8di);
> +}
> +
> +__m128i mm_cvtepi8_epi64_builtin_convertvector(__v2qi a)
> +{
> +  return __builtin_convertvector(a, __v2di);
> +}
> +
> +__m256i	mm256_cvtepi8_epi64_builtin_convertvector(__v4qi a)
> +{
> +  return (__m256i)__builtin_convertvector(a, __v4di);
> +}
> +
> +__m512i	mm512_cvtepi8_epi64_builtin_convertvector(__v8qi a)
> +{
> +  return (__m512i)__builtin_convertvector(a, __v8di);
> +}
> +
> +__m128i mm_cvtepi16_epi32_builtin_convertvector(__v4hi a)
> +{
> +  return (__m128i)__builtin_convertvector(a, __v4si);
> +}
> +
> +__m256i	mm256_cvtepi16_epi32_builtin_convertvector(__v8hi a)
> +{
> +  return (__m256i)__builtin_convertvector(a, __v8si);
> +}
> +
> +__m512i	mm512_cvtepi16_epi32_builtin_convertvector(__v16hi a)
> +{
> +  return (__m512i)__builtin_convertvector(a, __v16si);
> +}
> +
> +__m128i mm_cvtepi8_epi32_builtin_convertvector(__v4qi a)
> +{
> +  return (__m128i)__builtin_convertvector(a, __v4si);
> +}
> +
> +__m256i	mm256_cvtepi8_epi32_builtin_convertvector(__v8qi a)
> +{
> +  return (__m256i)__builtin_convertvector(a, __v8si);
> +}
> +
> +__m512i	mm512_cvtepi8_epi32_builtin_convertvector(__v16qi a)
> +{
> +  return (__m512i)__builtin_convertvector(a, __v16si);
> +}
> +
> +__m128i mm_cvtepi8_epi16_builtin_convertvector(__v8qi a)
> +{
> +  return (__m128i)__builtin_convertvector(a, __v8hi);
> +}
> +
> +__m256i	mm256_cvtepi8_epi16_builtin_convertvector(__v16qi a)
> +{
> +  return (__m256i)__builtin_convertvector(a, __v16hi);
> +}
> +
> +__v32hi	mm512_cvtepi8_epi16_builtin_convertvector(__v32qi a)
> +{
> +  return __builtin_convertvector(a, __v32hi);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr107432-3.c b/gcc/testsuite/gcc.target/i386/pr107432-3.c
> new file mode 100644
> index 00000000000..30dc947b6dd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr107432-3.c
> @@ -0,0 +1,55 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64 -mavx512fp16 -mavx512vl -O3" } */
> +/* { dg-final { scan-assembler-times "vcvtpd2ps" 3 } } */
> +/* { dg-final { scan-assembler-times "vcvtpd2ph" 3 } } */
> +/* { dg-final { scan-assembler-times "vcvtps2ph" 3 } } */
> +
> +#include <x86intrin.h>
> +
> +typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4)));
> +typedef _Float16 __v4hf __attribute__ ((__vector_size__ (8)));
> +
> +__v2sf mm_cvtpd_ps_builtin_convertvector(__v2df a)
> +{
> +  return __builtin_convertvector(a, __v2sf);
> +}
> +
> +__v4sf	mm256_cvtpd_ps_builtin_convertvector(__v4df a)
> +{
> +  return __builtin_convertvector(a, __v4sf);
> +}
> +
> +__v8sf	mm512_cvtpd_ps_builtin_convertvector(__v8df a)
> +{
> +  return __builtin_convertvector(a, __v8sf);
> +}
> +
> +__v2hf mm_cvtpd_ph_builtin_convertvector(__v2df a)
> +{
> +  return __builtin_convertvector(a, __v2hf);
> +}
> +
> +__v4hf	mm256_cvtpd_ph_builtin_convertvector(__v4df a)
> +{
> +  return __builtin_convertvector(a, __v4hf);
> +}
> +
> +__v8hf	mm512_cvtpd_ph_builtin_convertvector(__v8df a)
> +{
> +  return __builtin_convertvector(a, __v8hf);
> +}
> +
> +__v4hf mm_cvtps_ph_builtin_convertvector(__v4sf a)
> +{
> +  return __builtin_convertvector(a, __v4hf);
> +}
> +
> +__v8hf	mm256_cvtps_ph_builtin_convertvector(__v8sf a)
> +{
> +  return __builtin_convertvector(a, __v8hf);
> +}
> +
> +__v16hf	mm512_cvtps_ph_builtin_convertvector(__v16sf a)
> +{
> +  return __builtin_convertvector(a, __v16hf);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr107432-4.c b/gcc/testsuite/gcc.target/i386/pr107432-4.c
> new file mode 100644
> index 00000000000..e537e7349e4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr107432-4.c
> @@ -0,0 +1,56 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64 -mavx512fp16 -mavx512vl -O3" } */
> +/* { dg-final { scan-assembler-times "vcvtps2pd" 2 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvtps2pd" 3 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvtph2pd" 3 } } */
> +/* { dg-final { scan-assembler-times "vcvtph2ps" 3 } } */
> +
> +#include <x86intrin.h>
> +
> +typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4)));
> +typedef _Float16 __v4hf __attribute__ ((__vector_size__ (8)));
> +
> +__v2df mm_cvtps_pd_builtin_convertvector(__v2sf a)
> +{
> +  return __builtin_convertvector(a, __v2df);
> +}
> +
> +__v4df	mm256_cvtps_pd_builtin_convertvector(__v4sf a)
> +{
> +  return __builtin_convertvector(a, __v4df);
> +}
> +
> +__v8df	mm512_cvtps_pd_builtin_convertvector(__v8sf a)
> +{
> +  return __builtin_convertvector(a, __v8df);
> +}
> +
> +__v2df mm_cvtph_pd_builtin_convertvector(__v2hf a)
> +{
> +  return __builtin_convertvector(a, __v2df);
> +}
> +
> +__v4df	mm256_cvtph_pd_builtin_convertvector(__v4hf a)
> +{
> +  return __builtin_convertvector(a, __v4df);
> +}
> +
> +__v8df	mm512_cvtph_pd_builtin_convertvector(__v8hf a)
> +{
> +  return __builtin_convertvector(a, __v8df);
> +}
> +
> +__v4sf mm_cvtph_ps_builtin_convertvector(__v4hf a)
> +{
> +  return __builtin_convertvector(a, __v4sf);
> +}
> +
> +__v8sf	mm256_cvtph_ps_builtin_convertvector(__v8hf a)
> +{
> +  return __builtin_convertvector(a, __v8sf);
> +}
> +
> +__v16sf	mm512_cvtph_ps_builtin_convertvector(__v16hf a)
> +{
> +  return __builtin_convertvector(a, __v16sf);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr107432-5.c b/gcc/testsuite/gcc.target/i386/pr107432-5.c
> new file mode 100644
> index 00000000000..5a44ef9f3b9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr107432-5.c
> @@ -0,0 +1,72 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64 -mavx512dq -mavx512fp16 -mavx512vl -O3" } */
> +/* { dg-final { scan-assembler-times "vcvttpd2dq" 3 } } */
> +/* { dg-final { scan-assembler-times "vcvttps2qq" 2 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvttps2qq" 3 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvttph2dq" 3 } } */
> +/* { dg-final { scan-assembler-times "vcvttph2qq" 3 } } */
> +
> +#include <x86intrin.h>
> +
> +typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4)));
> +typedef _Float16 __v4hf __attribute__ ((__vector_size__ (8)));
> +
> +__v2si mm_cvtpd_epi32_builtin_convertvector(__v2df a)
> +{
> +  return __builtin_convertvector(a, __v2si);
> +}
> +
> +__v4si	mm256_cvtpd_epi32_builtin_convertvector(__v4df a)
> +{
> +  return __builtin_convertvector(a, __v4si);
> +}
> +
> +__v8si	mm512_cvtpd_epi32_builtin_convertvector(__v8df a)
> +{
> +  return __builtin_convertvector(a, __v8si);
> +}
> +
> +__v2di mm_cvtps_epi64_builtin_convertvector(__v2sf a)
> +{
> +  return __builtin_convertvector(a, __v2di);
> +}
> +
> +__v4di	mm256_cvtps_epi64_builtin_convertvector(__v4sf a)
> +{
> +  return __builtin_convertvector(a, __v4di);
> +}
> +
> +__v8di	mm512_cvtps_epi64_builtin_convertvector(__v8sf a)
> +{
> +  return __builtin_convertvector(a, __v8di);
> +}
> +
> +__v4si mm_cvtph_epi32_builtin_convertvector(__v4hf a)
> +{
> +  return __builtin_convertvector(a, __v4si);
> +}
> +
> +__v8si	mm256_cvtph_epi32_builtin_convertvector(__v8hf a)
> +{
> +  return __builtin_convertvector(a, __v8si);
> +}
> +
> +__v16si	mm512_cvtph_epi32_builtin_convertvector(__v16hf a)
> +{
> +  return __builtin_convertvector(a, __v16si);
> +}
> +
> +__v2di mm_cvtph_epi64_builtin_convertvector(__v2hf a)
> +{
> +  return __builtin_convertvector(a, __v2di);
> +}
> +
> +__v4di	mm256_cvtph_epi64_builtin_convertvector(__v4hf a)
> +{
> +  return __builtin_convertvector(a, __v4di);
> +}
> +
> +__v8di	mm512_cvtph_epi64_builtin_convertvector(__v8hf a)
> +{
> +  return __builtin_convertvector(a, __v8di);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr107432-6.c b/gcc/testsuite/gcc.target/i386/pr107432-6.c
> new file mode 100644
> index 00000000000..4a68a10b089
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr107432-6.c
> @@ -0,0 +1,139 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512fp16 -mavx512vl -mavx512bw -O2 -mavx512dq -fno-trapping-math" } */
> +/* { dg-final { scan-assembler-times "vcvttpd2dq" 2 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvttpd2dq" 3 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvttpd2udq" 2 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvttpd2udq" 3 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvttps2dq" 3 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvttps2dq" 4 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvttps2udq" 3 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvttps2udq" 4 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvttph2w" 4 } } */
> +/* { dg-final { scan-assembler-times "vcvttph2uw" 4 } } */
> +/* { dg-final { scan-assembler-times "vpmovdb" 10 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovdb" 14 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
> +
> +#include <x86intrin.h>
> +
> +typedef char __v2qi __attribute__ ((__vector_size__ (2)));
> +typedef char __v4qi __attribute__ ((__vector_size__ (4)));
> +typedef char __v8qi __attribute__ ((__vector_size__ (8)));
> +typedef char __v16qi __attribute__ ((__vector_size__ (16)));
> +typedef unsigned char __v2qu __attribute__ ((vector_size (2)));
> +typedef unsigned char __v4qu __attribute__ ((vector_size (4)));
> +typedef unsigned char __v8qu __attribute__ ((vector_size (8)));
> +typedef unsigned char __v16qu __attribute__ ((vector_size (16)));
> +typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4)));
> +typedef _Float16 __v4hf __attribute__ ((__vector_size__ (8)));
> +typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16)));
> +
> +__v2qi	mm_cvtpd_epi8_builtin_convertvector(__v2df a)
> +{
> +  return __builtin_convertvector((__v2df)a, __v2qi);
> +}
> +
> +__v4qi	mm256_cvtpd_epi8_builtin_convertvector(__v4df a)
> +{
> +  return __builtin_convertvector((__v4df)a, __v4qi);
> +}
> +
> +__v8qi	mm512_cvtpd_epi8_builtin_convertvector(__v8df a)
> +{
> +  return __builtin_convertvector((__v8df)a, __v8qi);
> +}
> +
> +__v2qu	mm_cvtpd_epu8_builtin_convertvector(__v2df a)
> +{
> +  return __builtin_convertvector((__v2df)a, __v2qu);
> +}
> +
> +__v4qu	mm256_cvtpd_epu8_builtin_convertvector(__v4df a)
> +{
> +  return __builtin_convertvector((__v4df)a, __v4qu);
> +}
> +
> +__v8qu	mm512_cvtpd_epu8_builtin_convertvector(__v8df a)
> +{
> +  return __builtin_convertvector((__v8df)a, __v8qu);
> +}
> +
> +__v2qi	mm64_cvtps_epi8_builtin_convertvector(__v2sf a)
> +{
> +  return __builtin_convertvector((__v2sf)a, __v2qi);
> +}
> +
> +__v4qi	mm128_cvtps_epi8_builtin_convertvector(__v4sf a)
> +{
> +  return __builtin_convertvector((__v4sf)a, __v4qi);
> +}
> +
> +__v8qi	mm256_cvtps_epi8_builtin_convertvector(__v8sf a)
> +{
> +  return __builtin_convertvector((__v8sf)a, __v8qi);
> +}
> +
> +__v16qi	mm512_cvtps_epi8_builtin_convertvector(__v16sf a)
> +{
> +  return __builtin_convertvector((__v16sf)a, __v16qi);
> +}
> +
> +__v2qu	mm64_cvtps_epu8_builtin_convertvector(__v2sf a)
> +{
> +  return __builtin_convertvector((__v2sf)a, __v2qu);
> +}
> +
> +__v4qu	mm128_cvtps_epu8_builtin_convertvector(__v4sf a)
> +{
> +  return __builtin_convertvector((__v4sf)a, __v4qu);
> +}
> +
> +__v8qu	mm256_cvtps_epu8_builtin_convertvector(__v8sf a)
> +{
> +  return __builtin_convertvector((__v8sf)a, __v8qu);
> +}
> +
> +__v16qu	mm512_cvtps_epu8_builtin_convertvector(__v16sf a)
> +{
> +  return __builtin_convertvector((__v16sf)a, __v16qu);
> +}
> +
> +__v2qi	mm32_cvtph_epi8_builtin_convertvector(__v2hf a)
> +{
> +  return __builtin_convertvector((__v2hf)a, __v2qi);
> +}
> +
> +__v8qi	mm128_cvtph_epi8_builtin_convertvector(__v8hf a)
> +{
> +  return __builtin_convertvector((__v8hf)a, __v8qi);
> +}
> +
> +__v16qi	mm256_cvtph_epi8_builtin_convertvector(__v16hf a)
> +{
> +  return __builtin_convertvector((__v16hf)a, __v16qi);
> +}
> +
> +__v32qi	mm512_cvtph_epi8_builtin_convertvector(__v32hf a)
> +{
> +  return __builtin_convertvector((__v32hf)a, __v32qi);
> +}
> +
> +__v2qu	mm32_cvtph_epu8_builtin_convertvector(__v2hf a)
> +{
> +  return __builtin_convertvector((__v2hf)a, __v2qu);
> +}
> +
> +__v8qu	mm128_cvtph_epu8_builtin_convertvector(__v8hf a)
> +{
> +  return __builtin_convertvector((__v8hf)a, __v8qu);
> +}
> +
> +__v16qu	mm256_cvtph_epu8_builtin_convertvector(__v16hf a)
> +{
> +  return __builtin_convertvector((__v16hf)a, __v16qu);
> +}
> +
> +__v32qu	mm512_cvtph_epu8_builtin_convertvector(__v32hf a)
> +{
> +  return __builtin_convertvector((__v32hf)a, __v32qu);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr107432-7.c b/gcc/testsuite/gcc.target/i386/pr107432-7.c
> new file mode 100644
> index 00000000000..0ff5a97ed1a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr107432-7.c
> @@ -0,0 +1,156 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512fp16 -mavx512vl -mavx512bw -O2 -mavx512dq -fno-trapping-math" } */
> +/* { dg-final { scan-assembler-times "vcvtdq2pd" 2 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvtdq2pd" 3 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvtudq2pd" 2 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvtudq2pd" 3 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvtdq2ps" 3 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvtdq2ps" 4 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvtudq2ps" 3 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvtudq2ps" 4 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvtw2ph" 4 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvtw2ph" 5 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvtuw2ph" 4 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvtuw2ph" 5 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovsxbd" 5 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovsxbd" 7 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovzxbd" 5 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovzxbd" 7 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovsxbd" 5 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovsxbd" 7 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovzxbd" 5 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovzxbd" 7 { target { ! ia32 } } } } */
> +
> +#include <x86intrin.h>
> +
> +typedef char __v2qi __attribute__ ((__vector_size__ (2)));
> +typedef char __v4qi __attribute__ ((__vector_size__ (4)));
> +typedef char __v8qi __attribute__ ((__vector_size__ (8)));
> +typedef char __v16qi __attribute__ ((__vector_size__ (16)));
> +typedef unsigned char __v2qu __attribute__ ((vector_size (2)));
> +typedef unsigned char __v4qu __attribute__ ((vector_size (4)));
> +typedef unsigned char __v8qu __attribute__ ((vector_size (8)));
> +typedef unsigned char __v16qu __attribute__ ((vector_size (16)));
> +typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4)));
> +typedef _Float16 __v4hf __attribute__ ((__vector_size__ (8)));
> +typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16)));
> +
> +__v2df	mm_cvtepi8_pd_builtin_convertvector(__v2qi a)
> +{
> +  return __builtin_convertvector((__v2qi)a, __v2df);
> +}
> +
> +__v4df	mm256_cvtepi8_pd_builtin_convertvector(__v4qi a)
> +{
> +  return __builtin_convertvector((__v4qi)a, __v4df);
> +}
> +
> +__v8df	mm512_cvtepi8_pd_builtin_convertvector(__v8qi a)
> +{
> +  return __builtin_convertvector((__v8qi)a, __v8df);
> +}
> +
> +__v2df	mm_cvtepu8_pd_builtin_convertvector(__v2qu a)
> +{
> +  return __builtin_convertvector((__v2qu)a, __v2df);
> +}
> +
> +__v4df	mm256_cvtepu8_pd_builtin_convertvector(__v4qu a)
> +{
> +  return __builtin_convertvector((__v4qu)a, __v4df);
> +}
> +
> +__v8df	mm512_cvtepu8_pd_builtin_convertvector(__v8qu a)
> +{
> +  return __builtin_convertvector((__v8qu)a, __v8df);
> +}
> +
> +__v2sf	mm64_cvtepi8_ps_builtin_convertvector(__v2qi a)
> +{
> +  return __builtin_convertvector((__v2qi)a, __v2sf);
> +}
> +
> +__v4sf	mm128_cvtepi8_ps_builtin_convertvector(__v4qi a)
> +{
> +  return __builtin_convertvector((__v4qi)a, __v4sf);
> +}
> +
> +__v8sf	mm256_cvtepi8_ps_builtin_convertvector(__v8qi a)
> +{
> +  return __builtin_convertvector((__v8qi)a, __v8sf);
> +}
> +
> +__v16sf	mm512_cvtepi8_ps_builtin_convertvector(__v16qi a)
> +{
> +  return __builtin_convertvector((__v16qi)a, __v16sf);
> +}
> +
> +__v2sf	mm64_cvtepu8_ps_builtin_convertvector(__v2qu a)
> +{
> +  return __builtin_convertvector((__v2qu)a, __v2sf);
> +}
> +
> +__v4sf	mm128_cvtepu8_ps_builtin_convertvector(__v4qu a)
> +{
> +  return __builtin_convertvector((__v4qu)a, __v4sf);
> +}
> +
> +__v8sf	mm256_cvtepu8_ps_builtin_convertvector(__v8qu a)
> +{
> +  return __builtin_convertvector((__v8qu)a, __v8sf);
> +}
> +
> +__v16sf	mm512_cvtepu8_ps_builtin_convertvector(__v16qu a)
> +{
> +  return __builtin_convertvector((__v16qu)a, __v16sf);
> +}
> +
> +__v2hf	mm32_cvtepi8_ph_builtin_convertvector(__v2qi a)
> +{
> +  return __builtin_convertvector((__v2qi)a, __v2hf);
> +}
> +
> +__v4hf	mm64_cvtepi8_ph_builtin_convertvector(__v4qi a)
> +{
> +  return __builtin_convertvector((__v4qi)a, __v4hf);
> +}
> +
> +__v8hf	mm128_cvtepi8_ph_builtin_convertvector(__v8qi a)
> +{
> +  return __builtin_convertvector((__v8qi)a, __v8hf);
> +}
> +
> +__v16hf	mm256_cvtepi8_ph_builtin_convertvector(__v16qi a)
> +{
> +  return __builtin_convertvector((__v16qi)a, __v16hf);
> +}
> +
> +__v32hf	mm512_cvtepi8_ph_builtin_convertvector(__v32qi a)
> +{
> +  return __builtin_convertvector((__v32qi)a, __v32hf);
> +}
> +
> +__v2hf	mm32_cvtepu8_ph_builtin_convertvector(__v2qu a)
> +{
> +  return __builtin_convertvector((__v2qu)a, __v2hf);
> +}
> +
> +__v4hf	mm64_cvtepu8_ph_builtin_convertvector(__v4qu a)
> +{
> +  return __builtin_convertvector((__v4qu)a, __v4hf);
> +}
> +
> +__v8hf	mm128_cvtepu8_ph_builtin_convertvector(__v8qu a)
> +{
> +  return __builtin_convertvector((__v8qu)a, __v8hf);
> +}
> +
> +__v16hf	mm256_cvtepu8_ph_builtin_convertvector(__v16qu a)
> +{
> +  return __builtin_convertvector((__v16qu)a, __v16hf);
> +}
> +
> +__v32hf	mm512_cvtepu8_ph_builtin_convertvector(__v32qu a)
> +{
> +  return __builtin_convertvector((__v32qu)a, __v32hf);
> +}
> diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
> index ea0069f7a67..c38c0b9dda8 100644
> --- a/gcc/tree-vect-generic.cc
> +++ b/gcc/tree-vect-generic.cc
> @@ -45,6 +45,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "gimple-match.h"
>  #include "recog.h"		/* FIXME: for insn_data */
>  #include "optabs-libfuncs.h"
> +#include "cfgloop.h"
> +#include "tree-vectorizer.h"
>  
>  
>  /* Build a ternary operation and gimplify it.  Emit code before GSI.
> @@ -1870,14 +1872,33 @@ expand_vector_conversion (gimple_stmt_iterator *gsi)
>    else if (ret_elt_bits > arg_elt_bits)
>      modifier = WIDEN;
>  
> +  if (supportable_convert_operation (code, ret_type, arg_type, &code1))
> +    {
> +      g = gimple_build_assign (lhs, code1, arg);
> +      gsi_replace (gsi, g, false);
> +      return;
> +    }

Given the API change I suggest below it might make sense to have
supportable_indirect_convert_operation do the above and represent
it as single-step conversion?

> +  code_helper code2 = ERROR_MARK, code3 = ERROR_MARK;
> +  int multi_step_cvt = 0;
> +  vec<tree> interm_types = vNULL;
> +  if (supportable_indirect_convert_operation (NULL,
> +					      code,
> +					      ret_type, arg_type,
> +					      &code2, &code3,
> +					      &multi_step_cvt,
> +					      &interm_types, arg))
> +    {
> +      new_rhs = make_ssa_name (interm_types[0]);
> +      g = gimple_build_assign (new_rhs, (tree_code) code3, arg);
> +      gsi_insert_before (gsi, g, GSI_SAME_STMT);
> +      g = gimple_build_assign (lhs, (tree_code) code2, new_rhs);
> +      gsi_replace (gsi, g, false);
> +      return;
> +    }
> +
>    if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
>      {
> -      if (supportable_convert_operation (code, ret_type, arg_type, &code1))
> -	{
> -	  g = gimple_build_assign (lhs, code1, arg);
> -	  gsi_replace (gsi, g, false);
> -	  return;
> -	}
>        /* Can't use get_compute_type here, as supportable_convert_operation
>  	 doesn't necessarily use an optab and needs two arguments.  */
>        tree vec_compute_type
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 05a169ecb2d..0aa608202ca 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -5175,7 +5175,7 @@ vectorizable_conversion (vec_info *vinfo,
>    tree scalar_dest;
>    tree op0, op1 = NULL_TREE;
>    loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
> -  tree_code tc1, tc2;
> +  tree_code tc1;
>    code_helper code, code1, code2;
>    code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
>    tree new_temp;
> @@ -5384,92 +5384,17 @@ vectorizable_conversion (vec_info *vinfo,
>  	break;
>        }
>  
> -      /* For conversions between float and integer types try whether
> -	 we can use intermediate signed integer types to support the
> -	 conversion.  */
> -      if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
> -	  && (code == FLOAT_EXPR ||
> -	      (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
> -	{
> -	  bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
> -	  bool float_expr_p = code == FLOAT_EXPR;
> -	  unsigned short target_size;
> -	  scalar_mode intermediate_mode;
> -	  if (demotion)
> -	    {
> -	      intermediate_mode = lhs_mode;
> -	      target_size = GET_MODE_SIZE (rhs_mode);
> -	    }
> -	  else
> -	    {
> -	      target_size = GET_MODE_SIZE (lhs_mode);
> -	      if (!int_mode_for_size
> -		  (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
> -		goto unsupported;
> -	    }
> -	  code1 = float_expr_p ? code : NOP_EXPR;
> -	  codecvt1 = float_expr_p ? NOP_EXPR : code;
> -	  opt_scalar_mode mode_iter;
> -	  FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
> -	    {
> -	      intermediate_mode = mode_iter.require ();
> -
> -	      if (GET_MODE_SIZE (intermediate_mode) > target_size)
> -		break;
> -
> -	      scalar_mode cvt_mode;
> -	      if (!int_mode_for_size
> -		  (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
> -		break;
> -
> -	      cvt_type = build_nonstandard_integer_type
> -		(GET_MODE_BITSIZE (cvt_mode), 0);
> -
> -	      /* Check if the intermediate type can hold OP0's range.
> -		 When converting from float to integer this is not necessary
> -		 because values that do not fit the (smaller) target type are
> -		 unspecified anyway.  */
> -	      if (demotion && float_expr_p)
> -		{
> -		  wide_int op_min_value, op_max_value;
> -		  if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
> -		    break;
> -
> -		  if (cvt_type == NULL_TREE
> -		      || (wi::min_precision (op_max_value, SIGNED)
> -			  > TYPE_PRECISION (cvt_type))
> -		      || (wi::min_precision (op_min_value, SIGNED)
> -			  > TYPE_PRECISION (cvt_type)))
> -		    continue;
> -		}
> -
> -	      cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
> -	      /* This should only happened for SLP as long as loop vectorizer
> -		 only supports same-sized vector.  */
> -	      if (cvt_type == NULL_TREE
> -		  || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
> -		  || !supportable_convert_operation ((tree_code) code1,
> -						     vectype_out,
> -						     cvt_type, &tc1)
> -		  || !supportable_convert_operation ((tree_code) codecvt1,
> -						     cvt_type,
> -						     vectype_in, &tc2))
> -		continue;
> -
> -	      found_mode = true;
> -	      break;
> -	    }
> +      if (supportable_indirect_convert_operation (vinfo,
> +						  code,
> +						  vectype_out,
> +						  vectype_in,
> +						  &code1,
> +						  &codecvt1,
> +						  &multi_step_cvt,
> +						  &interm_types,
> +						  op0,slp_node))
> +	break;
>  
> -	  if (found_mode)
> -	    {
> -	      multi_step_cvt++;
> -	      interm_types.safe_push (cvt_type);
> -	      cvt_type = NULL_TREE;
> -	      code1 = tc1;
> -	      codecvt1 = tc2;
> -	      break;
> -	    }
> -	}
>        /* FALLTHRU */
>      unsupported:
>        if (dump_enabled_p ())
> @@ -14626,6 +14551,153 @@ supportable_narrowing_operation (code_helper code,
>    return false;
>  }
>  
> +/* Function supportable_indirect_convert_operation
> +
> +   Check whether an operation represented by the code CODE is two
> +   convert operations that are supported by the target platform in
> +   vector form (i.e., when operating on arguments of type VECTYPE_IN
> +   producing a result of type VECTYPE_OUT).
> +
> +   Convert operations we currently support directly are FIX_TRUNC and FLOAT.
> +   This function checks if these operations are supported
> +   by the target platform directly (via vector tree-codes).
> +
> +   Output:
> +   - CODE1 is the code of a vector operation to be used when
> +   converting the operation in the first step, if available.
> +   - CODE2 is the code of a vector operation to be used when
> +   converting the operation in the second step, if available.
> +   - MULTI_STEP_CVT determines the number of required intermediate steps in
> +   case of multi-step conversion (like int->short->char - in that case
> +   MULTI_STEP_CVT will be 1). In the function, it should be 1.
> +   - INTERM_TYPES contains the intermediate type required to perform the
> +   convert operation (short in the above example).   */
> +bool
> +supportable_indirect_convert_operation (vec_info *vinfo,
> +					code_helper code,
> +					tree vectype_out,
> +					tree vectype_in,
> +					code_helper *code1,
> +					code_helper *code2,
> +					int *multi_step_cvt,
> +					vec<tree> *interm_types,

This API is somewhat awkward, as we're inventing a new one I guess
we can do better.  I think we want

      vec<std::pair<tree, tree_code> > *converts,

covering all code1, code2, multi_step_cvt and interm_types with
the conversion sequence being

      converts[0].first tem0 = converts[0].second op0;
      converts[1].first tem1 = converts[1].second tem;

... while converts.length () determines the length of the chain,
one being a direct conversion where then converts[0].first
is vectype_out.  That would allow double -> char to go
double -> float -> int -> short -> char for example.

> +					tree op0,
> +					slp_tree slp_node)

I would like to avoid passing VINFO and SLP_NODE here, see below.
The same is true for OP0 where the existing use is wrong for SLP already,
but I guess that can stay for now (I opened PR115538 about the
wrong-code issue).

> +{
> +  bool found_mode = false;
> +  scalar_mode lhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_out));
> +  scalar_mode rhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_in));
> +  opt_scalar_mode mode_iter;
> +  tree_code tc1, tc2;
> +
> +  tree cvt_type = NULL_TREE;
> +  poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype_in);
> +
> +  (*multi_step_cvt) = 0;
> +  /* For conversions between float and integer types try whether
> +     we can use intermediate signed integer types to support the
> +     conversion.  */
> +  if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
> +      && (code == FLOAT_EXPR
> +	  || (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
> +    {
> +      bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
> +      bool float_expr_p = code == FLOAT_EXPR;
> +      unsigned short target_size;
> +      scalar_mode intermediate_mode;
> +      if (demotion)
> +	{
> +	  intermediate_mode = lhs_mode;
> +	  target_size = GET_MODE_SIZE (rhs_mode);
> +	}
> +      else
> +	{
> +	  target_size = GET_MODE_SIZE (lhs_mode);
> +	  if (!int_mode_for_size
> +	      (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
> +	    return false;
> +	}
> +      *code1 = float_expr_p ? code : NOP_EXPR;
> +      *code2 = float_expr_p ? NOP_EXPR : code;
> +      opt_scalar_mode mode_iter;
> +      FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
> +	{
> +	  intermediate_mode = mode_iter.require ();
> +
> +	  if (GET_MODE_SIZE (intermediate_mode) > target_size)
> +	    break;
> +
> +	  scalar_mode cvt_mode;
> +	  if (!int_mode_for_size
> +	      (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
> +	    break;
> +
> +	  cvt_type = build_nonstandard_integer_type
> +	    (GET_MODE_BITSIZE (cvt_mode), 0);
> +
> +	  /* Check if the intermediate type can hold OP0's range.
> +	     When converting from float to integer this is not necessary
> +	     because values that do not fit the (smaller) target type are
> +	     unspecified anyway.  */
> +	  if (demotion && float_expr_p)
> +	    {
> +	      wide_int op_min_value, op_max_value;
> +	      /* For vector form, it looks like op0 doesn't have RANGE_INFO.
> +		 In the future, if it is supported, changes may need to be made
> +		 to this part, such as checking the RANGE of each element
> +		 in the vector.  */
> +	      if (!SSA_NAME_RANGE_INFO (op0)
> +		  || !vect_get_range_info (op0, &op_min_value, &op_max_value))
> +		break;
> +
> +	      if (cvt_type == NULL_TREE
> +		  || (wi::min_precision (op_max_value, SIGNED)
> +		      > TYPE_PRECISION (cvt_type))
> +		  || (wi::min_precision (op_min_value, SIGNED)
> +		      > TYPE_PRECISION (cvt_type)))
> +		continue;
> +	    }
> +
> +	  if (vinfo != NULL && slp_node != NULL)
> +	    cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
> +	  else
> +	    {
> +	      bool uns = TYPE_UNSIGNED (TREE_TYPE (vectype_out))
> +			 || TYPE_UNSIGNED (TREE_TYPE (vectype_in));
> +	      cvt_type = build_nonstandard_integer_type
> +		(GET_MODE_BITSIZE (cvt_mode), uns);
> +	      cvt_type = build_vector_type (cvt_type, nelts);
> +	    }

So this would then become

          cvt_type = get_related_vectype_for_scalar_type (TYPE_MODE 
(vectype_in), cvt_type, TYPE_VECTOR_SUBPARTS (vectype_in));

> +	  /* This should only happened for SLP as long as loop vectorizer
> +	     only supports same-sized vector.  */
> +	  if (cvt_type == NULL_TREE
> +	      || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nelts)
> +	      || !supportable_convert_operation ((tree_code) *code1,
> +						 vectype_out,
> +						 cvt_type, &tc1)
> +	      || !supportable_convert_operation ((tree_code) *code2,
> +						 cvt_type,
> +						 vectype_in, &tc2))
> +	    continue;
> +
> +	  found_mode = true;
> +	  break;
> +	}
> +
> +      if (found_mode)
> +	{
> +	  (*multi_step_cvt)++;
> +	  interm_types->safe_push (cvt_type);
> +	  cvt_type = NULL_TREE;
> +	  *code1 = tc1;
> +	  *code2 = tc2;
> +	  return true;
> +	}
> +    }
> +  interm_types->release ();

Hmm, ownership of interm_types is somewhat unclear here - the caller
should release it, or is the situation that the caller is confused
by stray elements in it?  In that case I'd suggest to instead
do interm_types->truncate (0).

> +  return false;
> +}
> +
>  /* Generate and return a vector mask of MASK_TYPE such that
>     mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
>     Add the statements to SEQ.  */
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 97ec9c341e7..ad65ce71bb7 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -2265,6 +2265,15 @@ extern bool supportable_widening_operation (vec_info*, code_helper,
>  extern bool supportable_narrowing_operation (code_helper, tree, tree,
>  					     code_helper *, int *,
>  					     vec<tree> *);
> +extern bool supportable_indirect_convert_operation (vec_info *,
> +						    code_helper,
> +						    tree, tree,
> +						    code_helper *,
> +						    code_helper *,
> +						    int *,
> +						    vec<tree> *,
> +						    tree = NULL_TREE,
> +						    slp_tree = NULL);
>  
>  extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
>  				  enum vect_cost_for_stmt, stmt_vec_info,
>
Hu, Lin1 June 20, 2024, 11:26 a.m. UTC | #3
> >    else if (ret_elt_bits > arg_elt_bits)
> >      modifier = WIDEN;
> >
> > +  if (supportable_convert_operation (code, ret_type, arg_type, &code1))
> > +    {
> > +      g = gimple_build_assign (lhs, code1, arg);
> > +      gsi_replace (gsi, g, false);
> > +      return;
> > +    }
> 
> Given the API change I suggest below it might make sense to have
> supportable_indirect_convert_operation do the above and represent it as single-
> step conversion?
>

OK, if you want to supportable_indirect_convert_operation can do something like supportable_convert_operation, I'll give it a try. This functionality is really the part that this function can cover. But this would require some changes not only the API change, because supportable_indirect_convert_operation originally only supported Float -> Int or Int ->Float.
 
>
> > +  code_helper code2 = ERROR_MARK, code3 = ERROR_MARK;
> > +  int multi_step_cvt = 0;
> > +  vec<tree> interm_types = vNULL;
> > +  if (supportable_indirect_convert_operation (NULL,
> > +					      code,
> > +					      ret_type, arg_type,
> > +					      &code2, &code3,
> > +					      &multi_step_cvt,
> > +					      &interm_types, arg))
> > +    {
> > +      new_rhs = make_ssa_name (interm_types[0]);
> > +      g = gimple_build_assign (new_rhs, (tree_code) code3, arg);
> > +      gsi_insert_before (gsi, g, GSI_SAME_STMT);
> > +      g = gimple_build_assign (lhs, (tree_code) code2, new_rhs);
> > +      gsi_replace (gsi, g, false);
> > +      return;
> > +    }
> > +
> >    if (modifier == NONE && (code == FIX_TRUNC_EXPR || code ==
> FLOAT_EXPR))
> >      {
> > -      if (supportable_convert_operation (code, ret_type, arg_type, &code1))
> > -	{
> > -	  g = gimple_build_assign (lhs, code1, arg);
> > -	  gsi_replace (gsi, g, false);
> > -	  return;
> > -	}
> >        /* Can't use get_compute_type here, as supportable_convert_operation
> >  	 doesn't necessarily use an optab and needs two arguments.  */
> >        tree vec_compute_type
> > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index
> > 05a169ecb2d..0aa608202ca 100644
> > --- a/gcc/tree-vect-stmts.cc
> > +++ b/gcc/tree-vect-stmts.cc
> > @@ -5175,7 +5175,7 @@ vectorizable_conversion (vec_info *vinfo,
> >    tree scalar_dest;
> >    tree op0, op1 = NULL_TREE;
> >    loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
> > -  tree_code tc1, tc2;
> > +  tree_code tc1;
> >    code_helper code, code1, code2;
> >    code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
> >    tree new_temp;
> > @@ -5384,92 +5384,17 @@ vectorizable_conversion (vec_info *vinfo,
> >  	break;
> >        }
> >
> > -      /* For conversions between float and integer types try whether
> > -	 we can use intermediate signed integer types to support the
> > -	 conversion.  */
> > -      if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
> > -	  && (code == FLOAT_EXPR ||
> > -	      (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
> > -	{
> > -	  bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE
> (lhs_mode);
> > -	  bool float_expr_p = code == FLOAT_EXPR;
> > -	  unsigned short target_size;
> > -	  scalar_mode intermediate_mode;
> > -	  if (demotion)
> > -	    {
> > -	      intermediate_mode = lhs_mode;
> > -	      target_size = GET_MODE_SIZE (rhs_mode);
> > -	    }
> > -	  else
> > -	    {
> > -	      target_size = GET_MODE_SIZE (lhs_mode);
> > -	      if (!int_mode_for_size
> > -		  (GET_MODE_BITSIZE (rhs_mode), 0).exists
> (&intermediate_mode))
> > -		goto unsupported;
> > -	    }
> > -	  code1 = float_expr_p ? code : NOP_EXPR;
> > -	  codecvt1 = float_expr_p ? NOP_EXPR : code;
> > -	  opt_scalar_mode mode_iter;
> > -	  FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
> > -	    {
> > -	      intermediate_mode = mode_iter.require ();
> > -
> > -	      if (GET_MODE_SIZE (intermediate_mode) > target_size)
> > -		break;
> > -
> > -	      scalar_mode cvt_mode;
> > -	      if (!int_mode_for_size
> > -		  (GET_MODE_BITSIZE (intermediate_mode), 0).exists
> (&cvt_mode))
> > -		break;
> > -
> > -	      cvt_type = build_nonstandard_integer_type
> > -		(GET_MODE_BITSIZE (cvt_mode), 0);
> > -
> > -	      /* Check if the intermediate type can hold OP0's range.
> > -		 When converting from float to integer this is not necessary
> > -		 because values that do not fit the (smaller) target type are
> > -		 unspecified anyway.  */
> > -	      if (demotion && float_expr_p)
> > -		{
> > -		  wide_int op_min_value, op_max_value;
> > -		  if (!vect_get_range_info (op0, &op_min_value,
> &op_max_value))
> > -		    break;
> > -
> > -		  if (cvt_type == NULL_TREE
> > -		      || (wi::min_precision (op_max_value, SIGNED)
> > -			  > TYPE_PRECISION (cvt_type))
> > -		      || (wi::min_precision (op_min_value, SIGNED)
> > -			  > TYPE_PRECISION (cvt_type)))
> > -		    continue;
> > -		}
> > -
> > -	      cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
> > -	      /* This should only happened for SLP as long as loop vectorizer
> > -		 only supports same-sized vector.  */
> > -	      if (cvt_type == NULL_TREE
> > -		  || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
> > -		  || !supportable_convert_operation ((tree_code) code1,
> > -						     vectype_out,
> > -						     cvt_type, &tc1)
> > -		  || !supportable_convert_operation ((tree_code) codecvt1,
> > -						     cvt_type,
> > -						     vectype_in, &tc2))
> > -		continue;
> > -
> > -	      found_mode = true;
> > -	      break;
> > -	    }
> > +      if (supportable_indirect_convert_operation (vinfo,
> > +						  code,
> > +						  vectype_out,
> > +						  vectype_in,
> > +						  &code1,
> > +						  &codecvt1,
> > +						  &multi_step_cvt,
> > +						  &interm_types,
> > +						  op0,slp_node))
> > +	break;
> >
> > -	  if (found_mode)
> > -	    {
> > -	      multi_step_cvt++;
> > -	      interm_types.safe_push (cvt_type);
> > -	      cvt_type = NULL_TREE;
> > -	      code1 = tc1;
> > -	      codecvt1 = tc2;
> > -	      break;
> > -	    }
> > -	}
> >        /* FALLTHRU */
> >      unsupported:
> >        if (dump_enabled_p ())
> > @@ -14626,6 +14551,153 @@ supportable_narrowing_operation
> (code_helper code,
> >    return false;
> >  }
> >
> > +/* Function supportable_indirect_convert_operation
> > +
> > +   Check whether an operation represented by the code CODE is two
> > +   convert operations that are supported by the target platform in
> > +   vector form (i.e., when operating on arguments of type VECTYPE_IN
> > +   producing a result of type VECTYPE_OUT).
> > +
> > +   Convert operations we currently support directly are FIX_TRUNC and FLOAT.
> > +   This function checks if these operations are supported
> > +   by the target platform directly (via vector tree-codes).
> > +
> > +   Output:
> > +   - CODE1 is the code of a vector operation to be used when
> > +   converting the operation in the first step, if available.
> > +   - CODE2 is the code of a vector operation to be used when
> > +   converting the operation in the second step, if available.
> > +   - MULTI_STEP_CVT determines the number of required intermediate steps
> in
> > +   case of multi-step conversion (like int->short->char - in that case
> > +   MULTI_STEP_CVT will be 1). In the function, it should be 1.
> > +   - INTERM_TYPES contains the intermediate type required to perform the
> > +   convert operation (short in the above example).   */
> > +bool
> > +supportable_indirect_convert_operation (vec_info *vinfo,
> > +					code_helper code,
> > +					tree vectype_out,
> > +					tree vectype_in,
> > +					code_helper *code1,
> > +					code_helper *code2,
> > +					int *multi_step_cvt,
> > +					vec<tree> *interm_types,
> 
> This API is somewhat awkward, as we're inventing a new one I guess we can do
> better.  I think we want
> 
>       vec<std::pair<tree, tree_code> > *converts,
> 
> covering all code1, code2, multi_step_cvt and interm_types with the conversion
> sequence being
> 
>       converts[0].first tem0 = converts[0].second op0;
>       converts[1].first tem1 = converts[1].second tem;
> 

That's great, this really makes the function work better.

>
> ... while converts.length () determines the length of the chain, one being a direct
> conversion where then converts[0].first is vectype_out.  That would allow
> double -> char to go double -> float -> int -> short -> char for example.
>

I'm trying to determine the requirements, do you want this function to support multiple conversions (the current implementation just does a two-step conversion, like double -> char, which becomes double -> int -> char). Actually we should be able to do all conversions in two steps, if we have some suitable instructions. I can't think of a scenario where multiple conversions are needed yet. Could you give me some examples? Of course, I could tweak this feature in advance if it is for future consideration.

>
> > +					tree op0,
> > +					slp_tree slp_node)
> 
> I would like to avoid passing VINFO and SLP_NODE here, see below.
> The same is true for OP0 where the existing use is wrong for SLP already, but I
> guess that can stay for now (I opened PR115538 about the wrong-code issue).
>

Thanks, I have removed them. 
 
>
> > +{
> > +  bool found_mode = false;
> > +  scalar_mode lhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_out));
> > +  scalar_mode rhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_in));
> > +  opt_scalar_mode mode_iter;
> > +  tree_code tc1, tc2;
> > +
> > +  tree cvt_type = NULL_TREE;
> > +  poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype_in);
> > +
> > +  (*multi_step_cvt) = 0;
> > +  /* For conversions between float and integer types try whether
> > +     we can use intermediate signed integer types to support the
> > +     conversion.  */
> > +  if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
> > +      && (code == FLOAT_EXPR
> > +	  || (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
> > +    {
> > +      bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE
> (lhs_mode);
> > +      bool float_expr_p = code == FLOAT_EXPR;
> > +      unsigned short target_size;
> > +      scalar_mode intermediate_mode;
> > +      if (demotion)
> > +	{
> > +	  intermediate_mode = lhs_mode;
> > +	  target_size = GET_MODE_SIZE (rhs_mode);
> > +	}
> > +      else
> > +	{
> > +	  target_size = GET_MODE_SIZE (lhs_mode);
> > +	  if (!int_mode_for_size
> > +	      (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
> > +	    return false;
> > +	}
> > +      *code1 = float_expr_p ? code : NOP_EXPR;
> > +      *code2 = float_expr_p ? NOP_EXPR : code;
> > +      opt_scalar_mode mode_iter;
> > +      FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
> > +	{
> > +	  intermediate_mode = mode_iter.require ();
> > +
> > +	  if (GET_MODE_SIZE (intermediate_mode) > target_size)
> > +	    break;
> > +
> > +	  scalar_mode cvt_mode;
> > +	  if (!int_mode_for_size
> > +	      (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
> > +	    break;
> > +
> > +	  cvt_type = build_nonstandard_integer_type
> > +	    (GET_MODE_BITSIZE (cvt_mode), 0);
> > +
> > +	  /* Check if the intermediate type can hold OP0's range.
> > +	     When converting from float to integer this is not necessary
> > +	     because values that do not fit the (smaller) target type are
> > +	     unspecified anyway.  */
> > +	  if (demotion && float_expr_p)
> > +	    {
> > +	      wide_int op_min_value, op_max_value;
> > +	      /* For vector form, it looks like op0 doesn't have RANGE_INFO.
> > +		 In the future, if it is supported, changes may need to be made
> > +		 to this part, such as checking the RANGE of each element
> > +		 in the vector.  */
> > +	      if (!SSA_NAME_RANGE_INFO (op0)
> > +		  || !vect_get_range_info (op0, &op_min_value,
> &op_max_value))
> > +		break;
> > +
> > +	      if (cvt_type == NULL_TREE
> > +		  || (wi::min_precision (op_max_value, SIGNED)
> > +		      > TYPE_PRECISION (cvt_type))
> > +		  || (wi::min_precision (op_min_value, SIGNED)
> > +		      > TYPE_PRECISION (cvt_type)))
> > +		continue;
> > +	    }
> > +
> > +	  if (vinfo != NULL && slp_node != NULL)
> > +	    cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
> > +	  else
> > +	    {
> > +	      bool uns = TYPE_UNSIGNED (TREE_TYPE (vectype_out))
> > +			 || TYPE_UNSIGNED (TREE_TYPE (vectype_in));
> > +	      cvt_type = build_nonstandard_integer_type
> > +		(GET_MODE_BITSIZE (cvt_mode), uns);
> > +	      cvt_type = build_vector_type (cvt_type, nelts);
> > +	    }
> 
> So this would then become
> 
>           cvt_type = get_related_vectype_for_scalar_type (TYPE_MODE
> (vectype_in), cvt_type, TYPE_VECTOR_SUBPARTS (vectype_in));
> 
> > +	  /* This should only happened for SLP as long as loop vectorizer
> > +	     only supports same-sized vector.  */
> > +	  if (cvt_type == NULL_TREE
> > +	      || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nelts)
> > +	      || !supportable_convert_operation ((tree_code) *code1,
> > +						 vectype_out,
> > +						 cvt_type, &tc1)
> > +	      || !supportable_convert_operation ((tree_code) *code2,
> > +						 cvt_type,
> > +						 vectype_in, &tc2))
> > +	    continue;
> > +
> > +	  found_mode = true;
> > +	  break;
> > +	}
> > +
> > +      if (found_mode)
> > +	{
> > +	  (*multi_step_cvt)++;
> > +	  interm_types->safe_push (cvt_type);
> > +	  cvt_type = NULL_TREE;
> > +	  *code1 = tc1;
> > +	  *code2 = tc2;
> > +	  return true;
> > +	}
> > +    }
> > +  interm_types->release ();
> 
> Hmm, ownership of interm_types is somewhat unclear here - the caller should
> release it, or is the situation that the caller is confused by stray elements in it?  In
> that case I'd suggest to instead do interm_types->truncate (0).
>

It's my fault, I just imitate supportable_narrowing/widening_operation, I think for this function, interm_types->release() is not needed.
 
BRs,
Lin
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c b/gcc/testsuite/gcc.target/i386/pr107432-1.c
new file mode 100644
index 00000000000..a4f37447eb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c
@@ -0,0 +1,234 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512bw -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vpmovqd" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovqw" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovqb" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovdw" 6 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
+
+#include <x86intrin.h>
+
+typedef short __v2hi __attribute__ ((__vector_size__ (4)));
+typedef char __v2qi __attribute__ ((__vector_size__ (2)));
+typedef char __v4qi __attribute__ ((__vector_size__ (4)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+
+typedef unsigned short __v2hu __attribute__ ((__vector_size__ (4)));
+typedef unsigned short __v4hu __attribute__ ((__vector_size__ (8)));
+typedef unsigned char __v2qu __attribute__ ((__vector_size__ (2)));
+typedef unsigned char __v4qu __attribute__ ((__vector_size__ (4)));
+typedef unsigned char __v8qu __attribute__ ((__vector_size__ (8)));
+typedef unsigned int __v2su __attribute__ ((__vector_size__ (8)));
+
+__v2si mm_cvtepi64_epi32_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2di)a, __v2si);
+}
+
+__m128i	mm256_cvtepi64_epi32_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v4di)a, __v4si);
+}
+
+__m256i	mm512_cvtepi64_epi32_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v8di)a, __v8si);
+}
+
+__v2hi	mm_cvtepi64_epi16_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2di)a, __v2hi);
+}
+
+__v4hi	mm256_cvtepi64_epi16_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v4di)a, __v4hi);
+}
+
+__m128i	mm512_cvtepi64_epi16_builtin_convertvector(__m512i a)
+{
+  return (__m128i)__builtin_convertvector((__v8di)a, __v8hi);
+}
+
+__v2qi	mm_cvtepi64_epi8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2di)a, __v2qi);
+}
+
+__v4qi	mm256_cvtepi64_epi8_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v4di)a, __v4qi);
+}
+
+__v8qi	mm512_cvtepi64_epi8_builtin_convertvector(__m512i a)
+{
+  return __builtin_convertvector((__v8di)a, __v8qi);
+}
+
+__v2hi	mm64_cvtepi32_epi16_builtin_convertvector(__v2si a)
+{
+  return __builtin_convertvector((__v2si)a, __v2hi);
+}
+
+__v4hi	mm_cvtepi32_epi16_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v4si)a, __v4hi);
+}
+
+__m128i	mm256_cvtepi32_epi16_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v8si)a, __v8hi);
+}
+
+__m256i	mm512_cvtepi32_epi16_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v16si)a, __v16hi);
+}
+
+__v2qi	mm64_cvtepi32_epi8_builtin_convertvector(__v2si a)
+{
+  return __builtin_convertvector((__v2si)a, __v2qi);
+}
+
+__v4qi	mm_cvtepi32_epi8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v4si)a, __v4qi);
+}
+
+__v8qi	mm256_cvtepi32_epi8_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v8si)a, __v8qi);
+}
+
+__m128i	mm512_cvtepi32_epi8_builtin_convertvector(__m512i a)
+{
+  return (__m128i)__builtin_convertvector((__v16si)a, __v16qi);
+}
+
+__v2qi	mm32_cvtepi16_epi8_builtin_convertvector(__v2hi a)
+{
+  return __builtin_convertvector((__v2hi)a, __v2qi);
+}
+
+__v8qi	mm_cvtepi16_epi8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v8hi)a, __v8qi);
+}
+
+__m128i	mm256_cvtepi16_epi8_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v16hi)a, __v16qi);
+}
+
+__m256i	mm512_cvtepi16_epi8_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v32hi)a, __v32qi);
+}
+
+__v2su mm_cvtepu64_epu32_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2du)a, __v2su);
+}
+
+__m128i	mm256_cvtepu64_epu32_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v4du)a, __v4su);
+}
+
+__m256i	mm512_cvtepu64_epu32_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v8du)a, __v8su);
+}
+
+__v2hu	mm_cvtepu64_epu16_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2du)a, __v2hu);
+}
+
+__v4hu	mm256_cvtepu64_epu16_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v4du)a, __v4hu);
+}
+
+__m128i	mm512_cvtepu64_epu16_builtin_convertvector(__m512i a)
+{
+  return (__m128i)__builtin_convertvector((__v8du)a, __v8hu);
+}
+
+__v2qu	mm_cvtepu64_epu8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2du)a, __v2qu);
+}
+
+__v4qu	mm256_cvtepu64_epu8_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v4du)a, __v4qu);
+}
+
+__v8qu	mm512_cvtepu64_epu8_builtin_convertvector(__m512i a)
+{
+  return __builtin_convertvector((__v8du)a, __v8qu);
+}
+
+__v2hu	mm32_cvtepu32_epu16_builtin_convertvector(__v2su a)
+{
+  return __builtin_convertvector((__v2su)a, __v2hu);
+}
+
+__v4hu	mm_cvtepu32_epu16_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v4su)a, __v4hu);
+}
+
+__m128i	mm256_cvtepu32_epu16_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v8su)a, __v8hu);
+}
+
+__m256i	mm512_cvtepu32_epu16_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v16su)a, __v16hu);
+}
+
+__v2qu	mm32_cvtepu32_epu8_builtin_convertvector(__v2su a)
+{
+  return __builtin_convertvector((__v2su)a, __v2qu);
+}
+
+__v4qu	mm_cvtepu2_epu8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v4su)a, __v4qu);
+}
+
+__v8qu	mm256_cvtepu32_epu8_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v8su)a, __v8qu);
+}
+
+__m128i	mm512_cvtepu32_epu8_builtin_convertvector(__m512i a)
+{
+  return (__m128i)__builtin_convertvector((__v16su)a, __v16qu);
+}
+
+__v2qu	mm32_cvtepu16_epu8_builtin_convertvector(__v2hu a)
+{
+  return __builtin_convertvector((__v2hu)a, __v2qu);
+}
+
+__v8qu	mm_cvtepu16_epu8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v8hu)a, __v8qu);
+}
+
+__m128i	mm256_cvtepu16_epu8_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v16hu)a, __v16qu);
+}
+
+__m256i	mm512_cvtepu16_epu8_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v32hu)a, __v32qu);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-2.c b/gcc/testsuite/gcc.target/i386/pr107432-2.c
new file mode 100644
index 00000000000..02ffd811cb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-2.c
@@ -0,0 +1,105 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512bw -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vpmovsxdq" 3 } } */
+/* { dg-final { scan-assembler-times "vpmovsxwq" 3 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbq" 3 } } */
+/* { dg-final { scan-assembler-times "vpmovsxwd" 3 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbd" 3 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbw" 3 } } */
+
+#include <x86intrin.h>
+
+typedef short __v2hi __attribute__ ((__vector_size__ (4)));
+typedef char __v2qi __attribute__ ((__vector_size__ (2)));
+typedef char __v4qi __attribute__ ((__vector_size__ (4)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+
+__m128i mm_cvtepi32_epi64_builtin_convertvector(__v2si a)
+{
+  return __builtin_convertvector(a, __v2di);
+}
+
+__m256i	mm256_cvtepi32_epi64_builtin_convertvector(__v4si a)
+{
+  return (__m256i)__builtin_convertvector(a, __v4di);
+}
+
+__m512i	mm512_cvtepi32_epi64_builtin_convertvector(__v8si a)
+{
+  return (__m512i)__builtin_convertvector(a, __v8di);
+}
+
+__m128i mm_cvtepi16_epi64_builtin_convertvector(__v2hi a)
+{
+  return __builtin_convertvector(a, __v2di);
+}
+
+__m256i	mm256_cvtepi16_epi64_builtin_convertvector(__v4hi a)
+{
+  return (__m256i)__builtin_convertvector(a, __v4di);
+}
+
+__m512i	mm512_cvtepi16_epi64_builtin_convertvector(__v8hi a)
+{
+  return (__m512i)__builtin_convertvector(a, __v8di);
+}
+
+__m128i mm_cvtepi8_epi64_builtin_convertvector(__v2qi a)
+{
+  return __builtin_convertvector(a, __v2di);
+}
+
+__m256i	mm256_cvtepi8_epi64_builtin_convertvector(__v4qi a)
+{
+  return (__m256i)__builtin_convertvector(a, __v4di);
+}
+
+__m512i	mm512_cvtepi8_epi64_builtin_convertvector(__v8qi a)
+{
+  return (__m512i)__builtin_convertvector(a, __v8di);
+}
+
+__m128i mm_cvtepi16_epi32_builtin_convertvector(__v4hi a)
+{
+  return (__m128i)__builtin_convertvector(a, __v4si);
+}
+
+__m256i	mm256_cvtepi16_epi32_builtin_convertvector(__v8hi a)
+{
+  return (__m256i)__builtin_convertvector(a, __v8si);
+}
+
+__m512i	mm512_cvtepi16_epi32_builtin_convertvector(__v16hi a)
+{
+  return (__m512i)__builtin_convertvector(a, __v16si);
+}
+
+__m128i mm_cvtepi8_epi32_builtin_convertvector(__v4qi a)
+{
+  return (__m128i)__builtin_convertvector(a, __v4si);
+}
+
+__m256i	mm256_cvtepi8_epi32_builtin_convertvector(__v8qi a)
+{
+  return (__m256i)__builtin_convertvector(a, __v8si);
+}
+
+__m512i	mm512_cvtepi8_epi32_builtin_convertvector(__v16qi a)
+{
+  return (__m512i)__builtin_convertvector(a, __v16si);
+}
+
+__m128i mm_cvtepi8_epi16_builtin_convertvector(__v8qi a)
+{
+  return (__m128i)__builtin_convertvector(a, __v8hi);
+}
+
+__m256i	mm256_cvtepi8_epi16_builtin_convertvector(__v16qi a)
+{
+  return (__m256i)__builtin_convertvector(a, __v16hi);
+}
+
+__v32hi	mm512_cvtepi8_epi16_builtin_convertvector(__v32qi a)
+{
+  return __builtin_convertvector(a, __v32hi);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-3.c b/gcc/testsuite/gcc.target/i386/pr107432-3.c
new file mode 100644
index 00000000000..30dc947b6dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-3.c
@@ -0,0 +1,55 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512fp16 -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vcvtpd2ps" 3 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2ph" 3 } } */
+/* { dg-final { scan-assembler-times "vcvtps2ph" 3 } } */
+
+#include <x86intrin.h>
+
+typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4)));
+typedef _Float16 __v4hf __attribute__ ((__vector_size__ (8)));
+
+__v2sf mm_cvtpd_ps_builtin_convertvector(__v2df a)
+{
+  return __builtin_convertvector(a, __v2sf);
+}
+
+__v4sf	mm256_cvtpd_ps_builtin_convertvector(__v4df a)
+{
+  return __builtin_convertvector(a, __v4sf);
+}
+
+__v8sf	mm512_cvtpd_ps_builtin_convertvector(__v8df a)
+{
+  return __builtin_convertvector(a, __v8sf);
+}
+
+__v2hf mm_cvtpd_ph_builtin_convertvector(__v2df a)
+{
+  return __builtin_convertvector(a, __v2hf);
+}
+
+__v4hf	mm256_cvtpd_ph_builtin_convertvector(__v4df a)
+{
+  return __builtin_convertvector(a, __v4hf);
+}
+
+__v8hf	mm512_cvtpd_ph_builtin_convertvector(__v8df a)
+{
+  return __builtin_convertvector(a, __v8hf);
+}
+
+__v4hf mm_cvtps_ph_builtin_convertvector(__v4sf a)
+{
+  return __builtin_convertvector(a, __v4hf);
+}
+
+__v8hf	mm256_cvtps_ph_builtin_convertvector(__v8sf a)
+{
+  return __builtin_convertvector(a, __v8hf);
+}
+
+__v16hf	mm512_cvtps_ph_builtin_convertvector(__v16sf a)
+{
+  return __builtin_convertvector(a, __v16hf);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-4.c b/gcc/testsuite/gcc.target/i386/pr107432-4.c
new file mode 100644
index 00000000000..e537e7349e4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-4.c
@@ -0,0 +1,56 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512fp16 -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vcvtps2pd" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvtps2pd" 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvtph2pd" 3 } } */
+/* { dg-final { scan-assembler-times "vcvtph2ps" 3 } } */
+
+#include <x86intrin.h>
+
+typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4)));
+typedef _Float16 __v4hf __attribute__ ((__vector_size__ (8)));
+
+__v2df mm_cvtps_pd_builtin_convertvector(__v2sf a)
+{
+  return __builtin_convertvector(a, __v2df);
+}
+
+__v4df	mm256_cvtps_pd_builtin_convertvector(__v4sf a)
+{
+  return __builtin_convertvector(a, __v4df);
+}
+
+__v8df	mm512_cvtps_pd_builtin_convertvector(__v8sf a)
+{
+  return __builtin_convertvector(a, __v8df);
+}
+
+__v2df mm_cvtph_pd_builtin_convertvector(__v2hf a)
+{
+  return __builtin_convertvector(a, __v2df);
+}
+
+__v4df	mm256_cvtph_pd_builtin_convertvector(__v4hf a)
+{
+  return __builtin_convertvector(a, __v4df);
+}
+
+__v8df	mm512_cvtph_pd_builtin_convertvector(__v8hf a)
+{
+  return __builtin_convertvector(a, __v8df);
+}
+
+__v4sf mm_cvtph_ps_builtin_convertvector(__v4hf a)
+{
+  return __builtin_convertvector(a, __v4sf);
+}
+
+__v8sf	mm256_cvtph_ps_builtin_convertvector(__v8hf a)
+{
+  return __builtin_convertvector(a, __v8sf);
+}
+
+__v16sf	mm512_cvtph_ps_builtin_convertvector(__v16hf a)
+{
+  return __builtin_convertvector(a, __v16sf);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-5.c b/gcc/testsuite/gcc.target/i386/pr107432-5.c
new file mode 100644
index 00000000000..5a44ef9f3b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-5.c
@@ -0,0 +1,72 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512dq -mavx512fp16 -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vcvttpd2dq" 3 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qq" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttps2qq" 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttph2dq" 3 } } */
+/* { dg-final { scan-assembler-times "vcvttph2qq" 3 } } */
+
+#include <x86intrin.h>
+
+typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4)));
+typedef _Float16 __v4hf __attribute__ ((__vector_size__ (8)));
+
+__v2si mm_cvtpd_epi32_builtin_convertvector(__v2df a)
+{
+  return __builtin_convertvector(a, __v2si);
+}
+
+__v4si	mm256_cvtpd_epi32_builtin_convertvector(__v4df a)
+{
+  return __builtin_convertvector(a, __v4si);
+}
+
+__v8si	mm512_cvtpd_epi32_builtin_convertvector(__v8df a)
+{
+  return __builtin_convertvector(a, __v8si);
+}
+
+__v2di mm_cvtps_epi64_builtin_convertvector(__v2sf a)
+{
+  return __builtin_convertvector(a, __v2di);
+}
+
+__v4di	mm256_cvtps_epi64_builtin_convertvector(__v4sf a)
+{
+  return __builtin_convertvector(a, __v4di);
+}
+
+__v8di	mm512_cvtps_epi64_builtin_convertvector(__v8sf a)
+{
+  return __builtin_convertvector(a, __v8di);
+}
+
+__v4si mm_cvtph_epi32_builtin_convertvector(__v4hf a)
+{
+  return __builtin_convertvector(a, __v4si);
+}
+
+__v8si	mm256_cvtph_epi32_builtin_convertvector(__v8hf a)
+{
+  return __builtin_convertvector(a, __v8si);
+}
+
+__v16si	mm512_cvtph_epi32_builtin_convertvector(__v16hf a)
+{
+  return __builtin_convertvector(a, __v16si);
+}
+
+__v2di mm_cvtph_epi64_builtin_convertvector(__v2hf a)
+{
+  return __builtin_convertvector(a, __v2di);
+}
+
+__v4di	mm256_cvtph_epi64_builtin_convertvector(__v4hf a)
+{
+  return __builtin_convertvector(a, __v4di);
+}
+
+__v8di	mm512_cvtph_epi64_builtin_convertvector(__v8hf a)
+{
+  return __builtin_convertvector(a, __v8di);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-6.c b/gcc/testsuite/gcc.target/i386/pr107432-6.c
new file mode 100644
index 00000000000..4a68a10b089
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-6.c
@@ -0,0 +1,139 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -mavx512vl -mavx512bw -O2 -mavx512dq -fno-trapping-math" } */
+/* { dg-final { scan-assembler-times "vcvttpd2dq" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dq" 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udq" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udq" 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttps2dq" 3 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttps2dq" 4 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttps2udq" 3 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttps2udq" 4 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttph2w" 4 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uw" 4 } } */
+/* { dg-final { scan-assembler-times "vpmovdb" 10 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovdb" 14 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
+
+#include <x86intrin.h>
+
+typedef char __v2qi __attribute__ ((__vector_size__ (2)));
+typedef char __v4qi __attribute__ ((__vector_size__ (4)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+typedef char __v16qi __attribute__ ((__vector_size__ (16)));
+typedef unsigned char __v2qu __attribute__ ((vector_size (2)));
+typedef unsigned char __v4qu __attribute__ ((vector_size (4)));
+typedef unsigned char __v8qu __attribute__ ((vector_size (8)));
+typedef unsigned char __v16qu __attribute__ ((vector_size (16)));
+typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4)));
+typedef _Float16 __v4hf __attribute__ ((__vector_size__ (8)));
+typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16)));
+
+__v2qi	mm_cvtpd_epi8_builtin_convertvector(__v2df a)
+{
+  return __builtin_convertvector((__v2df)a, __v2qi);
+}
+
+__v4qi	mm256_cvtpd_epi8_builtin_convertvector(__v4df a)
+{
+  return __builtin_convertvector((__v4df)a, __v4qi);
+}
+
+__v8qi	mm512_cvtpd_epi8_builtin_convertvector(__v8df a)
+{
+  return __builtin_convertvector((__v8df)a, __v8qi);
+}
+
+__v2qu	mm_cvtpd_epu8_builtin_convertvector(__v2df a)
+{
+  return __builtin_convertvector((__v2df)a, __v2qu);
+}
+
+__v4qu	mm256_cvtpd_epu8_builtin_convertvector(__v4df a)
+{
+  return __builtin_convertvector((__v4df)a, __v4qu);
+}
+
+__v8qu	mm512_cvtpd_epu8_builtin_convertvector(__v8df a)
+{
+  return __builtin_convertvector((__v8df)a, __v8qu);
+}
+
+__v2qi	mm64_cvtps_epi8_builtin_convertvector(__v2sf a)
+{
+  return __builtin_convertvector((__v2sf)a, __v2qi);
+}
+
+__v4qi	mm128_cvtps_epi8_builtin_convertvector(__v4sf a)
+{
+  return __builtin_convertvector((__v4sf)a, __v4qi);
+}
+
+__v8qi	mm256_cvtps_epi8_builtin_convertvector(__v8sf a)
+{
+  return __builtin_convertvector((__v8sf)a, __v8qi);
+}
+
+__v16qi	mm512_cvtps_epi8_builtin_convertvector(__v16sf a)
+{
+  return __builtin_convertvector((__v16sf)a, __v16qi);
+}
+
+__v2qu	mm64_cvtps_epu8_builtin_convertvector(__v2sf a)
+{
+  return __builtin_convertvector((__v2sf)a, __v2qu);
+}
+
+__v4qu	mm128_cvtps_epu8_builtin_convertvector(__v4sf a)
+{
+  return __builtin_convertvector((__v4sf)a, __v4qu);
+}
+
+__v8qu	mm256_cvtps_epu8_builtin_convertvector(__v8sf a)
+{
+  return __builtin_convertvector((__v8sf)a, __v8qu);
+}
+
+__v16qu	mm512_cvtps_epu8_builtin_convertvector(__v16sf a)
+{
+  return __builtin_convertvector((__v16sf)a, __v16qu);
+}
+
+__v2qi	mm32_cvtph_epi8_builtin_convertvector(__v2hf a)
+{
+  return __builtin_convertvector((__v2hf)a, __v2qi);
+}
+
+__v8qi	mm128_cvtph_epi8_builtin_convertvector(__v8hf a)
+{
+  return __builtin_convertvector((__v8hf)a, __v8qi);
+}
+
+__v16qi	mm256_cvtph_epi8_builtin_convertvector(__v16hf a)
+{
+  return __builtin_convertvector((__v16hf)a, __v16qi);
+}
+
+__v32qi	mm512_cvtph_epi8_builtin_convertvector(__v32hf a)
+{
+  return __builtin_convertvector((__v32hf)a, __v32qi);
+}
+
+__v2qu	mm32_cvtph_epu8_builtin_convertvector(__v2hf a)
+{
+  return __builtin_convertvector((__v2hf)a, __v2qu);
+}
+
+__v8qu	mm128_cvtph_epu8_builtin_convertvector(__v8hf a)
+{
+  return __builtin_convertvector((__v8hf)a, __v8qu);
+}
+
+__v16qu	mm256_cvtph_epu8_builtin_convertvector(__v16hf a)
+{
+  return __builtin_convertvector((__v16hf)a, __v16qu);
+}
+
+__v32qu	mm512_cvtph_epu8_builtin_convertvector(__v32hf a)
+{
+  return __builtin_convertvector((__v32hf)a, __v32qu);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-7.c b/gcc/testsuite/gcc.target/i386/pr107432-7.c
new file mode 100644
index 00000000000..0ff5a97ed1a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-7.c
@@ -0,0 +1,156 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -mavx512vl -mavx512bw -O2 -mavx512dq -fno-trapping-math" } */
+/* { dg-final { scan-assembler-times "vcvtdq2pd" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvtdq2pd" 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvtudq2pd" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvtudq2pd" 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvtdq2ps" 3 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvtdq2ps" 4 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvtudq2ps" 3 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvtudq2ps" 4 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvtw2ph" 4 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvtw2ph" 5 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvtuw2ph" 4 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvtuw2ph" 5 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovsxbd" 5 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovsxbd" 7 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovzxbd" 5 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovzxbd" 7 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovsxbd" 5 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovsxbd" 7 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovzxbd" 5 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovzxbd" 7 { target { ! ia32 } } } } */
+
+#include <x86intrin.h>
+
+typedef char __v2qi __attribute__ ((__vector_size__ (2)));
+typedef char __v4qi __attribute__ ((__vector_size__ (4)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+typedef char __v16qi __attribute__ ((__vector_size__ (16)));
+typedef unsigned char __v2qu __attribute__ ((vector_size (2)));
+typedef unsigned char __v4qu __attribute__ ((vector_size (4)));
+typedef unsigned char __v8qu __attribute__ ((vector_size (8)));
+typedef unsigned char __v16qu __attribute__ ((vector_size (16)));
+typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4)));
+typedef _Float16 __v4hf __attribute__ ((__vector_size__ (8)));
+typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16)));
+
+__v2df	mm_cvtepi8_pd_builtin_convertvector(__v2qi a)
+{
+  return __builtin_convertvector((__v2qi)a, __v2df);
+}
+
+__v4df	mm256_cvtepi8_pd_builtin_convertvector(__v4qi a)
+{
+  return __builtin_convertvector((__v4qi)a, __v4df);
+}
+
+__v8df	mm512_cvtepi8_pd_builtin_convertvector(__v8qi a)
+{
+  return __builtin_convertvector((__v8qi)a, __v8df);
+}
+
+__v2df	mm_cvtepu8_pd_builtin_convertvector(__v2qu a)
+{
+  return __builtin_convertvector((__v2qu)a, __v2df);
+}
+
+__v4df	mm256_cvtepu8_pd_builtin_convertvector(__v4qu a)
+{
+  return __builtin_convertvector((__v4qu)a, __v4df);
+}
+
+__v8df	mm512_cvtepu8_pd_builtin_convertvector(__v8qu a)
+{
+  return __builtin_convertvector((__v8qu)a, __v8df);
+}
+
+__v2sf	mm64_cvtepi8_ps_builtin_convertvector(__v2qi a)
+{
+  return __builtin_convertvector((__v2qi)a, __v2sf);
+}
+
+__v4sf	mm128_cvtepi8_ps_builtin_convertvector(__v4qi a)
+{
+  return __builtin_convertvector((__v4qi)a, __v4sf);
+}
+
+__v8sf	mm256_cvtepi8_ps_builtin_convertvector(__v8qi a)
+{
+  return __builtin_convertvector((__v8qi)a, __v8sf);
+}
+
+__v16sf	mm512_cvtepi8_ps_builtin_convertvector(__v16qi a)
+{
+  return __builtin_convertvector((__v16qi)a, __v16sf);
+}
+
+__v2sf	mm64_cvtepu8_ps_builtin_convertvector(__v2qu a)
+{
+  return __builtin_convertvector((__v2qu)a, __v2sf);
+}
+
+__v4sf	mm128_cvtepu8_ps_builtin_convertvector(__v4qu a)
+{
+  return __builtin_convertvector((__v4qu)a, __v4sf);
+}
+
+__v8sf	mm256_cvtepu8_ps_builtin_convertvector(__v8qu a)
+{
+  return __builtin_convertvector((__v8qu)a, __v8sf);
+}
+
+__v16sf	mm512_cvtepu8_ps_builtin_convertvector(__v16qu a)
+{
+  return __builtin_convertvector((__v16qu)a, __v16sf);
+}
+
+__v2hf	mm32_cvtepi8_ph_builtin_convertvector(__v2qi a)
+{
+  return __builtin_convertvector((__v2qi)a, __v2hf);
+}
+
+__v4hf	mm64_cvtepi8_ph_builtin_convertvector(__v4qi a)
+{
+  return __builtin_convertvector((__v4qi)a, __v4hf);
+}
+
+__v8hf	mm128_cvtepi8_ph_builtin_convertvector(__v8qi a)
+{
+  return __builtin_convertvector((__v8qi)a, __v8hf);
+}
+
+__v16hf	mm256_cvtepi8_ph_builtin_convertvector(__v16qi a)
+{
+  return __builtin_convertvector((__v16qi)a, __v16hf);
+}
+
+__v32hf	mm512_cvtepi8_ph_builtin_convertvector(__v32qi a)
+{
+  return __builtin_convertvector((__v32qi)a, __v32hf);
+}
+
+__v2hf	mm32_cvtepu8_ph_builtin_convertvector(__v2qu a)
+{
+  return __builtin_convertvector((__v2qu)a, __v2hf);
+}
+
+__v4hf	mm64_cvtepu8_ph_builtin_convertvector(__v4qu a)
+{
+  return __builtin_convertvector((__v4qu)a, __v4hf);
+}
+
+__v8hf	mm128_cvtepu8_ph_builtin_convertvector(__v8qu a)
+{
+  return __builtin_convertvector((__v8qu)a, __v8hf);
+}
+
+__v16hf	mm256_cvtepu8_ph_builtin_convertvector(__v16qu a)
+{
+  return __builtin_convertvector((__v16qu)a, __v16hf);
+}
+
+__v32hf	mm512_cvtepu8_ph_builtin_convertvector(__v32qu a)
+{
+  return __builtin_convertvector((__v32qu)a, __v32hf);
+}
diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
index ea0069f7a67..c38c0b9dda8 100644
--- a/gcc/tree-vect-generic.cc
+++ b/gcc/tree-vect-generic.cc
@@ -45,6 +45,8 @@  along with GCC; see the file COPYING3.  If not see
 #include "gimple-match.h"
 #include "recog.h"		/* FIXME: for insn_data */
 #include "optabs-libfuncs.h"
+#include "cfgloop.h"
+#include "tree-vectorizer.h"
 
 
 /* Build a ternary operation and gimplify it.  Emit code before GSI.
@@ -1870,14 +1872,33 @@  expand_vector_conversion (gimple_stmt_iterator *gsi)
   else if (ret_elt_bits > arg_elt_bits)
     modifier = WIDEN;
 
+  if (supportable_convert_operation (code, ret_type, arg_type, &code1))
+    {
+      g = gimple_build_assign (lhs, code1, arg);
+      gsi_replace (gsi, g, false);
+      return;
+    }
+
+  code_helper code2 = ERROR_MARK, code3 = ERROR_MARK;
+  int multi_step_cvt = 0;
+  vec<tree> interm_types = vNULL;
+  if (supportable_indirect_convert_operation (NULL,
+					      code,
+					      ret_type, arg_type,
+					      &code2, &code3,
+					      &multi_step_cvt,
+					      &interm_types, arg))
+    {
+      new_rhs = make_ssa_name (interm_types[0]);
+      g = gimple_build_assign (new_rhs, (tree_code) code3, arg);
+      gsi_insert_before (gsi, g, GSI_SAME_STMT);
+      g = gimple_build_assign (lhs, (tree_code) code2, new_rhs);
+      gsi_replace (gsi, g, false);
+      return;
+    }
+
   if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
     {
-      if (supportable_convert_operation (code, ret_type, arg_type, &code1))
-	{
-	  g = gimple_build_assign (lhs, code1, arg);
-	  gsi_replace (gsi, g, false);
-	  return;
-	}
       /* Can't use get_compute_type here, as supportable_convert_operation
 	 doesn't necessarily use an optab and needs two arguments.  */
       tree vec_compute_type
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 05a169ecb2d..0aa608202ca 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -5175,7 +5175,7 @@  vectorizable_conversion (vec_info *vinfo,
   tree scalar_dest;
   tree op0, op1 = NULL_TREE;
   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
-  tree_code tc1, tc2;
+  tree_code tc1;
   code_helper code, code1, code2;
   code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
   tree new_temp;
@@ -5384,92 +5384,17 @@  vectorizable_conversion (vec_info *vinfo,
 	break;
       }
 
-      /* For conversions between float and integer types try whether
-	 we can use intermediate signed integer types to support the
-	 conversion.  */
-      if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
-	  && (code == FLOAT_EXPR ||
-	      (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
-	{
-	  bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
-	  bool float_expr_p = code == FLOAT_EXPR;
-	  unsigned short target_size;
-	  scalar_mode intermediate_mode;
-	  if (demotion)
-	    {
-	      intermediate_mode = lhs_mode;
-	      target_size = GET_MODE_SIZE (rhs_mode);
-	    }
-	  else
-	    {
-	      target_size = GET_MODE_SIZE (lhs_mode);
-	      if (!int_mode_for_size
-		  (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
-		goto unsupported;
-	    }
-	  code1 = float_expr_p ? code : NOP_EXPR;
-	  codecvt1 = float_expr_p ? NOP_EXPR : code;
-	  opt_scalar_mode mode_iter;
-	  FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
-	    {
-	      intermediate_mode = mode_iter.require ();
-
-	      if (GET_MODE_SIZE (intermediate_mode) > target_size)
-		break;
-
-	      scalar_mode cvt_mode;
-	      if (!int_mode_for_size
-		  (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
-		break;
-
-	      cvt_type = build_nonstandard_integer_type
-		(GET_MODE_BITSIZE (cvt_mode), 0);
-
-	      /* Check if the intermediate type can hold OP0's range.
-		 When converting from float to integer this is not necessary
-		 because values that do not fit the (smaller) target type are
-		 unspecified anyway.  */
-	      if (demotion && float_expr_p)
-		{
-		  wide_int op_min_value, op_max_value;
-		  if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
-		    break;
-
-		  if (cvt_type == NULL_TREE
-		      || (wi::min_precision (op_max_value, SIGNED)
-			  > TYPE_PRECISION (cvt_type))
-		      || (wi::min_precision (op_min_value, SIGNED)
-			  > TYPE_PRECISION (cvt_type)))
-		    continue;
-		}
-
-	      cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
-	      /* This should only happened for SLP as long as loop vectorizer
-		 only supports same-sized vector.  */
-	      if (cvt_type == NULL_TREE
-		  || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
-		  || !supportable_convert_operation ((tree_code) code1,
-						     vectype_out,
-						     cvt_type, &tc1)
-		  || !supportable_convert_operation ((tree_code) codecvt1,
-						     cvt_type,
-						     vectype_in, &tc2))
-		continue;
-
-	      found_mode = true;
-	      break;
-	    }
+      if (supportable_indirect_convert_operation (vinfo,
+						  code,
+						  vectype_out,
+						  vectype_in,
+						  &code1,
+						  &codecvt1,
+						  &multi_step_cvt,
+						  &interm_types,
+						  op0,slp_node))
+	break;
 
-	  if (found_mode)
-	    {
-	      multi_step_cvt++;
-	      interm_types.safe_push (cvt_type);
-	      cvt_type = NULL_TREE;
-	      code1 = tc1;
-	      codecvt1 = tc2;
-	      break;
-	    }
-	}
       /* FALLTHRU */
     unsupported:
       if (dump_enabled_p ())
@@ -14626,6 +14551,153 @@  supportable_narrowing_operation (code_helper code,
   return false;
 }
 
+/* Function supportable_indirect_convert_operation
+
+   Check whether an operation represented by the code CODE is two
+   convert operations that are supported by the target platform in
+   vector form (i.e., when operating on arguments of type VECTYPE_IN
+   producing a result of type VECTYPE_OUT).
+
+   Convert operations we currently support directly are FIX_TRUNC and FLOAT.
+   This function checks if these operations are supported
+   by the target platform directly (via vector tree-codes).
+
+   Output:
+   - CODE1 is the code of a vector operation to be used when
+   converting the operation in the first step, if available.
+   - CODE2 is the code of a vector operation to be used when
+   converting the operation in the second step, if available.
+   - MULTI_STEP_CVT determines the number of required intermediate steps in
+   case of multi-step conversion (like int->short->char - in that case
+   MULTI_STEP_CVT will be 1). In the function, it should be 1.
+   - INTERM_TYPES contains the intermediate type required to perform the
+   convert operation (short in the above example).   */
+bool
+supportable_indirect_convert_operation (vec_info *vinfo,
+					code_helper code,
+					tree vectype_out,
+					tree vectype_in,
+					code_helper *code1,
+					code_helper *code2,
+					int *multi_step_cvt,
+					vec<tree> *interm_types,
+					tree op0,
+					slp_tree slp_node)
+{
+  bool found_mode = false;
+  scalar_mode lhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_out));
+  scalar_mode rhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_in));
+  opt_scalar_mode mode_iter;
+  tree_code tc1, tc2;
+
+  tree cvt_type = NULL_TREE;
+  poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype_in);
+
+  (*multi_step_cvt) = 0;
+  /* For conversions between float and integer types try whether
+     we can use intermediate signed integer types to support the
+     conversion.  */
+  if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
+      && (code == FLOAT_EXPR
+	  || (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
+    {
+      bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
+      bool float_expr_p = code == FLOAT_EXPR;
+      unsigned short target_size;
+      scalar_mode intermediate_mode;
+      if (demotion)
+	{
+	  intermediate_mode = lhs_mode;
+	  target_size = GET_MODE_SIZE (rhs_mode);
+	}
+      else
+	{
+	  target_size = GET_MODE_SIZE (lhs_mode);
+	  if (!int_mode_for_size
+	      (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
+	    return false;
+	}
+      *code1 = float_expr_p ? code : NOP_EXPR;
+      *code2 = float_expr_p ? NOP_EXPR : code;
+      opt_scalar_mode mode_iter;
+      FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
+	{
+	  intermediate_mode = mode_iter.require ();
+
+	  if (GET_MODE_SIZE (intermediate_mode) > target_size)
+	    break;
+
+	  scalar_mode cvt_mode;
+	  if (!int_mode_for_size
+	      (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
+	    break;
+
+	  cvt_type = build_nonstandard_integer_type
+	    (GET_MODE_BITSIZE (cvt_mode), 0);
+
+	  /* Check if the intermediate type can hold OP0's range.
+	     When converting from float to integer this is not necessary
+	     because values that do not fit the (smaller) target type are
+	     unspecified anyway.  */
+	  if (demotion && float_expr_p)
+	    {
+	      wide_int op_min_value, op_max_value;
+	      /* For vector form, it looks like op0 doesn't have RANGE_INFO.
+		 In the future, if it is supported, changes may need to be made
+		 to this part, such as checking the RANGE of each element
+		 in the vector.  */
+	      if (!SSA_NAME_RANGE_INFO (op0)
+		  || !vect_get_range_info (op0, &op_min_value, &op_max_value))
+		break;
+
+	      if (cvt_type == NULL_TREE
+		  || (wi::min_precision (op_max_value, SIGNED)
+		      > TYPE_PRECISION (cvt_type))
+		  || (wi::min_precision (op_min_value, SIGNED)
+		      > TYPE_PRECISION (cvt_type)))
+		continue;
+	    }
+
+	  if (vinfo != NULL && slp_node != NULL)
+	    cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
+	  else
+	    {
+	      bool uns = TYPE_UNSIGNED (TREE_TYPE (vectype_out))
+			 || TYPE_UNSIGNED (TREE_TYPE (vectype_in));
+	      cvt_type = build_nonstandard_integer_type
+		(GET_MODE_BITSIZE (cvt_mode), uns);
+	      cvt_type = build_vector_type (cvt_type, nelts);
+	    }
+	  /* This should only happened for SLP as long as loop vectorizer
+	     only supports same-sized vector.  */
+	  if (cvt_type == NULL_TREE
+	      || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nelts)
+	      || !supportable_convert_operation ((tree_code) *code1,
+						 vectype_out,
+						 cvt_type, &tc1)
+	      || !supportable_convert_operation ((tree_code) *code2,
+						 cvt_type,
+						 vectype_in, &tc2))
+	    continue;
+
+	  found_mode = true;
+	  break;
+	}
+
+      if (found_mode)
+	{
+	  (*multi_step_cvt)++;
+	  interm_types->safe_push (cvt_type);
+	  cvt_type = NULL_TREE;
+	  *code1 = tc1;
+	  *code2 = tc2;
+	  return true;
+	}
+    }
+  interm_types->release ();
+  return false;
+}
+
 /* Generate and return a vector mask of MASK_TYPE such that
    mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
    Add the statements to SEQ.  */
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 97ec9c341e7..ad65ce71bb7 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2265,6 +2265,15 @@  extern bool supportable_widening_operation (vec_info*, code_helper,
 extern bool supportable_narrowing_operation (code_helper, tree, tree,
 					     code_helper *, int *,
 					     vec<tree> *);
+extern bool supportable_indirect_convert_operation (vec_info *,
+						    code_helper,
+						    tree, tree,
+						    code_helper *,
+						    code_helper *,
+						    int *,
+						    vec<tree> *,
+						    tree = NULL_TREE,
+						    slp_tree = NULL);
 
 extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
 				  enum vect_cost_for_stmt, stmt_vec_info,