Message ID | 20230606043121.24843-1-hongtao.liu@intel.com |
---|---|
State | New |
Headers | show |
Series | Fold _mm{, 256, 512}_abs_{epi8, epi16, epi32, epi64} into gimple ABSU_EXPR + VCE. | expand |
On Mon, Jun 5, 2023 at 9:34 PM liuhongt via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for > TYPE_MIN, but PABSB will store unsigned result into dst. The patch > uses ABSU_EXPR + VCE instead of ABS_EXPR. > > Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit > vector absm2 is guarded with TARGET_MMX_WITH_SSE. > > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}. > Ok for trunk? > > > gcc/ChangeLog: > > PR target/110108 > * config/i386/i386.cc (ix86_gimple_fold_builtin): Fold > _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple > ABSU_EXPR + VCE, don't fold _mm_abs_{pi8,pi16,pi32} w/o > TARGET_64BIT. > * config/i386/i386-builtin.def: Replace CODE_FOR_nothing with > real codename for __builtin_ia32_pabs{b,w,d}. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pr110108.c: New test. > --- > gcc/config/i386/i386-builtin.def | 6 ++-- > gcc/config/i386/i386.cc | 44 ++++++++++++++++++++---- > gcc/testsuite/gcc.target/i386/pr110108.c | 16 +++++++++ > 3 files changed, 56 insertions(+), 10 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr110108.c > > diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def > index 383b68a9bb8..7ba5b6a9d11 100644 > --- a/gcc/config/i386/i386-builtin.def > +++ b/gcc/config/i386/i386-builtin.def > @@ -900,11 +900,11 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd" > > /* SSSE3 */ > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI) > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI) > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI) > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI) > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI) > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI) > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI) > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI) > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI) > > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI) > BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > index d4ff56ee8dd..b09b3c79e99 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -18433,6 +18433,7 @@ bool > ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > { > gimple *stmt = gsi_stmt (*gsi), *g; > + gimple_seq stmts = NULL; > tree fndecl = gimple_call_fndecl (stmt); > gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD)); > int n_args = gimple_call_num_args (stmt); > @@ -18555,7 +18556,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > { > loc = gimple_location (stmt); > tree type = TREE_TYPE (arg2); > - gimple_seq stmts = NULL; > if (VECTOR_FLOAT_TYPE_P (type)) > { > tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode > @@ -18610,7 +18610,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > tree zero_vec = build_zero_cst (type); > tree minus_one_vec = build_minus_one_cst (type); > tree cmp_type = truth_type_for (type); > - gimple_seq stmts = NULL; > tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1); > gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > g = gimple_build_assign (gimple_call_lhs (stmt), > @@ -18904,14 +18903,18 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > break; > > case IX86_BUILTIN_PABSB: > + case IX86_BUILTIN_PABSW: > + case IX86_BUILTIN_PABSD: > + /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */ > + if (!TARGET_64BIT) > + break; > + /* FALLTHRU. */ > case IX86_BUILTIN_PABSB128: > case IX86_BUILTIN_PABSB256: > case IX86_BUILTIN_PABSB512: > - case IX86_BUILTIN_PABSW: > case IX86_BUILTIN_PABSW128: > case IX86_BUILTIN_PABSW256: > case IX86_BUILTIN_PABSW512: > - case IX86_BUILTIN_PABSD: > case IX86_BUILTIN_PABSD128: > case IX86_BUILTIN_PABSD256: > case IX86_BUILTIN_PABSD512: > @@ -18933,9 +18936,36 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > if (n_args > 1 > && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1))) > break; > - loc = gimple_location (stmt); > - g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0); > - gsi_replace (gsi, g, false); > + { > + tree utype, ures, vce; > + switch (GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)))) > + { > + case E_QImode: > + utype = unsigned_intQI_type_node; > + break; > + case E_HImode: > + utype = unsigned_intHI_type_node; > + break; > + case E_SImode: > + utype = unsigned_intSI_type_node; > + break; > + case E_DImode: > + utype = long_long_unsigned_type_node; > + break; > + default: > + gcc_unreachable (); > + } > + utype = get_same_sized_vectype (utype, TREE_TYPE (arg0)); The above switch can be replaced with just simply utype = unsigned_type_for (TREE_TYPE (arg0)); > + /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR > + instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */ > + ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0); > + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > + loc = gimple_location (stmt); > + vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures); > + g = gimple_build_assign (gimple_call_lhs (stmt), > + VIEW_CONVERT_EXPR, vce); > + gsi_replace (gsi, g, false); > + } > return true; > > default: > diff --git a/gcc/testsuite/gcc.target/i386/pr110108.c b/gcc/testsuite/gcc.target/i386/pr110108.c > new file mode 100644 > index 00000000000..cd05763b9bf > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr110108.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx2 -O2" } */ > +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */ > +#include <immintrin.h> > + > +__m128i do_stuff_128(__m128i X0, __m128i X1) { > + __m128i AbsX0 = _mm_abs_epi8(X0); > + __m128i Result = _mm_blendv_epi8(AbsX0, X1, AbsX0); > + return Result; > +} > + > +__m256i do_stuff_256(__m256i X0, __m256i X1) { > + __m256i AbsX0 = _mm256_abs_epi8(X0); > + __m256i Result = _mm256_blendv_epi8(AbsX0, X1, AbsX0); > + return Result; > +} > -- > 2.39.1.388.g2fc9e9ca3c >
On Tue, Jun 6, 2023 at 12:49 PM Andrew Pinski <pinskia@gmail.com> wrote: > > On Mon, Jun 5, 2023 at 9:34 PM liuhongt via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: > > > > r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for > > TYPE_MIN, but PABSB will store unsigned result into dst. The patch > > uses ABSU_EXPR + VCE instead of ABS_EXPR. > > > > Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit > > vector absm2 is guarded with TARGET_MMX_WITH_SSE. > > > > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}. > > Ok for trunk? > > > > > > gcc/ChangeLog: > > > > PR target/110108 > > * config/i386/i386.cc (ix86_gimple_fold_builtin): Fold > > _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple > > ABSU_EXPR + VCE, don't fold _mm_abs_{pi8,pi16,pi32} w/o > > TARGET_64BIT. > > * config/i386/i386-builtin.def: Replace CODE_FOR_nothing with > > real codename for __builtin_ia32_pabs{b,w,d}. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/i386/pr110108.c: New test. > > --- > > gcc/config/i386/i386-builtin.def | 6 ++-- > > gcc/config/i386/i386.cc | 44 ++++++++++++++++++++---- > > gcc/testsuite/gcc.target/i386/pr110108.c | 16 +++++++++ > > 3 files changed, 56 insertions(+), 10 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/i386/pr110108.c > > > > diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def > > index 383b68a9bb8..7ba5b6a9d11 100644 > > --- a/gcc/config/i386/i386-builtin.def > > +++ b/gcc/config/i386/i386-builtin.def > > @@ -900,11 +900,11 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd" > > > > /* SSSE3 */ > > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI) > > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI) > > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI) > > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI) > > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI) > > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI) > > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI) > > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI) > > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI) > > > > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI) > > BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > > index d4ff56ee8dd..b09b3c79e99 100644 > > --- a/gcc/config/i386/i386.cc > > +++ b/gcc/config/i386/i386.cc > > @@ -18433,6 +18433,7 @@ bool > > ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > > { > > gimple *stmt = gsi_stmt (*gsi), *g; > > + gimple_seq stmts = NULL; > > tree fndecl = gimple_call_fndecl (stmt); > > gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD)); > > int n_args = gimple_call_num_args (stmt); > > @@ -18555,7 +18556,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > > { > > loc = gimple_location (stmt); > > tree type = TREE_TYPE (arg2); > > - gimple_seq stmts = NULL; > > if (VECTOR_FLOAT_TYPE_P (type)) > > { > > tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode > > @@ -18610,7 +18610,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > > tree zero_vec = build_zero_cst (type); > > tree minus_one_vec = build_minus_one_cst (type); > > tree cmp_type = truth_type_for (type); > > - gimple_seq stmts = NULL; > > tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1); > > gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > > g = gimple_build_assign (gimple_call_lhs (stmt), > > @@ -18904,14 +18903,18 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > > break; > > > > case IX86_BUILTIN_PABSB: > > + case IX86_BUILTIN_PABSW: > > + case IX86_BUILTIN_PABSD: > > + /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */ > > + if (!TARGET_64BIT) > > + break; > > + /* FALLTHRU. */ > > case IX86_BUILTIN_PABSB128: > > case IX86_BUILTIN_PABSB256: > > case IX86_BUILTIN_PABSB512: > > - case IX86_BUILTIN_PABSW: > > case IX86_BUILTIN_PABSW128: > > case IX86_BUILTIN_PABSW256: > > case IX86_BUILTIN_PABSW512: > > - case IX86_BUILTIN_PABSD: > > case IX86_BUILTIN_PABSD128: > > case IX86_BUILTIN_PABSD256: > > case IX86_BUILTIN_PABSD512: > > @@ -18933,9 +18936,36 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > > if (n_args > 1 > > && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1))) > > break; > > - loc = gimple_location (stmt); > > - g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0); > > - gsi_replace (gsi, g, false); > > + { > > + tree utype, ures, vce; > > + switch (GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)))) > > + { > > + case E_QImode: > > + utype = unsigned_intQI_type_node; > > + break; > > + case E_HImode: > > + utype = unsigned_intHI_type_node; > > + break; > > + case E_SImode: > > + utype = unsigned_intSI_type_node; > > + break; > > + case E_DImode: > > + utype = long_long_unsigned_type_node; > > + break; > > + default: > > + gcc_unreachable (); > > + } > > + utype = get_same_sized_vectype (utype, TREE_TYPE (arg0)); > > The above switch can be replaced with just simply > utype = unsigned_type_for (TREE_TYPE (arg0)); Yes, thanks. > > > + /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR > > + instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */ > > + ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0); > > + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > > + loc = gimple_location (stmt); > > + vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures); > > + g = gimple_build_assign (gimple_call_lhs (stmt), > > + VIEW_CONVERT_EXPR, vce); > > + gsi_replace (gsi, g, false); > > + } > > return true; > > > > default: > > diff --git a/gcc/testsuite/gcc.target/i386/pr110108.c b/gcc/testsuite/gcc.target/i386/pr110108.c > > new file mode 100644 > > index 00000000000..cd05763b9bf > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr110108.c > > @@ -0,0 +1,16 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-mavx2 -O2" } */ > > +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */ > > +#include <immintrin.h> > > + > > +__m128i do_stuff_128(__m128i X0, __m128i X1) { > > + __m128i AbsX0 = _mm_abs_epi8(X0); > > + __m128i Result = _mm_blendv_epi8(AbsX0, X1, AbsX0); > > + return Result; > > +} > > + > > +__m256i do_stuff_256(__m256i X0, __m256i X1) { > > + __m256i AbsX0 = _mm256_abs_epi8(X0); > > + __m256i Result = _mm256_blendv_epi8(AbsX0, X1, AbsX0); > > + return Result; > > +} > > -- > > 2.39.1.388.g2fc9e9ca3c > >
On Tue, Jun 6, 2023 at 6:33 AM liuhongt via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for > TYPE_MIN, but PABSB will store unsigned result into dst. The patch > uses ABSU_EXPR + VCE instead of ABS_EXPR. > > Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit > vector absm2 is guarded with TARGET_MMX_WITH_SSE. > > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}. > Ok for trunk? > > > gcc/ChangeLog: > > PR target/110108 > * config/i386/i386.cc (ix86_gimple_fold_builtin): Fold > _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple > ABSU_EXPR + VCE, don't fold _mm_abs_{pi8,pi16,pi32} w/o > TARGET_64BIT. > * config/i386/i386-builtin.def: Replace CODE_FOR_nothing with > real codename for __builtin_ia32_pabs{b,w,d}. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pr110108.c: New test. > --- > gcc/config/i386/i386-builtin.def | 6 ++-- > gcc/config/i386/i386.cc | 44 ++++++++++++++++++++---- > gcc/testsuite/gcc.target/i386/pr110108.c | 16 +++++++++ > 3 files changed, 56 insertions(+), 10 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr110108.c > > diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def > index 383b68a9bb8..7ba5b6a9d11 100644 > --- a/gcc/config/i386/i386-builtin.def > +++ b/gcc/config/i386/i386-builtin.def > @@ -900,11 +900,11 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd" > > /* SSSE3 */ > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI) > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI) > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI) > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI) > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI) > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI) > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI) > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI) > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI) > > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI) > BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > index d4ff56ee8dd..b09b3c79e99 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -18433,6 +18433,7 @@ bool > ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > { > gimple *stmt = gsi_stmt (*gsi), *g; > + gimple_seq stmts = NULL; > tree fndecl = gimple_call_fndecl (stmt); > gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD)); > int n_args = gimple_call_num_args (stmt); > @@ -18555,7 +18556,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > { > loc = gimple_location (stmt); > tree type = TREE_TYPE (arg2); > - gimple_seq stmts = NULL; > if (VECTOR_FLOAT_TYPE_P (type)) > { > tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode > @@ -18610,7 +18610,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > tree zero_vec = build_zero_cst (type); > tree minus_one_vec = build_minus_one_cst (type); > tree cmp_type = truth_type_for (type); > - gimple_seq stmts = NULL; > tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1); > gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > g = gimple_build_assign (gimple_call_lhs (stmt), > @@ -18904,14 +18903,18 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > break; > > case IX86_BUILTIN_PABSB: > + case IX86_BUILTIN_PABSW: > + case IX86_BUILTIN_PABSD: > + /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */ > + if (!TARGET_64BIT) This should be !TARGET_MMX_WITH_SSE. TARGET_64BIT is not enough, see the definition of T_M_W_S in i386.h. OTOH, these builtins are available for TARGET_MMX, so I'm not sure if the above check is needed at all. Uros. > + break; > + /* FALLTHRU. */ > case IX86_BUILTIN_PABSB128: > case IX86_BUILTIN_PABSB256: > case IX86_BUILTIN_PABSB512: > - case IX86_BUILTIN_PABSW: > case IX86_BUILTIN_PABSW128: > case IX86_BUILTIN_PABSW256: > case IX86_BUILTIN_PABSW512: > - case IX86_BUILTIN_PABSD: > case IX86_BUILTIN_PABSD128: > case IX86_BUILTIN_PABSD256: > case IX86_BUILTIN_PABSD512: > @@ -18933,9 +18936,36 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > if (n_args > 1 > && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1))) > break; > - loc = gimple_location (stmt); > - g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0); > - gsi_replace (gsi, g, false); > + { > + tree utype, ures, vce; > + switch (GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)))) > + { > + case E_QImode: > + utype = unsigned_intQI_type_node; > + break; > + case E_HImode: > + utype = unsigned_intHI_type_node; > + break; > + case E_SImode: > + utype = unsigned_intSI_type_node; > + break; > + case E_DImode: > + utype = long_long_unsigned_type_node; > + break; > + default: > + gcc_unreachable (); > + } > + utype = get_same_sized_vectype (utype, TREE_TYPE (arg0)); > + /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR > + instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */ > + ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0); > + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > + loc = gimple_location (stmt); > + vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures); > + g = gimple_build_assign (gimple_call_lhs (stmt), > + VIEW_CONVERT_EXPR, vce); > + gsi_replace (gsi, g, false); > + } > return true; > > default: > diff --git a/gcc/testsuite/gcc.target/i386/pr110108.c b/gcc/testsuite/gcc.target/i386/pr110108.c > new file mode 100644 > index 00000000000..cd05763b9bf > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr110108.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx2 -O2" } */ > +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */ > +#include <immintrin.h> > + > +__m128i do_stuff_128(__m128i X0, __m128i X1) { > + __m128i AbsX0 = _mm_abs_epi8(X0); > + __m128i Result = _mm_blendv_epi8(AbsX0, X1, AbsX0); > + return Result; > +} > + > +__m256i do_stuff_256(__m256i X0, __m256i X1) { > + __m256i AbsX0 = _mm256_abs_epi8(X0); > + __m256i Result = _mm256_blendv_epi8(AbsX0, X1, AbsX0); > + return Result; > +} > -- > 2.39.1.388.g2fc9e9ca3c >
On Tue, Jun 6, 2023 at 6:33 AM liuhongt via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for > TYPE_MIN, but PABSB will store unsigned result into dst. The patch > uses ABSU_EXPR + VCE instead of ABS_EXPR. > > Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit > vector absm2 is guarded with TARGET_MMX_WITH_SSE. Please note that we are using builtins here, so we should not fold to absm2, but to ssse3_absm2, which is also available with TARGET_MMX. Uros. > > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}. > Ok for trunk? > > > gcc/ChangeLog: > > PR target/110108 > * config/i386/i386.cc (ix86_gimple_fold_builtin): Fold > _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple > ABSU_EXPR + VCE, don't fold _mm_abs_{pi8,pi16,pi32} w/o > TARGET_64BIT. > * config/i386/i386-builtin.def: Replace CODE_FOR_nothing with > real codename for __builtin_ia32_pabs{b,w,d}. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pr110108.c: New test. > --- > gcc/config/i386/i386-builtin.def | 6 ++-- > gcc/config/i386/i386.cc | 44 ++++++++++++++++++++---- > gcc/testsuite/gcc.target/i386/pr110108.c | 16 +++++++++ > 3 files changed, 56 insertions(+), 10 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr110108.c > > diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def > index 383b68a9bb8..7ba5b6a9d11 100644 > --- a/gcc/config/i386/i386-builtin.def > +++ b/gcc/config/i386/i386-builtin.def > @@ -900,11 +900,11 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd" > > /* SSSE3 */ > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI) > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI) > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI) > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI) > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI) > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI) > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI) > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI) > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI) > > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI) > BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > index d4ff56ee8dd..b09b3c79e99 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -18433,6 +18433,7 @@ bool > ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > { > gimple *stmt = gsi_stmt (*gsi), *g; > + gimple_seq stmts = NULL; > tree fndecl = gimple_call_fndecl (stmt); > gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD)); > int n_args = gimple_call_num_args (stmt); > @@ -18555,7 +18556,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > { > loc = gimple_location (stmt); > tree type = TREE_TYPE (arg2); > - gimple_seq stmts = NULL; > if (VECTOR_FLOAT_TYPE_P (type)) > { > tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode > @@ -18610,7 +18610,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > tree zero_vec = build_zero_cst (type); > tree minus_one_vec = build_minus_one_cst (type); > tree cmp_type = truth_type_for (type); > - gimple_seq stmts = NULL; > tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1); > gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > g = gimple_build_assign (gimple_call_lhs (stmt), > @@ -18904,14 +18903,18 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > break; > > case IX86_BUILTIN_PABSB: > + case IX86_BUILTIN_PABSW: > + case IX86_BUILTIN_PABSD: > + /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */ > + if (!TARGET_64BIT) > + break; > + /* FALLTHRU. */ > case IX86_BUILTIN_PABSB128: > case IX86_BUILTIN_PABSB256: > case IX86_BUILTIN_PABSB512: > - case IX86_BUILTIN_PABSW: > case IX86_BUILTIN_PABSW128: > case IX86_BUILTIN_PABSW256: > case IX86_BUILTIN_PABSW512: > - case IX86_BUILTIN_PABSD: > case IX86_BUILTIN_PABSD128: > case IX86_BUILTIN_PABSD256: > case IX86_BUILTIN_PABSD512: > @@ -18933,9 +18936,36 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > if (n_args > 1 > && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1))) > break; > - loc = gimple_location (stmt); > - g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0); > - gsi_replace (gsi, g, false); > + { > + tree utype, ures, vce; > + switch (GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)))) > + { > + case E_QImode: > + utype = unsigned_intQI_type_node; > + break; > + case E_HImode: > + utype = unsigned_intHI_type_node; > + break; > + case E_SImode: > + utype = unsigned_intSI_type_node; > + break; > + case E_DImode: > + utype = long_long_unsigned_type_node; > + break; > + default: > + gcc_unreachable (); > + } > + utype = get_same_sized_vectype (utype, TREE_TYPE (arg0)); > + /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR > + instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */ > + ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0); > + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > + loc = gimple_location (stmt); > + vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures); > + g = gimple_build_assign (gimple_call_lhs (stmt), > + VIEW_CONVERT_EXPR, vce); > + gsi_replace (gsi, g, false); > + } > return true; > > default: > diff --git a/gcc/testsuite/gcc.target/i386/pr110108.c b/gcc/testsuite/gcc.target/i386/pr110108.c > new file mode 100644 > index 00000000000..cd05763b9bf > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr110108.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx2 -O2" } */ > +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */ > +#include <immintrin.h> > + > +__m128i do_stuff_128(__m128i X0, __m128i X1) { > + __m128i AbsX0 = _mm_abs_epi8(X0); > + __m128i Result = _mm_blendv_epi8(AbsX0, X1, AbsX0); > + return Result; > +} > + > +__m256i do_stuff_256(__m256i X0, __m256i X1) { > + __m256i AbsX0 = _mm256_abs_epi8(X0); > + __m256i Result = _mm256_blendv_epi8(AbsX0, X1, AbsX0); > + return Result; > +} > -- > 2.39.1.388.g2fc9e9ca3c >
On Tue, Jun 6, 2023 at 5:11 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > On Tue, Jun 6, 2023 at 6:33 AM liuhongt via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: > > > > r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for > > TYPE_MIN, but PABSB will store unsigned result into dst. The patch > > uses ABSU_EXPR + VCE instead of ABS_EXPR. > > > > Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit > > vector absm2 is guarded with TARGET_MMX_WITH_SSE. > >This should be !TARGET_MMX_WITH_SSE. TARGET_64BIT is not enough, see >the definition of T_M_W_S in i386.h. OTOH, these builtins are >available for TARGET_MMX, so I'm not sure if the above check is needed >at all. BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI) ISA requirement(OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX) will be checked by ix86_check_builtin_isa_match which is at the beginning of ix86_gimple_fold_builtin. Here, we're folding those builtin into gimple ABSU_EXPR, and ABSU_EXPR<vector> will be lowered by vec_lower pass when backend doesn't support corressponding absm2_optab, that's why i only check TARGET_64BIT here. > Please note that we are using builtins here, so we should not fold to > absm2, but to ssse3_absm2, which is also available with TARGET_MMX. Yes, that exactly why I checked TARGET_64BIT here, w/ TARGET_64BIT, backend suppport absm2_optab which exactly matches ssse3_absm2. w/o TARGET_64BIT, the builtin shouldn't folding into gimple ABSU_EXPR, but let backend expanded to ssse3_absm2. > > Uros. > > > > > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}. > > Ok for trunk? > > > > > > gcc/ChangeLog: > > > > PR target/110108 > > * config/i386/i386.cc (ix86_gimple_fold_builtin): Fold > > _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple > > ABSU_EXPR + VCE, don't fold _mm_abs_{pi8,pi16,pi32} w/o > > TARGET_64BIT. > > * config/i386/i386-builtin.def: Replace CODE_FOR_nothing with > > real codename for __builtin_ia32_pabs{b,w,d}. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/i386/pr110108.c: New test. > > --- > > gcc/config/i386/i386-builtin.def | 6 ++-- > > gcc/config/i386/i386.cc | 44 ++++++++++++++++++++---- > > gcc/testsuite/gcc.target/i386/pr110108.c | 16 +++++++++ > > 3 files changed, 56 insertions(+), 10 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/i386/pr110108.c > > > > diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def > > index 383b68a9bb8..7ba5b6a9d11 100644 > > --- a/gcc/config/i386/i386-builtin.def > > +++ b/gcc/config/i386/i386-builtin.def > > @@ -900,11 +900,11 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd" > > > > /* SSSE3 */ > > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI) > > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI) > > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI) > > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI) > > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI) > > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI) > > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI) > > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI) > > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI) > > > > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI) > > BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > > index d4ff56ee8dd..b09b3c79e99 100644 > > --- a/gcc/config/i386/i386.cc > > +++ b/gcc/config/i386/i386.cc > > @@ -18433,6 +18433,7 @@ bool > > ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > > { > > gimple *stmt = gsi_stmt (*gsi), *g; > > + gimple_seq stmts = NULL; > > tree fndecl = gimple_call_fndecl (stmt); > > gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD)); > > int n_args = gimple_call_num_args (stmt); > > @@ -18555,7 +18556,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > > { > > loc = gimple_location (stmt); > > tree type = TREE_TYPE (arg2); > > - gimple_seq stmts = NULL; > > if (VECTOR_FLOAT_TYPE_P (type)) > > { > > tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode > > @@ -18610,7 +18610,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > > tree zero_vec = build_zero_cst (type); > > tree minus_one_vec = build_minus_one_cst (type); > > tree cmp_type = truth_type_for (type); > > - gimple_seq stmts = NULL; > > tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1); > > gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > > g = gimple_build_assign (gimple_call_lhs (stmt), > > @@ -18904,14 +18903,18 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > > break; > > > > case IX86_BUILTIN_PABSB: > > + case IX86_BUILTIN_PABSW: > > + case IX86_BUILTIN_PABSD: > > + /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */ > > + if (!TARGET_64BIT) > > + break; > > + /* FALLTHRU. */ > > case IX86_BUILTIN_PABSB128: > > case IX86_BUILTIN_PABSB256: > > case IX86_BUILTIN_PABSB512: > > - case IX86_BUILTIN_PABSW: > > case IX86_BUILTIN_PABSW128: > > case IX86_BUILTIN_PABSW256: > > case IX86_BUILTIN_PABSW512: > > - case IX86_BUILTIN_PABSD: > > case IX86_BUILTIN_PABSD128: > > case IX86_BUILTIN_PABSD256: > > case IX86_BUILTIN_PABSD512: > > @@ -18933,9 +18936,36 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > > if (n_args > 1 > > && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1))) > > break; > > - loc = gimple_location (stmt); > > - g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0); > > - gsi_replace (gsi, g, false); > > + { > > + tree utype, ures, vce; > > + switch (GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)))) > > + { > > + case E_QImode: > > + utype = unsigned_intQI_type_node; > > + break; > > + case E_HImode: > > + utype = unsigned_intHI_type_node; > > + break; > > + case E_SImode: > > + utype = unsigned_intSI_type_node; > > + break; > > + case E_DImode: > > + utype = long_long_unsigned_type_node; > > + break; > > + default: > > + gcc_unreachable (); > > + } > > + utype = get_same_sized_vectype (utype, TREE_TYPE (arg0)); > > + /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR > > + instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */ > > + ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0); > > + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > > + loc = gimple_location (stmt); > > + vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures); > > + g = gimple_build_assign (gimple_call_lhs (stmt), > > + VIEW_CONVERT_EXPR, vce); > > + gsi_replace (gsi, g, false); > > + } > > return true; > > > > default: > > diff --git a/gcc/testsuite/gcc.target/i386/pr110108.c b/gcc/testsuite/gcc.target/i386/pr110108.c > > new file mode 100644 > > index 00000000000..cd05763b9bf > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr110108.c > > @@ -0,0 +1,16 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-mavx2 -O2" } */ > > +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */ > > +#include <immintrin.h> > > + > > +__m128i do_stuff_128(__m128i X0, __m128i X1) { > > + __m128i AbsX0 = _mm_abs_epi8(X0); > > + __m128i Result = _mm_blendv_epi8(AbsX0, X1, AbsX0); > > + return Result; > > +} > > + > > +__m256i do_stuff_256(__m256i X0, __m256i X1) { > > + __m256i AbsX0 = _mm256_abs_epi8(X0); > > + __m256i Result = _mm256_blendv_epi8(AbsX0, X1, AbsX0); > > + return Result; > > +} > > -- > > 2.39.1.388.g2fc9e9ca3c > >
On Tue, Jun 6, 2023 at 1:42 PM Hongtao Liu <crazylht@gmail.com> wrote: > > On Tue, Jun 6, 2023 at 5:11 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > On Tue, Jun 6, 2023 at 6:33 AM liuhongt via Gcc-patches > > <gcc-patches@gcc.gnu.org> wrote: > > > > > > r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for > > > TYPE_MIN, but PABSB will store unsigned result into dst. The patch > > > uses ABSU_EXPR + VCE instead of ABS_EXPR. > > > > > > Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit > > > vector absm2 is guarded with TARGET_MMX_WITH_SSE. > > > >This should be !TARGET_MMX_WITH_SSE. TARGET_64BIT is not enough, see > >the definition of T_M_W_S in i386.h. OTOH, these builtins are > >available for TARGET_MMX, so I'm not sure if the above check is needed > >at all. > BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, > CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, > UNKNOWN, (int) V8QI_FTYPE_V8QI) > > ISA requirement(OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX) will be > checked by ix86_check_builtin_isa_match which is at the beginning of > ix86_gimple_fold_builtin. > Here, we're folding those builtin into gimple ABSU_EXPR, and > ABSU_EXPR<vector> will be lowered by vec_lower pass when backend > doesn't support corressponding absm2_optab, that's why i only check > TARGET_64BIT here. > > > Please note that we are using builtins here, so we should not fold to > > absm2, but to ssse3_absm2, which is also available with TARGET_MMX. > Yes, that exactly why I checked TARGET_64BIT here, w/ TARGET_64BIT, > backend suppport absm2_optab which exactly matches ssse3_absm2. > w/o TARGET_64BIT, the builtin shouldn't folding into gimple ABSU_EXPR, > but let backend expanded to ssse3_absm2. Thanks for the explanation, but for consistency, I'd recommend checking TARGET_MMX_WITH_SSE (= TARGET_64BIT && TARGET_SSE2) here. The macro is self-explanatory, while the usage of TARGET_64BIT is not that descriptive. Uros.
On Tue, Jun 6, 2023 at 10:36 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > On Tue, Jun 6, 2023 at 1:42 PM Hongtao Liu <crazylht@gmail.com> wrote: > > > > On Tue, Jun 6, 2023 at 5:11 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > > > On Tue, Jun 6, 2023 at 6:33 AM liuhongt via Gcc-patches > > > <gcc-patches@gcc.gnu.org> wrote: > > > > > > > > r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for > > > > TYPE_MIN, but PABSB will store unsigned result into dst. The patch > > > > uses ABSU_EXPR + VCE instead of ABS_EXPR. > > > > > > > > Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit > > > > vector absm2 is guarded with TARGET_MMX_WITH_SSE. > > > > > >This should be !TARGET_MMX_WITH_SSE. TARGET_64BIT is not enough, see > > >the definition of T_M_W_S in i386.h. OTOH, these builtins are > > >available for TARGET_MMX, so I'm not sure if the above check is needed > > >at all. > > BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, > > CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, > > UNKNOWN, (int) V8QI_FTYPE_V8QI) > > > > ISA requirement(OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX) will be > > checked by ix86_check_builtin_isa_match which is at the beginning of > > ix86_gimple_fold_builtin. > > Here, we're folding those builtin into gimple ABSU_EXPR, and > > ABSU_EXPR<vector> will be lowered by vec_lower pass when backend > > doesn't support corressponding absm2_optab, that's why i only check > > TARGET_64BIT here. > > > > > Please note that we are using builtins here, so we should not fold to > > > absm2, but to ssse3_absm2, which is also available with TARGET_MMX. > > Yes, that exactly why I checked TARGET_64BIT here, w/ TARGET_64BIT, > > backend suppport absm2_optab which exactly matches ssse3_absm2. > > w/o TARGET_64BIT, the builtin shouldn't folding into gimple ABSU_EXPR, > > but let backend expanded to ssse3_absm2. > > Thanks for the explanation, but for consistency, I'd recommend > checking TARGET_MMX_WITH_SSE (= TARGET_64BIT && TARGET_SSE2) here. The > macro is self-explanatory, while the usage of TARGET_64BIT is not that > descriptive. Sure. > > Uros.
On Wed, Jun 7, 2023 at 8:31 AM Hongtao Liu <crazylht@gmail.com> wrote: > > On Tue, Jun 6, 2023 at 10:36 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > On Tue, Jun 6, 2023 at 1:42 PM Hongtao Liu <crazylht@gmail.com> wrote: > > > > > > On Tue, Jun 6, 2023 at 5:11 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > > > > > On Tue, Jun 6, 2023 at 6:33 AM liuhongt via Gcc-patches > > > > <gcc-patches@gcc.gnu.org> wrote: > > > > > > > > > > r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for > > > > > TYPE_MIN, but PABSB will store unsigned result into dst. The patch > > > > > uses ABSU_EXPR + VCE instead of ABS_EXPR. > > > > > > > > > > Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit > > > > > vector absm2 is guarded with TARGET_MMX_WITH_SSE. > > > > > > > >This should be !TARGET_MMX_WITH_SSE. TARGET_64BIT is not enough, see > > > >the definition of T_M_W_S in i386.h. OTOH, these builtins are > > > >available for TARGET_MMX, so I'm not sure if the above check is needed > > > >at all. > > > BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, > > > CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, > > > UNKNOWN, (int) V8QI_FTYPE_V8QI) > > > > > > ISA requirement(OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX) will be > > > checked by ix86_check_builtin_isa_match which is at the beginning of > > > ix86_gimple_fold_builtin. > > > Here, we're folding those builtin into gimple ABSU_EXPR, and > > > ABSU_EXPR<vector> will be lowered by vec_lower pass when backend > > > doesn't support corressponding absm2_optab, that's why i only check > > > TARGET_64BIT here. > > > > > > > Please note that we are using builtins here, so we should not fold to > > > > absm2, but to ssse3_absm2, which is also available with TARGET_MMX. > > > Yes, that exactly why I checked TARGET_64BIT here, w/ TARGET_64BIT, > > > backend suppport absm2_optab which exactly matches ssse3_absm2. > > > w/o TARGET_64BIT, the builtin shouldn't folding into gimple ABSU_EXPR, > > > but let backend expanded to ssse3_absm2. > > > > Thanks for the explanation, but for consistency, I'd recommend > > checking TARGET_MMX_WITH_SSE (= TARGET_64BIT && TARGET_SSE2) here. The > > macro is self-explanatory, while the usage of TARGET_64BIT is not that > > descriptive. > Sure. Pushed to trunk. > > > > Uros. > > > > -- > BR, > Hongtao
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 383b68a9bb8..7ba5b6a9d11 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -900,11 +900,11 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd" /* SSSE3 */ BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI) -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI) +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI) BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI) -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI) +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI) BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI) -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI) +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI) BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI) BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index d4ff56ee8dd..b09b3c79e99 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -18433,6 +18433,7 @@ bool ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) { gimple *stmt = gsi_stmt (*gsi), *g; + gimple_seq stmts = NULL; tree fndecl = gimple_call_fndecl (stmt); gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD)); int n_args = gimple_call_num_args (stmt); @@ -18555,7 +18556,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) { loc = gimple_location (stmt); tree type = TREE_TYPE (arg2); - gimple_seq stmts = NULL; if (VECTOR_FLOAT_TYPE_P (type)) { tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode @@ -18610,7 +18610,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) tree zero_vec = build_zero_cst (type); tree minus_one_vec = build_minus_one_cst (type); tree cmp_type = truth_type_for (type); - gimple_seq stmts = NULL; tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1); gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); g = gimple_build_assign (gimple_call_lhs (stmt), @@ -18904,14 +18903,18 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) break; case IX86_BUILTIN_PABSB: + case IX86_BUILTIN_PABSW: + case IX86_BUILTIN_PABSD: + /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */ + if (!TARGET_64BIT) + break; + /* FALLTHRU. */ case IX86_BUILTIN_PABSB128: case IX86_BUILTIN_PABSB256: case IX86_BUILTIN_PABSB512: - case IX86_BUILTIN_PABSW: case IX86_BUILTIN_PABSW128: case IX86_BUILTIN_PABSW256: case IX86_BUILTIN_PABSW512: - case IX86_BUILTIN_PABSD: case IX86_BUILTIN_PABSD128: case IX86_BUILTIN_PABSD256: case IX86_BUILTIN_PABSD512: @@ -18933,9 +18936,36 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) if (n_args > 1 && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1))) break; - loc = gimple_location (stmt); - g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0); - gsi_replace (gsi, g, false); + { + tree utype, ures, vce; + switch (GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)))) + { + case E_QImode: + utype = unsigned_intQI_type_node; + break; + case E_HImode: + utype = unsigned_intHI_type_node; + break; + case E_SImode: + utype = unsigned_intSI_type_node; + break; + case E_DImode: + utype = long_long_unsigned_type_node; + break; + default: + gcc_unreachable (); + } + utype = get_same_sized_vectype (utype, TREE_TYPE (arg0)); + /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR + instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */ + ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + loc = gimple_location (stmt); + vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures); + g = gimple_build_assign (gimple_call_lhs (stmt), + VIEW_CONVERT_EXPR, vce); + gsi_replace (gsi, g, false); + } return true; default: diff --git a/gcc/testsuite/gcc.target/i386/pr110108.c b/gcc/testsuite/gcc.target/i386/pr110108.c new file mode 100644 index 00000000000..cd05763b9bf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr110108.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -O2" } */ +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */ +#include <immintrin.h> + +__m128i do_stuff_128(__m128i X0, __m128i X1) { + __m128i AbsX0 = _mm_abs_epi8(X0); + __m128i Result = _mm_blendv_epi8(AbsX0, X1, AbsX0); + return Result; +} + +__m256i do_stuff_256(__m256i X0, __m256i X1) { + __m256i AbsX0 = _mm256_abs_epi8(X0); + __m256i Result = _mm256_blendv_epi8(AbsX0, X1, AbsX0); + return Result; +}