Message ID | 20230606043121.24843-2-hongtao.liu@intel.com |
---|---|
State | New |
Headers | show |
Series | Don't fold _mm{, 256}_blendv_epi8 into (mask < 0 ? src1 : src2) when -funsigned-char. | expand |
On Mon, Jun 5, 2023 at 9:34 PM liuhongt via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > Since mask < 0 will be always false when -funsigned-char, but > vpblendvb needs to check the most significant bit. > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. > Ok for trunk and backport to GCC12/GCC13 release branch? I think this is a better patch and will always be correct and still get folded at the gimple level (correctly): diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index d4ff56ee8dd..02bf5ba93a5 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -18561,8 +18561,10 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode ? intSI_type_node : intDI_type_node; type = get_same_sized_vectype (itype, type); - arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2); } + else + type = signed_type_for (type); + arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2); tree zero_vec = build_zero_cst (type); tree cmp_type = truth_type_for (type); tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec); Thanks, Andrew Pinski > > gcc/ChangeLog: > > PR target/110108 > * config/i386/i386-builtin.def (BDESC): Replace > CODE_FOR_nothing with real code name for blendvb builtins. > * config/i386/i386.cc (ix86_gimple_fold_builtin): Don't fold > _mm{,256}_blendv_epi8 into (mask < 0 ? src1 : src2) when > -funsigned-char. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pr110108-2.c: New test. > --- > gcc/config/i386/i386-builtin.def | 4 ++-- > gcc/config/i386/i386.cc | 7 +++++++ > gcc/testsuite/gcc.target/i386/pr110108-2.c | 14 ++++++++++++++ > 3 files changed, 23 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr110108-2.c > > diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def > index 7ba5b6a9d11..b4c99ff62a2 100644 > --- a/gcc/config/i386/i386-builtin.def > +++ b/gcc/config/i386/i386-builtin.def > @@ -944,7 +944,7 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", I > BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) > BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_insertps_v4sf, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) > BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT) > -BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_nothing, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI) > +BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI) > BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT) > > BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI) > @@ -1198,7 +1198,7 @@ BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX > BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI) > BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) > BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI) > -BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI) > +BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI) > BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT) > BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) > BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI) > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > index b09b3c79e99..f8f6c26c8eb 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -18548,6 +18548,13 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) > /* FALLTHRU. */ > case IX86_BUILTIN_PBLENDVB128: > case IX86_BUILTIN_BLENDVPS: > + /* Don't fold PBLENDVB when funsigned-char since mask < 0 > + will always be false in the gimple level. */ > + if ((fn_code == IX86_BUILTIN_PBLENDVB128 > + || fn_code == IX86_BUILTIN_PBLENDVB256) > + && !flag_signed_char) > + break; > + > gcc_assert (n_args == 3); > arg0 = gimple_call_arg (stmt, 0); > arg1 = gimple_call_arg (stmt, 1); > diff --git a/gcc/testsuite/gcc.target/i386/pr110108-2.c b/gcc/testsuite/gcc.target/i386/pr110108-2.c > new file mode 100644 > index 00000000000..2d1d2fd4991 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr110108-2.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx2 -O2 -funsigned-char" } */ > +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */ > + > +#include <immintrin.h> > +__m128i do_stuff_128(__m128i X0, __m128i X1, __m128i X2) { > + __m128i Result = _mm_blendv_epi8(X0, X1, X2); > + return Result; > +} > + > +__m256i do_stuff_256(__m256i X0, __m256i X1, __m256i X2) { > + __m256i Result = _mm256_blendv_epi8(X0, X1, X2); > + return Result; > +} > -- > 2.39.1.388.g2fc9e9ca3c >
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 7ba5b6a9d11..b4c99ff62a2 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -944,7 +944,7 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", I BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_insertps_v4sf, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT) -BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_nothing, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI) +BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI) @@ -1198,7 +1198,7 @@ BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI) BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI) -BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI) +BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI) BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT) BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index b09b3c79e99..f8f6c26c8eb 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -18548,6 +18548,13 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) /* FALLTHRU. */ case IX86_BUILTIN_PBLENDVB128: case IX86_BUILTIN_BLENDVPS: + /* Don't fold PBLENDVB when funsigned-char since mask < 0 + will always be false in the gimple level. */ + if ((fn_code == IX86_BUILTIN_PBLENDVB128 + || fn_code == IX86_BUILTIN_PBLENDVB256) + && !flag_signed_char) + break; + gcc_assert (n_args == 3); arg0 = gimple_call_arg (stmt, 0); arg1 = gimple_call_arg (stmt, 1); diff --git a/gcc/testsuite/gcc.target/i386/pr110108-2.c b/gcc/testsuite/gcc.target/i386/pr110108-2.c new file mode 100644 index 00000000000..2d1d2fd4991 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr110108-2.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -O2 -funsigned-char" } */ +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */ + +#include <immintrin.h> +__m128i do_stuff_128(__m128i X0, __m128i X1, __m128i X2) { + __m128i Result = _mm_blendv_epi8(X0, X1, X2); + return Result; +} + +__m256i do_stuff_256(__m256i X0, __m256i X1, __m256i X2) { + __m256i Result = _mm256_blendv_epi8(X0, X1, X2); + return Result; +}