diff mbox series

Check the type of mask while generating cond_op in gimple simplication.

Message ID 20210827065259.5764-1-hongtao.liu@intel.com
State New
Headers show
Series Check the type of mask while generating cond_op in gimple simplication. | expand

Commit Message

liuhongt Aug. 27, 2021, 6:52 a.m. UTC
When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
it doesn't check if mask type matches. It causes an ICE when expand cond_op
with mismatched mode.
  This patch add a function named cond_vectorized_internal_fn_supported_p
 to additionally check mask type than vectorized_internal_fn_supported_p.

  Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
  Ok for trunk?

gcc/ChangeLog:

	PR middle-end/102080
	* internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
	* internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
	* match.pd: Check the type of mask while generating cond_op in
	gimple simplication.

gcc/testsuite/ChangeLog:

	PR middle-end/102080
	* gcc.target/i386/pr102080.c: New test.
---
 gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
 gcc/internal-fn.h                        |  1 +
 gcc/match.pd                             | 24 ++++++++++++++++--------
 gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
 4 files changed, 55 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c

Comments

Richard Biener Aug. 30, 2021, 12:24 p.m. UTC | #1
On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
>
>   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
> it doesn't check if mask type matches. It causes an ICE when expand cond_op
> with mismatched mode.
>   This patch add a function named cond_vectorized_internal_fn_supported_p
>  to additionally check mask type than vectorized_internal_fn_supported_p.
>
>   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
>   Ok for trunk?
>
> gcc/ChangeLog:
>
>         PR middle-end/102080
>         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
>         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
>         * match.pd: Check the type of mask while generating cond_op in
>         gimple simplication.
>
> gcc/testsuite/ChangeLog:
>
>         PR middle-end/102080
>         * gcc.target/i386/pr102080.c: New test.
> ---
>  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
>  gcc/internal-fn.h                        |  1 +
>  gcc/match.pd                             | 24 ++++++++++++++++--------
>  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
>  4 files changed, 55 insertions(+), 8 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
>
> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> index 1360a00f0b9..8b2b65db1a7 100644
> --- a/gcc/internal-fn.c
> +++ b/gcc/internal-fn.c
> @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
>    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
>  }
>
> +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
> +   doesn't check if mask type matches.  */
> +bool
> +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
> +                                        tree mask_type)
> +{
> +  if (!vectorized_internal_fn_supported_p (ifn, type))
> +    return false;
> +
> +  machine_mode mask_mode;
> +  machine_mode vmode = TYPE_MODE (type);
> +  int size1, size2;
> +  if (VECTOR_MODE_P (vmode)
> +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
> +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
> +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
> +      && size1 != size2)

Why do we check for equal size rather than just mode equality which
I think would work for non-constant sized modes as well?  And when
using sizes you'd instead use maybe_ne (GET_MODE_SIZE (mask_mode),
GET_MODE_SIZE (TYPE_MODE (mask_type)))

Thanks,
Richard.

> +    return false;
> +
> +  return true;
> +}
> +
>  /* If TYPE is a vector type, return true if IFN is a direct internal
>     function that is supported for that type.  If TYPE is a scalar type,
>     return true if IFN is a direct internal function that is supported for
> diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> index 19d0f849a5a..f0aea00103c 100644
> --- a/gcc/internal-fn.h
> +++ b/gcc/internal-fn.h
> @@ -236,5 +236,6 @@ extern void expand_PHI (internal_fn, gcall *);
>  extern void expand_SHUFFLEVECTOR (internal_fn, gcall *);
>
>  extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
> +extern bool cond_vectorized_internal_fn_supported_p (internal_fn, tree, tree);
>
>  #endif
> diff --git a/gcc/match.pd b/gcc/match.pd
> index e5bbb123a6a..72b1bc674db 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -6987,14 +6987,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>       cond_op (COND_BINARY)
>   (simplify
>    (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
> -  (with { tree op_type = TREE_TYPE (@4); }
> -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> +  (with { tree op_type = TREE_TYPE (@4);
> +         tree mask_type = TREE_TYPE (@0); }
> +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> +                                                op_type, mask_type)
>         && element_precision (type) == element_precision (op_type))
>      (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
>   (simplify
>    (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
> -  (with { tree op_type = TREE_TYPE (@4); }
> -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> +  (with { tree op_type = TREE_TYPE (@4);
> +         tree mask_type = TREE_TYPE (@0); }
> +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> +                                                op_type, mask_type)
>         && element_precision (type) == element_precision (op_type))
>      (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
>
> @@ -7003,14 +7007,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>       cond_op (COND_TERNARY)
>   (simplify
>    (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
> -  (with { tree op_type = TREE_TYPE (@5); }
> -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> +  (with { tree op_type = TREE_TYPE (@5);
> +         tree mask_type = TREE_TYPE (@0); }
> +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> +                                                op_type, mask_type)
>         && element_precision (type) == element_precision (op_type))
>      (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
>   (simplify
>    (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
> -  (with { tree op_type = TREE_TYPE (@5); }
> -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> +  (with { tree op_type = TREE_TYPE (@5);
> +         tree mask_type = TREE_TYPE (@0); }
> +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> +                                                op_type, mask_type)
>         && element_precision (type) == element_precision (op_type))
>      (view_convert (cond_op (bit_not @0) @2 @3 @4
>                   (view_convert:op_type @1)))))))
> diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
> new file mode 100644
> index 00000000000..6a40a75e1c5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102080.c
> @@ -0,0 +1,16 @@
> +#include<immintrin.h>
> +typedef float __m256 __attribute__((__vector_size__(32)));
> +__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
> +  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
> +
> +void
> +__attribute__ ((target("avx")))
> +IfThenElse (__m256 no) {
> +  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
> +}
> +void
> +__attribute__ ((target("avx512vl")))
> +EncodedFromDisplay() {
> +  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
> +  IfThenElse(__trans_tmp_11);
> +}
> --
> 2.18.1
>
Hongtao Liu Aug. 31, 2021, 10:24 a.m. UTC | #2
On Mon, Aug 30, 2021 at 8:25 PM Richard Biener via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
> >
> >   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
> > it doesn't check if mask type matches. It causes an ICE when expand cond_op
> > with mismatched mode.
> >   This patch add a function named cond_vectorized_internal_fn_supported_p
> >  to additionally check mask type than vectorized_internal_fn_supported_p.
> >
> >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> >   Ok for trunk?
> >
> > gcc/ChangeLog:
> >
> >         PR middle-end/102080
> >         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
> >         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
> >         * match.pd: Check the type of mask while generating cond_op in
> >         gimple simplication.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         PR middle-end/102080
> >         * gcc.target/i386/pr102080.c: New test.
> > ---
> >  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
> >  gcc/internal-fn.h                        |  1 +
> >  gcc/match.pd                             | 24 ++++++++++++++++--------
> >  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
> >  4 files changed, 55 insertions(+), 8 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
> >
> > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> > index 1360a00f0b9..8b2b65db1a7 100644
> > --- a/gcc/internal-fn.c
> > +++ b/gcc/internal-fn.c
> > @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
> >    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
> >  }
> >
> > +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
> > +   doesn't check if mask type matches.  */
> > +bool
> > +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
> > +                                        tree mask_type)
> > +{
> > +  if (!vectorized_internal_fn_supported_p (ifn, type))
> > +    return false;
> > +
> > +  machine_mode mask_mode;
> > +  machine_mode vmode = TYPE_MODE (type);
> > +  int size1, size2;
> > +  if (VECTOR_MODE_P (vmode)
> > +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
> > +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
> > +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
> > +      && size1 != size2)
>
> Why do we check for equal size rather than just mode equality which
I originally thought  TYPE_MODE of vector(8) <signed-boolean:1> was
not QImode, Changed the patch to check mode equality.
Update patch.

> I think would work for non-constant sized modes as well?  And when
> using sizes you'd instead use maybe_ne (GET_MODE_SIZE (mask_mode),
> GET_MODE_SIZE (TYPE_MODE (mask_type)))
>
> Thanks,
> Richard.
>
> > +    return false;
> > +
> > +  return true;
> > +}
> > +
> >  /* If TYPE is a vector type, return true if IFN is a direct internal
> >     function that is supported for that type.  If TYPE is a scalar type,
> >     return true if IFN is a direct internal function that is supported for
> > diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> > index 19d0f849a5a..f0aea00103c 100644
> > --- a/gcc/internal-fn.h
> > +++ b/gcc/internal-fn.h
> > @@ -236,5 +236,6 @@ extern void expand_PHI (internal_fn, gcall *);
> >  extern void expand_SHUFFLEVECTOR (internal_fn, gcall *);
> >
> >  extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
> > +extern bool cond_vectorized_internal_fn_supported_p (internal_fn, tree, tree);
> >
> >  #endif
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index e5bbb123a6a..72b1bc674db 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -6987,14 +6987,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >       cond_op (COND_BINARY)
> >   (simplify
> >    (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
> > -  (with { tree op_type = TREE_TYPE (@4); }
> > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > +  (with { tree op_type = TREE_TYPE (@4);
> > +         tree mask_type = TREE_TYPE (@0); }
> > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > +                                                op_type, mask_type)
> >         && element_precision (type) == element_precision (op_type))
> >      (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
> >   (simplify
> >    (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
> > -  (with { tree op_type = TREE_TYPE (@4); }
> > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > +  (with { tree op_type = TREE_TYPE (@4);
> > +         tree mask_type = TREE_TYPE (@0); }
> > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > +                                                op_type, mask_type)
> >         && element_precision (type) == element_precision (op_type))
> >      (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
> >
> > @@ -7003,14 +7007,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >       cond_op (COND_TERNARY)
> >   (simplify
> >    (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
> > -  (with { tree op_type = TREE_TYPE (@5); }
> > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > +  (with { tree op_type = TREE_TYPE (@5);
> > +         tree mask_type = TREE_TYPE (@0); }
> > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > +                                                op_type, mask_type)
> >         && element_precision (type) == element_precision (op_type))
> >      (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
> >   (simplify
> >    (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
> > -  (with { tree op_type = TREE_TYPE (@5); }
> > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > +  (with { tree op_type = TREE_TYPE (@5);
> > +         tree mask_type = TREE_TYPE (@0); }
> > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > +                                                op_type, mask_type)
> >         && element_precision (type) == element_precision (op_type))
> >      (view_convert (cond_op (bit_not @0) @2 @3 @4
> >                   (view_convert:op_type @1)))))))
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
> > new file mode 100644
> > index 00000000000..6a40a75e1c5
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102080.c
> > @@ -0,0 +1,16 @@
> > +#include<immintrin.h>
> > +typedef float __m256 __attribute__((__vector_size__(32)));
> > +__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
> > +  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
> > +
> > +void
> > +__attribute__ ((target("avx")))
> > +IfThenElse (__m256 no) {
> > +  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
> > +}
> > +void
> > +__attribute__ ((target("avx512vl")))
> > +EncodedFromDisplay() {
> > +  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
> > +  IfThenElse(__trans_tmp_11);
> > +}
> > --
> > 2.18.1
> >
Richard Biener Aug. 31, 2021, 11:56 a.m. UTC | #3
On Tue, Aug 31, 2021 at 12:18 PM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Mon, Aug 30, 2021 at 8:25 PM Richard Biener via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
> > >
> > >   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
> > > it doesn't check if mask type matches. It causes an ICE when expand cond_op
> > > with mismatched mode.
> > >   This patch add a function named cond_vectorized_internal_fn_supported_p
> > >  to additionally check mask type than vectorized_internal_fn_supported_p.
> > >
> > >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> > >   Ok for trunk?
> > >
> > > gcc/ChangeLog:
> > >
> > >         PR middle-end/102080
> > >         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
> > >         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
> > >         * match.pd: Check the type of mask while generating cond_op in
> > >         gimple simplication.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >         PR middle-end/102080
> > >         * gcc.target/i386/pr102080.c: New test.
> > > ---
> > >  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
> > >  gcc/internal-fn.h                        |  1 +
> > >  gcc/match.pd                             | 24 ++++++++++++++++--------
> > >  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
> > >  4 files changed, 55 insertions(+), 8 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
> > >
> > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> > > index 1360a00f0b9..8b2b65db1a7 100644
> > > --- a/gcc/internal-fn.c
> > > +++ b/gcc/internal-fn.c
> > > @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
> > >    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
> > >  }
> > >
> > > +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
> > > +   doesn't check if mask type matches.  */
> > > +bool
> > > +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
> > > +                                        tree mask_type)
> > > +{
> > > +  if (!vectorized_internal_fn_supported_p (ifn, type))
> > > +    return false;
> > > +
> > > +  machine_mode mask_mode;
> > > +  machine_mode vmode = TYPE_MODE (type);
> > > +  int size1, size2;
> > > +  if (VECTOR_MODE_P (vmode)
> > > +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
> > > +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
> > > +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
> > > +      && size1 != size2)
> >
> > Why do we check for equal size rather than just mode equality which
> I originally thought  TYPE_MODE of vector(8) <signed-boolean:1> was
> not QImode, Changed the patch to check mode equality.
> Update patch.

Looking at all this it seems the match.pd patterns should have not
used vectorized_internal_fn_supported_p but direct_internal_fn_supported_p
which is equivalent here because we're always working with vector modes?

And then shouldn't we look at the actual optab whether the mask mode matches
the expectation rather than going around via the target hook which may not have
enough context to decide which mask mode to use?

In any case if the approach of the patch is correct shouldn't it do

  if (VECTOR_MODE_P (vmode)
      && (!targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
             || mask_mode != TYPE_MODE (mask_type)))
    return false;

that is, not return true if there's no mask mode for the data mode?

Given the first observation should we call the function
direct_cond_internal_fn_supported_p () instead and as to the second
observation, look at the optab operands mode?

Richard.

> > I think would work for non-constant sized modes as well?  And when
> > using sizes you'd instead use maybe_ne (GET_MODE_SIZE (mask_mode),
> > GET_MODE_SIZE (TYPE_MODE (mask_type)))
> >
> > Thanks,
> > Richard.
> >
> > > +    return false;
> > > +
> > > +  return true;
> > > +}
> > > +
> > >  /* If TYPE is a vector type, return true if IFN is a direct internal
> > >     function that is supported for that type.  If TYPE is a scalar type,
> > >     return true if IFN is a direct internal function that is supported for
> > > diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> > > index 19d0f849a5a..f0aea00103c 100644
> > > --- a/gcc/internal-fn.h
> > > +++ b/gcc/internal-fn.h
> > > @@ -236,5 +236,6 @@ extern void expand_PHI (internal_fn, gcall *);
> > >  extern void expand_SHUFFLEVECTOR (internal_fn, gcall *);
> > >
> > >  extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
> > > +extern bool cond_vectorized_internal_fn_supported_p (internal_fn, tree, tree);
> > >
> > >  #endif
> > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > index e5bbb123a6a..72b1bc674db 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -6987,14 +6987,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >       cond_op (COND_BINARY)
> > >   (simplify
> > >    (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
> > > -  (with { tree op_type = TREE_TYPE (@4); }
> > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > +  (with { tree op_type = TREE_TYPE (@4);
> > > +         tree mask_type = TREE_TYPE (@0); }
> > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > +                                                op_type, mask_type)
> > >         && element_precision (type) == element_precision (op_type))
> > >      (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
> > >   (simplify
> > >    (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
> > > -  (with { tree op_type = TREE_TYPE (@4); }
> > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > +  (with { tree op_type = TREE_TYPE (@4);
> > > +         tree mask_type = TREE_TYPE (@0); }
> > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > +                                                op_type, mask_type)
> > >         && element_precision (type) == element_precision (op_type))
> > >      (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
> > >
> > > @@ -7003,14 +7007,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >       cond_op (COND_TERNARY)
> > >   (simplify
> > >    (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
> > > -  (with { tree op_type = TREE_TYPE (@5); }
> > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > +  (with { tree op_type = TREE_TYPE (@5);
> > > +         tree mask_type = TREE_TYPE (@0); }
> > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > +                                                op_type, mask_type)
> > >         && element_precision (type) == element_precision (op_type))
> > >      (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
> > >   (simplify
> > >    (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
> > > -  (with { tree op_type = TREE_TYPE (@5); }
> > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > +  (with { tree op_type = TREE_TYPE (@5);
> > > +         tree mask_type = TREE_TYPE (@0); }
> > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > +                                                op_type, mask_type)
> > >         && element_precision (type) == element_precision (op_type))
> > >      (view_convert (cond_op (bit_not @0) @2 @3 @4
> > >                   (view_convert:op_type @1)))))))
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
> > > new file mode 100644
> > > index 00000000000..6a40a75e1c5
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102080.c
> > > @@ -0,0 +1,16 @@
> > > +#include<immintrin.h>
> > > +typedef float __m256 __attribute__((__vector_size__(32)));
> > > +__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
> > > +  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
> > > +
> > > +void
> > > +__attribute__ ((target("avx")))
> > > +IfThenElse (__m256 no) {
> > > +  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
> > > +}
> > > +void
> > > +__attribute__ ((target("avx512vl")))
> > > +EncodedFromDisplay() {
> > > +  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
> > > +  IfThenElse(__trans_tmp_11);
> > > +}
> > > --
> > > 2.18.1
> > >
>
>
>
> --
> BR,
> Hongtao
Hongtao Liu Sept. 1, 2021, 6:33 a.m. UTC | #4
On Tue, Aug 31, 2021 at 7:56 PM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Tue, Aug 31, 2021 at 12:18 PM Hongtao Liu <crazylht@gmail.com> wrote:
> >
> > On Mon, Aug 30, 2021 at 8:25 PM Richard Biener via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
> > > >
> > > >   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
> > > > it doesn't check if mask type matches. It causes an ICE when expand cond_op
> > > > with mismatched mode.
> > > >   This patch add a function named cond_vectorized_internal_fn_supported_p
> > > >  to additionally check mask type than vectorized_internal_fn_supported_p.
> > > >
> > > >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> > > >   Ok for trunk?
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > >         PR middle-end/102080
> > > >         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
> > > >         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
> > > >         * match.pd: Check the type of mask while generating cond_op in
> > > >         gimple simplication.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > >         PR middle-end/102080
> > > >         * gcc.target/i386/pr102080.c: New test.
> > > > ---
> > > >  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
> > > >  gcc/internal-fn.h                        |  1 +
> > > >  gcc/match.pd                             | 24 ++++++++++++++++--------
> > > >  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
> > > >  4 files changed, 55 insertions(+), 8 deletions(-)
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
> > > >
> > > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> > > > index 1360a00f0b9..8b2b65db1a7 100644
> > > > --- a/gcc/internal-fn.c
> > > > +++ b/gcc/internal-fn.c
> > > > @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
> > > >    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
> > > >  }
> > > >
> > > > +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
> > > > +   doesn't check if mask type matches.  */
> > > > +bool
> > > > +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
> > > > +                                        tree mask_type)
> > > > +{
> > > > +  if (!vectorized_internal_fn_supported_p (ifn, type))
> > > > +    return false;
> > > > +
> > > > +  machine_mode mask_mode;
> > > > +  machine_mode vmode = TYPE_MODE (type);
> > > > +  int size1, size2;
> > > > +  if (VECTOR_MODE_P (vmode)
> > > > +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
> > > > +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
> > > > +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
> > > > +      && size1 != size2)
> > >
> > > Why do we check for equal size rather than just mode equality which
> > I originally thought  TYPE_MODE of vector(8) <signed-boolean:1> was
> > not QImode, Changed the patch to check mode equality.
> > Update patch.
>
> Looking at all this it seems the match.pd patterns should have not
> used vectorized_internal_fn_supported_p but direct_internal_fn_supported_p
> which is equivalent here because we're always working with vector modes?
>
> And then shouldn't we look at the actual optab whether the mask mode matches
> the expectation rather than going around via the target hook which may not have
> enough context to decide which mask mode to use?
How about this?

+/* Return true if target supports cond_op with data TYPE and
+   mask MASK_TYPE.  */
+bool
+cond_internal_fn_supported_p (internal_fn ifn, tree type,
+       tree mask_type)
+{
+  tree_pair types = tree_pair (type, type);
+  optab tmp = direct_internal_fn_optab (ifn, types);
+  machine_mode vmode = TYPE_MODE (type);
+  insn_code icode = direct_optab_handler (tmp, vmode);
+  if (icode == CODE_FOR_nothing)
+    return false;
+
+  machine_mode mask_mode = TYPE_MODE (mask_type);
+  /* Can't create rtx and use insn_operand_matches here.  */
+  return insn_data[icode].operand[0].mode == vmode
+    && insn_data[icode].operand[1].mode == mask_mode;
+}
+

Update patch

>
> In any case if the approach of the patch is correct shouldn't it do
>
>   if (VECTOR_MODE_P (vmode)
>       && (!targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
>              || mask_mode != TYPE_MODE (mask_type)))
>     return false;
>
> that is, not return true if there's no mask mode for the data mode?
>
> Given the first observation should we call the function
> direct_cond_internal_fn_supported_p () instead and as to the second
> observation, look at the optab operands mode?
>
> Richard.
>
> > > I think would work for non-constant sized modes as well?  And when
> > > using sizes you'd instead use maybe_ne (GET_MODE_SIZE (mask_mode),
> > > GET_MODE_SIZE (TYPE_MODE (mask_type)))
> > >
> > > Thanks,
> > > Richard.
> > >
> > > > +    return false;
> > > > +
> > > > +  return true;
> > > > +}
> > > > +
> > > >  /* If TYPE is a vector type, return true if IFN is a direct internal
> > > >     function that is supported for that type.  If TYPE is a scalar type,
> > > >     return true if IFN is a direct internal function that is supported for
> > > > diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> > > > index 19d0f849a5a..f0aea00103c 100644
> > > > --- a/gcc/internal-fn.h
> > > > +++ b/gcc/internal-fn.h
> > > > @@ -236,5 +236,6 @@ extern void expand_PHI (internal_fn, gcall *);
> > > >  extern void expand_SHUFFLEVECTOR (internal_fn, gcall *);
> > > >
> > > >  extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
> > > > +extern bool cond_vectorized_internal_fn_supported_p (internal_fn, tree, tree);
> > > >
> > > >  #endif
> > > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > > index e5bbb123a6a..72b1bc674db 100644
> > > > --- a/gcc/match.pd
> > > > +++ b/gcc/match.pd
> > > > @@ -6987,14 +6987,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > > >       cond_op (COND_BINARY)
> > > >   (simplify
> > > >    (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
> > > > -  (with { tree op_type = TREE_TYPE (@4); }
> > > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > > +  (with { tree op_type = TREE_TYPE (@4);
> > > > +         tree mask_type = TREE_TYPE (@0); }
> > > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > > +                                                op_type, mask_type)
> > > >         && element_precision (type) == element_precision (op_type))
> > > >      (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
> > > >   (simplify
> > > >    (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
> > > > -  (with { tree op_type = TREE_TYPE (@4); }
> > > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > > +  (with { tree op_type = TREE_TYPE (@4);
> > > > +         tree mask_type = TREE_TYPE (@0); }
> > > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > > +                                                op_type, mask_type)
> > > >         && element_precision (type) == element_precision (op_type))
> > > >      (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
> > > >
> > > > @@ -7003,14 +7007,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > > >       cond_op (COND_TERNARY)
> > > >   (simplify
> > > >    (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
> > > > -  (with { tree op_type = TREE_TYPE (@5); }
> > > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > > +  (with { tree op_type = TREE_TYPE (@5);
> > > > +         tree mask_type = TREE_TYPE (@0); }
> > > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > > +                                                op_type, mask_type)
> > > >         && element_precision (type) == element_precision (op_type))
> > > >      (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
> > > >   (simplify
> > > >    (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
> > > > -  (with { tree op_type = TREE_TYPE (@5); }
> > > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > > +  (with { tree op_type = TREE_TYPE (@5);
> > > > +         tree mask_type = TREE_TYPE (@0); }
> > > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > > +                                                op_type, mask_type)
> > > >         && element_precision (type) == element_precision (op_type))
> > > >      (view_convert (cond_op (bit_not @0) @2 @3 @4
> > > >                   (view_convert:op_type @1)))))))
> > > > diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
> > > > new file mode 100644
> > > > index 00000000000..6a40a75e1c5
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.target/i386/pr102080.c
> > > > @@ -0,0 +1,16 @@
> > > > +#include<immintrin.h>
> > > > +typedef float __m256 __attribute__((__vector_size__(32)));
> > > > +__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
> > > > +  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
> > > > +
> > > > +void
> > > > +__attribute__ ((target("avx")))
> > > > +IfThenElse (__m256 no) {
> > > > +  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
> > > > +}
> > > > +void
> > > > +__attribute__ ((target("avx512vl")))
> > > > +EncodedFromDisplay() {
> > > > +  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
> > > > +  IfThenElse(__trans_tmp_11);
> > > > +}
> > > > --
> > > > 2.18.1
> > > >
> >
> >
> >
> > --
> > BR,
> > Hongtao
Richard Biener Sept. 1, 2021, 11:14 a.m. UTC | #5
On Wed, Sep 1, 2021 at 8:28 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Tue, Aug 31, 2021 at 7:56 PM Richard Biener
> <richard.guenther@gmail.com> wrote:
> >
> > On Tue, Aug 31, 2021 at 12:18 PM Hongtao Liu <crazylht@gmail.com> wrote:
> > >
> > > On Mon, Aug 30, 2021 at 8:25 PM Richard Biener via Gcc-patches
> > > <gcc-patches@gcc.gnu.org> wrote:
> > > >
> > > > On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
> > > > >
> > > > >   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
> > > > > it doesn't check if mask type matches. It causes an ICE when expand cond_op
> > > > > with mismatched mode.
> > > > >   This patch add a function named cond_vectorized_internal_fn_supported_p
> > > > >  to additionally check mask type than vectorized_internal_fn_supported_p.
> > > > >
> > > > >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> > > > >   Ok for trunk?
> > > > >
> > > > > gcc/ChangeLog:
> > > > >
> > > > >         PR middle-end/102080
> > > > >         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
> > > > >         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
> > > > >         * match.pd: Check the type of mask while generating cond_op in
> > > > >         gimple simplication.
> > > > >
> > > > > gcc/testsuite/ChangeLog:
> > > > >
> > > > >         PR middle-end/102080
> > > > >         * gcc.target/i386/pr102080.c: New test.
> > > > > ---
> > > > >  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
> > > > >  gcc/internal-fn.h                        |  1 +
> > > > >  gcc/match.pd                             | 24 ++++++++++++++++--------
> > > > >  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
> > > > >  4 files changed, 55 insertions(+), 8 deletions(-)
> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
> > > > >
> > > > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> > > > > index 1360a00f0b9..8b2b65db1a7 100644
> > > > > --- a/gcc/internal-fn.c
> > > > > +++ b/gcc/internal-fn.c
> > > > > @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
> > > > >    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
> > > > >  }
> > > > >
> > > > > +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
> > > > > +   doesn't check if mask type matches.  */
> > > > > +bool
> > > > > +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
> > > > > +                                        tree mask_type)
> > > > > +{
> > > > > +  if (!vectorized_internal_fn_supported_p (ifn, type))
> > > > > +    return false;
> > > > > +
> > > > > +  machine_mode mask_mode;
> > > > > +  machine_mode vmode = TYPE_MODE (type);
> > > > > +  int size1, size2;
> > > > > +  if (VECTOR_MODE_P (vmode)
> > > > > +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
> > > > > +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
> > > > > +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
> > > > > +      && size1 != size2)
> > > >
> > > > Why do we check for equal size rather than just mode equality which
> > > I originally thought  TYPE_MODE of vector(8) <signed-boolean:1> was
> > > not QImode, Changed the patch to check mode equality.
> > > Update patch.
> >
> > Looking at all this it seems the match.pd patterns should have not
> > used vectorized_internal_fn_supported_p but direct_internal_fn_supported_p
> > which is equivalent here because we're always working with vector modes?
> >
> > And then shouldn't we look at the actual optab whether the mask mode matches
> > the expectation rather than going around via the target hook which may not have
> > enough context to decide which mask mode to use?
> How about this?
>
> +/* Return true if target supports cond_op with data TYPE and
> +   mask MASK_TYPE.  */
> +bool
> +cond_internal_fn_supported_p (internal_fn ifn, tree type,
> +       tree mask_type)
> +{
> +  tree_pair types = tree_pair (type, type);
> +  optab tmp = direct_internal_fn_optab (ifn, types);
> +  machine_mode vmode = TYPE_MODE (type);
> +  insn_code icode = direct_optab_handler (tmp, vmode);
> +  if (icode == CODE_FOR_nothing)
> +    return false;
> +
> +  machine_mode mask_mode = TYPE_MODE (mask_type);
> +  /* Can't create rtx and use insn_operand_matches here.  */
> +  return insn_data[icode].operand[0].mode == vmode
> +    && insn_data[icode].operand[1].mode == mask_mode;
> +}
> +

Yeah, sth like that, though the operand[0].mode test should be
redudnant.  I think we should assert or have a whiltelist
for the internal function we support to be queried this way.
Not sure if we can directly access the 'cond_binary/cond_ternary'
classification used in internal-fn.def, that would be best.

Richard, what are your thoughts about all this?

Thanks,
Richard.

> Update patch
>
> >
> > In any case if the approach of the patch is correct shouldn't it do
> >
> >   if (VECTOR_MODE_P (vmode)
> >       && (!targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
> >              || mask_mode != TYPE_MODE (mask_type)))
> >     return false;
> >
> > that is, not return true if there's no mask mode for the data mode?
> >
> > Given the first observation should we call the function
> > direct_cond_internal_fn_supported_p () instead and as to the second
> > observation, look at the optab operands mode?
> >
> > Richard.
> >
> > > > I think would work for non-constant sized modes as well?  And when
> > > > using sizes you'd instead use maybe_ne (GET_MODE_SIZE (mask_mode),
> > > > GET_MODE_SIZE (TYPE_MODE (mask_type)))
> > > >
> > > > Thanks,
> > > > Richard.
> > > >
> > > > > +    return false;
> > > > > +
> > > > > +  return true;
> > > > > +}
> > > > > +
> > > > >  /* If TYPE is a vector type, return true if IFN is a direct internal
> > > > >     function that is supported for that type.  If TYPE is a scalar type,
> > > > >     return true if IFN is a direct internal function that is supported for
> > > > > diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> > > > > index 19d0f849a5a..f0aea00103c 100644
> > > > > --- a/gcc/internal-fn.h
> > > > > +++ b/gcc/internal-fn.h
> > > > > @@ -236,5 +236,6 @@ extern void expand_PHI (internal_fn, gcall *);
> > > > >  extern void expand_SHUFFLEVECTOR (internal_fn, gcall *);
> > > > >
> > > > >  extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
> > > > > +extern bool cond_vectorized_internal_fn_supported_p (internal_fn, tree, tree);
> > > > >
> > > > >  #endif
> > > > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > > > index e5bbb123a6a..72b1bc674db 100644
> > > > > --- a/gcc/match.pd
> > > > > +++ b/gcc/match.pd
> > > > > @@ -6987,14 +6987,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > > > >       cond_op (COND_BINARY)
> > > > >   (simplify
> > > > >    (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
> > > > > -  (with { tree op_type = TREE_TYPE (@4); }
> > > > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > > > +  (with { tree op_type = TREE_TYPE (@4);
> > > > > +         tree mask_type = TREE_TYPE (@0); }
> > > > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > > > +                                                op_type, mask_type)
> > > > >         && element_precision (type) == element_precision (op_type))
> > > > >      (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
> > > > >   (simplify
> > > > >    (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
> > > > > -  (with { tree op_type = TREE_TYPE (@4); }
> > > > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > > > +  (with { tree op_type = TREE_TYPE (@4);
> > > > > +         tree mask_type = TREE_TYPE (@0); }
> > > > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > > > +                                                op_type, mask_type)
> > > > >         && element_precision (type) == element_precision (op_type))
> > > > >      (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
> > > > >
> > > > > @@ -7003,14 +7007,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > > > >       cond_op (COND_TERNARY)
> > > > >   (simplify
> > > > >    (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
> > > > > -  (with { tree op_type = TREE_TYPE (@5); }
> > > > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > > > +  (with { tree op_type = TREE_TYPE (@5);
> > > > > +         tree mask_type = TREE_TYPE (@0); }
> > > > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > > > +                                                op_type, mask_type)
> > > > >         && element_precision (type) == element_precision (op_type))
> > > > >      (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
> > > > >   (simplify
> > > > >    (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
> > > > > -  (with { tree op_type = TREE_TYPE (@5); }
> > > > > -   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > > > +  (with { tree op_type = TREE_TYPE (@5);
> > > > > +         tree mask_type = TREE_TYPE (@0); }
> > > > > +   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
> > > > > +                                                op_type, mask_type)
> > > > >         && element_precision (type) == element_precision (op_type))
> > > > >      (view_convert (cond_op (bit_not @0) @2 @3 @4
> > > > >                   (view_convert:op_type @1)))))))
> > > > > diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
> > > > > new file mode 100644
> > > > > index 00000000000..6a40a75e1c5
> > > > > --- /dev/null
> > > > > +++ b/gcc/testsuite/gcc.target/i386/pr102080.c
> > > > > @@ -0,0 +1,16 @@
> > > > > +#include<immintrin.h>
> > > > > +typedef float __m256 __attribute__((__vector_size__(32)));
> > > > > +__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
> > > > > +  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
> > > > > +
> > > > > +void
> > > > > +__attribute__ ((target("avx")))
> > > > > +IfThenElse (__m256 no) {
> > > > > +  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
> > > > > +}
> > > > > +void
> > > > > +__attribute__ ((target("avx512vl")))
> > > > > +EncodedFromDisplay() {
> > > > > +  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
> > > > > +  IfThenElse(__trans_tmp_11);
> > > > > +}
> > > > > --
> > > > > 2.18.1
> > > > >
> > >
> > >
> > >
> > > --
> > > BR,
> > > Hongtao
>
>
>
> --
> BR,
> Hongtao
Richard Sandiford Sept. 1, 2021, 12:52 p.m. UTC | #6
Richard Biener <richard.guenther@gmail.com> writes:
> On Wed, Sep 1, 2021 at 8:28 AM Hongtao Liu <crazylht@gmail.com> wrote:
>>
>> On Tue, Aug 31, 2021 at 7:56 PM Richard Biener
>> <richard.guenther@gmail.com> wrote:
>> >
>> > On Tue, Aug 31, 2021 at 12:18 PM Hongtao Liu <crazylht@gmail.com> wrote:
>> > >
>> > > On Mon, Aug 30, 2021 at 8:25 PM Richard Biener via Gcc-patches
>> > > <gcc-patches@gcc.gnu.org> wrote:
>> > > >
>> > > > On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
>> > > > >
>> > > > >   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
>> > > > > it doesn't check if mask type matches. It causes an ICE when expand cond_op
>> > > > > with mismatched mode.
>> > > > >   This patch add a function named cond_vectorized_internal_fn_supported_p
>> > > > >  to additionally check mask type than vectorized_internal_fn_supported_p.
>> > > > >
>> > > > >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
>> > > > >   Ok for trunk?
>> > > > >
>> > > > > gcc/ChangeLog:
>> > > > >
>> > > > >         PR middle-end/102080
>> > > > >         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
>> > > > >         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
>> > > > >         * match.pd: Check the type of mask while generating cond_op in
>> > > > >         gimple simplication.
>> > > > >
>> > > > > gcc/testsuite/ChangeLog:
>> > > > >
>> > > > >         PR middle-end/102080
>> > > > >         * gcc.target/i386/pr102080.c: New test.
>> > > > > ---
>> > > > >  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
>> > > > >  gcc/internal-fn.h                        |  1 +
>> > > > >  gcc/match.pd                             | 24 ++++++++++++++++--------
>> > > > >  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
>> > > > >  4 files changed, 55 insertions(+), 8 deletions(-)
>> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
>> > > > >
>> > > > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
>> > > > > index 1360a00f0b9..8b2b65db1a7 100644
>> > > > > --- a/gcc/internal-fn.c
>> > > > > +++ b/gcc/internal-fn.c
>> > > > > @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
>> > > > >    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
>> > > > >  }
>> > > > >
>> > > > > +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
>> > > > > +   doesn't check if mask type matches.  */
>> > > > > +bool
>> > > > > +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
>> > > > > +                                        tree mask_type)
>> > > > > +{
>> > > > > +  if (!vectorized_internal_fn_supported_p (ifn, type))
>> > > > > +    return false;
>> > > > > +
>> > > > > +  machine_mode mask_mode;
>> > > > > +  machine_mode vmode = TYPE_MODE (type);
>> > > > > +  int size1, size2;
>> > > > > +  if (VECTOR_MODE_P (vmode)
>> > > > > +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
>> > > > > +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
>> > > > > +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
>> > > > > +      && size1 != size2)
>> > > >
>> > > > Why do we check for equal size rather than just mode equality which
>> > > I originally thought  TYPE_MODE of vector(8) <signed-boolean:1> was
>> > > not QImode, Changed the patch to check mode equality.
>> > > Update patch.
>> >
>> > Looking at all this it seems the match.pd patterns should have not
>> > used vectorized_internal_fn_supported_p but direct_internal_fn_supported_p
>> > which is equivalent here because we're always working with vector modes?

Yeah, looks like it.

>> > And then shouldn't we look at the actual optab whether the mask mode matches
>> > the expectation rather than going around via the target hook which may not have
>> > enough context to decide which mask mode to use?
>> How about this?
>>
>> +/* Return true if target supports cond_op with data TYPE and
>> +   mask MASK_TYPE.  */
>> +bool
>> +cond_internal_fn_supported_p (internal_fn ifn, tree type,
>> +       tree mask_type)
>> +{
>> +  tree_pair types = tree_pair (type, type);
>> +  optab tmp = direct_internal_fn_optab (ifn, types);
>> +  machine_mode vmode = TYPE_MODE (type);
>> +  insn_code icode = direct_optab_handler (tmp, vmode);
>> +  if (icode == CODE_FOR_nothing)
>> +    return false;
>> +
>> +  machine_mode mask_mode = TYPE_MODE (mask_type);
>> +  /* Can't create rtx and use insn_operand_matches here.  */
>> +  return insn_data[icode].operand[0].mode == vmode
>> +    && insn_data[icode].operand[1].mode == mask_mode;
>> +}
>> +
>
> Yeah, sth like that, though the operand[0].mode test should be
> redudnant.  I think we should assert or have a whiltelist
> for the internal function we support to be queried this way.
> Not sure if we can directly access the 'cond_binary/cond_ternary'
> classification used in internal-fn.def, that would be best.
>
> Richard, what are your thoughts about all this?

IMO using get_mask_mode was right.  The optab documentation says:

  Operands 0, 2, 3 and 4 all have mode @var{m}.  Operand 1 is a scalar
  integer if @var{m} is scalar, otherwise it has the mode returned by
  @code{TARGET_VECTORIZE_GET_MASK_MODE}.

Allowing targets to use optabs to enforce different mask modes for
different operations would open up a mess of combinations.

In other words, I think cond_vectorized_internal_fn_supported_p
is really testing two things:

(1) is the mask type/vector type combination well-formed?
(2) is the internal function supported for the vector type?

where (1) is a gimple question and (2) is a target question.

I guess there's an argument that (1) should be part of the match.pd
condition instead, alongside the element_precision check.  That would
add to the cut-&-paste though. :-(

Alternatively, I guess we would add:

  bool is_truth_type_for (tree type, tree truth_type);

to return true if truth_type is equal to truth_type_for (type)
(but without having to call truth_type_for).  We could then use:

  is_truth_type_for (op_type, TREE_TYPE (@0))

instead of:

  element_precision (type) == element_precision (op_type)

since it should be a strictly stronger condition.

Thanks,
Richard
Richard Biener Sept. 1, 2021, 1:33 p.m. UTC | #7
On Wed, Sep 1, 2021 at 2:52 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Richard Biener <richard.guenther@gmail.com> writes:
> > On Wed, Sep 1, 2021 at 8:28 AM Hongtao Liu <crazylht@gmail.com> wrote:
> >>
> >> On Tue, Aug 31, 2021 at 7:56 PM Richard Biener
> >> <richard.guenther@gmail.com> wrote:
> >> >
> >> > On Tue, Aug 31, 2021 at 12:18 PM Hongtao Liu <crazylht@gmail.com> wrote:
> >> > >
> >> > > On Mon, Aug 30, 2021 at 8:25 PM Richard Biener via Gcc-patches
> >> > > <gcc-patches@gcc.gnu.org> wrote:
> >> > > >
> >> > > > On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
> >> > > > >
> >> > > > >   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
> >> > > > > it doesn't check if mask type matches. It causes an ICE when expand cond_op
> >> > > > > with mismatched mode.
> >> > > > >   This patch add a function named cond_vectorized_internal_fn_supported_p
> >> > > > >  to additionally check mask type than vectorized_internal_fn_supported_p.
> >> > > > >
> >> > > > >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> >> > > > >   Ok for trunk?
> >> > > > >
> >> > > > > gcc/ChangeLog:
> >> > > > >
> >> > > > >         PR middle-end/102080
> >> > > > >         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
> >> > > > >         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
> >> > > > >         * match.pd: Check the type of mask while generating cond_op in
> >> > > > >         gimple simplication.
> >> > > > >
> >> > > > > gcc/testsuite/ChangeLog:
> >> > > > >
> >> > > > >         PR middle-end/102080
> >> > > > >         * gcc.target/i386/pr102080.c: New test.
> >> > > > > ---
> >> > > > >  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
> >> > > > >  gcc/internal-fn.h                        |  1 +
> >> > > > >  gcc/match.pd                             | 24 ++++++++++++++++--------
> >> > > > >  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
> >> > > > >  4 files changed, 55 insertions(+), 8 deletions(-)
> >> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
> >> > > > >
> >> > > > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> >> > > > > index 1360a00f0b9..8b2b65db1a7 100644
> >> > > > > --- a/gcc/internal-fn.c
> >> > > > > +++ b/gcc/internal-fn.c
> >> > > > > @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
> >> > > > >    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
> >> > > > >  }
> >> > > > >
> >> > > > > +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
> >> > > > > +   doesn't check if mask type matches.  */
> >> > > > > +bool
> >> > > > > +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
> >> > > > > +                                        tree mask_type)
> >> > > > > +{
> >> > > > > +  if (!vectorized_internal_fn_supported_p (ifn, type))
> >> > > > > +    return false;
> >> > > > > +
> >> > > > > +  machine_mode mask_mode;
> >> > > > > +  machine_mode vmode = TYPE_MODE (type);
> >> > > > > +  int size1, size2;
> >> > > > > +  if (VECTOR_MODE_P (vmode)
> >> > > > > +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
> >> > > > > +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
> >> > > > > +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
> >> > > > > +      && size1 != size2)
> >> > > >
> >> > > > Why do we check for equal size rather than just mode equality which
> >> > > I originally thought  TYPE_MODE of vector(8) <signed-boolean:1> was
> >> > > not QImode, Changed the patch to check mode equality.
> >> > > Update patch.
> >> >
> >> > Looking at all this it seems the match.pd patterns should have not
> >> > used vectorized_internal_fn_supported_p but direct_internal_fn_supported_p
> >> > which is equivalent here because we're always working with vector modes?
>
> Yeah, looks like it.
>
> >> > And then shouldn't we look at the actual optab whether the mask mode matches
> >> > the expectation rather than going around via the target hook which may not have
> >> > enough context to decide which mask mode to use?
> >> How about this?
> >>
> >> +/* Return true if target supports cond_op with data TYPE and
> >> +   mask MASK_TYPE.  */
> >> +bool
> >> +cond_internal_fn_supported_p (internal_fn ifn, tree type,
> >> +       tree mask_type)
> >> +{
> >> +  tree_pair types = tree_pair (type, type);
> >> +  optab tmp = direct_internal_fn_optab (ifn, types);
> >> +  machine_mode vmode = TYPE_MODE (type);
> >> +  insn_code icode = direct_optab_handler (tmp, vmode);
> >> +  if (icode == CODE_FOR_nothing)
> >> +    return false;
> >> +
> >> +  machine_mode mask_mode = TYPE_MODE (mask_type);
> >> +  /* Can't create rtx and use insn_operand_matches here.  */
> >> +  return insn_data[icode].operand[0].mode == vmode
> >> +    && insn_data[icode].operand[1].mode == mask_mode;
> >> +}
> >> +
> >
> > Yeah, sth like that, though the operand[0].mode test should be
> > redudnant.  I think we should assert or have a whiltelist
> > for the internal function we support to be queried this way.
> > Not sure if we can directly access the 'cond_binary/cond_ternary'
> > classification used in internal-fn.def, that would be best.
> >
> > Richard, what are your thoughts about all this?
>
> IMO using get_mask_mode was right.  The optab documentation says:
>
>   Operands 0, 2, 3 and 4 all have mode @var{m}.  Operand 1 is a scalar
>   integer if @var{m} is scalar, otherwise it has the mode returned by
>   @code{TARGET_VECTORIZE_GET_MASK_MODE}.
>
> Allowing targets to use optabs to enforce different mask modes for
> different operations would open up a mess of combinations.

Meh!  And I thought this was the way out of my AVX512 vs AVX2 mask
mess with mask_gather_load ...

> In other words, I think cond_vectorized_internal_fn_supported_p
> is really testing two things:
>
> (1) is the mask type/vector type combination well-formed?

But can we always determine this without looking at the actual
operation?  That is, do we force targets to be consistent here
when writing their machine description and supporting more
than one mask variant?

> (2) is the internal function supported for the vector type?
>
> where (1) is a gimple question and (2) is a target question.
>
> I guess there's an argument that (1) should be part of the match.pd
> condition instead, alongside the element_precision check.  That would
> add to the cut-&-paste though. :-(
>
> Alternatively, I guess we would add:
>
>   bool is_truth_type_for (tree type, tree truth_type);
>
> to return true if truth_type is equal to truth_type_for (type)
> (but without having to call truth_type_for).  We could then use:
>
>   is_truth_type_for (op_type, TREE_TYPE (@0))
>
> instead of:
>
>   element_precision (type) == element_precision (op_type)
>
> since it should be a strictly stronger condition.

I guess that would work, yes.  Won't exactly help me with
mask_gather_load but for the cond_* IFNs it's certainly good.

Thanks,
Richard.

>
> Thanks,
> Richard
Hongtao Liu Sept. 2, 2021, 6:42 a.m. UTC | #8
On Wed, Sep 1, 2021 at 8:52 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Richard Biener <richard.guenther@gmail.com> writes:
> > On Wed, Sep 1, 2021 at 8:28 AM Hongtao Liu <crazylht@gmail.com> wrote:
> >>
> >> On Tue, Aug 31, 2021 at 7:56 PM Richard Biener
> >> <richard.guenther@gmail.com> wrote:
> >> >
> >> > On Tue, Aug 31, 2021 at 12:18 PM Hongtao Liu <crazylht@gmail.com> wrote:
> >> > >
> >> > > On Mon, Aug 30, 2021 at 8:25 PM Richard Biener via Gcc-patches
> >> > > <gcc-patches@gcc.gnu.org> wrote:
> >> > > >
> >> > > > On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
> >> > > > >
> >> > > > >   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
> >> > > > > it doesn't check if mask type matches. It causes an ICE when expand cond_op
> >> > > > > with mismatched mode.
> >> > > > >   This patch add a function named cond_vectorized_internal_fn_supported_p
> >> > > > >  to additionally check mask type than vectorized_internal_fn_supported_p.
> >> > > > >
> >> > > > >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> >> > > > >   Ok for trunk?
> >> > > > >
> >> > > > > gcc/ChangeLog:
> >> > > > >
> >> > > > >         PR middle-end/102080
> >> > > > >         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
> >> > > > >         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
> >> > > > >         * match.pd: Check the type of mask while generating cond_op in
> >> > > > >         gimple simplication.
> >> > > > >
> >> > > > > gcc/testsuite/ChangeLog:
> >> > > > >
> >> > > > >         PR middle-end/102080
> >> > > > >         * gcc.target/i386/pr102080.c: New test.
> >> > > > > ---
> >> > > > >  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
> >> > > > >  gcc/internal-fn.h                        |  1 +
> >> > > > >  gcc/match.pd                             | 24 ++++++++++++++++--------
> >> > > > >  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
> >> > > > >  4 files changed, 55 insertions(+), 8 deletions(-)
> >> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
> >> > > > >
> >> > > > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> >> > > > > index 1360a00f0b9..8b2b65db1a7 100644
> >> > > > > --- a/gcc/internal-fn.c
> >> > > > > +++ b/gcc/internal-fn.c
> >> > > > > @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
> >> > > > >    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
> >> > > > >  }
> >> > > > >
> >> > > > > +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
> >> > > > > +   doesn't check if mask type matches.  */
> >> > > > > +bool
> >> > > > > +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
> >> > > > > +                                        tree mask_type)
> >> > > > > +{
> >> > > > > +  if (!vectorized_internal_fn_supported_p (ifn, type))
> >> > > > > +    return false;
> >> > > > > +
> >> > > > > +  machine_mode mask_mode;
> >> > > > > +  machine_mode vmode = TYPE_MODE (type);
> >> > > > > +  int size1, size2;
> >> > > > > +  if (VECTOR_MODE_P (vmode)
> >> > > > > +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
> >> > > > > +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
> >> > > > > +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
> >> > > > > +      && size1 != size2)
> >> > > >
> >> > > > Why do we check for equal size rather than just mode equality which
> >> > > I originally thought  TYPE_MODE of vector(8) <signed-boolean:1> was
> >> > > not QImode, Changed the patch to check mode equality.
> >> > > Update patch.
> >> >
> >> > Looking at all this it seems the match.pd patterns should have not
> >> > used vectorized_internal_fn_supported_p but direct_internal_fn_supported_p
> >> > which is equivalent here because we're always working with vector modes?
>
> Yeah, looks like it.
>
> >> > And then shouldn't we look at the actual optab whether the mask mode matches
> >> > the expectation rather than going around via the target hook which may not have
> >> > enough context to decide which mask mode to use?
> >> How about this?
> >>
> >> +/* Return true if target supports cond_op with data TYPE and
> >> +   mask MASK_TYPE.  */
> >> +bool
> >> +cond_internal_fn_supported_p (internal_fn ifn, tree type,
> >> +       tree mask_type)
> >> +{
> >> +  tree_pair types = tree_pair (type, type);
> >> +  optab tmp = direct_internal_fn_optab (ifn, types);
> >> +  machine_mode vmode = TYPE_MODE (type);
> >> +  insn_code icode = direct_optab_handler (tmp, vmode);
> >> +  if (icode == CODE_FOR_nothing)
> >> +    return false;
> >> +
> >> +  machine_mode mask_mode = TYPE_MODE (mask_type);
> >> +  /* Can't create rtx and use insn_operand_matches here.  */
> >> +  return insn_data[icode].operand[0].mode == vmode
> >> +    && insn_data[icode].operand[1].mode == mask_mode;
> >> +}
> >> +
> >
> > Yeah, sth like that, though the operand[0].mode test should be
> > redudnant.  I think we should assert or have a whiltelist
> > for the internal function we support to be queried this way.
> > Not sure if we can directly access the 'cond_binary/cond_ternary'
> > classification used in internal-fn.def, that would be best.
> >
> > Richard, what are your thoughts about all this?
>
> IMO using get_mask_mode was right.  The optab documentation says:
>
>   Operands 0, 2, 3 and 4 all have mode @var{m}.  Operand 1 is a scalar
>   integer if @var{m} is scalar, otherwise it has the mode returned by
>   @code{TARGET_VECTORIZE_GET_MASK_MODE}.
>
> Allowing targets to use optabs to enforce different mask modes for
> different operations would open up a mess of combinations.
>
> In other words, I think cond_vectorized_internal_fn_supported_p
> is really testing two things:
>
> (1) is the mask type/vector type combination well-formed?
> (2) is the internal function supported for the vector type?
>
> where (1) is a gimple question and (2) is a target question.
>
> I guess there's an argument that (1) should be part of the match.pd
> condition instead, alongside the element_precision check.  That would
> add to the cut-&-paste though. :-(
>
> Alternatively, I guess we would add:
>
>   bool is_truth_type_for (tree type, tree truth_type);
>
> to return true if truth_type is equal to truth_type_for (type)
> (but without having to call truth_type_for).  We could then use:
>
>   is_truth_type_for (op_type, TREE_TYPE (@0))
>
> instead of:
>
>   element_precision (type) == element_precision (op_type)
>
> since it should be a strictly stronger condition.
Thanks for your advice, it sounds more reasonable.
Here is the updated patch.
>
> Thanks,
> Richard
Richard Sandiford Sept. 2, 2021, 5:54 p.m. UTC | #9
Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> On Wed, Sep 1, 2021 at 8:52 PM Richard Sandiford
> <richard.sandiford@arm.com> wrote:
>>
>> Richard Biener <richard.guenther@gmail.com> writes:
>> > On Wed, Sep 1, 2021 at 8:28 AM Hongtao Liu <crazylht@gmail.com> wrote:
>> >>
>> >> On Tue, Aug 31, 2021 at 7:56 PM Richard Biener
>> >> <richard.guenther@gmail.com> wrote:
>> >> >
>> >> > On Tue, Aug 31, 2021 at 12:18 PM Hongtao Liu <crazylht@gmail.com> wrote:
>> >> > >
>> >> > > On Mon, Aug 30, 2021 at 8:25 PM Richard Biener via Gcc-patches
>> >> > > <gcc-patches@gcc.gnu.org> wrote:
>> >> > > >
>> >> > > > On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
>> >> > > > >
>> >> > > > >   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
>> >> > > > > it doesn't check if mask type matches. It causes an ICE when expand cond_op
>> >> > > > > with mismatched mode.
>> >> > > > >   This patch add a function named cond_vectorized_internal_fn_supported_p
>> >> > > > >  to additionally check mask type than vectorized_internal_fn_supported_p.
>> >> > > > >
>> >> > > > >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
>> >> > > > >   Ok for trunk?
>> >> > > > >
>> >> > > > > gcc/ChangeLog:
>> >> > > > >
>> >> > > > >         PR middle-end/102080
>> >> > > > >         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
>> >> > > > >         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
>> >> > > > >         * match.pd: Check the type of mask while generating cond_op in
>> >> > > > >         gimple simplication.
>> >> > > > >
>> >> > > > > gcc/testsuite/ChangeLog:
>> >> > > > >
>> >> > > > >         PR middle-end/102080
>> >> > > > >         * gcc.target/i386/pr102080.c: New test.
>> >> > > > > ---
>> >> > > > >  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
>> >> > > > >  gcc/internal-fn.h                        |  1 +
>> >> > > > >  gcc/match.pd                             | 24 ++++++++++++++++--------
>> >> > > > >  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
>> >> > > > >  4 files changed, 55 insertions(+), 8 deletions(-)
>> >> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
>> >> > > > >
>> >> > > > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
>> >> > > > > index 1360a00f0b9..8b2b65db1a7 100644
>> >> > > > > --- a/gcc/internal-fn.c
>> >> > > > > +++ b/gcc/internal-fn.c
>> >> > > > > @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
>> >> > > > >    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
>> >> > > > >  }
>> >> > > > >
>> >> > > > > +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
>> >> > > > > +   doesn't check if mask type matches.  */
>> >> > > > > +bool
>> >> > > > > +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
>> >> > > > > +                                        tree mask_type)
>> >> > > > > +{
>> >> > > > > +  if (!vectorized_internal_fn_supported_p (ifn, type))
>> >> > > > > +    return false;
>> >> > > > > +
>> >> > > > > +  machine_mode mask_mode;
>> >> > > > > +  machine_mode vmode = TYPE_MODE (type);
>> >> > > > > +  int size1, size2;
>> >> > > > > +  if (VECTOR_MODE_P (vmode)
>> >> > > > > +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
>> >> > > > > +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
>> >> > > > > +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
>> >> > > > > +      && size1 != size2)
>> >> > > >
>> >> > > > Why do we check for equal size rather than just mode equality which
>> >> > > I originally thought  TYPE_MODE of vector(8) <signed-boolean:1> was
>> >> > > not QImode, Changed the patch to check mode equality.
>> >> > > Update patch.
>> >> >
>> >> > Looking at all this it seems the match.pd patterns should have not
>> >> > used vectorized_internal_fn_supported_p but direct_internal_fn_supported_p
>> >> > which is equivalent here because we're always working with vector modes?
>>
>> Yeah, looks like it.
>>
>> >> > And then shouldn't we look at the actual optab whether the mask mode matches
>> >> > the expectation rather than going around via the target hook which may not have
>> >> > enough context to decide which mask mode to use?
>> >> How about this?
>> >>
>> >> +/* Return true if target supports cond_op with data TYPE and
>> >> +   mask MASK_TYPE.  */
>> >> +bool
>> >> +cond_internal_fn_supported_p (internal_fn ifn, tree type,
>> >> +       tree mask_type)
>> >> +{
>> >> +  tree_pair types = tree_pair (type, type);
>> >> +  optab tmp = direct_internal_fn_optab (ifn, types);
>> >> +  machine_mode vmode = TYPE_MODE (type);
>> >> +  insn_code icode = direct_optab_handler (tmp, vmode);
>> >> +  if (icode == CODE_FOR_nothing)
>> >> +    return false;
>> >> +
>> >> +  machine_mode mask_mode = TYPE_MODE (mask_type);
>> >> +  /* Can't create rtx and use insn_operand_matches here.  */
>> >> +  return insn_data[icode].operand[0].mode == vmode
>> >> +    && insn_data[icode].operand[1].mode == mask_mode;
>> >> +}
>> >> +
>> >
>> > Yeah, sth like that, though the operand[0].mode test should be
>> > redudnant.  I think we should assert or have a whiltelist
>> > for the internal function we support to be queried this way.
>> > Not sure if we can directly access the 'cond_binary/cond_ternary'
>> > classification used in internal-fn.def, that would be best.
>> >
>> > Richard, what are your thoughts about all this?
>>
>> IMO using get_mask_mode was right.  The optab documentation says:
>>
>>   Operands 0, 2, 3 and 4 all have mode @var{m}.  Operand 1 is a scalar
>>   integer if @var{m} is scalar, otherwise it has the mode returned by
>>   @code{TARGET_VECTORIZE_GET_MASK_MODE}.
>>
>> Allowing targets to use optabs to enforce different mask modes for
>> different operations would open up a mess of combinations.
>>
>> In other words, I think cond_vectorized_internal_fn_supported_p
>> is really testing two things:
>>
>> (1) is the mask type/vector type combination well-formed?
>> (2) is the internal function supported for the vector type?
>>
>> where (1) is a gimple question and (2) is a target question.
>>
>> I guess there's an argument that (1) should be part of the match.pd
>> condition instead, alongside the element_precision check.  That would
>> add to the cut-&-paste though. :-(
>>
>> Alternatively, I guess we would add:
>>
>>   bool is_truth_type_for (tree type, tree truth_type);
>>
>> to return true if truth_type is equal to truth_type_for (type)
>> (but without having to call truth_type_for).  We could then use:
>>
>>   is_truth_type_for (op_type, TREE_TYPE (@0))
>>
>> instead of:
>>
>>   element_precision (type) == element_precision (op_type)
>>
>> since it should be a strictly stronger condition.
> Thanks for your advice, it sounds more reasonable.
> Here is the updated patch.
>>
>> Thanks,
>> Richard
>
>
>
> -- 
> BR,
> Hongtao
>
> From ae192d4a164b8d73adb06d6e28864f717741158c Mon Sep 17 00:00:00 2001
> From: liuhongt <hongtao.liu@intel.com>
> Date: Thu, 2 Sep 2021 13:05:54 +0800
> Subject: [PATCH v4] Check mask type when doing cond_op related gimple
>  simplification.
>
> gcc/ChangeLog:
>
> 	PR middle-end/102080
> 	* match.pd: Check mask type when doing cond_op related gimple
> 	simplification.
> 	* tree.c (is_truth_type_for): New function.
> 	* tree.h (is_truth_type_for): New declaration.
>
> gcc/testsuite/ChangeLog:
>
> 	PR middle-end/102080
> 	* gcc.target/i386/pr102080.c: New test.
> ---
>  gcc/match.pd                             |  8 ++++----
>  gcc/testsuite/gcc.target/i386/pr102080.c | 19 +++++++++++++++++++
>  gcc/tree.c                               |  7 +++++++
>  gcc/tree.h                               |  1 +
>  4 files changed, 31 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index f421c74b62c..c9a27f46ed2 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -6988,13 +6988,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>    (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
>    (with { tree op_type = TREE_TYPE (@4); }
>     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> -	&& element_precision (type) == element_precision (op_type))
> +	&& is_truth_type_for (type, TREE_TYPE (@0)))

Why pass “type” rather than “op_type”?  “type” is the type of the
original expression, and if the original vec_cond is well-formed,
@0 should already have the right truth type for “type”.  Here we're
trying to convert the expression into a conditional operation on
“op_type”, so we need to test whether that's valid.

>      (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
>   (simplify
>    (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
>    (with { tree op_type = TREE_TYPE (@4); }
>     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> -	&& element_precision (type) == element_precision (op_type))
> +	&& is_truth_type_for (type, TREE_TYPE (@0)))
>      (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
>  
>  /* Same for ternary operations.  */
> @@ -7004,13 +7004,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>    (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
>    (with { tree op_type = TREE_TYPE (@5); }
>     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> -	&& element_precision (type) == element_precision (op_type))
> +	&& is_truth_type_for (type, TREE_TYPE (@0)))
>      (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
>   (simplify
>    (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
>    (with { tree op_type = TREE_TYPE (@5); }
>     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> -	&& element_precision (type) == element_precision (op_type))
> +	&& is_truth_type_for (type, TREE_TYPE (@0)))
>      (view_convert (cond_op (bit_not @0) @2 @3 @4
>  		  (view_convert:op_type @1)))))))
>  #endif
> diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
> new file mode 100644
> index 00000000000..4c5ee32ee63
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102080.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include<immintrin.h>
> +typedef float __m256 __attribute__((__vector_size__(32)));
> +__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
> +  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
> +
> +void
> +__attribute__ ((target("avx")))
> +IfThenElse (__m256 no) {
> +  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
> +}
> +void
> +__attribute__ ((target("avx512vl")))
> +EncodedFromDisplay() {
> +  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
> +  IfThenElse(__trans_tmp_11);
> +}
> diff --git a/gcc/tree.c b/gcc/tree.c
> index cba3bca41b3..88c2221eabb 100644
> --- a/gcc/tree.c
> +++ b/gcc/tree.c
> @@ -10723,6 +10723,13 @@ signed_type_for (tree type)
>    return signed_or_unsigned_type_for (0, type);
>  }
>  
> +bool
> +is_truth_type_for (tree type, tree truth_type)
> +{
> +  tree tmp = truth_type_for (type);
> +  return tmp == truth_type;
> +}

The idea was to try to avoid calling truth_type to create a type.
Instead we can use similar logic to truth_type to tell whether type
has the right form.  I think the rules are:

- For VECTOR_TYPEs:
  - The truth type must be a VECTOR_BOOLEAN_TYPE.
  - The number of elements must match (known_eq).
  - Also:
    - If !VECTOR_BOOLEAN_TYPE_P and targetm.vectorize.get_mask_mode
      exists, the truth type must have that mode.
    - Otherwise, the types must have the same size.
- Otherwise, the truth type must be a BOOLEAN_TYPE.

(Richi please correct me if I'm wrong.)

Thanks,
Richard


> +
>  /* If TYPE is a vector type, return a signed integer vector type with the
>     same width and number of subparts. Otherwise return boolean_type_node.  */
>  
> diff --git a/gcc/tree.h b/gcc/tree.h
> index 060a41f6991..c8542bfd476 100644
> --- a/gcc/tree.h
> +++ b/gcc/tree.h
> @@ -4556,6 +4556,7 @@ extern tree build_string_literal (unsigned, const char * = NULL,
>  extern tree signed_or_unsigned_type_for (int, tree);
>  extern tree signed_type_for (tree);
>  extern tree unsigned_type_for (tree);
> +bool is_truth_type_for (tree, tree);
>  extern tree truth_type_for (tree);
>  extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
>  extern tree build_pointer_type (tree);
Richard Biener Sept. 6, 2021, 6:54 a.m. UTC | #10
On Thu, Sep 2, 2021 at 7:54 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> > On Wed, Sep 1, 2021 at 8:52 PM Richard Sandiford
> > <richard.sandiford@arm.com> wrote:
> >>
> >> Richard Biener <richard.guenther@gmail.com> writes:
> >> > On Wed, Sep 1, 2021 at 8:28 AM Hongtao Liu <crazylht@gmail.com> wrote:
> >> >>
> >> >> On Tue, Aug 31, 2021 at 7:56 PM Richard Biener
> >> >> <richard.guenther@gmail.com> wrote:
> >> >> >
> >> >> > On Tue, Aug 31, 2021 at 12:18 PM Hongtao Liu <crazylht@gmail.com> wrote:
> >> >> > >
> >> >> > > On Mon, Aug 30, 2021 at 8:25 PM Richard Biener via Gcc-patches
> >> >> > > <gcc-patches@gcc.gnu.org> wrote:
> >> >> > > >
> >> >> > > > On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
> >> >> > > > >
> >> >> > > > >   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
> >> >> > > > > it doesn't check if mask type matches. It causes an ICE when expand cond_op
> >> >> > > > > with mismatched mode.
> >> >> > > > >   This patch add a function named cond_vectorized_internal_fn_supported_p
> >> >> > > > >  to additionally check mask type than vectorized_internal_fn_supported_p.
> >> >> > > > >
> >> >> > > > >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> >> >> > > > >   Ok for trunk?
> >> >> > > > >
> >> >> > > > > gcc/ChangeLog:
> >> >> > > > >
> >> >> > > > >         PR middle-end/102080
> >> >> > > > >         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
> >> >> > > > >         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
> >> >> > > > >         * match.pd: Check the type of mask while generating cond_op in
> >> >> > > > >         gimple simplication.
> >> >> > > > >
> >> >> > > > > gcc/testsuite/ChangeLog:
> >> >> > > > >
> >> >> > > > >         PR middle-end/102080
> >> >> > > > >         * gcc.target/i386/pr102080.c: New test.
> >> >> > > > > ---
> >> >> > > > >  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
> >> >> > > > >  gcc/internal-fn.h                        |  1 +
> >> >> > > > >  gcc/match.pd                             | 24 ++++++++++++++++--------
> >> >> > > > >  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
> >> >> > > > >  4 files changed, 55 insertions(+), 8 deletions(-)
> >> >> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
> >> >> > > > >
> >> >> > > > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> >> >> > > > > index 1360a00f0b9..8b2b65db1a7 100644
> >> >> > > > > --- a/gcc/internal-fn.c
> >> >> > > > > +++ b/gcc/internal-fn.c
> >> >> > > > > @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
> >> >> > > > >    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
> >> >> > > > >  }
> >> >> > > > >
> >> >> > > > > +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
> >> >> > > > > +   doesn't check if mask type matches.  */
> >> >> > > > > +bool
> >> >> > > > > +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
> >> >> > > > > +                                        tree mask_type)
> >> >> > > > > +{
> >> >> > > > > +  if (!vectorized_internal_fn_supported_p (ifn, type))
> >> >> > > > > +    return false;
> >> >> > > > > +
> >> >> > > > > +  machine_mode mask_mode;
> >> >> > > > > +  machine_mode vmode = TYPE_MODE (type);
> >> >> > > > > +  int size1, size2;
> >> >> > > > > +  if (VECTOR_MODE_P (vmode)
> >> >> > > > > +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
> >> >> > > > > +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
> >> >> > > > > +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
> >> >> > > > > +      && size1 != size2)
> >> >> > > >
> >> >> > > > Why do we check for equal size rather than just mode equality which
> >> >> > > I originally thought  TYPE_MODE of vector(8) <signed-boolean:1> was
> >> >> > > not QImode, Changed the patch to check mode equality.
> >> >> > > Update patch.
> >> >> >
> >> >> > Looking at all this it seems the match.pd patterns should have not
> >> >> > used vectorized_internal_fn_supported_p but direct_internal_fn_supported_p
> >> >> > which is equivalent here because we're always working with vector modes?
> >>
> >> Yeah, looks like it.
> >>
> >> >> > And then shouldn't we look at the actual optab whether the mask mode matches
> >> >> > the expectation rather than going around via the target hook which may not have
> >> >> > enough context to decide which mask mode to use?
> >> >> How about this?
> >> >>
> >> >> +/* Return true if target supports cond_op with data TYPE and
> >> >> +   mask MASK_TYPE.  */
> >> >> +bool
> >> >> +cond_internal_fn_supported_p (internal_fn ifn, tree type,
> >> >> +       tree mask_type)
> >> >> +{
> >> >> +  tree_pair types = tree_pair (type, type);
> >> >> +  optab tmp = direct_internal_fn_optab (ifn, types);
> >> >> +  machine_mode vmode = TYPE_MODE (type);
> >> >> +  insn_code icode = direct_optab_handler (tmp, vmode);
> >> >> +  if (icode == CODE_FOR_nothing)
> >> >> +    return false;
> >> >> +
> >> >> +  machine_mode mask_mode = TYPE_MODE (mask_type);
> >> >> +  /* Can't create rtx and use insn_operand_matches here.  */
> >> >> +  return insn_data[icode].operand[0].mode == vmode
> >> >> +    && insn_data[icode].operand[1].mode == mask_mode;
> >> >> +}
> >> >> +
> >> >
> >> > Yeah, sth like that, though the operand[0].mode test should be
> >> > redudnant.  I think we should assert or have a whiltelist
> >> > for the internal function we support to be queried this way.
> >> > Not sure if we can directly access the 'cond_binary/cond_ternary'
> >> > classification used in internal-fn.def, that would be best.
> >> >
> >> > Richard, what are your thoughts about all this?
> >>
> >> IMO using get_mask_mode was right.  The optab documentation says:
> >>
> >>   Operands 0, 2, 3 and 4 all have mode @var{m}.  Operand 1 is a scalar
> >>   integer if @var{m} is scalar, otherwise it has the mode returned by
> >>   @code{TARGET_VECTORIZE_GET_MASK_MODE}.
> >>
> >> Allowing targets to use optabs to enforce different mask modes for
> >> different operations would open up a mess of combinations.
> >>
> >> In other words, I think cond_vectorized_internal_fn_supported_p
> >> is really testing two things:
> >>
> >> (1) is the mask type/vector type combination well-formed?
> >> (2) is the internal function supported for the vector type?
> >>
> >> where (1) is a gimple question and (2) is a target question.
> >>
> >> I guess there's an argument that (1) should be part of the match.pd
> >> condition instead, alongside the element_precision check.  That would
> >> add to the cut-&-paste though. :-(
> >>
> >> Alternatively, I guess we would add:
> >>
> >>   bool is_truth_type_for (tree type, tree truth_type);
> >>
> >> to return true if truth_type is equal to truth_type_for (type)
> >> (but without having to call truth_type_for).  We could then use:
> >>
> >>   is_truth_type_for (op_type, TREE_TYPE (@0))
> >>
> >> instead of:
> >>
> >>   element_precision (type) == element_precision (op_type)
> >>
> >> since it should be a strictly stronger condition.
> > Thanks for your advice, it sounds more reasonable.
> > Here is the updated patch.
> >>
> >> Thanks,
> >> Richard
> >
> >
> >
> > --
> > BR,
> > Hongtao
> >
> > From ae192d4a164b8d73adb06d6e28864f717741158c Mon Sep 17 00:00:00 2001
> > From: liuhongt <hongtao.liu@intel.com>
> > Date: Thu, 2 Sep 2021 13:05:54 +0800
> > Subject: [PATCH v4] Check mask type when doing cond_op related gimple
> >  simplification.
> >
> > gcc/ChangeLog:
> >
> >       PR middle-end/102080
> >       * match.pd: Check mask type when doing cond_op related gimple
> >       simplification.
> >       * tree.c (is_truth_type_for): New function.
> >       * tree.h (is_truth_type_for): New declaration.
> >
> > gcc/testsuite/ChangeLog:
> >
> >       PR middle-end/102080
> >       * gcc.target/i386/pr102080.c: New test.
> > ---
> >  gcc/match.pd                             |  8 ++++----
> >  gcc/testsuite/gcc.target/i386/pr102080.c | 19 +++++++++++++++++++
> >  gcc/tree.c                               |  7 +++++++
> >  gcc/tree.h                               |  1 +
> >  4 files changed, 31 insertions(+), 4 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index f421c74b62c..c9a27f46ed2 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -6988,13 +6988,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >    (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
> >    (with { tree op_type = TREE_TYPE (@4); }
> >     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > -     && element_precision (type) == element_precision (op_type))
> > +     && is_truth_type_for (type, TREE_TYPE (@0)))
>
> Why pass “type” rather than “op_type”?  “type” is the type of the
> original expression, and if the original vec_cond is well-formed,
> @0 should already have the right truth type for “type”.  Here we're
> trying to convert the expression into a conditional operation on
> “op_type”, so we need to test whether that's valid.
>
> >      (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
> >   (simplify
> >    (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
> >    (with { tree op_type = TREE_TYPE (@4); }
> >     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > -     && element_precision (type) == element_precision (op_type))
> > +     && is_truth_type_for (type, TREE_TYPE (@0)))
> >      (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
> >
> >  /* Same for ternary operations.  */
> > @@ -7004,13 +7004,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >    (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
> >    (with { tree op_type = TREE_TYPE (@5); }
> >     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > -     && element_precision (type) == element_precision (op_type))
> > +     && is_truth_type_for (type, TREE_TYPE (@0)))
> >      (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
> >   (simplify
> >    (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
> >    (with { tree op_type = TREE_TYPE (@5); }
> >     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > -     && element_precision (type) == element_precision (op_type))
> > +     && is_truth_type_for (type, TREE_TYPE (@0)))
> >      (view_convert (cond_op (bit_not @0) @2 @3 @4
> >                 (view_convert:op_type @1)))))))
> >  #endif
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
> > new file mode 100644
> > index 00000000000..4c5ee32ee63
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102080.c
> > @@ -0,0 +1,19 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include<immintrin.h>
> > +typedef float __m256 __attribute__((__vector_size__(32)));
> > +__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
> > +  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
> > +
> > +void
> > +__attribute__ ((target("avx")))
> > +IfThenElse (__m256 no) {
> > +  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
> > +}
> > +void
> > +__attribute__ ((target("avx512vl")))
> > +EncodedFromDisplay() {
> > +  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
> > +  IfThenElse(__trans_tmp_11);
> > +}
> > diff --git a/gcc/tree.c b/gcc/tree.c
> > index cba3bca41b3..88c2221eabb 100644
> > --- a/gcc/tree.c
> > +++ b/gcc/tree.c
> > @@ -10723,6 +10723,13 @@ signed_type_for (tree type)
> >    return signed_or_unsigned_type_for (0, type);
> >  }
> >
> > +bool
> > +is_truth_type_for (tree type, tree truth_type)
> > +{
> > +  tree tmp = truth_type_for (type);
> > +  return tmp == truth_type;
> > +}
>
> The idea was to try to avoid calling truth_type to create a type.
> Instead we can use similar logic to truth_type to tell whether type
> has the right form.  I think the rules are:
>
> - For VECTOR_TYPEs:
>   - The truth type must be a VECTOR_BOOLEAN_TYPE.
>   - The number of elements must match (known_eq).
>   - Also:
>     - If !VECTOR_BOOLEAN_TYPE_P and targetm.vectorize.get_mask_mode
>       exists, the truth type must have that mode.
>     - Otherwise, the types must have the same size.

For vector types it's

  VECTOR_BOOLEAN_TYPE_P (truth_type)
  && known_eq (number of elements)
  && targetm.vectorize.get_mask_mode (TYPE_MODE (type)) == TYPE_MODE
(truth_type)

I think we're only interested in the case that TYPE_MODE (type) is a
vector mode given
we're doing optab checks.  If we want to cover !VECTOR_MODE vector type 'type'
then it would be known_eq (component sizes) I think.

Not sure if we need to test for !TYPE_UNSIGNED (truth_type).

> - Otherwise, the truth type must be a BOOLEAN_TYPE.

I'm not sure which case you're refering to here - when 'type' is a scalar type?
I suppose we'd want to allow all types that trivially convert to
boolean_type_node
then which means useless_type_conversion_p (boolean_type_node, truth_type).

> (Richi please correct me if I'm wrong.)
>
> Thanks,
> Richard
>
>
> > +
> >  /* If TYPE is a vector type, return a signed integer vector type with the
> >     same width and number of subparts. Otherwise return boolean_type_node.  */
> >
> > diff --git a/gcc/tree.h b/gcc/tree.h
> > index 060a41f6991..c8542bfd476 100644
> > --- a/gcc/tree.h
> > +++ b/gcc/tree.h
> > @@ -4556,6 +4556,7 @@ extern tree build_string_literal (unsigned, const char * = NULL,
> >  extern tree signed_or_unsigned_type_for (int, tree);
> >  extern tree signed_type_for (tree);
> >  extern tree unsigned_type_for (tree);
> > +bool is_truth_type_for (tree, tree);
> >  extern tree truth_type_for (tree);
> >  extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
> >  extern tree build_pointer_type (tree);
Hongtao Liu Sept. 6, 2021, 9:01 a.m. UTC | #11
On Mon, Sep 6, 2021 at 2:54 PM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Thu, Sep 2, 2021 at 7:54 PM Richard Sandiford
> <richard.sandiford@arm.com> wrote:
> >
> > Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> > > On Wed, Sep 1, 2021 at 8:52 PM Richard Sandiford
> > > <richard.sandiford@arm.com> wrote:
> > >>
> > >> Richard Biener <richard.guenther@gmail.com> writes:
> > >> > On Wed, Sep 1, 2021 at 8:28 AM Hongtao Liu <crazylht@gmail.com> wrote:
> > >> >>
> > >> >> On Tue, Aug 31, 2021 at 7:56 PM Richard Biener
> > >> >> <richard.guenther@gmail.com> wrote:
> > >> >> >
> > >> >> > On Tue, Aug 31, 2021 at 12:18 PM Hongtao Liu <crazylht@gmail.com> wrote:
> > >> >> > >
> > >> >> > > On Mon, Aug 30, 2021 at 8:25 PM Richard Biener via Gcc-patches
> > >> >> > > <gcc-patches@gcc.gnu.org> wrote:
> > >> >> > > >
> > >> >> > > > On Fri, Aug 27, 2021 at 8:53 AM liuhongt <hongtao.liu@intel.com> wrote:
> > >> >> > > > >
> > >> >> > > > >   When gimple simplifcation try to combine op and vec_cond_expr to cond_op,
> > >> >> > > > > it doesn't check if mask type matches. It causes an ICE when expand cond_op
> > >> >> > > > > with mismatched mode.
> > >> >> > > > >   This patch add a function named cond_vectorized_internal_fn_supported_p
> > >> >> > > > >  to additionally check mask type than vectorized_internal_fn_supported_p.
> > >> >> > > > >
> > >> >> > > > >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> > >> >> > > > >   Ok for trunk?
> > >> >> > > > >
> > >> >> > > > > gcc/ChangeLog:
> > >> >> > > > >
> > >> >> > > > >         PR middle-end/102080
> > >> >> > > > >         * internal-fn.c (cond_vectorized_internal_fn_supported_p): New functions.
> > >> >> > > > >         * internal-fn.h (cond_vectorized_internal_fn_supported_p): New declaration.
> > >> >> > > > >         * match.pd: Check the type of mask while generating cond_op in
> > >> >> > > > >         gimple simplication.
> > >> >> > > > >
> > >> >> > > > > gcc/testsuite/ChangeLog:
> > >> >> > > > >
> > >> >> > > > >         PR middle-end/102080
> > >> >> > > > >         * gcc.target/i386/pr102080.c: New test.
> > >> >> > > > > ---
> > >> >> > > > >  gcc/internal-fn.c                        | 22 ++++++++++++++++++++++
> > >> >> > > > >  gcc/internal-fn.h                        |  1 +
> > >> >> > > > >  gcc/match.pd                             | 24 ++++++++++++++++--------
> > >> >> > > > >  gcc/testsuite/gcc.target/i386/pr102080.c | 16 ++++++++++++++++
> > >> >> > > > >  4 files changed, 55 insertions(+), 8 deletions(-)
> > >> >> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
> > >> >> > > > >
> > >> >> > > > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> > >> >> > > > > index 1360a00f0b9..8b2b65db1a7 100644
> > >> >> > > > > --- a/gcc/internal-fn.c
> > >> >> > > > > +++ b/gcc/internal-fn.c
> > >> >> > > > > @@ -4102,6 +4102,28 @@ expand_internal_call (gcall *stmt)
> > >> >> > > > >    expand_internal_call (gimple_call_internal_fn (stmt), stmt);
> > >> >> > > > >  }
> > >> >> > > > >
> > >> >> > > > > +/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
> > >> >> > > > > +   doesn't check if mask type matches.  */
> > >> >> > > > > +bool
> > >> >> > > > > +cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
> > >> >> > > > > +                                        tree mask_type)
> > >> >> > > > > +{
> > >> >> > > > > +  if (!vectorized_internal_fn_supported_p (ifn, type))
> > >> >> > > > > +    return false;
> > >> >> > > > > +
> > >> >> > > > > +  machine_mode mask_mode;
> > >> >> > > > > +  machine_mode vmode = TYPE_MODE (type);
> > >> >> > > > > +  int size1, size2;
> > >> >> > > > > +  if (VECTOR_MODE_P (vmode)
> > >> >> > > > > +      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
> > >> >> > > > > +      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
> > >> >> > > > > +      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
> > >> >> > > > > +      && size1 != size2)
> > >> >> > > >
> > >> >> > > > Why do we check for equal size rather than just mode equality which
> > >> >> > > I originally thought  TYPE_MODE of vector(8) <signed-boolean:1> was
> > >> >> > > not QImode, Changed the patch to check mode equality.
> > >> >> > > Update patch.
> > >> >> >
> > >> >> > Looking at all this it seems the match.pd patterns should have not
> > >> >> > used vectorized_internal_fn_supported_p but direct_internal_fn_supported_p
> > >> >> > which is equivalent here because we're always working with vector modes?
> > >>
> > >> Yeah, looks like it.
> > >>
> > >> >> > And then shouldn't we look at the actual optab whether the mask mode matches
> > >> >> > the expectation rather than going around via the target hook which may not have
> > >> >> > enough context to decide which mask mode to use?
> > >> >> How about this?
> > >> >>
> > >> >> +/* Return true if target supports cond_op with data TYPE and
> > >> >> +   mask MASK_TYPE.  */
> > >> >> +bool
> > >> >> +cond_internal_fn_supported_p (internal_fn ifn, tree type,
> > >> >> +       tree mask_type)
> > >> >> +{
> > >> >> +  tree_pair types = tree_pair (type, type);
> > >> >> +  optab tmp = direct_internal_fn_optab (ifn, types);
> > >> >> +  machine_mode vmode = TYPE_MODE (type);
> > >> >> +  insn_code icode = direct_optab_handler (tmp, vmode);
> > >> >> +  if (icode == CODE_FOR_nothing)
> > >> >> +    return false;
> > >> >> +
> > >> >> +  machine_mode mask_mode = TYPE_MODE (mask_type);
> > >> >> +  /* Can't create rtx and use insn_operand_matches here.  */
> > >> >> +  return insn_data[icode].operand[0].mode == vmode
> > >> >> +    && insn_data[icode].operand[1].mode == mask_mode;
> > >> >> +}
> > >> >> +
> > >> >
> > >> > Yeah, sth like that, though the operand[0].mode test should be
> > >> > redudnant.  I think we should assert or have a whiltelist
> > >> > for the internal function we support to be queried this way.
> > >> > Not sure if we can directly access the 'cond_binary/cond_ternary'
> > >> > classification used in internal-fn.def, that would be best.
> > >> >
> > >> > Richard, what are your thoughts about all this?
> > >>
> > >> IMO using get_mask_mode was right.  The optab documentation says:
> > >>
> > >>   Operands 0, 2, 3 and 4 all have mode @var{m}.  Operand 1 is a scalar
> > >>   integer if @var{m} is scalar, otherwise it has the mode returned by
> > >>   @code{TARGET_VECTORIZE_GET_MASK_MODE}.
> > >>
> > >> Allowing targets to use optabs to enforce different mask modes for
> > >> different operations would open up a mess of combinations.
> > >>
> > >> In other words, I think cond_vectorized_internal_fn_supported_p
> > >> is really testing two things:
> > >>
> > >> (1) is the mask type/vector type combination well-formed?
> > >> (2) is the internal function supported for the vector type?
> > >>
> > >> where (1) is a gimple question and (2) is a target question.
> > >>
> > >> I guess there's an argument that (1) should be part of the match.pd
> > >> condition instead, alongside the element_precision check.  That would
> > >> add to the cut-&-paste though. :-(
> > >>
> > >> Alternatively, I guess we would add:
> > >>
> > >>   bool is_truth_type_for (tree type, tree truth_type);
> > >>
> > >> to return true if truth_type is equal to truth_type_for (type)
> > >> (but without having to call truth_type_for).  We could then use:
> > >>
> > >>   is_truth_type_for (op_type, TREE_TYPE (@0))
> > >>
> > >> instead of:
> > >>
> > >>   element_precision (type) == element_precision (op_type)
> > >>
> > >> since it should be a strictly stronger condition.
> > > Thanks for your advice, it sounds more reasonable.
> > > Here is the updated patch.
> > >>
> > >> Thanks,
> > >> Richard
> > >
> > >
> > >
> > > --
> > > BR,
> > > Hongtao
> > >
> > > From ae192d4a164b8d73adb06d6e28864f717741158c Mon Sep 17 00:00:00 2001
> > > From: liuhongt <hongtao.liu@intel.com>
> > > Date: Thu, 2 Sep 2021 13:05:54 +0800
> > > Subject: [PATCH v4] Check mask type when doing cond_op related gimple
> > >  simplification.
> > >
> > > gcc/ChangeLog:
> > >
> > >       PR middle-end/102080
> > >       * match.pd: Check mask type when doing cond_op related gimple
> > >       simplification.
> > >       * tree.c (is_truth_type_for): New function.
> > >       * tree.h (is_truth_type_for): New declaration.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >       PR middle-end/102080
> > >       * gcc.target/i386/pr102080.c: New test.
> > > ---
> > >  gcc/match.pd                             |  8 ++++----
> > >  gcc/testsuite/gcc.target/i386/pr102080.c | 19 +++++++++++++++++++
> > >  gcc/tree.c                               |  7 +++++++
> > >  gcc/tree.h                               |  1 +
> > >  4 files changed, 31 insertions(+), 4 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102080.c
> > >
> > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > index f421c74b62c..c9a27f46ed2 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -6988,13 +6988,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >    (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
> > >    (with { tree op_type = TREE_TYPE (@4); }
> > >     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > -     && element_precision (type) == element_precision (op_type))
> > > +     && is_truth_type_for (type, TREE_TYPE (@0)))
> >
> > Why pass “type” rather than “op_type”?  “type” is the type of the
Sorry for the typo, changed.
> > original expression, and if the original vec_cond is well-formed,
> > @0 should already have the right truth type for “type”.  Here we're
> > trying to convert the expression into a conditional operation on
> > “op_type”, so we need to test whether that's valid.
> >
> > >      (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
> > >   (simplify
> > >    (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
> > >    (with { tree op_type = TREE_TYPE (@4); }
> > >     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > -     && element_precision (type) == element_precision (op_type))
> > > +     && is_truth_type_for (type, TREE_TYPE (@0)))
> > >      (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
> > >
> > >  /* Same for ternary operations.  */
> > > @@ -7004,13 +7004,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >    (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
> > >    (with { tree op_type = TREE_TYPE (@5); }
> > >     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > -     && element_precision (type) == element_precision (op_type))
> > > +     && is_truth_type_for (type, TREE_TYPE (@0)))
> > >      (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
> > >   (simplify
> > >    (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
> > >    (with { tree op_type = TREE_TYPE (@5); }
> > >     (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > > -     && element_precision (type) == element_precision (op_type))
> > > +     && is_truth_type_for (type, TREE_TYPE (@0)))
> > >      (view_convert (cond_op (bit_not @0) @2 @3 @4
> > >                 (view_convert:op_type @1)))))))
> > >  #endif
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
> > > new file mode 100644
> > > index 00000000000..4c5ee32ee63
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102080.c
> > > @@ -0,0 +1,19 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include<immintrin.h>
> > > +typedef float __m256 __attribute__((__vector_size__(32)));
> > > +__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
> > > +  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
> > > +
> > > +void
> > > +__attribute__ ((target("avx")))
> > > +IfThenElse (__m256 no) {
> > > +  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
> > > +}
> > > +void
> > > +__attribute__ ((target("avx512vl")))
> > > +EncodedFromDisplay() {
> > > +  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
> > > +  IfThenElse(__trans_tmp_11);
> > > +}
> > > diff --git a/gcc/tree.c b/gcc/tree.c
> > > index cba3bca41b3..88c2221eabb 100644
> > > --- a/gcc/tree.c
> > > +++ b/gcc/tree.c
> > > @@ -10723,6 +10723,13 @@ signed_type_for (tree type)
> > >    return signed_or_unsigned_type_for (0, type);
> > >  }
> > >
> > > +bool
> > > +is_truth_type_for (tree type, tree truth_type)
> > > +{
> > > +  tree tmp = truth_type_for (type);
> > > +  return tmp == truth_type;
> > > +}
> >
> > The idea was to try to avoid calling truth_type to create a type.
> > Instead we can use similar logic to truth_type to tell whether type
> > has the right form.  I think the rules are:
> >
> > - For VECTOR_TYPEs:
> >   - The truth type must be a VECTOR_BOOLEAN_TYPE.
> >   - The number of elements must match (known_eq).
> >   - Also:
> >     - If !VECTOR_BOOLEAN_TYPE_P and targetm.vectorize.get_mask_mode
> >       exists, the truth type must have that mode.
> >     - Otherwise, the types must have the same size.
>
> For vector types it's
>
>   VECTOR_BOOLEAN_TYPE_P (truth_type)
>   && known_eq (number of elements)
>   && targetm.vectorize.get_mask_mode (TYPE_MODE (type)) == TYPE_MODE
> (truth_type)
>
> I think we're only interested in the case that TYPE_MODE (type) is a
> vector mode given
> we're doing optab checks.  If we want to cover !VECTOR_MODE vector type 'type'
> then it would be known_eq (component sizes) I think.
>
> Not sure if we need to test for !TYPE_UNSIGNED (truth_type).
>
> > - Otherwise, the truth type must be a BOOLEAN_TYPE.
>
> I'm not sure which case you're refering to here - when 'type' is a scalar type?
> I suppose we'd want to allow all types that trivially convert to
> boolean_type_node
Update patch.
> then which means useless_type_conversion_p (boolean_type_node, truth_type).
>
> > (Richi please correct me if I'm wrong.)
> >
> > Thanks,
> > Richard
> >
> >
> > > +
> > >  /* If TYPE is a vector type, return a signed integer vector type with the
> > >     same width and number of subparts. Otherwise return boolean_type_node.  */
> > >
> > > diff --git a/gcc/tree.h b/gcc/tree.h
> > > index 060a41f6991..c8542bfd476 100644
> > > --- a/gcc/tree.h
> > > +++ b/gcc/tree.h
> > > @@ -4556,6 +4556,7 @@ extern tree build_string_literal (unsigned, const char * = NULL,
> > >  extern tree signed_or_unsigned_type_for (int, tree);
> > >  extern tree signed_type_for (tree);
> > >  extern tree unsigned_type_for (tree);
> > > +bool is_truth_type_for (tree, tree);
> > >  extern tree truth_type_for (tree);
> > >  extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
> > >  extern tree build_pointer_type (tree);
diff mbox series

Patch

diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 1360a00f0b9..8b2b65db1a7 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -4102,6 +4102,28 @@  expand_internal_call (gcall *stmt)
   expand_internal_call (gimple_call_internal_fn (stmt), stmt);
 }
 
+/* Check cond_op for vector modes since vectorized_internal_fn_supported_p
+   doesn't check if mask type matches.  */
+bool
+cond_vectorized_internal_fn_supported_p (internal_fn ifn, tree type,
+					 tree mask_type)
+{
+  if (!vectorized_internal_fn_supported_p (ifn, type))
+    return false;
+
+  machine_mode mask_mode;
+  machine_mode vmode = TYPE_MODE (type);
+  int size1, size2;
+  if (VECTOR_MODE_P (vmode)
+      && targetm.vectorize.get_mask_mode (vmode).exists(&mask_mode)
+      && GET_MODE_SIZE (mask_mode).is_constant (&size1)
+      && GET_MODE_SIZE (TYPE_MODE (mask_type)).is_constant (&size2)
+      && size1 != size2)
+    return false;
+
+  return true;
+}
+
 /* If TYPE is a vector type, return true if IFN is a direct internal
    function that is supported for that type.  If TYPE is a scalar type,
    return true if IFN is a direct internal function that is supported for
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index 19d0f849a5a..f0aea00103c 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -236,5 +236,6 @@  extern void expand_PHI (internal_fn, gcall *);
 extern void expand_SHUFFLEVECTOR (internal_fn, gcall *);
 
 extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
+extern bool cond_vectorized_internal_fn_supported_p (internal_fn, tree, tree);
 
 #endif
diff --git a/gcc/match.pd b/gcc/match.pd
index e5bbb123a6a..72b1bc674db 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6987,14 +6987,18 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      cond_op (COND_BINARY)
  (simplify
   (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
-  (with { tree op_type = TREE_TYPE (@4); }
-   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+  (with { tree op_type = TREE_TYPE (@4);
+	  tree mask_type = TREE_TYPE (@0); }
+   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
+						 op_type, mask_type)
 	&& element_precision (type) == element_precision (op_type))
     (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
  (simplify
   (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
-  (with { tree op_type = TREE_TYPE (@4); }
-   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+  (with { tree op_type = TREE_TYPE (@4);
+	  tree mask_type = TREE_TYPE (@0); }
+   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
+						 op_type, mask_type)
 	&& element_precision (type) == element_precision (op_type))
     (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
 
@@ -7003,14 +7007,18 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      cond_op (COND_TERNARY)
  (simplify
   (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4)
-  (with { tree op_type = TREE_TYPE (@5); }
-   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+  (with { tree op_type = TREE_TYPE (@5);
+	  tree mask_type = TREE_TYPE (@0); }
+   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
+						 op_type, mask_type)
 	&& element_precision (type) == element_precision (op_type))
     (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4))))))
  (simplify
   (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4)))
-  (with { tree op_type = TREE_TYPE (@5); }
-   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+  (with { tree op_type = TREE_TYPE (@5);
+	  tree mask_type = TREE_TYPE (@0); }
+   (if (cond_vectorized_internal_fn_supported_p (as_internal_fn (cond_op),
+						 op_type, mask_type)
 	&& element_precision (type) == element_precision (op_type))
     (view_convert (cond_op (bit_not @0) @2 @3 @4
 		  (view_convert:op_type @1)))))))
diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c
new file mode 100644
index 00000000000..6a40a75e1c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102080.c
@@ -0,0 +1,16 @@ 
+#include<immintrin.h>
+typedef float __m256 __attribute__((__vector_size__(32)));
+__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A,
+  _mm256_mul_ps___B, IfThenElse___trans_tmp_9;
+
+void
+__attribute__ ((target("avx")))
+IfThenElse (__m256 no) {
+  IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M);
+}
+void
+__attribute__ ((target("avx512vl")))
+EncodedFromDisplay() {
+  __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B;
+  IfThenElse(__trans_tmp_11);
+}