diff mbox series

Fix 3 generic vector lowering issues with VECTOR_BOOLEAN_TYPE_P SSA_NAMEs with scalar modes (PR tree-optimization/91157)

Message ID 20190717070136.GZ2125@tucnak
State New
Headers show
Series Fix 3 generic vector lowering issues with VECTOR_BOOLEAN_TYPE_P SSA_NAMEs with scalar modes (PR tree-optimization/91157) | expand

Commit Message

Jakub Jelinek July 17, 2019, 7:01 a.m. UTC
Hi!

On the following testcase we end up with a comparison (EQ_EXPR in this case)
with unsupported vector operands, but supported result (vector boolean
type with scalar mode, i.e. the AVX512F-ish integer bitmask) and later
a VEC_COND_EXPR which is also not supported by the optab and has the vector
boolean type with scalar mode as the first operand.

The last hunk makes sure that we don't just ignore lowering of the comparison
when it has an integer bitmask result but unsupported vector operands.
The expand_vector_comparison changes makes sure we lower the comparison
properly into the integer bitmask and finally the expand_vector_condition
changes makes sure we lower properly the VEC_COND_EXPR.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-07-17  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/91157
	* tree-vect-generic.c (expand_vector_comparison): Handle lhs being
	a vector boolean with scalar mode.
	(expand_vector_condition): Handle first operand being a vector boolean
	with scalar mode.
	(expand_vector_operations_1): For comparisons, don't bail out early
	if the return type is vector boolean with scalar mode, but comparison
	operand type is not.

	* gcc.target/i386/avx512f-pr91157.c: New test.
	* gcc.target/i386/avx512bw-pr91157.c: New test.


	Jakub

Comments

Richard Biener July 17, 2019, 7:06 a.m. UTC | #1
On Wed, 17 Jul 2019, Jakub Jelinek wrote:

> Hi!
> 
> On the following testcase we end up with a comparison (EQ_EXPR in this case)
> with unsupported vector operands, but supported result (vector boolean
> type with scalar mode, i.e. the AVX512F-ish integer bitmask) and later
> a VEC_COND_EXPR which is also not supported by the optab and has the vector
> boolean type with scalar mode as the first operand.
> 
> The last hunk makes sure that we don't just ignore lowering of the comparison
> when it has an integer bitmask result but unsupported vector operands.
> The expand_vector_comparison changes makes sure we lower the comparison
> properly into the integer bitmask and finally the expand_vector_condition
> changes makes sure we lower properly the VEC_COND_EXPR.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2019-07-17  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/91157
> 	* tree-vect-generic.c (expand_vector_comparison): Handle lhs being
> 	a vector boolean with scalar mode.
> 	(expand_vector_condition): Handle first operand being a vector boolean
> 	with scalar mode.
> 	(expand_vector_operations_1): For comparisons, don't bail out early
> 	if the return type is vector boolean with scalar mode, but comparison
> 	operand type is not.
> 
> 	* gcc.target/i386/avx512f-pr91157.c: New test.
> 	* gcc.target/i386/avx512bw-pr91157.c: New test.
> 
> --- gcc/tree-vect-generic.c.jj	2019-07-04 00:18:37.063010439 +0200
> +++ gcc/tree-vect-generic.c	2019-07-16 12:40:41.343059690 +0200
> @@ -382,8 +382,48 @@ expand_vector_comparison (gimple_stmt_it
>    tree t;
>    if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code)
>        && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code))
> -    t = expand_vector_piecewise (gsi, do_compare, type,
> -				 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
> +    {
> +      if (VECTOR_BOOLEAN_TYPE_P (type)
> +	  && VECTOR_BOOLEAN_TYPE_P (type)
> +	  && SCALAR_INT_MODE_P (TYPE_MODE (type))
> +	  && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
> +		       TYPE_VECTOR_SUBPARTS (type)
> +		       * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
> +						(TREE_TYPE (type)))))
> +	{
> +	  tree inner_type = TREE_TYPE (TREE_TYPE (op0));
> +	  tree part_width = TYPE_SIZE (inner_type);
> +	  tree index = bitsize_int (0);
> +	  int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
> +	  int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
> +	  tree ret_type = build_nonstandard_integer_type (prec, 1);
> +	  tree ret_inner_type = boolean_type_node;
> +	  int i;
> +	  location_t loc = gimple_location (gsi_stmt (*gsi));
> +	  t = build_zero_cst (ret_type);
> +
> +	  if (TYPE_PRECISION (ret_inner_type) != 1)
> +	    ret_inner_type = build_nonstandard_integer_type (1, 1);
> +	  warning_at (loc, OPT_Wvector_operation_performance,
> +		      "vector operation will be expanded piecewise");
> +	  for (i = 0; i < nunits;
> +	       i++, index = int_const_binop (PLUS_EXPR, index, part_width))
> +	    {
> +	      tree a = tree_vec_extract (gsi, inner_type, op0, part_width,
> +					 index);
> +	      tree b = tree_vec_extract (gsi, inner_type, op1, part_width,
> +					 index);
> +	      tree result = gimplify_build2 (gsi, code, ret_inner_type, a, b);
> +	      t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
> +				   bitsize_int (i));
> +	    }
> +	  t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
> +	}
> +      else
> +	t = expand_vector_piecewise (gsi, do_compare, type,
> +				     TREE_TYPE (TREE_TYPE (op0)), op0, op1,
> +				     code);
> +    }
>    else
>      t = NULL_TREE;
>  
> @@ -879,6 +919,7 @@ expand_vector_condition (gimple_stmt_ite
>    tree a1 = a;
>    tree a2 = NULL_TREE;
>    bool a_is_comparison = false;
> +  bool a_is_scalar_bitmask = false;
>    tree b = gimple_assign_rhs2 (stmt);
>    tree c = gimple_assign_rhs3 (stmt);
>    vec<constructor_elt, va_gc> *v;
> @@ -942,6 +983,20 @@ expand_vector_condition (gimple_stmt_ite
>    warning_at (loc, OPT_Wvector_operation_performance,
>  	      "vector condition will be expanded piecewise");
>  
> +  if (!a_is_comparison
> +      && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
> +      && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
> +      && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
> +		   TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
> +		   * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
> +						(TREE_TYPE (TREE_TYPE (a))))))
> +    {
> +      a_is_scalar_bitmask = true;
> +      int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (a)));
> +      tree atype = build_nonstandard_integer_type (prec, 1);
> +      a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a);
> +    }
> +
>    int nunits = nunits_for_known_piecewise_op (type);
>    vec_alloc (v, nunits);
>    for (i = 0; i < nunits; i++)
> @@ -957,6 +1012,14 @@ expand_vector_condition (gimple_stmt_ite
>  				       comp_width, comp_index);
>  	  aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2);
>  	}
> +      else if (a_is_scalar_bitmask)
> +	{
> +	  wide_int w = wi::set_bit_in_zero (i, TYPE_PRECISION (TREE_TYPE (a)));
> +	  result = gimplify_build2 (gsi, BIT_AND_EXPR, TREE_TYPE (a),
> +				    a, wide_int_to_tree (TREE_TYPE (a), w));
> +	  aa = fold_build2 (NE_EXPR, boolean_type_node, result,
> +			    build_zero_cst (TREE_TYPE (a)));
> +	}
>        else
>  	aa = tree_vec_extract (gsi, cond_type, a, width, index);
>        result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
> @@ -1941,7 +2004,11 @@ expand_vector_operations_1 (gimple_stmt_
>    /* A scalar operation pretending to be a vector one.  */
>    if (VECTOR_BOOLEAN_TYPE_P (type)
>        && !VECTOR_MODE_P (TYPE_MODE (type))
> -      && TYPE_MODE (type) != BLKmode)
> +      && TYPE_MODE (type) != BLKmode
> +      && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) != tcc_comparison
> +	  || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))
> +	      && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1)))
> +	      && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode)))
>      return;
>  
>    /* If the vector operation is operating on all same vector elements
> --- gcc/testsuite/gcc.target/i386/avx512f-pr91157.c.jj	2019-07-16 12:54:55.928900526 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512f-pr91157.c	2019-07-16 13:01:39.217714434 +0200
> @@ -0,0 +1,29 @@
> +/* PR tree-optimization/91157 */
> +/* { dg-do run { target { avx512f && lp64 } } } */
> +/* { dg-options "-O2 -mavx512f -fexceptions -fnon-call-exceptions -fsignaling-nans" } */
> +
> +#include "avx512f-helper.h"
> +
> +typedef long double V __attribute__ ((vector_size (4 * sizeof (long double))));
> +typedef __int128 W __attribute__ ((vector_size (4 * sizeof (__int128))));
> +
> +__attribute__((noipa)) W
> +foo (V x)
> +{
> +  return x == 0;
> +}
> +
> +static void
> +test_512 (void)
> +{
> +  V a = { 5.0L, 0.0L, -0.0L, -17.0L };
> +  V b = { -0.0L, 16.0L, 0.0L, 18.0L };
> +  V c = { 6.0L, 7.0L, 8.0L, 0.0L };
> +  W ar = foo (a);
> +  W br = foo (b);
> +  W cr = foo (c);
> +  if (ar[0] != 0 || ar[1] != -1 || ar[2] != -1 || ar[3] != 0
> +      || br[0] != -1 || br[1] != 0 || br[2] != -1 || br[3] != 0
> +      || cr[0] != 0 || cr[1] != 0 || cr[2] != 0 || cr[3] != -1)
> +    __builtin_abort ();
> +}
> --- gcc/testsuite/gcc.target/i386/avx512bw-pr91157.c.jj	2019-07-16 12:55:11.609659992 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512bw-pr91157.c	2019-07-16 13:01:10.438155882 +0200
> @@ -0,0 +1,6 @@
> +/* PR tree-optimization/91157 */
> +/* { dg-do run { target { avx512bw && lp64 } } } */
> +/* { dg-options "-O2 -mavx512bw -fexceptions -fnon-call-exceptions -fsignaling-nans" } */
> +
> +#define AVX512BW
> +#include "avx512f-pr91157.c"
> 
> 	Jakub
>
Bernhard Reutner-Fischer July 18, 2019, 6:28 a.m. UTC | #2
On 17 July 2019 09:01:36 CEST, Jakub Jelinek <jakub@redhat.com> wrote:
>Hi!
>
>On the following testcase we end up with a comparison (EQ_EXPR in this
>case)
>with unsupported vector operands, but supported result (vector boolean
>type with scalar mode, i.e. the AVX512F-ish integer bitmask) and later
>a VEC_COND_EXPR which is also not supported by the optab and has the
>vector
>boolean type with scalar mode as the first operand.
>
>The last hunk makes sure that we don't just ignore lowering of the
>comparison
>when it has an integer bitmask result but unsupported vector operands.
>The expand_vector_comparison changes makes sure we lower the comparison
>properly into the integer bitmask and finally the
>expand_vector_condition
>changes makes sure we lower properly the VEC_COND_EXPR.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
>2019-07-17  Jakub Jelinek  <jakub@redhat.com>
>
>	PR tree-optimization/91157
>	* tree-vect-generic.c (expand_vector_comparison): Handle lhs being
>	a vector boolean with scalar mode.
>	(expand_vector_condition): Handle first operand being a vector boolean
>	with scalar mode.
>	(expand_vector_operations_1): For comparisons, don't bail out early
>	if the return type is vector boolean with scalar mode, but comparison
>	operand type is not.
>
>	* gcc.target/i386/avx512f-pr91157.c: New test.
>	* gcc.target/i386/avx512bw-pr91157.c: New test.
>
>--- gcc/tree-vect-generic.c.jj	2019-07-04 00:18:37.063010439 +0200
>+++ gcc/tree-vect-generic.c	2019-07-16 12:40:41.343059690 +0200
>@@ -382,8 +382,48 @@ expand_vector_comparison (gimple_stmt_it
>   tree t;
>   if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code)
>       && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code))
>-    t = expand_vector_piecewise (gsi, do_compare, type,
>-				 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
>+    {
>+      if (VECTOR_BOOLEAN_TYPE_P (type)
>+	  && VECTOR_BOOLEAN_TYPE_P (type)

The above condition looks redundant, fwiw.
Did you mean to check op0?

thanks,

>+	  && SCALAR_INT_MODE_P (TYPE_MODE (type))
>+	  && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
>+		       TYPE_VECTOR_SUBPARTS (type)
>+		       * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
>+						(TREE_TYPE (type)))))
>+	{
>+	  tree inner_type = TREE_TYPE (TREE_TYPE (op0));
>+	  tree part_width = TYPE_SIZE (inner_type);
>+	  tree index = bitsize_int (0);
>+	  int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
>+	  int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
>+	  tree ret_type = build_nonstandard_integer_type (prec, 1);
>+	  tree ret_inner_type = boolean_type_node;
>+	  int i;
>+	  location_t loc = gimple_location (gsi_stmt (*gsi));
>+	  t = build_zero_cst (ret_type);
>+
>+	  if (TYPE_PRECISION (ret_inner_type) != 1)
>+	    ret_inner_type = build_nonstandard_integer_type (1, 1);
>+	  warning_at (loc, OPT_Wvector_operation_performance,
>+		      "vector operation will be expanded piecewise");
>+	  for (i = 0; i < nunits;
>+	       i++, index = int_const_binop (PLUS_EXPR, index, part_width))
>+	    {
>+	      tree a = tree_vec_extract (gsi, inner_type, op0, part_width,
>+					 index);
>+	      tree b = tree_vec_extract (gsi, inner_type, op1, part_width,
>+					 index);
>+	      tree result = gimplify_build2 (gsi, code, ret_inner_type, a,
>b);
>+	      t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
>+				   bitsize_int (i));
>+	    }
>+	  t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
>+	}
>+      else
>+	t = expand_vector_piecewise (gsi, do_compare, type,
>+				     TREE_TYPE (TREE_TYPE (op0)), op0, op1,
>+				     code);
>+    }
>   else
>     t = NULL_TREE;
> 
>@@ -879,6 +919,7 @@ expand_vector_condition (gimple_stmt_ite
>   tree a1 = a;
>   tree a2 = NULL_TREE;
>   bool a_is_comparison = false;
>+  bool a_is_scalar_bitmask = false;
>   tree b = gimple_assign_rhs2 (stmt);
>   tree c = gimple_assign_rhs3 (stmt);
>   vec<constructor_elt, va_gc> *v;
>@@ -942,6 +983,20 @@ expand_vector_condition (gimple_stmt_ite
>   warning_at (loc, OPT_Wvector_operation_performance,
> 	      "vector condition will be expanded piecewise");
> 
>+  if (!a_is_comparison
>+      && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
>+      && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
>+      && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
>+		   TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
>+		   * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
>+						(TREE_TYPE (TREE_TYPE (a))))))
>+    {
>+      a_is_scalar_bitmask = true;
>+      int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE
>(a)));
>+      tree atype = build_nonstandard_integer_type (prec, 1);
>+      a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a);
>+    }
>+
>   int nunits = nunits_for_known_piecewise_op (type);
>   vec_alloc (v, nunits);
>   for (i = 0; i < nunits; i++)
>@@ -957,6 +1012,14 @@ expand_vector_condition (gimple_stmt_ite
> 				       comp_width, comp_index);
> 	  aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2);
> 	}
>+      else if (a_is_scalar_bitmask)
>+	{
>+	  wide_int w = wi::set_bit_in_zero (i, TYPE_PRECISION (TREE_TYPE
>(a)));
>+	  result = gimplify_build2 (gsi, BIT_AND_EXPR, TREE_TYPE (a),
>+				    a, wide_int_to_tree (TREE_TYPE (a), w));
>+	  aa = fold_build2 (NE_EXPR, boolean_type_node, result,
>+			    build_zero_cst (TREE_TYPE (a)));
>+	}
>       else
> 	aa = tree_vec_extract (gsi, cond_type, a, width, index);
>     result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
>@@ -1941,7 +2004,11 @@ expand_vector_operations_1 (gimple_stmt_
>   /* A scalar operation pretending to be a vector one.  */
>   if (VECTOR_BOOLEAN_TYPE_P (type)
>       && !VECTOR_MODE_P (TYPE_MODE (type))
>-      && TYPE_MODE (type) != BLKmode)
>+      && TYPE_MODE (type) != BLKmode
>+      && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) !=
>tcc_comparison
>+	  || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))
>+	      && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1)))
>+	      && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode)))
>     return;
> 
>   /* If the vector operation is operating on all same vector elements
>--- gcc/testsuite/gcc.target/i386/avx512f-pr91157.c.jj	2019-07-16
>12:54:55.928900526 +0200
>+++ gcc/testsuite/gcc.target/i386/avx512f-pr91157.c	2019-07-16
>13:01:39.217714434 +0200
>@@ -0,0 +1,29 @@
>+/* PR tree-optimization/91157 */
>+/* { dg-do run { target { avx512f && lp64 } } } */
>+/* { dg-options "-O2 -mavx512f -fexceptions -fnon-call-exceptions
>-fsignaling-nans" } */
>+
>+#include "avx512f-helper.h"
>+
>+typedef long double V __attribute__ ((vector_size (4 * sizeof (long
>double))));
>+typedef __int128 W __attribute__ ((vector_size (4 * sizeof
>(__int128))));
>+
>+__attribute__((noipa)) W
>+foo (V x)
>+{
>+  return x == 0;
>+}
>+
>+static void
>+test_512 (void)
>+{
>+  V a = { 5.0L, 0.0L, -0.0L, -17.0L };
>+  V b = { -0.0L, 16.0L, 0.0L, 18.0L };
>+  V c = { 6.0L, 7.0L, 8.0L, 0.0L };
>+  W ar = foo (a);
>+  W br = foo (b);
>+  W cr = foo (c);
>+  if (ar[0] != 0 || ar[1] != -1 || ar[2] != -1 || ar[3] != 0
>+      || br[0] != -1 || br[1] != 0 || br[2] != -1 || br[3] != 0
>+      || cr[0] != 0 || cr[1] != 0 || cr[2] != 0 || cr[3] != -1)
>+    __builtin_abort ();
>+}
>--- gcc/testsuite/gcc.target/i386/avx512bw-pr91157.c.jj	2019-07-16
>12:55:11.609659992 +0200
>+++ gcc/testsuite/gcc.target/i386/avx512bw-pr91157.c	2019-07-16
>13:01:10.438155882 +0200
>@@ -0,0 +1,6 @@
>+/* PR tree-optimization/91157 */
>+/* { dg-do run { target { avx512bw && lp64 } } } */
>+/* { dg-options "-O2 -mavx512bw -fexceptions -fnon-call-exceptions
>-fsignaling-nans" } */
>+
>+#define AVX512BW
>+#include "avx512f-pr91157.c"
>
>	Jakub
Jakub Jelinek July 18, 2019, 6:32 a.m. UTC | #3
On Thu, Jul 18, 2019 at 08:28:30AM +0200, Bernhard Reutner-Fischer wrote:
> >+      if (VECTOR_BOOLEAN_TYPE_P (type)
> >+	  && VECTOR_BOOLEAN_TYPE_P (type)
> 
> The above condition looks redundant, fwiw.
> Did you mean to check op0?

It is redundant and I've already removed the second line yesterday.

	Jakub
diff mbox series

Patch

--- gcc/tree-vect-generic.c.jj	2019-07-04 00:18:37.063010439 +0200
+++ gcc/tree-vect-generic.c	2019-07-16 12:40:41.343059690 +0200
@@ -382,8 +382,48 @@  expand_vector_comparison (gimple_stmt_it
   tree t;
   if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code)
       && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code))
-    t = expand_vector_piecewise (gsi, do_compare, type,
-				 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
+    {
+      if (VECTOR_BOOLEAN_TYPE_P (type)
+	  && VECTOR_BOOLEAN_TYPE_P (type)
+	  && SCALAR_INT_MODE_P (TYPE_MODE (type))
+	  && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
+		       TYPE_VECTOR_SUBPARTS (type)
+		       * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
+						(TREE_TYPE (type)))))
+	{
+	  tree inner_type = TREE_TYPE (TREE_TYPE (op0));
+	  tree part_width = TYPE_SIZE (inner_type);
+	  tree index = bitsize_int (0);
+	  int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
+	  int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
+	  tree ret_type = build_nonstandard_integer_type (prec, 1);
+	  tree ret_inner_type = boolean_type_node;
+	  int i;
+	  location_t loc = gimple_location (gsi_stmt (*gsi));
+	  t = build_zero_cst (ret_type);
+
+	  if (TYPE_PRECISION (ret_inner_type) != 1)
+	    ret_inner_type = build_nonstandard_integer_type (1, 1);
+	  warning_at (loc, OPT_Wvector_operation_performance,
+		      "vector operation will be expanded piecewise");
+	  for (i = 0; i < nunits;
+	       i++, index = int_const_binop (PLUS_EXPR, index, part_width))
+	    {
+	      tree a = tree_vec_extract (gsi, inner_type, op0, part_width,
+					 index);
+	      tree b = tree_vec_extract (gsi, inner_type, op1, part_width,
+					 index);
+	      tree result = gimplify_build2 (gsi, code, ret_inner_type, a, b);
+	      t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
+				   bitsize_int (i));
+	    }
+	  t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
+	}
+      else
+	t = expand_vector_piecewise (gsi, do_compare, type,
+				     TREE_TYPE (TREE_TYPE (op0)), op0, op1,
+				     code);
+    }
   else
     t = NULL_TREE;
 
@@ -879,6 +919,7 @@  expand_vector_condition (gimple_stmt_ite
   tree a1 = a;
   tree a2 = NULL_TREE;
   bool a_is_comparison = false;
+  bool a_is_scalar_bitmask = false;
   tree b = gimple_assign_rhs2 (stmt);
   tree c = gimple_assign_rhs3 (stmt);
   vec<constructor_elt, va_gc> *v;
@@ -942,6 +983,20 @@  expand_vector_condition (gimple_stmt_ite
   warning_at (loc, OPT_Wvector_operation_performance,
 	      "vector condition will be expanded piecewise");
 
+  if (!a_is_comparison
+      && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
+      && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
+      && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
+		   TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
+		   * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
+						(TREE_TYPE (TREE_TYPE (a))))))
+    {
+      a_is_scalar_bitmask = true;
+      int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (a)));
+      tree atype = build_nonstandard_integer_type (prec, 1);
+      a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a);
+    }
+
   int nunits = nunits_for_known_piecewise_op (type);
   vec_alloc (v, nunits);
   for (i = 0; i < nunits; i++)
@@ -957,6 +1012,14 @@  expand_vector_condition (gimple_stmt_ite
 				       comp_width, comp_index);
 	  aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2);
 	}
+      else if (a_is_scalar_bitmask)
+	{
+	  wide_int w = wi::set_bit_in_zero (i, TYPE_PRECISION (TREE_TYPE (a)));
+	  result = gimplify_build2 (gsi, BIT_AND_EXPR, TREE_TYPE (a),
+				    a, wide_int_to_tree (TREE_TYPE (a), w));
+	  aa = fold_build2 (NE_EXPR, boolean_type_node, result,
+			    build_zero_cst (TREE_TYPE (a)));
+	}
       else
 	aa = tree_vec_extract (gsi, cond_type, a, width, index);
       result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
@@ -1941,7 +2004,11 @@  expand_vector_operations_1 (gimple_stmt_
   /* A scalar operation pretending to be a vector one.  */
   if (VECTOR_BOOLEAN_TYPE_P (type)
       && !VECTOR_MODE_P (TYPE_MODE (type))
-      && TYPE_MODE (type) != BLKmode)
+      && TYPE_MODE (type) != BLKmode
+      && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) != tcc_comparison
+	  || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))
+	      && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1)))
+	      && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode)))
     return;
 
   /* If the vector operation is operating on all same vector elements
--- gcc/testsuite/gcc.target/i386/avx512f-pr91157.c.jj	2019-07-16 12:54:55.928900526 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-pr91157.c	2019-07-16 13:01:39.217714434 +0200
@@ -0,0 +1,29 @@ 
+/* PR tree-optimization/91157 */
+/* { dg-do run { target { avx512f && lp64 } } } */
+/* { dg-options "-O2 -mavx512f -fexceptions -fnon-call-exceptions -fsignaling-nans" } */
+
+#include "avx512f-helper.h"
+
+typedef long double V __attribute__ ((vector_size (4 * sizeof (long double))));
+typedef __int128 W __attribute__ ((vector_size (4 * sizeof (__int128))));
+
+__attribute__((noipa)) W
+foo (V x)
+{
+  return x == 0;
+}
+
+static void
+test_512 (void)
+{
+  V a = { 5.0L, 0.0L, -0.0L, -17.0L };
+  V b = { -0.0L, 16.0L, 0.0L, 18.0L };
+  V c = { 6.0L, 7.0L, 8.0L, 0.0L };
+  W ar = foo (a);
+  W br = foo (b);
+  W cr = foo (c);
+  if (ar[0] != 0 || ar[1] != -1 || ar[2] != -1 || ar[3] != 0
+      || br[0] != -1 || br[1] != 0 || br[2] != -1 || br[3] != 0
+      || cr[0] != 0 || cr[1] != 0 || cr[2] != 0 || cr[3] != -1)
+    __builtin_abort ();
+}
--- gcc/testsuite/gcc.target/i386/avx512bw-pr91157.c.jj	2019-07-16 12:55:11.609659992 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-pr91157.c	2019-07-16 13:01:10.438155882 +0200
@@ -0,0 +1,6 @@ 
+/* PR tree-optimization/91157 */
+/* { dg-do run { target { avx512bw && lp64 } } } */
+/* { dg-options "-O2 -mavx512bw -fexceptions -fnon-call-exceptions -fsignaling-nans" } */
+
+#define AVX512BW
+#include "avx512f-pr91157.c"