diff mbox series

Enable mask operation for 128/256-bit vector VCOND_EXPR under avx512f (PR92686)

Message ID CAMZc-bzdjRFDHHYN74_AkQduE4s26kEx+MstfVKJ8LDcehVa-A@mail.gmail.com
State New
Headers show
Series Enable mask operation for 128/256-bit vector VCOND_EXPR under avx512f (PR92686) | expand

Commit Message

Hongtao Liu Dec. 4, 2019, 2:07 a.m. UTC
Hi:
  Currently for VCOND_EXPR, integer mask operation is only available
for 512-bit vector, but since mask register is related to isa not
vector size, under avx512f we can also have 128/256-bit vector
condition move. My local tests show there's no boost frequency penalty
for using integer mask register, and also it will reduce sse register
pressure and save 1 instruction(vpblendvb).

  Bootstrap is ok, regression test on i386/x86_64 backend is ok.
  Ok for trunk.

Changelog
gcc/
        PR target/92686
        * config/i386/sse.md
        (*<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>,
        *<avx512>_cmp<mode>3<mask_scalar_merge_name>,
        *<avx512>_ucmp<mode>3<mask_scalar_merge_name>,
        *<avx512>_ucmp<mode>3<mask_scalar_merge_name>): New.
        * config/i386/i386.c (ix86_print_operand): New operand substitution.
        * config/i386/i386-expand.c (ix86_valid_mask_cmp_mode):
        New function.
        (ix86_expand_sse_cmp): Relax condition for integer mask from
        512-bit vector to all 128/256/512-bit vector. Delete code gen
        for avx512f compare patterns since we have generic pattern now.
        (ix86_expand_sse_movcc): Adjust condition and codegen for
        maskcmp.
        (ix86_expand_int_sse_cmp): Don't canonicalize the comparison
        when corresponding vector compare is available.

gcc/testsuite/
        * gcc.target/i386/pr92686.inc: New file.
        * gcc.target/i386/avx512bw-pr92686-vpcmp-1.c: New test.
        * gcc.target/i386/avx512bw-pr92686-vpcmp-2.c: Ditto.
        * gcc.target/i386/avx512vl-pr92686-vpcmp-1.c: Ditto.
        * gcc.target/i386/avx512vl-pr92686-vpcmp-2.c: Ditto.
        * gcc.target/i386/avx512bw-pr92686-movcc-1.c: Ditto.
        * gcc.target/i386/avx512bw-pr92686-movcc-2.c: Ditto.
        * gcc.target/i386/avx512vl-pr92686-movcc-1.c: Ditto.
        * gcc.target/i386/avx512vl-pr92686-movcc-2.c: Ditto.
        * gcc.target/i386/avx512vl-pr88547-1.c: Adjust testcase.
        * gcc.target/i386/pr88547-1.c: Ditto.

Comments

Jakub Jelinek Dec. 4, 2019, 8:22 a.m. UTC | #1
On Wed, Dec 04, 2019 at 10:07:05AM +0800, Hongtao Liu wrote:
> Changelog
> gcc/
> 	PR target/92686
> 	* config/i386/sse.md
> 	(*<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>,
> 	*<avx512>_cmp<mode>3<mask_scalar_merge_name>,
> 	*<avx512>_ucmp<mode>3<mask_scalar_merge_name>,
> 	*<avx512>_ucmp<mode>3<mask_scalar_merge_name>): New.
> 	* config/i386/i386.c (ix86_print_operand): New operand substitution.
> 	* config/i386/i386-expand.c (ix86_valid_mask_cmp_mode):
> 	New function.
> 	(ix86_expand_sse_cmp): Relax condition for integer mask from
> 	512-bit vector to all 128/256/512-bit vector. Delete code gen
> 	for avx512f compare patterns since we have generic pattern now.
> 	(ix86_expand_sse_movcc): Adjust condition and codegen for
> 	maskcmp.
> 	(ix86_expand_int_sse_cmp): Don't canonicalize the comparison
> 	when corresponding vector compare is available.
> 
> gcc/testsuite/
> 	* gcc.target/i386/pr92686.inc: New file.
> 	* gcc.target/i386/avx512bw-pr92686-vpcmp-1.c: New test.
> 	* gcc.target/i386/avx512bw-pr92686-vpcmp-2.c: Ditto.
> 	* gcc.target/i386/avx512vl-pr92686-vpcmp-1.c: Ditto.
> 	* gcc.target/i386/avx512vl-pr92686-vpcmp-2.c: Ditto.
> 	* gcc.target/i386/avx512bw-pr92686-movcc-1.c: Ditto.
> 	* gcc.target/i386/avx512bw-pr92686-movcc-2.c: Ditto.
> 	* gcc.target/i386/avx512vl-pr92686-movcc-1.c: Ditto.
> 	* gcc.target/i386/avx512vl-pr92686-movcc-2.c: Ditto.
> 	* gcc.target/i386/avx512vl-pr88547-1.c: Adjust testcase.
> 	* gcc.target/i386/pr88547-1.c: Ditto.

See comments below.

> +  /* AVX512BW is needed for vector QI/HImode,
> +     AVX512VL is needed for 128/256-bit vector.  */
> +  machine_mode inner_mode = GET_MODE_INNER (mode);
> +  int vector_size = GET_MODE_SIZE (mode);
> +  if ((inner_mode == QImode || inner_mode == HImode)
> +      && !TARGET_AVX512BW)

There is no reason not to keep && !TARGET_AVX512BW) on the previous line.

> +  if (ix86_valid_mask_cmp_mode (cmp_ops_mode))
>      {
>        unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode);
> -      cmp_mode = int_mode_for_size (nbits, 0).require ();
>        maskcmp = true;
> +      cmp_mode = nbits > 8 ?
> +	int_mode_for_size (nbits, 0).require ()
> +	: E_QImode;

Formatting.  ? never goes at the end of line, similarly :.
So, you want either
      cmp_mode
	= nbits > 8 ? int_mode_for_size (nbits, 0).require () : E_QImode;
or
      cmp_mode = (nbits > 8 ? int_mode_for_size (nbits, 0).require ()
		  : E_QImode);
or
      cmp_mode = (nbits > 8
		  ? int_mode_for_size (nbits, 0).require () : E_QImode);
or
      cmp_mode = (nbits > 8
		  ? int_mode_for_size (nbits, 0).require ()
		  : E_QImode);
- the parens around to help emacs.

> @@ -3515,7 +3510,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
>    machine_mode cmpmode = GET_MODE (cmp);
>  
>    /* In AVX512F the result of comparison is an integer mask.  */
> -  bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
> +  bool maskcmp = ((mode != cmpmode) && ix86_valid_mask_cmp_mode (mode));

No reason for either pair of ()s here, of course except the function call
argument list.

> +      /* Using vector move with mask register.  */
> +      cmp = force_reg (cmpmode, cmp);
> +      /* Optimize for mask zero.  */
> +      op_true = op_true != CONST0_RTX (mode)
> +	? force_reg (mode, op_true)
> +	: op_true;

Same thing as above, just in this case ? wasn't incorrectly at the end.

> +      op_false = op_false != CONST0_RTX (mode)
> +	? force_reg (mode, op_false)
> +	: op_false;

And again.

> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -12468,6 +12468,38 @@ ix86_print_operand (FILE *file, rtx x, int code)
>  	    }
>  	  return;
>  
> +	case 'I':
> +	  switch (GET_CODE (x))
> +	    {
> +	    case EQ:
> +	      fputs ("$0", file);
> +	      break;
> +	    case NE:
> +	      fputs ("$4", file);
> +	      break;
> +	    case GE:
> +	    case GEU:
> +	      fputs ("$5", file);
> +	      break;
> +	    case GT:
> +	    case GTU:
> +	      fputs ("$6", file);
> +	      break;
> +	    case LE:
> +	    case LEU:
> +	      fputs ("$2", file);
> +	      break;
> +	    case LT:
> +	    case LTU:
> +	      fputs ("$1", file);
> +	      break;

Does this work with -masm=intel?  I'd guess that $1 etc. isn't valid.
If not, either I should print just the number without $ and $ should be
added only for the AT&T syntax, or the above should print or not print the
$ depending on the asm syntax.

> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -3050,6 +3050,18 @@
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
>  
> +(define_insn "*<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
> +  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
> +	(match_operator:<avx512fmaskmode> 3 "ix86_comparison_int_operator"
> +	  [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
> +	   (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")]))]
> +  "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
> +  "vpcmp<ssemodesuffix>\t{%I3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %I3}"
> +  [(set_attr "type" "ssecmp")
> +   (set_attr "length_immediate" "1")
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "<sseinsnmode>")])
> +
>  (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
>    [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
>  	(unspec:<avx512fmaskmode>

> +      abort();
> +    }
> +  abort();
> +}
> \ No newline at end of file

Please make sure all files are newline terminated.

	Jakub
Hongtao Liu Dec. 5, 2019, 1:56 a.m. UTC | #2
On Wed, Dec 4, 2019 at 4:22 PM Jakub Jelinek <jakub@redhat.com> wrote:
>
> On Wed, Dec 04, 2019 at 10:07:05AM +0800, Hongtao Liu wrote:
> > Changelog
> > gcc/
> >       PR target/92686
> >       * config/i386/sse.md
> >       (*<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>,
> >       *<avx512>_cmp<mode>3<mask_scalar_merge_name>,
> >       *<avx512>_ucmp<mode>3<mask_scalar_merge_name>,
> >       *<avx512>_ucmp<mode>3<mask_scalar_merge_name>): New.
> >       * config/i386/i386.c (ix86_print_operand): New operand substitution.
> >       * config/i386/i386-expand.c (ix86_valid_mask_cmp_mode):
> >       New function.
> >       (ix86_expand_sse_cmp): Relax condition for integer mask from
> >       512-bit vector to all 128/256/512-bit vector. Delete code gen
> >       for avx512f compare patterns since we have generic pattern now.
> >       (ix86_expand_sse_movcc): Adjust condition and codegen for
> >       maskcmp.
> >       (ix86_expand_int_sse_cmp): Don't canonicalize the comparison
> >       when corresponding vector compare is available.
> >
> > gcc/testsuite/
> >       * gcc.target/i386/pr92686.inc: New file.
> >       * gcc.target/i386/avx512bw-pr92686-vpcmp-1.c: New test.
> >       * gcc.target/i386/avx512bw-pr92686-vpcmp-2.c: Ditto.
> >       * gcc.target/i386/avx512vl-pr92686-vpcmp-1.c: Ditto.
> >       * gcc.target/i386/avx512vl-pr92686-vpcmp-2.c: Ditto.
> >       * gcc.target/i386/avx512bw-pr92686-movcc-1.c: Ditto.
> >       * gcc.target/i386/avx512bw-pr92686-movcc-2.c: Ditto.
> >       * gcc.target/i386/avx512vl-pr92686-movcc-1.c: Ditto.
> >       * gcc.target/i386/avx512vl-pr92686-movcc-2.c: Ditto.
> >       * gcc.target/i386/avx512vl-pr88547-1.c: Adjust testcase.
> >       * gcc.target/i386/pr88547-1.c: Ditto.
>
> See comments below.
>
> > +  /* AVX512BW is needed for vector QI/HImode,
> > +     AVX512VL is needed for 128/256-bit vector.  */
> > +  machine_mode inner_mode = GET_MODE_INNER (mode);
> > +  int vector_size = GET_MODE_SIZE (mode);
> > +  if ((inner_mode == QImode || inner_mode == HImode)
> > +      && !TARGET_AVX512BW)
>
> There is no reason not to keep && !TARGET_AVX512BW) on the previous line.
>
> > +  if (ix86_valid_mask_cmp_mode (cmp_ops_mode))
> >      {
> >        unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode);
> > -      cmp_mode = int_mode_for_size (nbits, 0).require ();
> >        maskcmp = true;
> > +      cmp_mode = nbits > 8 ?
> > +     int_mode_for_size (nbits, 0).require ()
> > +     : E_QImode;
>
> Formatting.  ? never goes at the end of line, similarly :.
> So, you want either
>       cmp_mode
>         = nbits > 8 ? int_mode_for_size (nbits, 0).require () : E_QImode;
> or
>       cmp_mode = (nbits > 8 ? int_mode_for_size (nbits, 0).require ()
>                   : E_QImode);
> or
>       cmp_mode = (nbits > 8
>                   ? int_mode_for_size (nbits, 0).require () : E_QImode);
> or
>       cmp_mode = (nbits > 8
>                   ? int_mode_for_size (nbits, 0).require ()
>                   : E_QImode);
> - the parens around to help emacs.
>
> > @@ -3515,7 +3510,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
> >    machine_mode cmpmode = GET_MODE (cmp);
> >
> >    /* In AVX512F the result of comparison is an integer mask.  */
> > -  bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
> > +  bool maskcmp = ((mode != cmpmode) && ix86_valid_mask_cmp_mode (mode));
>
> No reason for either pair of ()s here, of course except the function call
> argument list.
>
> > +      /* Using vector move with mask register.  */
> > +      cmp = force_reg (cmpmode, cmp);
> > +      /* Optimize for mask zero.  */
> > +      op_true = op_true != CONST0_RTX (mode)
> > +     ? force_reg (mode, op_true)
> > +     : op_true;
>
> Same thing as above, just in this case ? wasn't incorrectly at the end.
>
> > +      op_false = op_false != CONST0_RTX (mode)
> > +     ? force_reg (mode, op_false)
> > +     : op_false;
>
> And again.
>
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -12468,6 +12468,38 @@ ix86_print_operand (FILE *file, rtx x, int code)
> >           }
> >         return;
> >
> > +     case 'I':
> > +       switch (GET_CODE (x))
> > +         {
> > +         case EQ:
> > +           fputs ("$0", file);
> > +           break;
> > +         case NE:
> > +           fputs ("$4", file);
> > +           break;
> > +         case GE:
> > +         case GEU:
> > +           fputs ("$5", file);
> > +           break;
> > +         case GT:
> > +         case GTU:
> > +           fputs ("$6", file);
> > +           break;
> > +         case LE:
> > +         case LEU:
> > +           fputs ("$2", file);
> > +           break;
> > +         case LT:
> > +         case LTU:
> > +           fputs ("$1", file);
> > +           break;
>
> Does this work with -masm=intel?  I'd guess that $1 etc. isn't valid.
> If not, either I should print just the number without $ and $ should be
> added only for the AT&T syntax, or the above should print or not print the
> $ depending on the asm syntax.
Confirmed
Change to
+       case 'I':
+         if (ASSEMBLER_DIALECT == ASM_ATT)
+           putc ('$', file);
+         switch (GET_CODE (x))
+           {
+           case EQ:
+             putc ('0', file);
+             break;
+           case NE:
+             putc ('4', file);
+             break;
+           case GE:
+           case GEU:
+             putc ('5', file);
+             break;
+           case GT:
+           case GTU:
+             putc ('6', file);
+             break;
+           case LE:
+           case LEU:
+             putc ('2', file);
+             break;
+           case LT:
+           case LTU:
+             putc ('1', file);

> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -3050,6 +3050,18 @@
> >     (set_attr "prefix" "evex")
> >     (set_attr "mode" "<sseinsnmode>")])
> >
> > +(define_insn "*<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
> > +  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
> > +     (match_operator:<avx512fmaskmode> 3 "ix86_comparison_int_operator"
> > +       [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
> > +        (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")]))]
> > +  "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
> > +  "vpcmp<ssemodesuffix>\t{%I3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %I3}"
> > +  [(set_attr "type" "ssecmp")
> > +   (set_attr "length_immediate" "1")
> > +   (set_attr "prefix" "evex")
> > +   (set_attr "mode" "<sseinsnmode>")])
> > +
> >  (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
> >    [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
> >       (unspec:<avx512fmaskmode>
>
> > +      abort();
> > +    }
> > +  abort();
> > +}
> > \ No newline at end of file
>
> Please make sure all files are newline terminated.
>
>         Jakub
>
Thanks for pointing out these format issues, that helps a lot.
Update patch
Jakub Jelinek Dec. 5, 2019, 8:03 a.m. UTC | #3
On Thu, Dec 05, 2019 at 09:56:46AM +0800, Hongtao Liu wrote:
> --- a/gcc/config/i386/i386-expand.c
> +++ b/gcc/config/i386/i386-expand.c
> +      /* Using vector move with mask register.  */
> +      cmp = force_reg (cmpmode, cmp);
> +      /* Optimize for mask zero.  */
> +      op_true =
> +	op_true != CONST0_RTX (mode) ? force_reg (mode, op_true) : op_true;
> +      op_false =
> +	op_false != CONST0_RTX (mode) ? force_reg (mode, op_false) : op_false;

The above two still aren't correct, = doesn't belong at the end of line
either.

      op_true
	= op_true != CONST0_RTX (mode) ? force_reg (mode, op_true) : op_true;

would be ok,

      op_false
	= op_false != CONST0_RTX (mode) ? force_reg (mode, op_false) : op_false;

is too long, so e.g.

      op_false = (op_false != CONST0_RTX (mode)
		  ? force_reg (mode, op_false) : op_false);

> +	  /* Reverse op_true op_false.  */
> +	  n = op_true;
> +	  op_true = op_false;
> +	  op_false = n;

Please use
	  std::swap (op_true, op_false);
instead of the above 3 lines.

Also, can you please add at least one testcase for this with -masm=intel,
effective target masm_intel and dg-do assemble to make sure it assembles?
Perhaps just one -mavx512vl -mavx512bw avx512vl/avx512bw effective target that 
tests all the patterns?

Ok with those changes.

	Jakub
Hongtao Liu Dec. 9, 2019, 4:21 a.m. UTC | #4
On Thu, Dec 5, 2019 at 4:03 PM Jakub Jelinek <jakub@redhat.com> wrote:
>
> On Thu, Dec 05, 2019 at 09:56:46AM +0800, Hongtao Liu wrote:
> > --- a/gcc/config/i386/i386-expand.c
> > +++ b/gcc/config/i386/i386-expand.c
> > +      /* Using vector move with mask register.  */
> > +      cmp = force_reg (cmpmode, cmp);
> > +      /* Optimize for mask zero.  */
> > +      op_true =
> > +     op_true != CONST0_RTX (mode) ? force_reg (mode, op_true) : op_true;
> > +      op_false =
> > +     op_false != CONST0_RTX (mode) ? force_reg (mode, op_false) : op_false;
>
> The above two still aren't correct, = doesn't belong at the end of line
> either.
>
>       op_true
>         = op_true != CONST0_RTX (mode) ? force_reg (mode, op_true) : op_true;
>
> would be ok,
>
>       op_false
>         = op_false != CONST0_RTX (mode) ? force_reg (mode, op_false) : op_false;
>
> is too long, so e.g.
>
>       op_false = (op_false != CONST0_RTX (mode)
>                   ? force_reg (mode, op_false) : op_false);
>
> > +       /* Reverse op_true op_false.  */
> > +       n = op_true;
> > +       op_true = op_false;
> > +       op_false = n;
>
> Please use
>           std::swap (op_true, op_false);
> instead of the above 3 lines.
>
> Also, can you please add at least one testcase for this with -masm=intel,
> effective target masm_intel and dg-do assemble to make sure it assembles?
> Perhaps just one -mavx512vl -mavx512bw avx512vl/avx512bw effective target that
> tests all the patterns?
>
Yes, avx512vl-pr92686-vpcmp-intelasm-1.c,
avx512bw-pr92686-vpcmp-intelasm-1.c are added.
> Ok with those changes.
>
>         Jakub
>
Committed, thanks.
diff mbox series

Patch

From 103d31db47dacc5bba9c85389c61f69293f83695 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Thu, 28 Nov 2019 14:12:31 +0800
Subject: [PATCH] Enable mask movement for VCOND_EXPR under avx512f for
 128/256-bit vector when integer mask is available.

Changelog
gcc/
	PR target/92686
	* config/i386/sse.md
	(*<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>,
	*<avx512>_cmp<mode>3<mask_scalar_merge_name>,
	*<avx512>_ucmp<mode>3<mask_scalar_merge_name>,
	*<avx512>_ucmp<mode>3<mask_scalar_merge_name>): New.
	* config/i386/i386.c (ix86_print_operand): New operand substitution.
	* config/i386/i386-expand.c (ix86_valid_mask_cmp_mode):
	New function.
	(ix86_expand_sse_cmp): Relax condition for integer mask from
	512-bit vector to all 128/256/512-bit vector. Delete code gen
	for avx512f compare patterns since we have generic pattern now.
	(ix86_expand_sse_movcc): Adjust condition and codegen for
	maskcmp.
	(ix86_expand_int_sse_cmp): Don't canonicalize the comparison
	when corresponding vector compare is available.

gcc/testsuite/
	* gcc.target/i386/pr92686.inc: New file.
	* gcc.target/i386/avx512bw-pr92686-vpcmp-1.c: New test.
	* gcc.target/i386/avx512bw-pr92686-vpcmp-2.c: Ditto.
	* gcc.target/i386/avx512vl-pr92686-vpcmp-1.c: Ditto.
	* gcc.target/i386/avx512vl-pr92686-vpcmp-2.c: Ditto.
	* gcc.target/i386/avx512bw-pr92686-movcc-1.c: Ditto.
	* gcc.target/i386/avx512bw-pr92686-movcc-2.c: Ditto.
	* gcc.target/i386/avx512vl-pr92686-movcc-1.c: Ditto.
	* gcc.target/i386/avx512vl-pr92686-movcc-2.c: Ditto.
	* gcc.target/i386/avx512vl-pr88547-1.c: Adjust testcase.
	* gcc.target/i386/pr88547-1.c: Ditto.
---
 gcc/config/i386/i386-expand.c                 | 172 ++++++----------
 gcc/config/i386/i386.c                        |  32 +++
 gcc/config/i386/sse.md                        |  48 +++++
 .../i386/avx512bw-pr92686-movcc-1.c           | 133 ++++++++++++
 .../i386/avx512bw-pr92686-movcc-2.c           | 102 ++++++++++
 .../i386/avx512bw-pr92686-vpcmp-1.c           | 112 +++++++++++
 .../i386/avx512bw-pr92686-vpcmp-2.c           |  90 +++++++++
 .../gcc.target/i386/avx512vl-pr88547-1.c      |   8 +-
 .../i386/avx512vl-pr92686-movcc-1.c           | 133 ++++++++++++
 .../i386/avx512vl-pr92686-movcc-2.c           | 102 ++++++++++
 .../i386/avx512vl-pr92686-vpcmp-1.c           | 112 +++++++++++
 .../i386/avx512vl-pr92686-vpcmp-2.c           |  91 +++++++++
 gcc/testsuite/gcc.target/i386/pr88547-1.c     |  16 +-
 gcc/testsuite/gcc.target/i386/pr92686.inc     | 189 ++++++++++++++++++
 14 files changed, 1212 insertions(+), 128 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr92686.inc

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 1ff1153e105..92dfaeaeb72 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -3422,6 +3422,30 @@  ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
   return true;
 }
 
+/* Return true if MODE is valid for vector compare to mask register,
+   Same result for conditionl vector move with mask register.  */
+static bool
+ix86_valid_mask_cmp_mode (machine_mode mode)
+{
+  /* XOP has its own vector conditional movement.  */
+  if (TARGET_XOP)
+    return false;
+
+  /* AVX512F is needed for mask operation.  */
+  if (!(TARGET_AVX512F && VECTOR_MODE_P (mode)))
+    return false;
+
+  /* AVX512BW is needed for vector QI/HImode,
+     AVX512VL is needed for 128/256-bit vector.  */
+  machine_mode inner_mode = GET_MODE_INNER (mode);
+  int vector_size = GET_MODE_SIZE (mode);
+  if ((inner_mode == QImode || inner_mode == HImode)
+      && !TARGET_AVX512BW)
+    return false;
+
+  return vector_size == 64 || TARGET_AVX512VL;
+}
+
 /* Expand an SSE comparison.  Return the register with the result.  */
 
 static rtx
@@ -3438,11 +3462,13 @@  ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
   bool maskcmp = false;
   rtx x;
 
-  if (GET_MODE_SIZE (cmp_ops_mode) == 64)
+  if (ix86_valid_mask_cmp_mode (cmp_ops_mode))
     {
       unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode);
-      cmp_mode = int_mode_for_size (nbits, 0).require ();
       maskcmp = true;
+      cmp_mode = nbits > 8 ?
+	int_mode_for_size (nbits, 0).require ()
+	: E_QImode;
     }
   else
     cmp_mode = cmp_ops_mode;
@@ -3461,37 +3487,6 @@  ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
       || (op_false && reg_overlap_mentioned_p (dest, op_false)))
     dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
 
-  /* Compare patterns for int modes are unspec in AVX512F only.  */
-  if (maskcmp && (code == GT || code == EQ))
-    {
-      rtx (*gen)(rtx, rtx, rtx);
-
-      switch (cmp_ops_mode)
-	{
-	case E_V64QImode:
-	  gcc_assert (TARGET_AVX512BW);
-	  gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
-	  break;
-	case E_V32HImode:
-	  gcc_assert (TARGET_AVX512BW);
-	  gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
-	  break;
-	case E_V16SImode:
-	  gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
-	  break;
-	case E_V8DImode:
-	  gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
-	  break;
-	default:
-	  gen = NULL;
-	}
-
-      if (gen)
-	{
-	  emit_insn (gen (dest, cmp_op0, cmp_op1));
-	  return dest;
-	}
-    }
   x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
 
   if (cmp_mode != mode && !maskcmp)
@@ -3515,7 +3510,7 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
   machine_mode cmpmode = GET_MODE (cmp);
 
   /* In AVX512F the result of comparison is an integer mask.  */
-  bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
+  bool maskcmp = ((mode != cmpmode) && ix86_valid_mask_cmp_mode (mode));
 
   rtx t2, t3, x;
 
@@ -3529,85 +3524,38 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
 
   if (maskcmp)
     {
-      rtx (*gen) (rtx, rtx) = NULL;
-      if ((op_true == CONST0_RTX (mode)
-	   && vector_all_ones_operand (op_false, mode))
-	  || (op_false == CONST0_RTX (mode)
-	      && vector_all_ones_operand (op_true, mode)))
-	switch (mode)
-	  {
-	  case E_V64QImode:
-	    if (TARGET_AVX512BW)
-	      gen = gen_avx512bw_cvtmask2bv64qi;
-	    break;
-	  case E_V32QImode:
-	    if (TARGET_AVX512VL && TARGET_AVX512BW)
-	      gen = gen_avx512vl_cvtmask2bv32qi;
-	    break;
-	  case E_V16QImode:
-	    if (TARGET_AVX512VL && TARGET_AVX512BW)
-	      gen = gen_avx512vl_cvtmask2bv16qi;
-	    break;
-	  case E_V32HImode:
-	    if (TARGET_AVX512BW)
-	      gen = gen_avx512bw_cvtmask2wv32hi;
-	    break;
-	  case E_V16HImode:
-	    if (TARGET_AVX512VL && TARGET_AVX512BW)
-	      gen = gen_avx512vl_cvtmask2wv16hi;
-	    break;
-	  case E_V8HImode:
-	    if (TARGET_AVX512VL && TARGET_AVX512BW)
-	      gen = gen_avx512vl_cvtmask2wv8hi;
-	    break;
-	  case E_V16SImode:
-	    if (TARGET_AVX512DQ)
-	      gen = gen_avx512f_cvtmask2dv16si;
-	    break;
-	  case E_V8SImode:
-	    if (TARGET_AVX512VL && TARGET_AVX512DQ)
-	      gen = gen_avx512vl_cvtmask2dv8si;
-	    break;
-	  case E_V4SImode:
-	    if (TARGET_AVX512VL && TARGET_AVX512DQ)
-	      gen = gen_avx512vl_cvtmask2dv4si;
-	    break;
-	  case E_V8DImode:
-	    if (TARGET_AVX512DQ)
-	      gen = gen_avx512f_cvtmask2qv8di;
-	    break;
-	  case E_V4DImode:
-	    if (TARGET_AVX512VL && TARGET_AVX512DQ)
-	      gen = gen_avx512vl_cvtmask2qv4di;
-	    break;
-	  case E_V2DImode:
-	    if (TARGET_AVX512VL && TARGET_AVX512DQ)
-	      gen = gen_avx512vl_cvtmask2qv2di;
-	    break;
-	  default:
-	    break;
-	  }
-      if (gen && SCALAR_INT_MODE_P (cmpmode))
-	{
-	  cmp = force_reg (cmpmode, cmp);
-	  if (op_true == CONST0_RTX (mode))
+      /* Using vector move with mask register.  */
+      cmp = force_reg (cmpmode, cmp);
+      /* Optimize for mask zero.  */
+      op_true = op_true != CONST0_RTX (mode)
+	? force_reg (mode, op_true)
+	: op_true;
+      op_false = op_false != CONST0_RTX (mode)
+	? force_reg (mode, op_false)
+	: op_false;
+      if (op_true == CONST0_RTX (mode))
+	{
+	  rtx (*gen_not) (rtx, rtx);
+	  switch (cmpmode)
 	    {
-	      rtx (*gen_not) (rtx, rtx);
-	      switch (cmpmode)
-		{
-		case E_QImode: gen_not = gen_knotqi; break;
-		case E_HImode: gen_not = gen_knothi; break;
-		case E_SImode: gen_not = gen_knotsi; break;
-		case E_DImode: gen_not = gen_knotdi; break;
-		default: gcc_unreachable ();
-		}
-	      rtx n = gen_reg_rtx (cmpmode);
-	      emit_insn (gen_not (n, cmp));
-	      cmp = n;
+	    case E_QImode: gen_not = gen_knotqi; break;
+	    case E_HImode: gen_not = gen_knothi; break;
+	    case E_SImode: gen_not = gen_knotsi; break;
+	    case E_DImode: gen_not = gen_knotdi; break;
+	    default: gcc_unreachable ();
 	    }
-	  emit_insn (gen (dest, cmp));
-	  return;
+	  rtx n = gen_reg_rtx (cmpmode);
+	  emit_insn (gen_not (n, cmp));
+	  cmp = n;
+	  /* Reverse op_true op_false.  */
+	  n = op_true;
+	  op_true = op_false;
+	  op_false = n;
 	}
+
+      rtx vec_merge = gen_rtx_VEC_MERGE (mode, op_true, op_false, cmp);
+      emit_insn (gen_rtx_SET (dest, vec_merge));
+      return;
     }
   else if (vector_all_ones_operand (op_true, mode)
 	   && op_false == CONST0_RTX (mode))
@@ -4068,6 +4016,10 @@  ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
       && (mode == V16QImode || mode == V8HImode
 	  || mode == V4SImode || mode == V2DImode))
     ;
+  /* AVX512F supports all of the comparsions
+     on all 128/256/512-bit vector int types.  */
+  else if (ix86_valid_mask_cmp_mode (mode))
+    ;
   else
     {
       /* Canonicalize the comparison to EQ, GT, GTU.  */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 04cbbd532c0..3f1ac79f31a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -12468,6 +12468,38 @@  ix86_print_operand (FILE *file, rtx x, int code)
 	    }
 	  return;
 
+	case 'I':
+	  switch (GET_CODE (x))
+	    {
+	    case EQ:
+	      fputs ("$0", file);
+	      break;
+	    case NE:
+	      fputs ("$4", file);
+	      break;
+	    case GE:
+	    case GEU:
+	      fputs ("$5", file);
+	      break;
+	    case GT:
+	    case GTU:
+	      fputs ("$6", file);
+	      break;
+	    case LE:
+	    case LEU:
+	      fputs ("$2", file);
+	      break;
+	    case LT:
+	    case LTU:
+	      fputs ("$1", file);
+	      break;
+	    default:
+	      output_operand_lossage ("operand is not a condition code, "
+				      "invalid operand code 'I'");
+	      return;
+	    }
+	  return;
+
 	case 'Y':
 	  switch (GET_CODE (x))
 	    {
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index fb43cafaad0..bbceb8b83ad 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -3050,6 +3050,18 @@ 
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "*<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+	(match_operator:<avx512fmaskmode> 3 "ix86_comparison_int_operator"
+	  [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
+	   (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")]))]
+  "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
+  "vpcmp<ssemodesuffix>\t{%I3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %I3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
 	(unspec:<avx512fmaskmode>
@@ -3064,6 +3076,18 @@ 
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "*<avx512>_cmp<mode>3<mask_scalar_merge_name>"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+	(match_operator:<avx512fmaskmode> 3 "ix86_comparison_int_operator"
+	  [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
+	   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]))]
+  "TARGET_AVX512BW"
+  "vpcmp<ssemodesuffix>\t{%I3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %I3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
 	(unspec:<avx512fmaskmode>
@@ -3078,6 +3102,18 @@ 
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "*<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+	(match_operator:<avx512fmaskmode> 3 "ix86_comparison_uns_operator"
+	  [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
+	   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]))]
+  "TARGET_AVX512BW"
+  "vpcmpu<ssemodesuffix>\t{%I3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %I3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
 	(unspec:<avx512fmaskmode>
@@ -3092,6 +3128,18 @@ 
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "*<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+	(match_operator:<avx512fmaskmode> 3 "ix86_comparison_uns_operator"
+	  [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
+	   (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]))]
+  "TARGET_AVX512F"
+  "vpcmpu<ssemodesuffix>\t{%I3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %I3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
 	(and:<avx512fmaskmode>
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-1.c
new file mode 100644
index 00000000000..2a89077ed03
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-1.c
@@ -0,0 +1,133 @@ 
+/* PR target/92686 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mavx512bw -mno-avx512dq -mno-avx512vl -mno-xop -mprefer-vector-width=512" } */
+/* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vmovdq\[au\]8\[^\{\n\]*%zmm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+/* { dg-final { scan-assembler-times "vmovdq\[au\]16\[^\{\n\]*%zmm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+/* { dg-final { scan-assembler-times "vmovdq\[au\]32\[^\{\n\]*%zmm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+/* { dg-final { scan-assembler-times "vmovdq\[au\]64\[^\{\n\]*%zmm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+
+__attribute__((noipa)) void
+f1 (char *__restrict dst, char *__restrict src1, char *__restrict src2)
+{
+  for (int i = 0; i != 64; i++)
+    dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f2 (unsigned char *__restrict dst, unsigned char *__restrict src1,
+    unsigned char *__restrict src2)
+{
+  for (int i = 0; i != 64; i++)
+    dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f3 (char *__restrict dst, char *__restrict src1, char *__restrict src2)
+{
+  for (int i = 0; i != 64; i++)
+    dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f4 (unsigned char *__restrict dst, unsigned char *__restrict src1,
+    unsigned char *__restrict src2)
+{
+  for (int i = 0; i != 64; i++)
+    dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f5 (short *__restrict dst, short *__restrict src1, short *__restrict src2)
+{
+  for (int i = 0; i != 32; i++)
+    dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f6 (unsigned short *__restrict dst, unsigned short *__restrict src1,
+    unsigned short *__restrict src2)
+{
+  for (int i = 0; i != 32; i++)
+    dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f7 (short *__restrict dst, short *__restrict src1, short *__restrict src2)
+{
+  for (int i = 0; i != 32; i++)
+    dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f8 (unsigned short *__restrict dst, unsigned short *__restrict src1,
+    unsigned short *__restrict src2)
+{
+  for (int i = 0; i != 32; i++)
+    dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f9 (int *__restrict dst, int *__restrict src1, int *__restrict src2)
+{
+  for (int i = 0; i != 16; i++)
+    dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f10 (unsigned int *__restrict dst, unsigned int *__restrict src1,
+     unsigned int *__restrict src2)
+{
+  for (int i = 0; i != 16; i++)
+    dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f11 (int *__restrict dst, int *__restrict src1, int *__restrict src2)
+{
+  for (int i = 0; i != 16; i++)
+    dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f12 (unsigned int *__restrict dst, unsigned int *__restrict src1,
+     unsigned int *__restrict src2)
+{
+  for (int i = 0; i != 16; i++)
+    dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f13 (long long int *__restrict dst, long long int *__restrict src1,
+     long long int *__restrict src2)
+{
+  for (int i = 0; i != 8; i++)
+    dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f14 (unsigned long long int *__restrict dst,
+     unsigned long long int *__restrict src1,
+     unsigned long long int *__restrict src2)
+{
+  for (int i = 0; i != 8; i++)
+    dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f15 (long long int *__restrict dst, long long int *__restrict src1,
+     long long int *__restrict src2)
+{
+  for (int i = 0; i != 8; i++)
+    dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f16 (unsigned long long int *__restrict dst,
+     unsigned long long int *__restrict src1,
+     unsigned long long int *__restrict src2)
+{
+  for (int i = 0; i != 8; i++)
+    dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-2.c
new file mode 100644
index 00000000000..53a7da12329
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-2.c
@@ -0,0 +1,102 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-options "-Ofast -mavx512bw -mavx512vl -mprefer-vector-width=256" } */
+
+#ifndef CHECK
+#define CHECK "avx512f-helper.h"
+#endif
+
+#include CHECK
+
+#ifndef TEST
+#define TEST avx512bw_test
+#endif
+
+#include "avx512bw-pr92686-movcc-1.c"
+#include "pr92686.inc"
+
+#define NUM 512
+
+
+#define TEST_SIGNED(vtype, type, N, fn, fn2, op)		\
+do								\
+  {								\
+    type dst[NUM], src1[NUM], src2[NUM];			\
+    int i, j,  sign = 1;					\
+    type res[N];						\
+    for (i = 0; i < NUM; i++)					\
+      {								\
+	src1[i] = i * i * sign;					\
+	src2[i] = (i + 20) * sign;				\
+	dst[i] = i * i * i + 100;				\
+	sign = -sign;						\
+      }								\
+    for (i = 0; i < NUM; i += N)				\
+      {								\
+	for (j = 0; j < N; j++)					\
+	  res[j] = dst[i + j];					\
+	fn (&dst[i], &src1[i], &src2[i]);			\
+	for (j = 0; j < N; j++)					\
+	  {							\
+	    res[j] = fn2 (res[j], src1[i + j],			\
+			  src2[i+ j], op);			\
+	    if (res[j] != dst[i+ j])				\
+	      abort();						\
+	  }							\
+      }								\
+  }								\
+while (0)
+
+#define TEST_UNSIGNED(vtype, type, N, fn, fn2, op)		\
+do								\
+  {								\
+    type dst[NUM], src1[NUM], src2[NUM];			\
+    int i,j;							\
+    type res[N];						\
+								\
+    for (i = 0; i < NUM; i++)					\
+      {								\
+	src1[i] = i * i;					\
+	src2[i] = i + 20;					\
+	dst[i] = i * i * i + 100;				\
+	if ((i % 4))						\
+	  src2[i] |= (1ULL << (sizeof (type)			\
+				 * __CHAR_BIT__ - 1));		\
+      }								\
+    for (i = 0; i < NUM; i += N)				\
+      {								\
+	for (j = 0; j < N; j++)					\
+	  res[j] = dst[i + j];					\
+	fn (&dst[i], &src1[i], &src2[i]);			\
+	for (j = 0; j < N; j++)					\
+	  {							\
+	    res[j] = fn2 (res[j], src1[i + j],			\
+			  src2[i + j], op);			\
+	    if (res[j] != dst[i + j])				\
+	      abort();						\
+	  }							\
+      }								\
+  }								\
+while (0)
+
+static void
+TEST (void)
+{
+  TEST_SIGNED (v64qi, signed char, 64, f1, cmpb, 5);
+  TEST_UNSIGNED (v64uqi, unsigned char, 64, f2, cmpub, 5);
+  TEST_SIGNED (v64qi, signed char, 64, f3, cmpb, 2);
+  TEST_UNSIGNED (v64uqi, unsigned char, 64, f4, cmpub, 2);
+  TEST_SIGNED (v32hi, short int, 32, f5, cmpw, 5);
+  TEST_UNSIGNED (v32uhi, unsigned short int, 32, f6, cmpuw, 5);
+  TEST_SIGNED (v32hi, short int, 32, f7, cmpw, 2);
+  TEST_UNSIGNED (v32uhi, unsigned short int, 32, f8, cmpuw, 2);
+  TEST_SIGNED (v16si, int, 16, f9, cmpd, 5);
+  TEST_UNSIGNED (v16usi, unsigned int, 16, f10, cmpud, 5);
+  TEST_SIGNED (v16si, int, 16, f11, cmpd, 2);
+  TEST_UNSIGNED (v16usi, unsigned int, 16, f12, cmpud, 2);
+  TEST_SIGNED (v8di, long long int, 8, f13, cmpq, 5);
+  TEST_UNSIGNED (v8udi, unsigned long long int, 8, f14, cmpuq, 5);
+  TEST_SIGNED (v8di, long long int, 8, f15, cmpq, 2);
+  TEST_UNSIGNED (v8udi, unsigned long long int, 8, f16, cmpuq, 2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-1.c
new file mode 100644
index 00000000000..4fd3b369d0f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-1.c
@@ -0,0 +1,112 @@ 
+/* PR target/92686 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mno-avx512dq -mno-avx512vl -mno-xop" } */
+/* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpmovm2\[bw\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[\t ]" 8 } } */
+
+typedef char v64qi __attribute__((vector_size(64)));
+typedef unsigned char v64uqi __attribute__((vector_size(64)));
+typedef short v32hi __attribute__((vector_size(64)));
+typedef unsigned short v32uhi __attribute__((vector_size(64)));
+typedef int v16si __attribute__((vector_size(64)));
+typedef unsigned v16usi __attribute__((vector_size(64)));
+typedef long long v8di __attribute__((vector_size(64)));
+typedef unsigned long long v8udi __attribute__((vector_size(64)));
+
+__attribute__((noipa)) v64qi
+f1 (v64qi x, v64qi y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v64uqi
+f2 (v64uqi x, v64uqi y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v64qi
+f3 (v64qi x, v64qi y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v64uqi
+f4 (v64uqi x, v64uqi y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v32hi
+f5 (v32hi x, v32hi y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v32uhi
+f6 (v32uhi x, v32uhi y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v32hi
+f7 (v32hi x, v32hi y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v32uhi
+f8 (v32uhi x, v32uhi y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v16si
+f9 (v16si x, v16si y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v16usi
+f10 (v16usi x, v16usi y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v16si
+f11 (v16si x, v16si y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v16usi
+f12 (v16usi x, v16usi y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v8di
+f13 (v8di x, v8di y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v8udi
+f14 (v8udi x, v8udi y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v8di
+f15 (v8di x, v8di y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v8udi
+f16 (v8udi x, v8udi y)
+{
+  return x <= y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-2.c
new file mode 100644
index 00000000000..0ea5b56eccf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-2.c
@@ -0,0 +1,90 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-options "-O2 -mavx512bw" } */
+
+#ifndef CHECK
+#define CHECK "avx512f-helper.h"
+#endif
+
+#include CHECK
+
+#ifndef TEST
+#define TEST avx512bw_test
+#endif
+
+#include "avx512bw-pr92686-vpcmp-1.c"
+
+#define NUM 512
+
+#define TEST_SIGNED(vtype, type, N, fn, op) \
+do								\
+  {								\
+    union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2;	\
+    int i, sign = 1;						\
+    type res;							\
+    for (i = 0; i < NUM; i++)					\
+      {								\
+	src1.i[i] = i * i * sign;				\
+	src2.i[i] = (i + 20) * sign;				\
+	sign = -sign;						\
+      }								\
+    for (i = 0; i < NUM; i += N)				\
+      dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]);		\
+								\
+    for (i = 0; i < NUM; i++)					\
+      {								\
+	res = src1.i[i] op src2.i[i] ? -1 : 0;			\
+	if (res != dst.i[i])					\
+	  abort ();						\
+      }								\
+  }								\
+while (0)
+
+#define TEST_UNSIGNED(vtype, type, N, fn, op) \
+do								\
+  {								\
+    union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2;	\
+    int i;							\
+    type res;							\
+								\
+    for (i = 0; i < NUM; i++)					\
+      {								\
+	src1.i[i] = i * i;					\
+	src2.i[i] = i + 20;					\
+	if ((i % 4))						\
+	  src2.i[i] |= (1ULL << (sizeof (type)			\
+				 * __CHAR_BIT__ - 1));		\
+      }								\
+								\
+    for (i = 0; i < NUM; i += N)				\
+      dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]);		\
+								\
+    for (i = 0; i < NUM; i++)					\
+      {								\
+	res = src1.i[i] op src2.i[i] ? -1 : 0;			\
+	if (res != dst.i[i])					\
+	  abort ();						\
+      }								\
+  }								\
+while (0)
+
+static void
+TEST (void)
+{
+  TEST_SIGNED (v64qi, signed char, 64, f1, >=);
+  TEST_UNSIGNED (v64uqi, unsigned char, 64, f2, >=);
+  TEST_SIGNED (v64qi, signed char, 64, f3, <=);
+  TEST_UNSIGNED (v64uqi, unsigned char, 64, f4, <=);
+  TEST_SIGNED (v32hi, short int, 32, f5, >=);
+  TEST_UNSIGNED (v32uhi, unsigned short int, 32, f6, >=);
+  TEST_SIGNED (v32hi, short int, 32, f7, <=);
+  TEST_UNSIGNED (v32uhi, unsigned short int, 32, f8, <=);
+  TEST_SIGNED (v16si, int, 16, f9, >=);
+  TEST_UNSIGNED (v16usi, unsigned int, 16, f10, >=);
+  TEST_SIGNED (v16si, int, 16, f11, <=);
+  TEST_UNSIGNED (v16usi, unsigned int, 16, f12, <=);
+  TEST_SIGNED (v8di, long long int, 8, f13, >=);
+  TEST_UNSIGNED (v8udi, unsigned long long int, 8, f14, >=);
+  TEST_SIGNED (v8di, long long int, 8, f15, <=);
+  TEST_UNSIGNED (v8udi, unsigned long long int, 8, f16, <=);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c
index aa64dc2f06e..a3ffeca4354 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c
@@ -6,9 +6,7 @@ 
 /* { dg-final { scan-assembler-times "vpminsb\[\t ]" 2 } } */
 /* { dg-final { scan-assembler-times "vpminuw\[\t ]" 2 } } */
 /* { dg-final { scan-assembler-times "vpminsw\[\t ]" 2 } } */
-/* { dg-final { scan-assembler-times "vpminud\[\t ]" 2 } } */
-/* { dg-final { scan-assembler-times "vpminsd\[\t ]" 2 } } */
-/* { dg-final { scan-assembler-times "vpminuq\[\t ]" 2 } } */
-/* { dg-final { scan-assembler-times "vpminsq\[\t ]" 2 } } */
-
+/* { dg-final { scan-assembler-times "vpcmp\[dq\]\[\t ]" 4 } } */
+/* { dg-final { scan-assembler-times "vpcmpu\[dq\]\[\t ]" 4 } } */
+/* { dg-final { scan-assembler-times "vpternlog\[qd\]\[\t ]" 8 } } */
 #include "avx2-pr88547-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c
new file mode 100644
index 00000000000..1b9644a3790
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c
@@ -0,0 +1,133 @@ 
+/* PR target/92686 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mavx512bw -mavx512vl -mno-xop -mprefer-vector-width=256" } */
+/* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vmovdq\[au\]8\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+/* { dg-final { scan-assembler-times "vmovdq\[au\]16\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+/* { dg-final { scan-assembler-times "vmovdq\[au\]32\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+/* { dg-final { scan-assembler-times "vmovdq\[au\]64\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+
+__attribute__((noipa)) void
+f1 (char *__restrict dst, char *__restrict src1, char *__restrict src2)
+{
+  for (int i = 0; i != 32; i++)
+    dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f2 (unsigned char *__restrict dst, unsigned char *__restrict src1,
+    unsigned char *__restrict src2)
+{
+  for (int i = 0; i != 32; i++)
+    dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f3 (char *__restrict dst, char *__restrict src1, char *__restrict src2)
+{
+  for (int i = 0; i != 32; i++)
+    dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f4 (unsigned char *__restrict dst, unsigned char *__restrict src1,
+    unsigned char *__restrict src2)
+{
+  for (int i = 0; i != 32; i++)
+    dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f5 (short *__restrict dst, short *__restrict src1, short *__restrict src2)
+{
+  for (int i = 0; i != 16; i++)
+    dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f6 (unsigned short *__restrict dst, unsigned short *__restrict src1,
+    unsigned short *__restrict src2)
+{
+  for (int i = 0; i != 16; i++)
+    dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f7 (short *__restrict dst, short *__restrict src1, short *__restrict src2)
+{
+  for (int i = 0; i != 16; i++)
+    dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f8 (unsigned short *__restrict dst, unsigned short *__restrict src1,
+    unsigned short *__restrict src2)
+{
+  for (int i = 0; i != 16; i++)
+    dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f9 (int *__restrict dst, int *__restrict src1, int *__restrict src2)
+{
+  for (int i = 0; i != 8; i++)
+    dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f10 (unsigned int *__restrict dst, unsigned int *__restrict src1,
+     unsigned int *__restrict src2)
+{
+  for (int i = 0; i != 8; i++)
+    dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f11 (int *__restrict dst, int *__restrict src1, int *__restrict src2)
+{
+  for (int i = 0; i != 8; i++)
+    dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f12 (unsigned int *__restrict dst, unsigned int *__restrict src1,
+     unsigned int *__restrict src2)
+{
+  for (int i = 0; i != 8; i++)
+    dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f13 (long long int *__restrict dst, long long int *__restrict src1,
+     long long int *__restrict src2)
+{
+  for (int i = 0; i != 4; i++)
+    dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f14 (unsigned long long int *__restrict dst,
+     unsigned long long int *__restrict src1,
+     unsigned long long int *__restrict src2)
+{
+  for (int i = 0; i != 4; i++)
+    dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f15 (long long int *__restrict dst, long long int *__restrict src1,
+     long long int *__restrict src2)
+{
+  for (int i = 0; i != 4; i++)
+    dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f16 (unsigned long long int *__restrict dst,
+     unsigned long long int *__restrict src1,
+     unsigned long long int *__restrict src2)
+{
+  for (int i = 0; i != 4; i++)
+    dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-2.c
new file mode 100644
index 00000000000..5f5562b98f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-2.c
@@ -0,0 +1,102 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-options "-Ofast -mavx512bw -mavx512vl -mprefer-vector-width=256" } */
+
+#ifndef CHECK
+#define CHECK "avx512f-helper.h"
+#endif
+
+#include CHECK
+
+#ifndef TEST
+#define TEST avx512vl_test
+#endif
+
+#include "avx512vl-pr92686-movcc-1.c"
+#include "pr92686.inc"
+
+#define NUM 256
+
+
+#define TEST_SIGNED(vtype, type, N, fn, fn2, op)		\
+do								\
+  {								\
+    type dst[NUM], src1[NUM], src2[NUM];			\
+    int i, j,  sign = 1;					\
+    type res[N];						\
+    for (i = 0; i < NUM; i++)					\
+      {								\
+	src1[i] = i * i * sign;					\
+	src2[i] = (i + 20) * sign;				\
+	dst[i] = i * i * i + 100;				\
+	sign = -sign;						\
+      }								\
+    for (i = 0; i < NUM; i += N)				\
+      {								\
+	for (j = 0; j < N; j++)					\
+	  res[j] = dst[i + j];					\
+	fn (&dst[i], &src1[i], &src2[i]);			\
+	for (j = 0; j < N; j++)					\
+	  {							\
+	    res[j] = fn2 (res[j], src1[i + j],			\
+			  src2[i+ j], op);			\
+	    if (res[j] != dst[i+ j])				\
+	      abort();						\
+	  }							\
+      }								\
+  }								\
+while (0)
+
+#define TEST_UNSIGNED(vtype, type, N, fn, fn2, op)		\
+do								\
+  {								\
+    type dst[NUM], src1[NUM], src2[NUM];			\
+    int i,j;							\
+    type res[N];						\
+								\
+    for (i = 0; i < NUM; i++)					\
+      {								\
+	src1[i] = i * i;					\
+	src2[i] = i + 20;					\
+	dst[i] = i * i * i + 100;				\
+	if ((i % 4))						\
+	  src2[i] |= (1ULL << (sizeof (type)			\
+				 * __CHAR_BIT__ - 1));		\
+      }								\
+    for (i = 0; i < NUM; i += N)				\
+      {								\
+	for (j = 0; j < N; j++)					\
+	  res[j] = dst[i + j];					\
+	fn (&dst[i], &src1[i], &src2[i]);			\
+	for (j = 0; j < N; j++)					\
+	  {							\
+	    res[j] = fn2 (res[j], src1[i + j],			\
+			  src2[i + j], op);			\
+	    if (res[j] != dst[i + j])				\
+	      abort();						\
+	  }							\
+      }								\
+  }								\
+while (0)
+
+static void
+TEST (void)
+{
+  TEST_SIGNED (v32qi, signed char, 32, f1, cmpb, 5);
+  TEST_UNSIGNED (v32uqi, unsigned char, 32, f2, cmpub, 5);
+  TEST_SIGNED (v32qi, signed char, 32, f3, cmpb, 2);
+  TEST_UNSIGNED (v32uqi, unsigned char, 32, f4, cmpub, 2);
+  TEST_SIGNED (v16hi, short int, 16, f5, cmpw, 5);
+  TEST_UNSIGNED (v16uhi, unsigned short int, 16, f6, cmpuw, 5);
+  TEST_SIGNED (v16hi, short int, 16, f7, cmpw, 2);
+  TEST_UNSIGNED (v16uhi, unsigned short int, 16, f8, cmpuw, 2);
+  TEST_SIGNED (v8si, int, 8, f9, cmpd, 5);
+  TEST_UNSIGNED (v8usi, unsigned int, 8, f10, cmpud, 5);
+  TEST_SIGNED (v8si, int, 8, f11, cmpd, 2);
+  TEST_UNSIGNED (v8usi, unsigned int, 8, f12, cmpud, 2);
+  TEST_SIGNED (v4di, long long int, 4, f13, cmpq, 5);
+  TEST_UNSIGNED (v4udi, unsigned long long int, 4, f14, cmpuq, 5);
+  TEST_SIGNED (v4di, long long int, 4, f15, cmpq, 2);
+  TEST_UNSIGNED (v4udi, unsigned long long int, 4, f16, cmpuq, 2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-1.c
new file mode 100644
index 00000000000..5b79d4d36f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-1.c
@@ -0,0 +1,112 @@ 
+/* PR target/88547 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -mno-avx512dq -mno-xop" } */
+/* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpmovm2\[bw\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[\t ]" 8 } } */
+
+typedef signed char v32qi __attribute__((vector_size(32)));
+typedef unsigned char v32uqi __attribute__((vector_size(32)));
+typedef short v16hi __attribute__((vector_size(32)));
+typedef unsigned short v16uhi __attribute__((vector_size(32)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef unsigned v8usi __attribute__((vector_size(32)));
+typedef long long v4di __attribute__((vector_size(32)));
+typedef unsigned long long v4udi __attribute__((vector_size(32)));
+
+__attribute__((noipa)) v32qi
+f1 (v32qi x, v32qi y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v32uqi
+f2 (v32uqi x, v32uqi y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v32qi
+f3 (v32qi x, v32qi y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v32uqi
+f4 (v32uqi x, v32uqi y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v16hi
+f5 (v16hi x, v16hi y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v16uhi
+f6 (v16uhi x, v16uhi y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v16hi
+f7 (v16hi x, v16hi y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v16uhi
+f8 (v16uhi x, v16uhi y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v8si
+f9 (v8si x, v8si y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v8usi
+f10 (v8usi x, v8usi y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v8si
+f11 (v8si x, v8si y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v8usi
+f12 (v8usi x, v8usi y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v4di
+f13 (v4di x, v4di y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v4udi
+f14 (v4udi x, v4udi y)
+{
+  return x >= y;
+}
+
+__attribute__((noipa)) v4di
+f15 (v4di x, v4di y)
+{
+  return x <= y;
+}
+
+__attribute__((noipa)) v4udi
+f16 (v4udi x, v4udi y)
+{
+  return x <= y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-2.c
new file mode 100644
index 00000000000..6be24ff30f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-2.c
@@ -0,0 +1,91 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+
+#ifndef CHECK
+#define CHECK "avx512f-helper.h"
+#endif
+
+#include CHECK
+
+#ifndef TEST
+#define TEST avx512vl_test
+#endif
+
+#include "avx512vl-pr92686-vpcmp-1.c"
+
+#define NUM 256
+
+#define TEST_SIGNED(vtype, type, N, fn, op) \
+do								\
+  {								\
+    union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2;	\
+    int i, sign = 1;						\
+    type res;							\
+    for (i = 0; i < NUM; i++)					\
+      {								\
+	src1.i[i] = i * i * sign;				\
+	src2.i[i] = (i + 20) * sign;				\
+	sign = -sign;						\
+      }								\
+    for (i = 0; i < NUM; i += N)				\
+      dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]);		\
+								\
+    for (i = 0; i < NUM; i++)					\
+      {								\
+	res = src1.i[i] op src2.i[i] ? -1 : 0;			\
+	if (res != dst.i[i])					\
+	  abort ();						\
+      }								\
+  }								\
+while (0)
+
+#define TEST_UNSIGNED(vtype, type, N, fn, op) \
+do								\
+  {								\
+    union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2;	\
+    int i;							\
+    type res;							\
+								\
+    for (i = 0; i < NUM; i++)					\
+      {								\
+	src1.i[i] = i * i;					\
+	src2.i[i] = i + 20;					\
+	if ((i % 4))						\
+	  src2.i[i] |= (1ULL << (sizeof (type)			\
+				 * __CHAR_BIT__ - 1));		\
+      }								\
+								\
+    for (i = 0; i < NUM; i += N)				\
+      dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]);		\
+								\
+    for (i = 0; i < NUM; i++)					\
+      {								\
+	res = src1.i[i] op src2.i[i] ? -1 : 0;			\
+	if (res != dst.i[i])					\
+	  abort ();						\
+      }								\
+  }								\
+while (0)
+
+static void
+TEST (void)
+{
+  TEST_SIGNED (v32qi, signed char, 32, f1, >=);
+  TEST_UNSIGNED (v32uqi, unsigned char, 32, f2, >=);
+  TEST_SIGNED (v32qi, signed char, 32, f3, <=);
+  TEST_UNSIGNED (v32uqi, unsigned char, 32, f4, <=);
+  TEST_SIGNED (v16hi, short int, 16, f5, >=);
+  TEST_UNSIGNED (v16uhi, unsigned short int, 16, f6, >=);
+  TEST_SIGNED (v16hi, short int, 16, f7, <=);
+  TEST_UNSIGNED (v16uhi, unsigned short int, 16, f8, <=);
+  TEST_SIGNED (v8si, int, 8, f9, >=);
+  TEST_UNSIGNED (v8usi, unsigned int, 8, f10, >=);
+  TEST_SIGNED (v8si, int, 8, f11, <=);
+  TEST_UNSIGNED (v8usi, unsigned int, 8, f12, <=);
+  TEST_SIGNED (v4di, long long int, 4, f13, >=);
+  TEST_UNSIGNED (v4udi, unsigned long long int, 4, f14, >=);
+  TEST_SIGNED (v4di, long long int, 4, f15, <=);
+  TEST_UNSIGNED (v4udi, unsigned long long int, 4, f16, <=);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88547-1.c b/gcc/testsuite/gcc.target/i386/pr88547-1.c
index b6c82157bba..fa6832c7fab 100644
--- a/gcc/testsuite/gcc.target/i386/pr88547-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr88547-1.c
@@ -1,19 +1,9 @@ 
 /* PR target/88547 */
 /* { dg-do compile } */
 /* { dg-options "-O2 -mavx512vl -mavx512bw -mavx512dq" } */
-/* { dg-final { scan-assembler-not "vpternlog" } } */
-/* { dg-final { scan-assembler-times "vpmovm2b\[\t  ]" 4 } } */
-/* { dg-final { scan-assembler-times "vpmovm2w\[\t  ]" 4 } } */
-/* { dg-final { scan-assembler-times "vpmovm2d\[\t  ]" 4 } } */
-/* { dg-final { scan-assembler-times "vpmovm2q\[\t  ]" 4 } } */
-/* { dg-final { scan-assembler-times "knotb\[\t  ]" 2 } } */
-/* { dg-final { scan-assembler-times "knotw\[\t  ]" 2 } } */
-/* { dg-final { scan-assembler-times "knotd\[\t  ]" 2 } } */
-/* { dg-final { scan-assembler-times "knotq\[\t  ]" 2 } } */
-/* { dg-final { scan-assembler-times "vpminud\[\t  ]" 2 } } */
-/* { dg-final { scan-assembler-times "vpminuq\[\t  ]" 2 } } */
-/* { dg-final { scan-assembler-not "vpsubd\[\t  ]" } } */
-/* { dg-final { scan-assembler-not "vpsubq\[\t  ]" } } */
+/* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpmovm2\[bwdq\]\[\t ]" 16 } } */
 
 typedef signed char v64qi __attribute__((vector_size(64)));
 typedef unsigned char v64uqi __attribute__((vector_size(64)));
diff --git a/gcc/testsuite/gcc.target/i386/pr92686.inc b/gcc/testsuite/gcc.target/i386/pr92686.inc
new file mode 100644
index 00000000000..18db02083cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92686.inc
@@ -0,0 +1,189 @@ 
+/* Include by avx512bw-pr92686-movcc-2.c, avx512vl-pr92686-movcc-2.c  */
+__attribute__((noipa)) char
+cmpb (char dst, char src1, char src2, int num)
+{
+  switch(num)
+    {
+    case 0:
+      return src1 == src2 ? src1 : dst;
+    case 1:
+      return src1 < src2 ? src1 : dst;
+    case 2:
+      return src1 <= src2 ? src1 : dst;
+    case 4:
+      return src1 != src2 ? src1 : dst;
+    case 5:
+      return src1 >= src2 ? src1 : dst;
+    case 6:
+      return src1 > src2 ? src1 : dst;
+    default:
+      abort();
+    }
+  abort();
+}
+
+__attribute__((noipa)) unsigned char
+cmpub (unsigned char dst, unsigned char src1,
+		       unsigned char src2, int num)
+{
+  switch(num)
+    {
+    case 0:
+      return src1 == src2 ? src1 : dst;
+    case 1:
+      return src1 < src2 ? src1 : dst;
+    case 2:
+      return src1 <= src2 ? src1 : dst;
+    case 4:
+      return src1 != src2 ? src1 : dst;
+    case 5:
+      return src1 >= src2 ? src1 : dst;
+    case 6:
+      return src1 > src2 ? src1 : dst;
+    default:
+      abort();
+    }
+  abort();
+}
+
+__attribute__((noipa)) short
+cmpw (short dst, short src1, short src2, int num)
+{
+  switch(num)
+    {
+    case 0:
+      return src1 == src2 ? src1 : dst;
+    case 1:
+      return src1 < src2 ? src1 : dst;
+    case 2:
+      return src1 <= src2 ? src1 : dst;
+    case 4:
+      return src1 != src2 ? src1 : dst;
+    case 5:
+      return src1 >= src2 ? src1 : dst;
+    case 6:
+      return src1 > src2 ? src1 : dst;
+    default:
+      abort();
+    }
+  abort();
+}
+
+__attribute__((noipa)) unsigned short
+cmpuw (unsigned short dst, unsigned short src1,
+		       unsigned short src2, int num)
+{
+  switch(num)
+    {
+    case 0:
+      return src1 == src2 ? src1 : dst;
+    case 1:
+      return src1 < src2 ? src1 : dst;
+    case 2:
+      return src1 <= src2 ? src1 : dst;
+    case 4:
+      return src1 != src2 ? src1 : dst;
+    case 5:
+      return src1 >= src2 ? src1 : dst;
+    case 6:
+      return src1 > src2 ? src1 : dst;
+    default:
+      abort();
+    }
+  abort();
+}
+
+__attribute__((noipa)) int
+cmpd (int dst, int src1, int src2, int num)
+{
+  switch(num)
+    {
+    case 0:
+      return src1 == src2 ? src1 : dst;
+    case 1:
+      return src1 < src2 ? src1 : dst;
+    case 2:
+      return src1 <= src2 ? src1 : dst;
+    case 4:
+      return src1 != src2 ? src1 : dst;
+    case 5:
+      return src1 >= src2 ? src1 : dst;
+    case 6:
+      return src1 > src2 ? src1 : dst;
+    default:
+      abort();
+    }
+  abort();
+}
+
+__attribute__((noipa)) unsigned int
+cmpud (unsigned int dst, unsigned int src1,
+		       unsigned int src2, int num)
+{
+  switch(num)
+    {
+    case 0:
+      return src1 == src2 ? src1 : dst;
+    case 1:
+      return src1 < src2 ? src1 : dst;
+    case 2:
+      return src1 <= src2 ? src1 : dst;
+    case 4:
+      return src1 != src2 ? src1 : dst;
+    case 5:
+      return src1 >= src2 ? src1 : dst;
+    case 6:
+      return src1 > src2 ? src1 : dst;
+    default:
+      abort();
+    }
+  abort();
+}
+
+__attribute__((noipa)) long long int
+cmpq (long long int dst, long long int src1,
+		       long long int src2, int num)
+{
+  switch(num)
+    {
+    case 0:
+      return src1 == src2 ? src1 : dst;
+    case 1:
+      return src1 < src2 ? src1 : dst;
+    case 2:
+      return src1 <= src2 ? src1 : dst;
+    case 4:
+      return src1 != src2 ? src1 : dst;
+    case 5:
+      return src1 >= src2 ? src1 : dst;
+    case 6:
+      return src1 > src2 ? src1 : dst;
+    default:
+      abort();
+    }
+  abort();
+}
+
+__attribute__((noipa)) unsigned long long int
+cmpuq (unsigned long long int dst, unsigned long long int src1,
+       unsigned long long int src2, int num)
+{
+  switch(num)
+    {
+    case 0:
+      return src1 == src2 ? src1 : dst;
+    case 1:
+      return src1 < src2 ? src1 : dst;
+    case 2:
+      return src1 <= src2 ? src1 : dst;
+    case 4:
+      return src1 != src2 ? src1 : dst;
+    case 5:
+      return src1 >= src2 ? src1 : dst;
+    case 6:
+      return src1 > src2 ? src1 : dst;
+    default:
+      abort();
+    }
+  abort();
+}
\ No newline at end of file
-- 
2.19.1