From 103d31db47dacc5bba9c85389c61f69293f83695 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Thu, 28 Nov 2019 14:12:31 +0800
Subject: [PATCH] Enable mask movement for VCOND_EXPR under avx512f for
128/256-bit vector when integer mask is available.
Changelog
gcc/
PR target/92686
* config/i386/sse.md
(*<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>,
*<avx512>_cmp<mode>3<mask_scalar_merge_name>,
*<avx512>_ucmp<mode>3<mask_scalar_merge_name>,
*<avx512>_ucmp<mode>3<mask_scalar_merge_name>): New.
* config/i386/i386.c (ix86_print_operand): New operand substitution.
* config/i386/i386-expand.c (ix86_valid_mask_cmp_mode):
New function.
(ix86_expand_sse_cmp): Relax condition for integer mask from
512-bit vector to all 128/256/512-bit vector. Delete code gen
for avx512f compare patterns since we have generic pattern now.
(ix86_expand_sse_movcc): Adjust condition and codegen for
maskcmp.
(ix86_expand_int_sse_cmp): Don't canonicalize the comparison
when corresponding vector compare is available.
gcc/testsuite/
* gcc.target/i386/pr92686.inc: New file.
* gcc.target/i386/avx512bw-pr92686-vpcmp-1.c: New test.
* gcc.target/i386/avx512bw-pr92686-vpcmp-2.c: Ditto.
* gcc.target/i386/avx512vl-pr92686-vpcmp-1.c: Ditto.
* gcc.target/i386/avx512vl-pr92686-vpcmp-2.c: Ditto.
* gcc.target/i386/avx512bw-pr92686-movcc-1.c: Ditto.
* gcc.target/i386/avx512bw-pr92686-movcc-2.c: Ditto.
* gcc.target/i386/avx512vl-pr92686-movcc-1.c: Ditto.
* gcc.target/i386/avx512vl-pr92686-movcc-2.c: Ditto.
* gcc.target/i386/avx512vl-pr88547-1.c: Adjust testcase.
* gcc.target/i386/pr88547-1.c: Ditto.
---
gcc/config/i386/i386-expand.c | 172 ++++++----------
gcc/config/i386/i386.c | 32 +++
gcc/config/i386/sse.md | 48 +++++
.../i386/avx512bw-pr92686-movcc-1.c | 133 ++++++++++++
.../i386/avx512bw-pr92686-movcc-2.c | 102 ++++++++++
.../i386/avx512bw-pr92686-vpcmp-1.c | 112 +++++++++++
.../i386/avx512bw-pr92686-vpcmp-2.c | 90 +++++++++
.../gcc.target/i386/avx512vl-pr88547-1.c | 8 +-
.../i386/avx512vl-pr92686-movcc-1.c | 133 ++++++++++++
.../i386/avx512vl-pr92686-movcc-2.c | 102 ++++++++++
.../i386/avx512vl-pr92686-vpcmp-1.c | 112 +++++++++++
.../i386/avx512vl-pr92686-vpcmp-2.c | 91 +++++++++
gcc/testsuite/gcc.target/i386/pr88547-1.c | 16 +-
gcc/testsuite/gcc.target/i386/pr92686.inc | 189 ++++++++++++++++++
14 files changed, 1212 insertions(+), 128 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr92686.inc
@@ -3422,6 +3422,30 @@ ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
return true;
}
+/* Return true if MODE is valid for vector compare to mask register,
+ Same result for conditionl vector move with mask register. */
+static bool
+ix86_valid_mask_cmp_mode (machine_mode mode)
+{
+ /* XOP has its own vector conditional movement. */
+ if (TARGET_XOP)
+ return false;
+
+ /* AVX512F is needed for mask operation. */
+ if (!(TARGET_AVX512F && VECTOR_MODE_P (mode)))
+ return false;
+
+ /* AVX512BW is needed for vector QI/HImode,
+ AVX512VL is needed for 128/256-bit vector. */
+ machine_mode inner_mode = GET_MODE_INNER (mode);
+ int vector_size = GET_MODE_SIZE (mode);
+ if ((inner_mode == QImode || inner_mode == HImode)
+ && !TARGET_AVX512BW)
+ return false;
+
+ return vector_size == 64 || TARGET_AVX512VL;
+}
+
/* Expand an SSE comparison. Return the register with the result. */
static rtx
@@ -3438,11 +3462,13 @@ ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
bool maskcmp = false;
rtx x;
- if (GET_MODE_SIZE (cmp_ops_mode) == 64)
+ if (ix86_valid_mask_cmp_mode (cmp_ops_mode))
{
unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode);
- cmp_mode = int_mode_for_size (nbits, 0).require ();
maskcmp = true;
+ cmp_mode = nbits > 8 ?
+ int_mode_for_size (nbits, 0).require ()
+ : E_QImode;
}
else
cmp_mode = cmp_ops_mode;
@@ -3461,37 +3487,6 @@ ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
|| (op_false && reg_overlap_mentioned_p (dest, op_false)))
dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
- /* Compare patterns for int modes are unspec in AVX512F only. */
- if (maskcmp && (code == GT || code == EQ))
- {
- rtx (*gen)(rtx, rtx, rtx);
-
- switch (cmp_ops_mode)
- {
- case E_V64QImode:
- gcc_assert (TARGET_AVX512BW);
- gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
- break;
- case E_V32HImode:
- gcc_assert (TARGET_AVX512BW);
- gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
- break;
- case E_V16SImode:
- gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
- break;
- case E_V8DImode:
- gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
- break;
- default:
- gen = NULL;
- }
-
- if (gen)
- {
- emit_insn (gen (dest, cmp_op0, cmp_op1));
- return dest;
- }
- }
x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
if (cmp_mode != mode && !maskcmp)
@@ -3515,7 +3510,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
machine_mode cmpmode = GET_MODE (cmp);
/* In AVX512F the result of comparison is an integer mask. */
- bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
+ bool maskcmp = ((mode != cmpmode) && ix86_valid_mask_cmp_mode (mode));
rtx t2, t3, x;
@@ -3529,85 +3524,38 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
if (maskcmp)
{
- rtx (*gen) (rtx, rtx) = NULL;
- if ((op_true == CONST0_RTX (mode)
- && vector_all_ones_operand (op_false, mode))
- || (op_false == CONST0_RTX (mode)
- && vector_all_ones_operand (op_true, mode)))
- switch (mode)
- {
- case E_V64QImode:
- if (TARGET_AVX512BW)
- gen = gen_avx512bw_cvtmask2bv64qi;
- break;
- case E_V32QImode:
- if (TARGET_AVX512VL && TARGET_AVX512BW)
- gen = gen_avx512vl_cvtmask2bv32qi;
- break;
- case E_V16QImode:
- if (TARGET_AVX512VL && TARGET_AVX512BW)
- gen = gen_avx512vl_cvtmask2bv16qi;
- break;
- case E_V32HImode:
- if (TARGET_AVX512BW)
- gen = gen_avx512bw_cvtmask2wv32hi;
- break;
- case E_V16HImode:
- if (TARGET_AVX512VL && TARGET_AVX512BW)
- gen = gen_avx512vl_cvtmask2wv16hi;
- break;
- case E_V8HImode:
- if (TARGET_AVX512VL && TARGET_AVX512BW)
- gen = gen_avx512vl_cvtmask2wv8hi;
- break;
- case E_V16SImode:
- if (TARGET_AVX512DQ)
- gen = gen_avx512f_cvtmask2dv16si;
- break;
- case E_V8SImode:
- if (TARGET_AVX512VL && TARGET_AVX512DQ)
- gen = gen_avx512vl_cvtmask2dv8si;
- break;
- case E_V4SImode:
- if (TARGET_AVX512VL && TARGET_AVX512DQ)
- gen = gen_avx512vl_cvtmask2dv4si;
- break;
- case E_V8DImode:
- if (TARGET_AVX512DQ)
- gen = gen_avx512f_cvtmask2qv8di;
- break;
- case E_V4DImode:
- if (TARGET_AVX512VL && TARGET_AVX512DQ)
- gen = gen_avx512vl_cvtmask2qv4di;
- break;
- case E_V2DImode:
- if (TARGET_AVX512VL && TARGET_AVX512DQ)
- gen = gen_avx512vl_cvtmask2qv2di;
- break;
- default:
- break;
- }
- if (gen && SCALAR_INT_MODE_P (cmpmode))
- {
- cmp = force_reg (cmpmode, cmp);
- if (op_true == CONST0_RTX (mode))
+ /* Using vector move with mask register. */
+ cmp = force_reg (cmpmode, cmp);
+ /* Optimize for mask zero. */
+ op_true = op_true != CONST0_RTX (mode)
+ ? force_reg (mode, op_true)
+ : op_true;
+ op_false = op_false != CONST0_RTX (mode)
+ ? force_reg (mode, op_false)
+ : op_false;
+ if (op_true == CONST0_RTX (mode))
+ {
+ rtx (*gen_not) (rtx, rtx);
+ switch (cmpmode)
{
- rtx (*gen_not) (rtx, rtx);
- switch (cmpmode)
- {
- case E_QImode: gen_not = gen_knotqi; break;
- case E_HImode: gen_not = gen_knothi; break;
- case E_SImode: gen_not = gen_knotsi; break;
- case E_DImode: gen_not = gen_knotdi; break;
- default: gcc_unreachable ();
- }
- rtx n = gen_reg_rtx (cmpmode);
- emit_insn (gen_not (n, cmp));
- cmp = n;
+ case E_QImode: gen_not = gen_knotqi; break;
+ case E_HImode: gen_not = gen_knothi; break;
+ case E_SImode: gen_not = gen_knotsi; break;
+ case E_DImode: gen_not = gen_knotdi; break;
+ default: gcc_unreachable ();
}
- emit_insn (gen (dest, cmp));
- return;
+ rtx n = gen_reg_rtx (cmpmode);
+ emit_insn (gen_not (n, cmp));
+ cmp = n;
+ /* Reverse op_true op_false. */
+ n = op_true;
+ op_true = op_false;
+ op_false = n;
}
+
+ rtx vec_merge = gen_rtx_VEC_MERGE (mode, op_true, op_false, cmp);
+ emit_insn (gen_rtx_SET (dest, vec_merge));
+ return;
}
else if (vector_all_ones_operand (op_true, mode)
&& op_false == CONST0_RTX (mode))
@@ -4068,6 +4016,10 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
&& (mode == V16QImode || mode == V8HImode
|| mode == V4SImode || mode == V2DImode))
;
+ /* AVX512F supports all of the comparsions
+ on all 128/256/512-bit vector int types. */
+ else if (ix86_valid_mask_cmp_mode (mode))
+ ;
else
{
/* Canonicalize the comparison to EQ, GT, GTU. */
@@ -12468,6 +12468,38 @@ ix86_print_operand (FILE *file, rtx x, int code)
}
return;
+ case 'I':
+ switch (GET_CODE (x))
+ {
+ case EQ:
+ fputs ("$0", file);
+ break;
+ case NE:
+ fputs ("$4", file);
+ break;
+ case GE:
+ case GEU:
+ fputs ("$5", file);
+ break;
+ case GT:
+ case GTU:
+ fputs ("$6", file);
+ break;
+ case LE:
+ case LEU:
+ fputs ("$2", file);
+ break;
+ case LT:
+ case LTU:
+ fputs ("$1", file);
+ break;
+ default:
+ output_operand_lossage ("operand is not a condition code, "
+ "invalid operand code 'I'");
+ return;
+ }
+ return;
+
case 'Y':
switch (GET_CODE (x))
{
@@ -3050,6 +3050,18 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "*<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+ (match_operator:<avx512fmaskmode> 3 "ix86_comparison_int_operator"
+ [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")]))]
+ "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
+ "vpcmp<ssemodesuffix>\t{%I3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %I3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
@@ -3064,6 +3076,18 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "*<avx512>_cmp<mode>3<mask_scalar_merge_name>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+ (match_operator:<avx512fmaskmode> 3 "ix86_comparison_int_operator"
+ [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]))]
+ "TARGET_AVX512BW"
+ "vpcmp<ssemodesuffix>\t{%I3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %I3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
@@ -3078,6 +3102,18 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "*<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+ (match_operator:<avx512fmaskmode> 3 "ix86_comparison_uns_operator"
+ [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]))]
+ "TARGET_AVX512BW"
+ "vpcmpu<ssemodesuffix>\t{%I3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %I3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
@@ -3092,6 +3128,18 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "*<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+ (match_operator:<avx512fmaskmode> 3 "ix86_comparison_uns_operator"
+ [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]))]
+ "TARGET_AVX512F"
+ "vpcmpu<ssemodesuffix>\t{%I3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %I3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(and:<avx512fmaskmode>
new file mode 100644
@@ -0,0 +1,133 @@
+/* PR target/92686 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mavx512bw -mno-avx512dq -mno-avx512vl -mno-xop -mprefer-vector-width=512" } */
+/* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vmovdq\[au\]8\[^\{\n\]*%zmm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+/* { dg-final { scan-assembler-times "vmovdq\[au\]16\[^\{\n\]*%zmm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+/* { dg-final { scan-assembler-times "vmovdq\[au\]32\[^\{\n\]*%zmm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+/* { dg-final { scan-assembler-times "vmovdq\[au\]64\[^\{\n\]*%zmm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+
+__attribute__((noipa)) void
+f1 (char *__restrict dst, char *__restrict src1, char *__restrict src2)
+{
+ for (int i = 0; i != 64; i++)
+ dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f2 (unsigned char *__restrict dst, unsigned char *__restrict src1,
+ unsigned char *__restrict src2)
+{
+ for (int i = 0; i != 64; i++)
+ dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f3 (char *__restrict dst, char *__restrict src1, char *__restrict src2)
+{
+ for (int i = 0; i != 64; i++)
+ dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f4 (unsigned char *__restrict dst, unsigned char *__restrict src1,
+ unsigned char *__restrict src2)
+{
+ for (int i = 0; i != 64; i++)
+ dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f5 (short *__restrict dst, short *__restrict src1, short *__restrict src2)
+{
+ for (int i = 0; i != 32; i++)
+ dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f6 (unsigned short *__restrict dst, unsigned short *__restrict src1,
+ unsigned short *__restrict src2)
+{
+ for (int i = 0; i != 32; i++)
+ dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f7 (short *__restrict dst, short *__restrict src1, short *__restrict src2)
+{
+ for (int i = 0; i != 32; i++)
+ dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f8 (unsigned short *__restrict dst, unsigned short *__restrict src1,
+ unsigned short *__restrict src2)
+{
+ for (int i = 0; i != 32; i++)
+ dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f9 (int *__restrict dst, int *__restrict src1, int *__restrict src2)
+{
+ for (int i = 0; i != 16; i++)
+ dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f10 (unsigned int *__restrict dst, unsigned int *__restrict src1,
+ unsigned int *__restrict src2)
+{
+ for (int i = 0; i != 16; i++)
+ dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f11 (int *__restrict dst, int *__restrict src1, int *__restrict src2)
+{
+ for (int i = 0; i != 16; i++)
+ dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f12 (unsigned int *__restrict dst, unsigned int *__restrict src1,
+ unsigned int *__restrict src2)
+{
+ for (int i = 0; i != 16; i++)
+ dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f13 (long long int *__restrict dst, long long int *__restrict src1,
+ long long int *__restrict src2)
+{
+ for (int i = 0; i != 8; i++)
+ dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f14 (unsigned long long int *__restrict dst,
+ unsigned long long int *__restrict src1,
+ unsigned long long int *__restrict src2)
+{
+ for (int i = 0; i != 8; i++)
+ dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f15 (long long int *__restrict dst, long long int *__restrict src1,
+ long long int *__restrict src2)
+{
+ for (int i = 0; i != 8; i++)
+ dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f16 (unsigned long long int *__restrict dst,
+ unsigned long long int *__restrict src1,
+ unsigned long long int *__restrict src2)
+{
+ for (int i = 0; i != 8; i++)
+ dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
new file mode 100644
@@ -0,0 +1,102 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-options "-Ofast -mavx512bw -mavx512vl -mprefer-vector-width=256" } */
+
+#ifndef CHECK
+#define CHECK "avx512f-helper.h"
+#endif
+
+#include CHECK
+
+#ifndef TEST
+#define TEST avx512bw_test
+#endif
+
+#include "avx512bw-pr92686-movcc-1.c"
+#include "pr92686.inc"
+
+#define NUM 512
+
+
+#define TEST_SIGNED(vtype, type, N, fn, fn2, op) \
+do \
+ { \
+ type dst[NUM], src1[NUM], src2[NUM]; \
+ int i, j, sign = 1; \
+ type res[N]; \
+ for (i = 0; i < NUM; i++) \
+ { \
+ src1[i] = i * i * sign; \
+ src2[i] = (i + 20) * sign; \
+ dst[i] = i * i * i + 100; \
+ sign = -sign; \
+ } \
+ for (i = 0; i < NUM; i += N) \
+ { \
+ for (j = 0; j < N; j++) \
+ res[j] = dst[i + j]; \
+ fn (&dst[i], &src1[i], &src2[i]); \
+ for (j = 0; j < N; j++) \
+ { \
+ res[j] = fn2 (res[j], src1[i + j], \
+ src2[i+ j], op); \
+ if (res[j] != dst[i+ j]) \
+ abort(); \
+ } \
+ } \
+ } \
+while (0)
+
+#define TEST_UNSIGNED(vtype, type, N, fn, fn2, op) \
+do \
+ { \
+ type dst[NUM], src1[NUM], src2[NUM]; \
+ int i,j; \
+ type res[N]; \
+ \
+ for (i = 0; i < NUM; i++) \
+ { \
+ src1[i] = i * i; \
+ src2[i] = i + 20; \
+ dst[i] = i * i * i + 100; \
+ if ((i % 4)) \
+ src2[i] |= (1ULL << (sizeof (type) \
+ * __CHAR_BIT__ - 1)); \
+ } \
+ for (i = 0; i < NUM; i += N) \
+ { \
+ for (j = 0; j < N; j++) \
+ res[j] = dst[i + j]; \
+ fn (&dst[i], &src1[i], &src2[i]); \
+ for (j = 0; j < N; j++) \
+ { \
+ res[j] = fn2 (res[j], src1[i + j], \
+ src2[i + j], op); \
+ if (res[j] != dst[i + j]) \
+ abort(); \
+ } \
+ } \
+ } \
+while (0)
+
+static void
+TEST (void)
+{
+ TEST_SIGNED (v64qi, signed char, 64, f1, cmpb, 5);
+ TEST_UNSIGNED (v64uqi, unsigned char, 64, f2, cmpub, 5);
+ TEST_SIGNED (v64qi, signed char, 64, f3, cmpb, 2);
+ TEST_UNSIGNED (v64uqi, unsigned char, 64, f4, cmpub, 2);
+ TEST_SIGNED (v32hi, short int, 32, f5, cmpw, 5);
+ TEST_UNSIGNED (v32uhi, unsigned short int, 32, f6, cmpuw, 5);
+ TEST_SIGNED (v32hi, short int, 32, f7, cmpw, 2);
+ TEST_UNSIGNED (v32uhi, unsigned short int, 32, f8, cmpuw, 2);
+ TEST_SIGNED (v16si, int, 16, f9, cmpd, 5);
+ TEST_UNSIGNED (v16usi, unsigned int, 16, f10, cmpud, 5);
+ TEST_SIGNED (v16si, int, 16, f11, cmpd, 2);
+ TEST_UNSIGNED (v16usi, unsigned int, 16, f12, cmpud, 2);
+ TEST_SIGNED (v8di, long long int, 8, f13, cmpq, 5);
+ TEST_UNSIGNED (v8udi, unsigned long long int, 8, f14, cmpuq, 5);
+ TEST_SIGNED (v8di, long long int, 8, f15, cmpq, 2);
+ TEST_UNSIGNED (v8udi, unsigned long long int, 8, f16, cmpuq, 2);
+}
new file mode 100644
@@ -0,0 +1,112 @@
+/* PR target/92686 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mno-avx512dq -mno-avx512vl -mno-xop" } */
+/* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpmovm2\[bw\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[\t ]" 8 } } */
+
+typedef char v64qi __attribute__((vector_size(64)));
+typedef unsigned char v64uqi __attribute__((vector_size(64)));
+typedef short v32hi __attribute__((vector_size(64)));
+typedef unsigned short v32uhi __attribute__((vector_size(64)));
+typedef int v16si __attribute__((vector_size(64)));
+typedef unsigned v16usi __attribute__((vector_size(64)));
+typedef long long v8di __attribute__((vector_size(64)));
+typedef unsigned long long v8udi __attribute__((vector_size(64)));
+
+__attribute__((noipa)) v64qi
+f1 (v64qi x, v64qi y)
+{
+ return x >= y;
+}
+
+__attribute__((noipa)) v64uqi
+f2 (v64uqi x, v64uqi y)
+{
+ return x >= y;
+}
+
+__attribute__((noipa)) v64qi
+f3 (v64qi x, v64qi y)
+{
+ return x <= y;
+}
+
+__attribute__((noipa)) v64uqi
+f4 (v64uqi x, v64uqi y)
+{
+ return x <= y;
+}
+
+__attribute__((noipa)) v32hi
+f5 (v32hi x, v32hi y)
+{
+ return x >= y;
+}
+
+__attribute__((noipa)) v32uhi
+f6 (v32uhi x, v32uhi y)
+{
+ return x >= y;
+}
+
+__attribute__((noipa)) v32hi
+f7 (v32hi x, v32hi y)
+{
+ return x <= y;
+}
+
+__attribute__((noipa)) v32uhi
+f8 (v32uhi x, v32uhi y)
+{
+ return x <= y;
+}
+
+__attribute__((noipa)) v16si
+f9 (v16si x, v16si y)
+{
+ return x >= y;
+}
+
+__attribute__((noipa)) v16usi
+f10 (v16usi x, v16usi y)
+{
+ return x >= y;
+}
+
+__attribute__((noipa)) v16si
+f11 (v16si x, v16si y)
+{
+ return x <= y;
+}
+
+__attribute__((noipa)) v16usi
+f12 (v16usi x, v16usi y)
+{
+ return x <= y;
+}
+
+__attribute__((noipa)) v8di
+f13 (v8di x, v8di y)
+{
+ return x >= y;
+}
+
+__attribute__((noipa)) v8udi
+f14 (v8udi x, v8udi y)
+{
+ return x >= y;
+}
+
+__attribute__((noipa)) v8di
+f15 (v8di x, v8di y)
+{
+ return x <= y;
+}
+
+__attribute__((noipa)) v8udi
+f16 (v8udi x, v8udi y)
+{
+ return x <= y;
+}
new file mode 100644
@@ -0,0 +1,90 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-options "-O2 -mavx512bw" } */
+
+#ifndef CHECK
+#define CHECK "avx512f-helper.h"
+#endif
+
+#include CHECK
+
+#ifndef TEST
+#define TEST avx512bw_test
+#endif
+
+#include "avx512bw-pr92686-vpcmp-1.c"
+
+#define NUM 512
+
+#define TEST_SIGNED(vtype, type, N, fn, op) \
+do \
+ { \
+ union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2; \
+ int i, sign = 1; \
+ type res; \
+ for (i = 0; i < NUM; i++) \
+ { \
+ src1.i[i] = i * i * sign; \
+ src2.i[i] = (i + 20) * sign; \
+ sign = -sign; \
+ } \
+ for (i = 0; i < NUM; i += N) \
+ dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]); \
+ \
+ for (i = 0; i < NUM; i++) \
+ { \
+ res = src1.i[i] op src2.i[i] ? -1 : 0; \
+ if (res != dst.i[i]) \
+ abort (); \
+ } \
+ } \
+while (0)
+
+#define TEST_UNSIGNED(vtype, type, N, fn, op) \
+do \
+ { \
+ union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2; \
+ int i; \
+ type res; \
+ \
+ for (i = 0; i < NUM; i++) \
+ { \
+ src1.i[i] = i * i; \
+ src2.i[i] = i + 20; \
+ if ((i % 4)) \
+ src2.i[i] |= (1ULL << (sizeof (type) \
+ * __CHAR_BIT__ - 1)); \
+ } \
+ \
+ for (i = 0; i < NUM; i += N) \
+ dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]); \
+ \
+ for (i = 0; i < NUM; i++) \
+ { \
+ res = src1.i[i] op src2.i[i] ? -1 : 0; \
+ if (res != dst.i[i]) \
+ abort (); \
+ } \
+ } \
+while (0)
+
+static void
+TEST (void)
+{
+ TEST_SIGNED (v64qi, signed char, 64, f1, >=);
+ TEST_UNSIGNED (v64uqi, unsigned char, 64, f2, >=);
+ TEST_SIGNED (v64qi, signed char, 64, f3, <=);
+ TEST_UNSIGNED (v64uqi, unsigned char, 64, f4, <=);
+ TEST_SIGNED (v32hi, short int, 32, f5, >=);
+ TEST_UNSIGNED (v32uhi, unsigned short int, 32, f6, >=);
+ TEST_SIGNED (v32hi, short int, 32, f7, <=);
+ TEST_UNSIGNED (v32uhi, unsigned short int, 32, f8, <=);
+ TEST_SIGNED (v16si, int, 16, f9, >=);
+ TEST_UNSIGNED (v16usi, unsigned int, 16, f10, >=);
+ TEST_SIGNED (v16si, int, 16, f11, <=);
+ TEST_UNSIGNED (v16usi, unsigned int, 16, f12, <=);
+ TEST_SIGNED (v8di, long long int, 8, f13, >=);
+ TEST_UNSIGNED (v8udi, unsigned long long int, 8, f14, >=);
+ TEST_SIGNED (v8di, long long int, 8, f15, <=);
+ TEST_UNSIGNED (v8udi, unsigned long long int, 8, f16, <=);
+}
@@ -6,9 +6,7 @@
/* { dg-final { scan-assembler-times "vpminsb\[\t ]" 2 } } */
/* { dg-final { scan-assembler-times "vpminuw\[\t ]" 2 } } */
/* { dg-final { scan-assembler-times "vpminsw\[\t ]" 2 } } */
-/* { dg-final { scan-assembler-times "vpminud\[\t ]" 2 } } */
-/* { dg-final { scan-assembler-times "vpminsd\[\t ]" 2 } } */
-/* { dg-final { scan-assembler-times "vpminuq\[\t ]" 2 } } */
-/* { dg-final { scan-assembler-times "vpminsq\[\t ]" 2 } } */
-
+/* { dg-final { scan-assembler-times "vpcmp\[dq\]\[\t ]" 4 } } */
+/* { dg-final { scan-assembler-times "vpcmpu\[dq\]\[\t ]" 4 } } */
+/* { dg-final { scan-assembler-times "vpternlog\[qd\]\[\t ]" 8 } } */
#include "avx2-pr88547-1.c"
new file mode 100644
@@ -0,0 +1,133 @@
+/* PR target/92686 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mavx512bw -mavx512vl -mno-xop -mprefer-vector-width=256" } */
+/* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vmovdq\[au\]8\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+/* { dg-final { scan-assembler-times "vmovdq\[au\]16\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+/* { dg-final { scan-assembler-times "vmovdq\[au\]32\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+/* { dg-final { scan-assembler-times "vmovdq\[au\]64\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+
+__attribute__((noipa)) void
+f1 (char *__restrict dst, char *__restrict src1, char *__restrict src2)
+{
+ for (int i = 0; i != 32; i++)
+ dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f2 (unsigned char *__restrict dst, unsigned char *__restrict src1,
+ unsigned char *__restrict src2)
+{
+ for (int i = 0; i != 32; i++)
+ dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f3 (char *__restrict dst, char *__restrict src1, char *__restrict src2)
+{
+ for (int i = 0; i != 32; i++)
+ dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f4 (unsigned char *__restrict dst, unsigned char *__restrict src1,
+ unsigned char *__restrict src2)
+{
+ for (int i = 0; i != 32; i++)
+ dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f5 (short *__restrict dst, short *__restrict src1, short *__restrict src2)
+{
+ for (int i = 0; i != 16; i++)
+ dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f6 (unsigned short *__restrict dst, unsigned short *__restrict src1,
+ unsigned short *__restrict src2)
+{
+ for (int i = 0; i != 16; i++)
+ dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f7 (short *__restrict dst, short *__restrict src1, short *__restrict src2)
+{
+ for (int i = 0; i != 16; i++)
+ dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f8 (unsigned short *__restrict dst, unsigned short *__restrict src1,
+ unsigned short *__restrict src2)
+{
+ for (int i = 0; i != 16; i++)
+ dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f9 (int *__restrict dst, int *__restrict src1, int *__restrict src2)
+{
+ for (int i = 0; i != 8; i++)
+ dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f10 (unsigned int *__restrict dst, unsigned int *__restrict src1,
+ unsigned int *__restrict src2)
+{
+ for (int i = 0; i != 8; i++)
+ dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f11 (int *__restrict dst, int *__restrict src1, int *__restrict src2)
+{
+ for (int i = 0; i != 8; i++)
+ dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f12 (unsigned int *__restrict dst, unsigned int *__restrict src1,
+ unsigned int *__restrict src2)
+{
+ for (int i = 0; i != 8; i++)
+ dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f13 (long long int *__restrict dst, long long int *__restrict src1,
+ long long int *__restrict src2)
+{
+ for (int i = 0; i != 4; i++)
+ dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f14 (unsigned long long int *__restrict dst,
+ unsigned long long int *__restrict src1,
+ unsigned long long int *__restrict src2)
+{
+ for (int i = 0; i != 4; i++)
+ dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f15 (long long int *__restrict dst, long long int *__restrict src1,
+ long long int *__restrict src2)
+{
+ for (int i = 0; i != 4; i++)
+ dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
+
+__attribute__((noipa)) void
+f16 (unsigned long long int *__restrict dst,
+ unsigned long long int *__restrict src1,
+ unsigned long long int *__restrict src2)
+{
+ for (int i = 0; i != 4; i++)
+ dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i];
+}
new file mode 100644
@@ -0,0 +1,102 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-options "-Ofast -mavx512bw -mavx512vl -mprefer-vector-width=256" } */
+
+#ifndef CHECK
+#define CHECK "avx512f-helper.h"
+#endif
+
+#include CHECK
+
+#ifndef TEST
+#define TEST avx512vl_test
+#endif
+
+#include "avx512vl-pr92686-movcc-1.c"
+#include "pr92686.inc"
+
+#define NUM 256
+
+
+#define TEST_SIGNED(vtype, type, N, fn, fn2, op) \
+do \
+ { \
+ type dst[NUM], src1[NUM], src2[NUM]; \
+ int i, j, sign = 1; \
+ type res[N]; \
+ for (i = 0; i < NUM; i++) \
+ { \
+ src1[i] = i * i * sign; \
+ src2[i] = (i + 20) * sign; \
+ dst[i] = i * i * i + 100; \
+ sign = -sign; \
+ } \
+ for (i = 0; i < NUM; i += N) \
+ { \
+ for (j = 0; j < N; j++) \
+ res[j] = dst[i + j]; \
+ fn (&dst[i], &src1[i], &src2[i]); \
+ for (j = 0; j < N; j++) \
+ { \
+ res[j] = fn2 (res[j], src1[i + j], \
+ src2[i+ j], op); \
+ if (res[j] != dst[i+ j]) \
+ abort(); \
+ } \
+ } \
+ } \
+while (0)
+
+#define TEST_UNSIGNED(vtype, type, N, fn, fn2, op) \
+do \
+ { \
+ type dst[NUM], src1[NUM], src2[NUM]; \
+ int i,j; \
+ type res[N]; \
+ \
+ for (i = 0; i < NUM; i++) \
+ { \
+ src1[i] = i * i; \
+ src2[i] = i + 20; \
+ dst[i] = i * i * i + 100; \
+ if ((i % 4)) \
+ src2[i] |= (1ULL << (sizeof (type) \
+ * __CHAR_BIT__ - 1)); \
+ } \
+ for (i = 0; i < NUM; i += N) \
+ { \
+ for (j = 0; j < N; j++) \
+ res[j] = dst[i + j]; \
+ fn (&dst[i], &src1[i], &src2[i]); \
+ for (j = 0; j < N; j++) \
+ { \
+ res[j] = fn2 (res[j], src1[i + j], \
+ src2[i + j], op); \
+ if (res[j] != dst[i + j]) \
+ abort(); \
+ } \
+ } \
+ } \
+while (0)
+
+static void
+TEST (void)
+{
+ TEST_SIGNED (v32qi, signed char, 32, f1, cmpb, 5);
+ TEST_UNSIGNED (v32uqi, unsigned char, 32, f2, cmpub, 5);
+ TEST_SIGNED (v32qi, signed char, 32, f3, cmpb, 2);
+ TEST_UNSIGNED (v32uqi, unsigned char, 32, f4, cmpub, 2);
+ TEST_SIGNED (v16hi, short int, 16, f5, cmpw, 5);
+ TEST_UNSIGNED (v16uhi, unsigned short int, 16, f6, cmpuw, 5);
+ TEST_SIGNED (v16hi, short int, 16, f7, cmpw, 2);
+ TEST_UNSIGNED (v16uhi, unsigned short int, 16, f8, cmpuw, 2);
+ TEST_SIGNED (v8si, int, 8, f9, cmpd, 5);
+ TEST_UNSIGNED (v8usi, unsigned int, 8, f10, cmpud, 5);
+ TEST_SIGNED (v8si, int, 8, f11, cmpd, 2);
+ TEST_UNSIGNED (v8usi, unsigned int, 8, f12, cmpud, 2);
+ TEST_SIGNED (v4di, long long int, 4, f13, cmpq, 5);
+ TEST_UNSIGNED (v4udi, unsigned long long int, 4, f14, cmpuq, 5);
+ TEST_SIGNED (v4di, long long int, 4, f15, cmpq, 2);
+ TEST_UNSIGNED (v4udi, unsigned long long int, 4, f16, cmpuq, 2);
+}
new file mode 100644
@@ -0,0 +1,112 @@
+/* PR target/88547 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -mno-avx512dq -mno-xop" } */
+/* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpmovm2\[bw\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[\t ]" 8 } } */
+
+typedef signed char v32qi __attribute__((vector_size(32)));
+typedef unsigned char v32uqi __attribute__((vector_size(32)));
+typedef short v16hi __attribute__((vector_size(32)));
+typedef unsigned short v16uhi __attribute__((vector_size(32)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef unsigned v8usi __attribute__((vector_size(32)));
+typedef long long v4di __attribute__((vector_size(32)));
+typedef unsigned long long v4udi __attribute__((vector_size(32)));
+
+__attribute__((noipa)) v32qi
+f1 (v32qi x, v32qi y)
+{
+ return x >= y;
+}
+
+__attribute__((noipa)) v32uqi
+f2 (v32uqi x, v32uqi y)
+{
+ return x >= y;
+}
+
+__attribute__((noipa)) v32qi
+f3 (v32qi x, v32qi y)
+{
+ return x <= y;
+}
+
+__attribute__((noipa)) v32uqi
+f4 (v32uqi x, v32uqi y)
+{
+ return x <= y;
+}
+
+__attribute__((noipa)) v16hi
+f5 (v16hi x, v16hi y)
+{
+ return x >= y;
+}
+
+__attribute__((noipa)) v16uhi
+f6 (v16uhi x, v16uhi y)
+{
+ return x >= y;
+}
+
+__attribute__((noipa)) v16hi
+f7 (v16hi x, v16hi y)
+{
+ return x <= y;
+}
+
+__attribute__((noipa)) v16uhi
+f8 (v16uhi x, v16uhi y)
+{
+ return x <= y;
+}
+
+__attribute__((noipa)) v8si
+f9 (v8si x, v8si y)
+{
+ return x >= y;
+}
+
+__attribute__((noipa)) v8usi
+f10 (v8usi x, v8usi y)
+{
+ return x >= y;
+}
+
+__attribute__((noipa)) v8si
+f11 (v8si x, v8si y)
+{
+ return x <= y;
+}
+
+__attribute__((noipa)) v8usi
+f12 (v8usi x, v8usi y)
+{
+ return x <= y;
+}
+
+__attribute__((noipa)) v4di
+f13 (v4di x, v4di y)
+{
+ return x >= y;
+}
+
+__attribute__((noipa)) v4udi
+f14 (v4udi x, v4udi y)
+{
+ return x >= y;
+}
+
+__attribute__((noipa)) v4di
+f15 (v4di x, v4di y)
+{
+ return x <= y;
+}
+
+__attribute__((noipa)) v4udi
+f16 (v4udi x, v4udi y)
+{
+ return x <= y;
+}
new file mode 100644
@@ -0,0 +1,91 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+
+#ifndef CHECK
+#define CHECK "avx512f-helper.h"
+#endif
+
+#include CHECK
+
+#ifndef TEST
+#define TEST avx512vl_test
+#endif
+
+#include "avx512vl-pr92686-vpcmp-1.c"
+
+#define NUM 256
+
+#define TEST_SIGNED(vtype, type, N, fn, op) \
+do \
+ { \
+ union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2; \
+ int i, sign = 1; \
+ type res; \
+ for (i = 0; i < NUM; i++) \
+ { \
+ src1.i[i] = i * i * sign; \
+ src2.i[i] = (i + 20) * sign; \
+ sign = -sign; \
+ } \
+ for (i = 0; i < NUM; i += N) \
+ dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]); \
+ \
+ for (i = 0; i < NUM; i++) \
+ { \
+ res = src1.i[i] op src2.i[i] ? -1 : 0; \
+ if (res != dst.i[i]) \
+ abort (); \
+ } \
+ } \
+while (0)
+
+#define TEST_UNSIGNED(vtype, type, N, fn, op) \
+do \
+ { \
+ union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2; \
+ int i; \
+ type res; \
+ \
+ for (i = 0; i < NUM; i++) \
+ { \
+ src1.i[i] = i * i; \
+ src2.i[i] = i + 20; \
+ if ((i % 4)) \
+ src2.i[i] |= (1ULL << (sizeof (type) \
+ * __CHAR_BIT__ - 1)); \
+ } \
+ \
+ for (i = 0; i < NUM; i += N) \
+ dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]); \
+ \
+ for (i = 0; i < NUM; i++) \
+ { \
+ res = src1.i[i] op src2.i[i] ? -1 : 0; \
+ if (res != dst.i[i]) \
+ abort (); \
+ } \
+ } \
+while (0)
+
+static void
+TEST (void)
+{
+ TEST_SIGNED (v32qi, signed char, 32, f1, >=);
+ TEST_UNSIGNED (v32uqi, unsigned char, 32, f2, >=);
+ TEST_SIGNED (v32qi, signed char, 32, f3, <=);
+ TEST_UNSIGNED (v32uqi, unsigned char, 32, f4, <=);
+ TEST_SIGNED (v16hi, short int, 16, f5, >=);
+ TEST_UNSIGNED (v16uhi, unsigned short int, 16, f6, >=);
+ TEST_SIGNED (v16hi, short int, 16, f7, <=);
+ TEST_UNSIGNED (v16uhi, unsigned short int, 16, f8, <=);
+ TEST_SIGNED (v8si, int, 8, f9, >=);
+ TEST_UNSIGNED (v8usi, unsigned int, 8, f10, >=);
+ TEST_SIGNED (v8si, int, 8, f11, <=);
+ TEST_UNSIGNED (v8usi, unsigned int, 8, f12, <=);
+ TEST_SIGNED (v4di, long long int, 4, f13, >=);
+ TEST_UNSIGNED (v4udi, unsigned long long int, 4, f14, >=);
+ TEST_SIGNED (v4di, long long int, 4, f15, <=);
+ TEST_UNSIGNED (v4udi, unsigned long long int, 4, f16, <=);
+}
@@ -1,19 +1,9 @@
/* PR target/88547 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512vl -mavx512bw -mavx512dq" } */
-/* { dg-final { scan-assembler-not "vpternlog" } } */
-/* { dg-final { scan-assembler-times "vpmovm2b\[\t ]" 4 } } */
-/* { dg-final { scan-assembler-times "vpmovm2w\[\t ]" 4 } } */
-/* { dg-final { scan-assembler-times "vpmovm2d\[\t ]" 4 } } */
-/* { dg-final { scan-assembler-times "vpmovm2q\[\t ]" 4 } } */
-/* { dg-final { scan-assembler-times "knotb\[\t ]" 2 } } */
-/* { dg-final { scan-assembler-times "knotw\[\t ]" 2 } } */
-/* { dg-final { scan-assembler-times "knotd\[\t ]" 2 } } */
-/* { dg-final { scan-assembler-times "knotq\[\t ]" 2 } } */
-/* { dg-final { scan-assembler-times "vpminud\[\t ]" 2 } } */
-/* { dg-final { scan-assembler-times "vpminuq\[\t ]" 2 } } */
-/* { dg-final { scan-assembler-not "vpsubd\[\t ]" } } */
-/* { dg-final { scan-assembler-not "vpsubq\[\t ]" } } */
+/* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpmovm2\[bwdq\]\[\t ]" 16 } } */
typedef signed char v64qi __attribute__((vector_size(64)));
typedef unsigned char v64uqi __attribute__((vector_size(64)));
new file mode 100644
@@ -0,0 +1,189 @@
+/* Include by avx512bw-pr92686-movcc-2.c, avx512vl-pr92686-movcc-2.c */
+__attribute__((noipa)) char
+cmpb (char dst, char src1, char src2, int num)
+{
+ switch(num)
+ {
+ case 0:
+ return src1 == src2 ? src1 : dst;
+ case 1:
+ return src1 < src2 ? src1 : dst;
+ case 2:
+ return src1 <= src2 ? src1 : dst;
+ case 4:
+ return src1 != src2 ? src1 : dst;
+ case 5:
+ return src1 >= src2 ? src1 : dst;
+ case 6:
+ return src1 > src2 ? src1 : dst;
+ default:
+ abort();
+ }
+ abort();
+}
+
+__attribute__((noipa)) unsigned char
+cmpub (unsigned char dst, unsigned char src1,
+ unsigned char src2, int num)
+{
+ switch(num)
+ {
+ case 0:
+ return src1 == src2 ? src1 : dst;
+ case 1:
+ return src1 < src2 ? src1 : dst;
+ case 2:
+ return src1 <= src2 ? src1 : dst;
+ case 4:
+ return src1 != src2 ? src1 : dst;
+ case 5:
+ return src1 >= src2 ? src1 : dst;
+ case 6:
+ return src1 > src2 ? src1 : dst;
+ default:
+ abort();
+ }
+ abort();
+}
+
+__attribute__((noipa)) short
+cmpw (short dst, short src1, short src2, int num)
+{
+ switch(num)
+ {
+ case 0:
+ return src1 == src2 ? src1 : dst;
+ case 1:
+ return src1 < src2 ? src1 : dst;
+ case 2:
+ return src1 <= src2 ? src1 : dst;
+ case 4:
+ return src1 != src2 ? src1 : dst;
+ case 5:
+ return src1 >= src2 ? src1 : dst;
+ case 6:
+ return src1 > src2 ? src1 : dst;
+ default:
+ abort();
+ }
+ abort();
+}
+
+__attribute__((noipa)) unsigned short
+cmpuw (unsigned short dst, unsigned short src1,
+ unsigned short src2, int num)
+{
+ switch(num)
+ {
+ case 0:
+ return src1 == src2 ? src1 : dst;
+ case 1:
+ return src1 < src2 ? src1 : dst;
+ case 2:
+ return src1 <= src2 ? src1 : dst;
+ case 4:
+ return src1 != src2 ? src1 : dst;
+ case 5:
+ return src1 >= src2 ? src1 : dst;
+ case 6:
+ return src1 > src2 ? src1 : dst;
+ default:
+ abort();
+ }
+ abort();
+}
+
+__attribute__((noipa)) int
+cmpd (int dst, int src1, int src2, int num)
+{
+ switch(num)
+ {
+ case 0:
+ return src1 == src2 ? src1 : dst;
+ case 1:
+ return src1 < src2 ? src1 : dst;
+ case 2:
+ return src1 <= src2 ? src1 : dst;
+ case 4:
+ return src1 != src2 ? src1 : dst;
+ case 5:
+ return src1 >= src2 ? src1 : dst;
+ case 6:
+ return src1 > src2 ? src1 : dst;
+ default:
+ abort();
+ }
+ abort();
+}
+
+__attribute__((noipa)) unsigned int
+cmpud (unsigned int dst, unsigned int src1,
+ unsigned int src2, int num)
+{
+ switch(num)
+ {
+ case 0:
+ return src1 == src2 ? src1 : dst;
+ case 1:
+ return src1 < src2 ? src1 : dst;
+ case 2:
+ return src1 <= src2 ? src1 : dst;
+ case 4:
+ return src1 != src2 ? src1 : dst;
+ case 5:
+ return src1 >= src2 ? src1 : dst;
+ case 6:
+ return src1 > src2 ? src1 : dst;
+ default:
+ abort();
+ }
+ abort();
+}
+
+__attribute__((noipa)) long long int
+cmpq (long long int dst, long long int src1,
+ long long int src2, int num)
+{
+ switch(num)
+ {
+ case 0:
+ return src1 == src2 ? src1 : dst;
+ case 1:
+ return src1 < src2 ? src1 : dst;
+ case 2:
+ return src1 <= src2 ? src1 : dst;
+ case 4:
+ return src1 != src2 ? src1 : dst;
+ case 5:
+ return src1 >= src2 ? src1 : dst;
+ case 6:
+ return src1 > src2 ? src1 : dst;
+ default:
+ abort();
+ }
+ abort();
+}
+
+__attribute__((noipa)) unsigned long long int
+cmpuq (unsigned long long int dst, unsigned long long int src1,
+ unsigned long long int src2, int num)
+{
+ switch(num)
+ {
+ case 0:
+ return src1 == src2 ? src1 : dst;
+ case 1:
+ return src1 < src2 ? src1 : dst;
+ case 2:
+ return src1 <= src2 ? src1 : dst;
+ case 4:
+ return src1 != src2 ? src1 : dst;
+ case 5:
+ return src1 >= src2 ? src1 : dst;
+ case 6:
+ return src1 > src2 ? src1 : dst;
+ default:
+ abort();
+ }
+ abort();
+}
\ No newline at end of file
--
2.19.1