Fix __mmask* types on many AVX512 intrinsics

Message ID 20180707081550.GQ7166@tucnak
State New
Headers show
Series
  • Fix __mmask* types on many AVX512 intrinsics
Related show

Commit Message

Jakub Jelinek July 7, 2018, 8:15 a.m.
Hi!

On Fri, Jul 06, 2018 at 12:47:07PM +0200, Jakub Jelinek wrote:
> On Thu, Jul 05, 2018 at 11:57:26PM +0300, Grazvydas Ignotas wrote:
> > I think it would be more efficient if you took care of it. I won't
> > have time for at least a few days anyway.

Here is the complete patch, I found two further issues where
the __mmask mismatch was in between the return type and what was used
in the rest of the intrinsic, so not caught by my earlier greps.

I've added (except for the avx512bitalg which seems to have no runtime
test coverage whatsoever) tests that cover the real bugs and further
fixed the avx512*-vpcmp{,u}b-2.c test because (rel) << i triggered UB
if i could go up to 63.

I don't have AVX512* hw, so I've just bootstrapped/regtested the patch
normally on i686-linux and x86_64-linux AVX2 hw and tried the affected
tests without the config/i386/ changes and with them under SDE.
The patch should fix these FAILs:

FAIL: gcc.target/i386/avx512bw-vpcmpb-2.c execution test
FAIL: gcc.target/i386/avx512bw-vpcmpub-2.c execution test
FAIL: gcc.target/i386/avx512f-vinsertf32x4-3.c execution test
FAIL: gcc.target/i386/avx512f-vinserti32x4-3.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpb-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpgeb-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpgeub-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpgeuw-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpgew-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpleb-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpleub-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpleuw-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmplew-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpltb-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpltub-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpltuw-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpltw-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpneqb-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpnequb-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpnequw-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpneqw-2.c execution test
FAIL: gcc.target/i386/avx512vl-vpcmpub-2.c execution test

Ok for trunk?

I guess we want to backport it soon, but would appreciate somebody testing
it on real AVX512-{BW,VL} hw before doing the backports.

Another thing to consider is whether we shouldn't add those grep/sed checks
I've been doing (at least the easy ones that don't cross-check the
i386-builtins.def against the uses in the intrin headers) to config/i386/t-*
some way.

2018-07-07  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/avx512bitalgintrin.h (_mm512_mask_bitshuffle_epi64_mask):
	Use __mmask64 type instead of __mmask8 for __M argument.
	* config/i386/avx512fintrin.h (_mm512_mask_xor_epi64,
	_mm512_maskz_xor_epi64): Use __mmask8 type instead of __mmask16 for
	__U argument.
	(_mm512_mask_cmpneq_epi64_mask): Use __mmask8 type instead of
	__mmask16 for __M argument.
	(_mm512_maskz_insertf32x4, _mm512_maskz_inserti32x4,
	_mm512_mask_insertf32x4, _mm512_mask_inserti32x4): Cast last argument
	to __mmask16 instead of __mmask8.
	* config/i386/avx512vlintrin.h (_mm_mask_add_ps, _mm_maskz_add_ps,
	_mm256_mask_add_ps, _mm256_maskz_add_ps, _mm_mask_sub_ps,
	_mm_maskz_sub_ps, _mm256_mask_sub_ps, _mm256_maskz_sub_ps,
	_mm256_maskz_cvtepi32_ps, _mm_maskz_cvtepi32_ps): Use __mmask8 type
	instead of __mmask16 for __U argument.
	* config/i386/avx512vlbwintrin.h (_mm_mask_cmp_epi8_mask): Use
	__mmask16 instead of __mmask8 for __U argument.
	(_mm256_mask_cmp_epi8_mask): Use __mmask32 instead of __mmask16 for
	__U argument.
	(_mm256_cmp_epi8_mask): Use __mmask32 return type instead of
	__mmask16.
	(_mm_mask_cmp_epu8_mask): Use __mmask16 instead of __mmask8 for __U
	argument.
	(_mm256_mask_cmp_epu8_mask): Use __mmask32 instead of __mmask16 for
	__U argument.
	(_mm256_cmp_epu8_mask): Use __mmask32 return type instead of
	__mmask16.
	(_mm_mask_cmp_epi16_mask): Cast last argument to __mmask8 instead
	of __mmask16.
	(_mm256_mask_cvtepi8_epi16): Use __mmask16 instead of __mmask32 for
	__U argument.
	(_mm_mask_cvtepi8_epi16): Use __mmask8 instead of __mmask32 for
	__U argument.
	(_mm256_mask_cvtepu8_epi16): Use __mmask16 instead of __mmask32 for
	__U argument.
	(_mm_mask_cvtepu8_epi16): Use __mmask8 instead of __mmask32 for
	__U argument.
	(_mm256_mask_cmpneq_epu8_mask, _mm256_mask_cmplt_epu8_mask,
	_mm256_mask_cmpge_epu8_mask, _mm256_mask_cmple_epu8_mask): Change
	return type as well as __M argument type and all casts from __mmask8
	to __mmask32.
	(_mm256_mask_cmpneq_epu16_mask, _mm256_mask_cmplt_epu16_mask,
	_mm256_mask_cmpge_epu16_mask, _mm256_mask_cmple_epu16_mask): Change
	return type as well as __M argument type and all casts from __mmask8
	to __mmask16.
	(_mm256_mask_cmpneq_epi8_mask, _mm256_mask_cmplt_epi8_mask,
	_mm256_mask_cmpge_epi8_mask, _mm256_mask_cmple_epi8_mask): Change
	return type as well as __M argument type and all casts from __mmask8
	to __mmask32.
	(_mm256_mask_cmpneq_epi16_mask, _mm256_mask_cmplt_epi16_mask,
	_mm256_mask_cmpge_epi16_mask, _mm256_mask_cmple_epi16_mask): Change
	return type as well as __M argument type and all casts from __mmask8
	to __mmask16.
	* config/i386/avx512vbmi2vlintrin.h (_mm_mask_shrdi_epi32,
	_mm_mask_shldi_epi32): Cast last argument to __mmask8 instead of
	__mmask16.

	* gcc.target/i386/avx512bw-vpcmpb-2.c (CMP): Use SIZE macro instead
	of hardcoding size.  Cast (rel) to MASK_TYPE.
	* gcc.target/i386/avx512bw-vpcmpub-2.c (CMP): Likewise.
	* gcc.target/i386/avx512f-vinserti32x4-3.c: New test.
	* gcc.target/i386/avx512f-vinsertf32x4-3.c: New test.
	* gcc.target/i386/avx512vl-vpcmpnequb-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpgeub-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpleb-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpgeb-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpltb-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpltub-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpleub-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpneqb-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpnequw-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpgeuw-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmplew-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpgew-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpltw-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpltuw-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpleuw-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpneqw-2.c: New test.

2018-07-07  Grazvydas Ignotas  <notasas@gmail.com>

	* config/i386/avx512bwintrin.h: (_mm512_mask_cmp_epi8_mask,
	_mm512_mask_cmp_epu8_mask): Use __mmask64 type instead of __mmask32
	for __U argument.

	* gcc.target/i386/avx512bw-vpcmpb-2.c (SIZE): Define to
	(AVX512F_LEN / 8) instead of (AVX512F_LEN / 16).
	* gcc.target/i386/avx512bw-vpcmpub-2.c (SIZE): Likewise.



	Jakub

Comments

Grazvydas Ignotas July 7, 2018, 11:39 p.m. | #1
On Sat, Jul 7, 2018 at 11:15 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> On Fri, Jul 06, 2018 at 12:47:07PM +0200, Jakub Jelinek wrote:
>> On Thu, Jul 05, 2018 at 11:57:26PM +0300, Grazvydas Ignotas wrote:
>> > I think it would be more efficient if you took care of it. I won't
>> > have time for at least a few days anyway.
>
> Here is the complete patch, I found two further issues where
> the __mmask mismatch was in between the return type and what was used
> in the rest of the intrinsic, so not caught by my earlier greps.
>
> I've added (except for the avx512bitalg which seems to have no runtime
> test coverage whatsoever) tests that cover the real bugs and further
> fixed the avx512*-vpcmp{,u}b-2.c test because (rel) << i triggered UB
> if i could go up to 63.
>
> I don't have AVX512* hw, so I've just bootstrapped/regtested the patch
> normally on i686-linux and x86_64-linux AVX2 hw and tried the affected
> tests without the config/i386/ changes and with them under SDE.
> The patch should fix these FAILs:
>
> FAIL: gcc.target/i386/avx512bw-vpcmpb-2.c execution test
> FAIL: gcc.target/i386/avx512bw-vpcmpub-2.c execution test
> FAIL: gcc.target/i386/avx512f-vinsertf32x4-3.c execution test
> FAIL: gcc.target/i386/avx512f-vinserti32x4-3.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpb-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpgeb-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpgeub-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpgeuw-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpgew-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpleb-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpleub-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpleuw-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmplew-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpltb-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpltub-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpltuw-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpltw-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpneqb-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpnequb-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpnequw-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpneqw-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpub-2.c execution test
>
> Ok for trunk?
>
> I guess we want to backport it soon, but would appreciate somebody testing
> it on real AVX512-{BW,VL} hw before doing the backports.

I've run the testsuite with this patch applied and all tests passed on
i7-7800X. There are avx512vl-vmovdqa64-1.c and avx512vl-vpermilpdi-1.c
failures, but those seem unrelated.

thanks,
Gražvydas
Jakub Jelinek July 9, 2018, 7:37 a.m. | #2
On Sun, Jul 08, 2018 at 02:39:40AM +0300, Grazvydas Ignotas wrote:
> > I guess we want to backport it soon, but would appreciate somebody testing
> > it on real AVX512-{BW,VL} hw before doing the backports.
> 
> I've run the testsuite with this patch applied and all tests passed on
> i7-7800X.

Thanks for the testing.

> There are avx512vl-vmovdqa64-1.c and avx512vl-vpermilpdi-1.c
> failures, but those seem unrelated.

These are dg-do compile tests, and they PASS for me, even when doing
make check-gcc RUNTESTFLAGS="--target_board=unix/-march=skylake-avx512 i386.exp='avx512vl-vmovdqa64-1.c avx512vl-vpermilpdi-1.c'"
So, how exactly you've configured your gcc, what kind of options are
passed to the test and how they FAIL?

	Jakub
Grazvydas Ignotas July 9, 2018, 9 a.m. | #3
On Mon, Jul 9, 2018 at 10:37 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Sun, Jul 08, 2018 at 02:39:40AM +0300, Grazvydas Ignotas wrote:
>> > I guess we want to backport it soon, but would appreciate somebody testing
>> > it on real AVX512-{BW,VL} hw before doing the backports.
>>
>> I've run the testsuite with this patch applied and all tests passed on
>> i7-7800X.
>
> Thanks for the testing.
>
>> There are avx512vl-vmovdqa64-1.c and avx512vl-vpermilpdi-1.c
>> failures, but those seem unrelated.
>
> These are dg-do compile tests, and they PASS for me, even when doing
> make check-gcc RUNTESTFLAGS="--target_board=unix/-march=skylake-avx512 i386.exp='avx512vl-vmovdqa64-1.c avx512vl-vpermilpdi-1.c'"
> So, how exactly you've configured your gcc, what kind of options are
> passed to the test and how they FAIL?

I should've mentioned I've tested this patch on top of 8.1 release
tarball and used crosstool-NG to build the toolchain with it's "GCC
test suite" option enabled. It looks like crosstool is applying some
patches, so the results might not be valid. Here is the log (seems to
contain the configuration info), where I just grepped for FAIL and the
new test names to see if they were actually run:

http://notaz.gp2x.de/misc/unsorted/gcc.log.xz

Gražvydas
Jakub Jelinek July 9, 2018, 11:21 a.m. | #4
On Mon, Jul 09, 2018 at 12:00:46PM +0300, Grazvydas Ignotas wrote:
> On Mon, Jul 9, 2018 at 10:37 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> > On Sun, Jul 08, 2018 at 02:39:40AM +0300, Grazvydas Ignotas wrote:
> >> > I guess we want to backport it soon, but would appreciate somebody testing
> >> > it on real AVX512-{BW,VL} hw before doing the backports.
> >>
> >> I've run the testsuite with this patch applied and all tests passed on
> >> i7-7800X.
> >
> > Thanks for the testing.
> >
> >> There are avx512vl-vmovdqa64-1.c and avx512vl-vpermilpdi-1.c
> >> failures, but those seem unrelated.
> >
> > These are dg-do compile tests, and they PASS for me, even when doing
> > make check-gcc RUNTESTFLAGS="--target_board=unix/-march=skylake-avx512 i386.exp='avx512vl-vmovdqa64-1.c avx512vl-vpermilpdi-1.c'"
> > So, how exactly you've configured your gcc, what kind of options are
> > passed to the test and how they FAIL?
> 
> I should've mentioned I've tested this patch on top of 8.1 release
> tarball and used crosstool-NG to build the toolchain with it's "GCC
> test suite" option enabled. It looks like crosstool is applying some
> patches, so the results might not be valid. Here is the log (seems to
> contain the configuration info), where I just grepped for FAIL and the
> new test names to see if they were actually run:
> 
> http://notaz.gp2x.de/misc/unsorted/gcc.log.xz

Don't see any FAILs even in your log file on the above tests:
spawn -ignore SIGHUP gcc /home/notaz/x-tools/x86_64-unknown-linux-gnu/test-suite/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c -fno-diagnostics-show-caret -fdiagnostics-color=never -mavx512vl -O2 -ffat-lto-objects -S -o avx512vl-vmovdqa64-1.s
PASS: gcc.target/i386/avx512vl-vmovdqa64-1.c (test for excess errors)
PASS: gcc.target/i386/avx512vl-vmovdqa64-1.c scan-assembler-times vmovdqa64[ \\t]+[^{\n]*%ymm[0-9]+[^\n]*%ymm[0-9]+{%k[1-7]}(?:\n|[ \\t]+#) 1
PASS: gcc.target/i386/avx512vl-vmovdqa64-1.c scan-assembler-times vmovdqa64[ \\t]+[^{\n]*%xmm[0-9]+[^\n]*%xmm[0-9]+{%k[1-7]}(?:\n|[ \\t]+#) 1
PASS: gcc.target/i386/avx512vl-vmovdqa64-1.c scan-assembler-times vmovdqa64[ \\t]+[^{\n]*%ymm[0-9]+[^\n]*%ymm[0-9]+{%k[1-7]}{z}(?:\n|[ \\t]+#) 1
PASS: gcc.target/i386/avx512vl-vmovdqa64-1.c scan-assembler-times vmovdqa64[ \\t]+[^{\n]*%xmm[0-9]+[^\n]*%xmm[0-9]+{%k[1-7]}{z}(?:\n|[ \\t]+#) 1
PASS: gcc.target/i386/avx512vl-vmovdqa64-1.c scan-assembler-times vmovdqa64[ \\t]+\\([^\n]*%ymm[0-9]+(?:\n|[ \\t]+#) 1
PASS: gcc.target/i386/avx512vl-vmovdqa64-1.c scan-assembler-times vmovdqa64[ \\t]+\\([^\n]*%xmm[0-9]+(?:\n|[ \\t]+#) 1
PASS: gcc.target/i386/avx512vl-vmovdqa64-1.c scan-assembler-times vmovdqa64[ \\t]+[^{\n]*\\)[^\n]*%ymm[0-9]+{%k[1-7]}(?:\n|[ \\t]+#) 1
PASS: gcc.target/i386/avx512vl-vmovdqa64-1.c scan-assembler-times vmovdqa64[ \\t]+[^{\n]*\\)[^\n]*%xmm[0-9]+{%k[1-7]}(?:\n|[ \\t]+#) 1
PASS: gcc.target/i386/avx512vl-vmovdqa64-1.c scan-assembler-times vmovdqa64[ \\t]+[^{\n]*\\)[^\n]*%ymm[0-9]+{%k[1-7]}{z}(?:\n|[ \\t]+#) 1
PASS: gcc.target/i386/avx512vl-vmovdqa64-1.c scan-assembler-times vmovdqa64[ \\t]+[^{\n]*\\)[^\n]*%xmm[0-9]+{%k[1-7]}{z}(?:\n|[ \\t]+#) 1
PASS: gcc.target/i386/avx512vl-vmovdqa64-1.c scan-assembler-times vmovdqa64[ \\t]+[^{\n]*%ymm[0-9]+[^\nxy]*\\(.{5,6}(?:\n|[ \\t]+#) 1
gcc.target/i386/avx512vl-vmovdqa64-1.c: vmovdqa64[ \\t]+[^{\n]*%xmm[0-9]+[^\nxy]*\\((?:\n|[ \\t]+#) found 0 times
XFAIL: gcc.target/i386/avx512vl-vmovdqa64-1.c scan-assembler-times vmovdqa64[ \\t]+[^{\n]*%xmm[0-9]+[^\nxy]*\\((?:\n|[ \\t]+#) 1
PASS: gcc.target/i386/avx512vl-vmovdqa64-1.c scan-assembler-times vmovdqa64[ \\t]+[^{\n]*%ymm[0-9]+[^\n]*\\){%k[1-7]}(?:\n|[ \\t]+#) 1
PASS: gcc.target/i386/avx512vl-vmovdqa64-1.c scan-assembler-times vmovdqa64[ \\t]+[^{\n]*%xmm[0-9]+[^\n]*\\){%k[1-7]}(?:\n|[ \\t]+#) 1

spawn -ignore SIGHUP gcc /home/notaz/x-tools/x86_64-unknown-linux-gnu/test-suite/gcc/testsuite/gcc.target/i386/avx512vl-vpermilpdi-1.c -fno-diagnostics-show-caret -fdiagnostics-color=never -mavx512vl -O2 -ffat-lto-objects -S -o avx512vl-vpermilpdi-1.s
PASS: gcc.target/i386/avx512vl-vpermilpdi-1.c (test for excess errors)
gcc.target/i386/avx512vl-vpermilpdi-1.c: vpermilpd[ \\t]+[^{\n]*13[^\n]*%ymm[0-9]+{%k[1-7]}(?:\n|[ \\t]+#) found 0 times
XFAIL: gcc.target/i386/avx512vl-vpermilpdi-1.c scan-assembler-times vpermilpd[ \\t]+[^{\n]*13[^\n]*%ymm[0-9]+{%k[1-7]}(?:\n|[ \\t]+#) 1
gcc.target/i386/avx512vl-vpermilpdi-1.c: vpermilpd[ \\t]+[^{\n]*13[^\n]*%ymm[0-9]+{%k[1-7]}{z}(?:\n|[ \\t]+#) found 0 times
XFAIL: gcc.target/i386/avx512vl-vpermilpdi-1.c scan-assembler-times vpermilpd[ \\t]+[^{\n]*13[^\n]*%ymm[0-9]+{%k[1-7]}{z}(?:\n|[ \\t]+#) 1
PASS: gcc.target/i386/avx512vl-vpermilpdi-1.c scan-assembler-times vpermilpd[ \\t]+[^{\n]*3[^\n]*%xmm[0-9]+{%k[1-7]}(?:\n|[ \\t]+#) 1
PASS: gcc.target/i386/avx512vl-vpermilpdi-1.c scan-assembler-times vpermilpd[ \\t]+[^{\n]*3[^\n]*%xmm[0-9]+{%k[1-7]}{z}(?:\n|[ \\t]+#) 1

XFAIL is expected fail, not unexpected...

	Jakub
Jeff Law July 11, 2018, 7:59 p.m. | #5
On 07/07/2018 02:15 AM, Jakub Jelinek wrote:
> Hi!
> 
> On Fri, Jul 06, 2018 at 12:47:07PM +0200, Jakub Jelinek wrote:
>> On Thu, Jul 05, 2018 at 11:57:26PM +0300, Grazvydas Ignotas wrote:
>>> I think it would be more efficient if you took care of it. I won't
>>> have time for at least a few days anyway.
> 
> Here is the complete patch, I found two further issues where
> the __mmask mismatch was in between the return type and what was used
> in the rest of the intrinsic, so not caught by my earlier greps.
> 
> I've added (except for the avx512bitalg which seems to have no runtime
> test coverage whatsoever) tests that cover the real bugs and further
> fixed the avx512*-vpcmp{,u}b-2.c test because (rel) << i triggered UB
> if i could go up to 63.
> 
> I don't have AVX512* hw, so I've just bootstrapped/regtested the patch
> normally on i686-linux and x86_64-linux AVX2 hw and tried the affected
> tests without the config/i386/ changes and with them under SDE.
> The patch should fix these FAILs:
> 
> FAIL: gcc.target/i386/avx512bw-vpcmpb-2.c execution test
> FAIL: gcc.target/i386/avx512bw-vpcmpub-2.c execution test
> FAIL: gcc.target/i386/avx512f-vinsertf32x4-3.c execution test
> FAIL: gcc.target/i386/avx512f-vinserti32x4-3.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpb-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpgeb-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpgeub-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpgeuw-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpgew-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpleb-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpleub-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpleuw-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmplew-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpltb-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpltub-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpltuw-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpltw-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpneqb-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpnequb-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpnequw-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpneqw-2.c execution test
> FAIL: gcc.target/i386/avx512vl-vpcmpub-2.c execution test
> 
> Ok for trunk?
> 
> I guess we want to backport it soon, but would appreciate somebody testing
> it on real AVX512-{BW,VL} hw before doing the backports.
> 
> Another thing to consider is whether we shouldn't add those grep/sed checks
> I've been doing (at least the easy ones that don't cross-check the
> i386-builtins.def against the uses in the intrin headers) to config/i386/t-*
> some way.
> 
> 2018-07-07  Jakub Jelinek  <jakub@redhat.com>
> 
> 	* config/i386/avx512bitalgintrin.h (_mm512_mask_bitshuffle_epi64_mask):
> 	Use __mmask64 type instead of __mmask8 for __M argument.
> 	* config/i386/avx512fintrin.h (_mm512_mask_xor_epi64,
> 	_mm512_maskz_xor_epi64): Use __mmask8 type instead of __mmask16 for
> 	__U argument.
> 	(_mm512_mask_cmpneq_epi64_mask): Use __mmask8 type instead of
> 	__mmask16 for __M argument.
> 	(_mm512_maskz_insertf32x4, _mm512_maskz_inserti32x4,
> 	_mm512_mask_insertf32x4, _mm512_mask_inserti32x4): Cast last argument
> 	to __mmask16 instead of __mmask8.
> 	* config/i386/avx512vlintrin.h (_mm_mask_add_ps, _mm_maskz_add_ps,
> 	_mm256_mask_add_ps, _mm256_maskz_add_ps, _mm_mask_sub_ps,
> 	_mm_maskz_sub_ps, _mm256_mask_sub_ps, _mm256_maskz_sub_ps,
> 	_mm256_maskz_cvtepi32_ps, _mm_maskz_cvtepi32_ps): Use __mmask8 type
> 	instead of __mmask16 for __U argument.
> 	* config/i386/avx512vlbwintrin.h (_mm_mask_cmp_epi8_mask): Use
> 	__mmask16 instead of __mmask8 for __U argument.
> 	(_mm256_mask_cmp_epi8_mask): Use __mmask32 instead of __mmask16 for
> 	__U argument.
> 	(_mm256_cmp_epi8_mask): Use __mmask32 return type instead of
> 	__mmask16.
> 	(_mm_mask_cmp_epu8_mask): Use __mmask16 instead of __mmask8 for __U
> 	argument.
> 	(_mm256_mask_cmp_epu8_mask): Use __mmask32 instead of __mmask16 for
> 	__U argument.
> 	(_mm256_cmp_epu8_mask): Use __mmask32 return type instead of
> 	__mmask16.
> 	(_mm_mask_cmp_epi16_mask): Cast last argument to __mmask8 instead
> 	of __mmask16.
> 	(_mm256_mask_cvtepi8_epi16): Use __mmask16 instead of __mmask32 for
> 	__U argument.
> 	(_mm_mask_cvtepi8_epi16): Use __mmask8 instead of __mmask32 for
> 	__U argument.
> 	(_mm256_mask_cvtepu8_epi16): Use __mmask16 instead of __mmask32 for
> 	__U argument.
> 	(_mm_mask_cvtepu8_epi16): Use __mmask8 instead of __mmask32 for
> 	__U argument.
> 	(_mm256_mask_cmpneq_epu8_mask, _mm256_mask_cmplt_epu8_mask,
> 	_mm256_mask_cmpge_epu8_mask, _mm256_mask_cmple_epu8_mask): Change
> 	return type as well as __M argument type and all casts from __mmask8
> 	to __mmask32.
> 	(_mm256_mask_cmpneq_epu16_mask, _mm256_mask_cmplt_epu16_mask,
> 	_mm256_mask_cmpge_epu16_mask, _mm256_mask_cmple_epu16_mask): Change
> 	return type as well as __M argument type and all casts from __mmask8
> 	to __mmask16.
> 	(_mm256_mask_cmpneq_epi8_mask, _mm256_mask_cmplt_epi8_mask,
> 	_mm256_mask_cmpge_epi8_mask, _mm256_mask_cmple_epi8_mask): Change
> 	return type as well as __M argument type and all casts from __mmask8
> 	to __mmask32.
> 	(_mm256_mask_cmpneq_epi16_mask, _mm256_mask_cmplt_epi16_mask,
> 	_mm256_mask_cmpge_epi16_mask, _mm256_mask_cmple_epi16_mask): Change
> 	return type as well as __M argument type and all casts from __mmask8
> 	to __mmask16.
> 	* config/i386/avx512vbmi2vlintrin.h (_mm_mask_shrdi_epi32,
> 	_mm_mask_shldi_epi32): Cast last argument to __mmask8 instead of
> 	__mmask16.
> 
> 	* gcc.target/i386/avx512bw-vpcmpb-2.c (CMP): Use SIZE macro instead
> 	of hardcoding size.  Cast (rel) to MASK_TYPE.
> 	* gcc.target/i386/avx512bw-vpcmpub-2.c (CMP): Likewise.
> 	* gcc.target/i386/avx512f-vinserti32x4-3.c: New test.
> 	* gcc.target/i386/avx512f-vinsertf32x4-3.c: New test.
> 	* gcc.target/i386/avx512vl-vpcmpnequb-2.c: New test.
> 	* gcc.target/i386/avx512vl-vpcmpgeub-2.c: New test.
> 	* gcc.target/i386/avx512vl-vpcmpleb-2.c: New test.
> 	* gcc.target/i386/avx512vl-vpcmpgeb-2.c: New test.
> 	* gcc.target/i386/avx512vl-vpcmpltb-2.c: New test.
> 	* gcc.target/i386/avx512vl-vpcmpltub-2.c: New test.
> 	* gcc.target/i386/avx512vl-vpcmpleub-2.c: New test.
> 	* gcc.target/i386/avx512vl-vpcmpneqb-2.c: New test.
> 	* gcc.target/i386/avx512vl-vpcmpnequw-2.c: New test.
> 	* gcc.target/i386/avx512vl-vpcmpgeuw-2.c: New test.
> 	* gcc.target/i386/avx512vl-vpcmplew-2.c: New test.
> 	* gcc.target/i386/avx512vl-vpcmpgew-2.c: New test.
> 	* gcc.target/i386/avx512vl-vpcmpltw-2.c: New test.
> 	* gcc.target/i386/avx512vl-vpcmpltuw-2.c: New test.
> 	* gcc.target/i386/avx512vl-vpcmpleuw-2.c: New test.
> 	* gcc.target/i386/avx512vl-vpcmpneqw-2.c: New test.
> 
> 2018-07-07  Grazvydas Ignotas  <notasas@gmail.com>
> 
> 	* config/i386/avx512bwintrin.h: (_mm512_mask_cmp_epi8_mask,
> 	_mm512_mask_cmp_epu8_mask): Use __mmask64 type instead of __mmask32
> 	for __U argument.
> 
> 	* gcc.target/i386/avx512bw-vpcmpb-2.c (SIZE): Define to
> 	(AVX512F_LEN / 8) instead of (AVX512F_LEN / 16).
> 	* gcc.target/i386/avx512bw-vpcmpub-2.c (SIZE): Likewise.
OK.

FWIW, we have plenty of avx512 machines available in beaker.

You can do queries based on the cpuflags.  Select "Key/Value" for the
table.  "CPUFLAGS" for the Keyvalue "contains" for Operation and
"avx512" for Value.

Jeff

Do a search on Key/Value for CPUFLAGS contains avx512.

jeff

Patch

--- gcc/config/i386/avx512bwintrin.h.jj	2018-01-03 10:20:06.699535804 +0100
+++ gcc/config/i386/avx512bwintrin.h	2018-07-06 23:33:03.782664372 +0200
@@ -3043,7 +3043,7 @@  _mm512_cmp_epi16_mask (__m512i __X, __m5
 
 extern __inline __mmask64
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_epi8_mask (__mmask32 __U, __m512i __X, __m512i __Y,
+_mm512_mask_cmp_epi8_mask (__mmask64 __U, __m512i __X, __m512i __Y,
 			   const int __P)
 {
   return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X,
@@ -3081,7 +3081,7 @@  _mm512_cmp_epu16_mask (__m512i __X, __m5
 
 extern __inline __mmask64
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_epu8_mask (__mmask32 __U, __m512i __X, __m512i __Y,
+_mm512_mask_cmp_epu8_mask (__mmask64 __U, __m512i __X, __m512i __Y,
 			   const int __P)
 {
   return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X,
--- gcc/config/i386/avx512bitalgintrin.h.jj	2018-01-26 12:43:26.374922539 +0100
+++ gcc/config/i386/avx512bitalgintrin.h	2018-07-06 23:33:03.782664372 +0200
@@ -107,7 +107,7 @@  _mm512_bitshuffle_epi64_mask (__m512i __
 
 extern __inline __mmask64
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_bitshuffle_epi64_mask (__mmask8 __M, __m512i __A, __m512i __B)
+_mm512_mask_bitshuffle_epi64_mask (__mmask64 __M, __m512i __A, __m512i __B)
 {
   return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v64qi) __A,
 						 (__v64qi) __B,
--- gcc/config/i386/avx512fintrin.h.jj	2018-05-21 13:15:43.494581775 +0200
+++ gcc/config/i386/avx512fintrin.h	2018-07-06 23:33:03.786664375 +0200
@@ -7377,7 +7377,7 @@  _mm512_xor_epi64 (__m512i __A, __m512i _
 
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+_mm512_mask_xor_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
 						 (__v8di) __B,
@@ -7387,7 +7387,7 @@  _mm512_mask_xor_epi64 (__m512i __W, __mm
 
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
+_mm512_maskz_xor_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
 						 (__v8di) __B,
@@ -9615,7 +9615,7 @@  _mm512_cmpneq_epu32_mask (__m512i __X, _
 
 extern __inline __mmask8
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
+_mm512_mask_cmpneq_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
 {
   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
 						    (__v8di) __Y, 4,
@@ -10877,22 +10877,22 @@  _mm512_mask_insertf32x4 (__m512 __A, __m
 #define _mm512_maskz_insertf32x4(A, X, Y, C)                            \
   ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
     (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(),      \
-    (__mmask8)(A)))
+    (__mmask16)(A)))
 
 #define _mm512_maskz_inserti32x4(A, X, Y, C)                            \
   ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
     (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (),     \
-    (__mmask8)(A)))
+    (__mmask16)(A)))
 
 #define _mm512_mask_insertf32x4(A, B, X, Y, C)                          \
   ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
     (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A),             \
-					     (__mmask8)(B)))
+					     (__mmask16)(B)))
 
 #define _mm512_mask_inserti32x4(A, B, X, Y, C)                          \
   ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
     (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A),           \
-					      (__mmask8)(B)))
+					      (__mmask16)(B)))
 #endif
 
 extern __inline __m512i
--- gcc/config/i386/avx512vlintrin.h.jj	2018-01-03 10:20:06.152535716 +0100
+++ gcc/config/i386/avx512vlintrin.h	2018-07-06 23:33:03.789664378 +0200
@@ -466,7 +466,7 @@  _mm256_maskz_add_pd (__mmask8 __U, __m25
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
+_mm_mask_add_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
 						(__v4sf) __B,
@@ -476,7 +476,7 @@  _mm_mask_add_ps (__m128 __W, __mmask16 _
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B)
+_mm_maskz_add_ps (__mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
 						(__v4sf) __B,
@@ -487,7 +487,7 @@  _mm_maskz_add_ps (__mmask16 __U, __m128
 
 extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
+_mm256_mask_add_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
 {
   return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
 						(__v8sf) __B,
@@ -497,7 +497,7 @@  _mm256_mask_add_ps (__m256 __W, __mmask1
 
 extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B)
+_mm256_maskz_add_ps (__mmask8 __U, __m256 __A, __m256 __B)
 {
   return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
 						(__v8sf) __B,
@@ -551,7 +551,7 @@  _mm256_maskz_sub_pd (__mmask8 __U, __m25
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
+_mm_mask_sub_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
 						(__v4sf) __B,
@@ -561,7 +561,7 @@  _mm_mask_sub_ps (__m128 __W, __mmask16 _
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B)
+_mm_maskz_sub_ps (__mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
 						(__v4sf) __B,
@@ -572,7 +572,7 @@  _mm_maskz_sub_ps (__mmask16 __U, __m128
 
 extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
+_mm256_mask_sub_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
 {
   return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
 						(__v8sf) __B,
@@ -582,7 +582,7 @@  _mm256_mask_sub_ps (__m256 __W, __mmask1
 
 extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sub_ps (__mmask16 __U, __m256 __A, __m256 __B)
+_mm256_maskz_sub_ps (__mmask8 __U, __m256 __A, __m256 __B)
 {
   return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
 						(__v8sf) __B,
@@ -1320,7 +1320,7 @@  _mm256_mask_cvtepi32_ps (__m256 __W, __m
 
 extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A)
+_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A)
 {
   return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
 						   (__v8sf)
@@ -1339,7 +1339,7 @@  _mm_mask_cvtepi32_ps (__m128 __W, __mmas
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A)
+_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A)
 {
   return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
 						   (__v4sf)
--- gcc/config/i386/avx512vlbwintrin.h.jj	2018-01-03 10:20:06.598535787 +0100
+++ gcc/config/i386/avx512vlbwintrin.h	2018-07-06 23:33:03.790664378 +0200
@@ -1467,7 +1467,7 @@  _mm256_cmp_epi16_mask (__m256i __X, __m2
 
 extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_epi8_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+_mm_mask_cmp_epi8_mask (__mmask16 __U, __m128i __X, __m128i __Y,
 			const int __P)
 {
   return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
@@ -1486,7 +1486,7 @@  _mm_cmp_epi8_mask (__m128i __X, __m128i
 
 extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmp_epi8_mask (__mmask16 __U, __m256i __X, __m256i __Y,
+_mm256_mask_cmp_epi8_mask (__mmask32 __U, __m256i __X, __m256i __Y,
 			   const int __P)
 {
   return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
@@ -1494,7 +1494,7 @@  _mm256_mask_cmp_epi8_mask (__mmask16 __U
 						  (__mmask32) __U);
 }
 
-extern __inline __mmask16
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cmp_epi8_mask (__m256i __X, __m256i __Y, const int __P)
 {
@@ -1543,7 +1543,7 @@  _mm256_cmp_epu16_mask (__m256i __X, __m2
 
 extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_epu8_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+_mm_mask_cmp_epu8_mask (__mmask16 __U, __m128i __X, __m128i __Y,
 			const int __P)
 {
   return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
@@ -1562,7 +1562,7 @@  _mm_cmp_epu8_mask (__m128i __X, __m128i
 
 extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmp_epu8_mask (__mmask16 __U, __m256i __X, __m256i __Y,
+_mm256_mask_cmp_epu8_mask (__mmask32 __U, __m256i __X, __m256i __Y,
 			   const int __P)
 {
   return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
@@ -1570,7 +1570,7 @@  _mm256_mask_cmp_epu8_mask (__mmask16 __U
 						   (__mmask32) __U);
 }
 
-extern __inline __mmask16
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cmp_epu8_mask (__m256i __X, __m256i __Y, const int __P)
 {
@@ -1998,7 +1998,7 @@  _mm_maskz_slli_epi16 (__mmask8 __U, __m1
 #define _mm_mask_cmp_epi16_mask(M, X, Y, P)				\
   ((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X),	\
 					    (__v8hi)(__m128i)(Y), (int)(P),\
-					    (__mmask16)(M)))
+					    (__mmask8)(M)))
 
 #define _mm_mask_cmp_epi8_mask(M, X, Y, P)				\
   ((__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi)(__m128i)(X),	\
@@ -2430,7 +2430,7 @@  _mm_maskz_mullo_epi16 (__mmask8 __U, __m
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi8_epi16 (__m256i __W, __mmask32 __U, __m128i __A)
+_mm256_mask_cvtepi8_epi16 (__m256i __W, __mmask16 __U, __m128i __A)
 {
   return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
 						    (__v16hi) __W,
@@ -2449,7 +2449,7 @@  _mm256_maskz_cvtepi8_epi16 (__mmask16 __
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi8_epi16 (__m128i __W, __mmask32 __U, __m128i __A)
+_mm_mask_cvtepi8_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
 						    (__v8hi) __W,
@@ -2468,7 +2468,7 @@  _mm_maskz_cvtepi8_epi16 (__mmask8 __U, _
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepu8_epi16 (__m256i __W, __mmask32 __U, __m128i __A)
+_mm256_mask_cvtepu8_epi16 (__m256i __W, __mmask16 __U, __m128i __A)
 {
   return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
 						    (__v16hi) __W,
@@ -2487,7 +2487,7 @@  _mm256_maskz_cvtepu8_epi16 (__mmask16 __
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepu8_epi16 (__m128i __W, __mmask32 __U, __m128i __A)
+_mm_mask_cvtepu8_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
 						    (__v8hi) __W,
@@ -4541,148 +4541,148 @@  _mm_mask_cmple_epi16_mask (__mmask8 __M,
 						 (__mmask8) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpneq_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-						  (__v32qi) __Y, 4,
-						  (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+						   (__v32qi) __Y, 4,
+						   (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmplt_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-						  (__v32qi) __Y, 1,
-						  (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+						   (__v32qi) __Y, 1,
+						   (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpge_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-						  (__v32qi) __Y, 5,
-						  (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+						   (__v32qi) __Y, 5,
+						   (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmple_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-						  (__v32qi) __Y, 2,
-						  (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+						   (__v32qi) __Y, 2,
+						   (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpneq_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-						  (__v16hi) __Y, 4,
-						  (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+						   (__v16hi) __Y, 4,
+						   (__mmask16) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmplt_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-						  (__v16hi) __Y, 1,
-						  (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+						   (__v16hi) __Y, 1,
+						   (__mmask16) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpge_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-						  (__v16hi) __Y, 5,
-						  (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+						   (__v16hi) __Y, 5,
+						   (__mmask16) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmple_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-						  (__v16hi) __Y, 2,
-						  (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+						   (__v16hi) __Y, 2,
+						   (__mmask16) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpneq_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-						 (__v32qi) __Y, 4,
-						 (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+						  (__v32qi) __Y, 4,
+						  (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmplt_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-						 (__v32qi) __Y, 1,
-						 (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+						  (__v32qi) __Y, 1,
+						  (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpge_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-						 (__v32qi) __Y, 5,
-						 (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+						  (__v32qi) __Y, 5,
+						  (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmple_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-						 (__v32qi) __Y, 2,
-						 (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+						  (__v32qi) __Y, 2,
+						  (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpneq_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-						 (__v16hi) __Y, 4,
-						 (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+						  (__v16hi) __Y, 4,
+						  (__mmask16) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmplt_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-						 (__v16hi) __Y, 1,
-						 (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+						  (__v16hi) __Y, 1,
+						  (__mmask16) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpge_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-						 (__v16hi) __Y, 5,
-						 (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+						  (__v16hi) __Y, 5,
+						  (__mmask16) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmple_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-						 (__v16hi) __Y, 2,
-						 (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+						  (__v16hi) __Y, 2,
+						  (__mmask16) __M);
 }
 
 #ifdef __DISABLE_AVX512VLBW__
--- gcc/config/i386/avx512vbmi2vlintrin.h.jj	2018-01-03 10:20:06.085535705 +0100
+++ gcc/config/i386/avx512vbmi2vlintrin.h	2018-07-06 23:33:03.791664379 +0200
@@ -541,7 +541,7 @@  _mm_shldi_epi64 (__m128i __A, __m128i __
 	(__v4si)(__m128i)(B),(int)(C))
 #define _mm_mask_shrdi_epi32(A, B, C, D, E) \
   ((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(C), \
-	(__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask16)(B))
+	(__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask8)(B))
 #define _mm_maskz_shrdi_epi32(A, B, C, D) \
   ((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(B), \
 	(__v4si)(__m128i)(C),(int)(D), \
@@ -601,7 +601,7 @@  _mm_shldi_epi64 (__m128i __A, __m128i __
 	(__v4si)(__m128i)(B),(int)(C))
 #define _mm_mask_shldi_epi32(A, B, C, D, E) \
   ((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(C), \
-	(__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask16)(B))
+	(__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask8)(B))
 #define _mm_maskz_shldi_epi32(A, B, C, D) \
   ((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(B), \
 	(__v4si)(__m128i)(C),(int)(D), \
--- gcc/testsuite/gcc.target/i386/avx512bw-vpcmpb-2.c.jj	2014-12-01 14:57:15.467700715 +0100
+++ gcc/testsuite/gcc.target/i386/avx512bw-vpcmpb-2.c	2018-07-06 22:39:20.531825189 +0200
@@ -6,17 +6,15 @@ 
 #include "avx512f-helper.h"
 
 #include <math.h>
-#define SIZE (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 8)
 #include "avx512f-mask-type.h"
 
 #if AVX512F_LEN == 512
 #undef CMP
 #define CMP(imm, rel)					\
     dst_ref = 0;					\
-    for (i = 0; i < 64; i++)				\
-    {							\
-      dst_ref = ((rel) << i) | dst_ref;			\
-    }							\
+    for (i = 0; i < SIZE; i++)				\
+      dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref;	\
     source1.x = _mm512_loadu_si512 (s1);		\
     source2.x = _mm512_loadu_si512 (s2);		\
     dst1 = _mm512_cmp_epi8_mask (source1.x, source2.x, imm);\
@@ -29,10 +27,8 @@ 
 #undef CMP
 #define CMP(imm, rel)					\
     dst_ref = 0;					\
-    for (i = 0; i < 32; i++)				\
-    {							\
-      dst_ref = ((rel) << i) | dst_ref;			\
-    }							\
+    for (i = 0; i < SIZE; i++)				\
+      dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref;	\
     source1.x = _mm256_loadu_si256 ((__m256i*)s1);	\
     source2.x = _mm256_loadu_si256 ((__m256i*)s2);	\
     dst1 = _mm256_cmp_epi8_mask (source1.x, source2.x, imm);\
@@ -45,10 +41,8 @@ 
 #undef CMP
 #define CMP(imm, rel)					\
     dst_ref = 0;					\
-    for (i = 0; i < 16; i++)				\
-    {							\
-      dst_ref = ((rel) << i) | dst_ref;			\
-    }							\
+    for (i = 0; i < SIZE; i++)				\
+      dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref;	\
     source1.x = _mm_loadu_si128 ((__m128i*)s1);		\
     source2.x = _mm_loadu_si128 ((__m128i*)s2);		\
     dst1 = _mm_cmp_epi8_mask (source1.x, source2.x, imm);\
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltw-2.c.jj	2018-07-06 23:26:43.443365254 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltw-2.c	2018-07-06 23:26:43.443365254 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltw-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpneqw-2.c.jj	2018-07-06 23:26:43.443365254 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpneqw-2.c	2018-07-06 23:26:43.443365254 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpneqw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpneqw-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpnequb-2.c.jj	2018-07-06 21:55:30.376035400 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpnequb-2.c	2018-07-06 21:56:09.360078733 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpnequb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpnequb-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmplew-2.c.jj	2018-07-06 23:26:43.442365253 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmplew-2.c	2018-07-06 23:26:43.442365253 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmplew-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmplew-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeub-2.c.jj	2018-07-06 21:55:30.394035420 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeub-2.c	2018-07-06 21:56:45.496118894 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgeub-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgeub-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleb-2.c.jj	2018-07-06 21:55:30.398035424 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleb-2.c	2018-07-06 21:56:54.170128532 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpleb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpleb-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeb-2.c.jj	2018-07-06 21:55:30.389035415 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeb-2.c	2018-07-06 21:56:38.217110801 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgeb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgeb-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpnequw-2.c.jj	2018-07-06 23:26:43.442365253 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpnequw-2.c	2018-07-06 23:26:43.442365253 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpnequw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpnequw-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltb-2.c.jj	2018-07-06 21:55:30.380035405 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltb-2.c	2018-07-06 21:56:18.298088665 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltb-2.c"
--- gcc/testsuite/gcc.target/i386/avx512f-vinserti32x4-3.c.jj	2018-07-06 21:03:53.975540200 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-vinserti32x4-3.c	2018-07-06 21:04:11.356560385 +0200
@@ -0,0 +1,59 @@ 
+/* { dg-do run } */
+/* { dg-options "-O0 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#define AVX512F
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include "string.h"
+
+void static
+CALC (UNION_TYPE (AVX512F_LEN, i_d) s1, union128i_d s2, int *res_ref, int imm)
+{
+  memcpy (res_ref, s1.a, SIZE * sizeof (int));
+  memcpy (res_ref + imm * 4, s2.a, 16);
+}
+
+void
+TEST (void)
+{
+  UNION_TYPE (AVX512F_LEN, i_d) s1, res1, res2, res3;
+  union128i_d s2;
+  int res_ref[SIZE];
+  int j;
+
+  MASK_TYPE mask = (MASK_TYPE) 0xa55a;
+
+  for (j = 0; j < SIZE; j++)
+    {
+      s1.a[j] = j * j;
+      res1.a[j] = DEFAULT_VALUE;
+      res2.a[j] = DEFAULT_VALUE;
+      res3.a[j] = DEFAULT_VALUE;
+    }
+
+  for (j = 0; j < 4; j++)
+    s2.a[j] = j * j * j;
+
+  res1.x = INTRINSIC (_inserti32x4) (s1.x, s2.x, 1);
+  res2.x = INTRINSIC (_mask_inserti32x4) (res2.x, mask, s1.x, s2.x, 1);
+  res3.x = INTRINSIC (_maskz_inserti32x4) (mask, s1.x, s2.x, 1);
+
+  CALC (s1, s2, res_ref, 1);
+
+  if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+    abort ();
+
+  MASK_MERGE (i_d) (res_ref, mask, SIZE);
+
+  if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+    abort ();
+
+  MASK_ZERO (i_d) (res_ref, mask, SIZE);
+
+  if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgew-2.c.jj	2018-07-06 23:26:43.443365254 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgew-2.c	2018-07-06 23:26:43.442365253 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgew-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgew-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeuw-2.c.jj	2018-07-06 23:26:43.442365253 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeuw-2.c	2018-07-06 23:26:43.442365253 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgeuw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgeuw-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltub-2.c.jj	2018-07-06 21:55:30.385035410 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltub-2.c	2018-07-06 21:56:29.853101506 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltub-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltub-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltuw-2.c.jj	2018-07-06 23:26:43.443365254 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltuw-2.c	2018-07-06 23:26:43.443365254 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltuw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpltuw-2.c"
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleub-2.c.jj	2018-07-06 21:55:30.403035430 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleub-2.c	2018-07-06 21:57:04.198139681 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpleub-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpleb-2.c"
--- gcc/testsuite/gcc.target/i386/avx512bw-vpcmpub-2.c.jj	2014-12-01 14:57:15.466700732 +0100
+++ gcc/testsuite/gcc.target/i386/avx512bw-vpcmpub-2.c	2018-07-06 22:40:41.666912357 +0200
@@ -6,17 +6,15 @@ 
 #include "avx512f-helper.h"
 
 #include <math.h>
-#define SIZE (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 8)
 #include "avx512f-mask-type.h"
 
 #if AVX512F_LEN == 512
 #undef CMP
 #define CMP(imm, rel)					\
     dst_ref = 0;					\
-    for (i = 0; i < 64; i++)				\
-    {							\
-      dst_ref = ((rel) << i) | dst_ref;			\
-    }							\
+    for (i = 0; i < SIZE; i++)				\
+      dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref;	\
     source1.x = _mm512_loadu_si512 (s1);		\
     source2.x = _mm512_loadu_si512 (s2);		\
     dst1 = _mm512_cmp_epu8_mask (source1.x, source2.x, imm);\
@@ -29,10 +27,8 @@ 
 #undef CMP
 #define CMP(imm, rel)					\
     dst_ref = 0;					\
-    for (i = 0; i < 32; i++)				\
-    {							\
-      dst_ref = ((rel) << i) | dst_ref;			\
-    }							\
+    for (i = 0; i < SIZE; i++)				\
+      dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref;	\
     source1.x = _mm256_loadu_si256 ((__m256i*)s1);	\
     source2.x = _mm256_loadu_si256 ((__m256i*)s2);	\
     dst1 = _mm256_cmp_epu8_mask (source1.x, source2.x, imm);\
@@ -45,10 +41,8 @@ 
 #undef CMP
 #define CMP(imm, rel)					\
     dst_ref = 0;					\
-    for (i = 0; i < 16; i++)				\
-    {							\
-      dst_ref = ((rel) << i) | dst_ref;			\
-    }							\
+    for (i = 0; i < SIZE; i++)				\
+      dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref;	\
     source1.x = _mm_loadu_si128 ((__m128i*)s1);		\
     source2.x = _mm_loadu_si128 ((__m128i*)s2);		\
     dst1 = _mm_cmp_epu8_mask (source1.x, source2.x, imm);\
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleuw-2.c.jj	2018-07-06 23:26:43.443365254 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleuw-2.c	2018-07-06 23:26:43.443365254 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpleuw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmplew-2.c"
--- gcc/testsuite/gcc.target/i386/avx512f-vinsertf32x4-3.c.jj	2018-07-06 21:00:03.880272894 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-vinsertf32x4-3.c	2018-07-06 21:01:52.615399210 +0200
@@ -0,0 +1,59 @@ 
+/* { dg-do run } */
+/* { dg-options "-O0 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#define AVX512F
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include "string.h"
+
+void static
+CALC (UNION_TYPE (AVX512F_LEN,) s1, union128 s2, float *res_ref, int imm)
+{
+  memcpy (res_ref, s1.a, SIZE * sizeof (float));
+  memcpy (res_ref + imm * 4, s2.a, 16);
+}
+
+void
+TEST (void)
+{
+  UNION_TYPE (AVX512F_LEN,) s1, res1, res2, res3;
+  union128 s2;
+  float res_ref[SIZE];
+  int j;
+
+  MASK_TYPE mask = (MASK_TYPE) 0xa55a;
+
+  for (j = 0; j < SIZE; j++)
+    {
+      s1.a[j] = j * j / 10.2;
+      res1.a[j] = DEFAULT_VALUE;
+      res2.a[j] = DEFAULT_VALUE;
+      res3.a[j] = DEFAULT_VALUE;
+    }
+
+  for (j = 0; j < 4; j++)
+    s2.a[j] = j * j * j / 2.03;
+
+  res1.x = INTRINSIC (_insertf32x4) (s1.x, s2.x, 1);
+  res2.x = INTRINSIC (_mask_insertf32x4) (res2.x, mask, s1.x, s2.x, 1);
+  res3.x = INTRINSIC (_maskz_insertf32x4) (mask, s1.x, s2.x, 1);
+
+  CALC (s1, s2, res_ref, 1);
+
+  if (UNION_CHECK (AVX512F_LEN,) (res1, res_ref))
+    abort ();
+
+  MASK_MERGE () (res_ref, mask, SIZE);
+
+  if (UNION_CHECK (AVX512F_LEN,) (res2, res_ref))
+    abort ();
+
+  MASK_ZERO () (res_ref, mask, SIZE);
+
+  if (UNION_CHECK (AVX512F_LEN,) (res3, res_ref))
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/avx512vl-vpcmpneqb-2.c.jj	2018-07-06 21:55:30.371035395 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpcmpneqb-2.c	2018-07-06 21:55:52.639060139 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpneqb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpneqb-2.c"