diff mbox

Add AVX512 k-mask intrinsics

Message ID CAMXFM3u61pLCrWKFzGQL=Rn+rwzKh8iU4ZSZHj_nUEAM3-2BiQ@mail.gmail.com
State New
Headers show

Commit Message

Andrew Senkevich Dec. 15, 2016, 6:55 p.m. UTC
2016-12-15 19:51 GMT+03:00 Uros Bizjak <ubizjak@gmail.com>:
> On Thu, Dec 15, 2016 at 2:31 PM, Andrew Senkevich
> <andrew.n.senkevich@gmail.com> wrote:
>> 2016-12-14 22:55 GMT+03:00 Uros Bizjak <ubizjak@gmail.com>:
>>> On Wed, Dec 14, 2016 at 8:04 PM, Andrew Senkevich
>>> <andrew.n.senkevich@gmail.com> wrote:
>>>
>>>> here is the second part of k-mask intrinsics, is it Ok?
>>>
>>>> --- a/gcc/config/i386/sse.md
>>>> +++ b/gcc/config/i386/sse.md
>>>> @@ -1309,12 +1309,30 @@
>>>>  ;; Mask variant shift mnemonics
>>>>  (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
>>>>
>>>> +(define_expand "kmovb"
>>>> +  [(set (match_operand:QI 0 "nonimmediate_operand")
>>>> + (match_operand:QI 1 "nonimmediate_operand"))]
>>>> +  "TARGET_AVX512DQ
>>>> +   && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
>>>> +
>>>>  (define_expand "kmovw"
>>>>    [(set (match_operand:HI 0 "nonimmediate_operand")
>>>>   (match_operand:HI 1 "nonimmediate_operand"))]
>>>>    "TARGET_AVX512F
>>>>     && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
>>>>
>>>> +(define_expand "kmovd"
>>>> +  [(set (match_operand:SI 0 "nonimmediate_operand")
>>>> + (match_operand:SI 1 "nonimmediate_operand"))]
>>>> +  "TARGET_AVX512BW
>>>> +   && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
>>>> +
>>>> +(define_expand "kmovq"
>>>> +  [(set (match_operand:DI 0 "nonimmediate_operand")
>>>> + (match_operand:DI 1 "nonimmediate_operand"))]
>>>> +  "TARGET_AVX512BW
>>>> +   && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
>>>> +
>>>>  (define_insn "k<code><mode>"
>>>>    [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
>>>>   (any_logic:SWI1248_AVX512BW
>>>
>>> All the above patterns can be macroized with the following patch:
>>>
>>> --cut here--
>>> Index: sse.md
>>> ===================================================================
>>> --- sse.md      (revision 243651)
>>> +++ sse.md      (working copy)
>>> @@ -1309,9 +1309,9 @@
>>>  ;; Mask variant shift mnemonics
>>>  (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
>>>
>>> -(define_expand "kmovw"
>>> -  [(set (match_operand:HI 0 "nonimmediate_operand")
>>> -       (match_operand:HI 1 "nonimmediate_operand"))]
>>> +(define_expand "kmov<mskmodesuffix>"
>>> +  [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
>>> +       (match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
>>>    "TARGET_AVX512F
>>>     && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
>>>
>>> --cut here--
>>>
>>> Please also post ChangeLog entry.
>>
>> Thanks,
>>
>> here is with ChangeLogs and renamed internal __builtin_ia32_kmov* to
>> match instruction names.
>> For __builtin_ia32_kmov16 change I will follow up for update in branches.
>>
>> Regtested on x86_64-linux-gnu, Ok for trunk?
>
> OK.

Thanks,

here is one more part for kadd{b,w,d,q}, is it ok?

gcc/
    * config/i386/avx512bwintrin.h: Add new k-mask intrinsics.
    * config/i386/avx512dqintrin.h: Ditto.
    * config/i386/avx512fintrin.h: Ditto.
    * config/i386/i386-builtin.def (__builtin_ia32_kaddqi,
    __builtin_ia32_kaddhi, __builtin_ia32_kaddsi,
    __builtin_ia32_kadddi): New.
    * config/i386/sse.md (kadd<mode>): New.

gcc/testsuite/
    * gcc.target/i386/avx512bw-kaddd-1.c: New test.
    * gcc.target/i386/avx512bw-kaddq-1.c: Ditto.
    * gcc.target/i386/avx512dq-kaddb-1.c: Ditto.
    * gcc.target/i386/avx512f-kaddw-1.c: Ditto.

+}


--
WBR,
Andrew

Comments

Uros Bizjak Dec. 16, 2016, 12:40 p.m. UTC | #1
On Thu, Dec 15, 2016 at 7:55 PM, Andrew Senkevich
<andrew.n.senkevich@gmail.com> wrote:
> 2016-12-15 19:51 GMT+03:00 Uros Bizjak <ubizjak@gmail.com>:
>> On Thu, Dec 15, 2016 at 2:31 PM, Andrew Senkevich
>> <andrew.n.senkevich@gmail.com> wrote:
>>> 2016-12-14 22:55 GMT+03:00 Uros Bizjak <ubizjak@gmail.com>:
>>>> On Wed, Dec 14, 2016 at 8:04 PM, Andrew Senkevich
>>>> <andrew.n.senkevich@gmail.com> wrote:
>>>>
>>>>> here is the second part of k-mask intrinsics, is it Ok?
>>>>
>>>>> --- a/gcc/config/i386/sse.md
>>>>> +++ b/gcc/config/i386/sse.md
>>>>> @@ -1309,12 +1309,30 @@
>>>>>  ;; Mask variant shift mnemonics
>>>>>  (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
>>>>>
>>>>> +(define_expand "kmovb"
>>>>> +  [(set (match_operand:QI 0 "nonimmediate_operand")
>>>>> + (match_operand:QI 1 "nonimmediate_operand"))]
>>>>> +  "TARGET_AVX512DQ
>>>>> +   && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
>>>>> +
>>>>>  (define_expand "kmovw"
>>>>>    [(set (match_operand:HI 0 "nonimmediate_operand")
>>>>>   (match_operand:HI 1 "nonimmediate_operand"))]
>>>>>    "TARGET_AVX512F
>>>>>     && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
>>>>>
>>>>> +(define_expand "kmovd"
>>>>> +  [(set (match_operand:SI 0 "nonimmediate_operand")
>>>>> + (match_operand:SI 1 "nonimmediate_operand"))]
>>>>> +  "TARGET_AVX512BW
>>>>> +   && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
>>>>> +
>>>>> +(define_expand "kmovq"
>>>>> +  [(set (match_operand:DI 0 "nonimmediate_operand")
>>>>> + (match_operand:DI 1 "nonimmediate_operand"))]
>>>>> +  "TARGET_AVX512BW
>>>>> +   && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
>>>>> +
>>>>>  (define_insn "k<code><mode>"
>>>>>    [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
>>>>>   (any_logic:SWI1248_AVX512BW
>>>>
>>>> All the above patterns can be macroized with the following patch:
>>>>
>>>> --cut here--
>>>> Index: sse.md
>>>> ===================================================================
>>>> --- sse.md      (revision 243651)
>>>> +++ sse.md      (working copy)
>>>> @@ -1309,9 +1309,9 @@
>>>>  ;; Mask variant shift mnemonics
>>>>  (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
>>>>
>>>> -(define_expand "kmovw"
>>>> -  [(set (match_operand:HI 0 "nonimmediate_operand")
>>>> -       (match_operand:HI 1 "nonimmediate_operand"))]
>>>> +(define_expand "kmov<mskmodesuffix>"
>>>> +  [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
>>>> +       (match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
>>>>    "TARGET_AVX512F
>>>>     && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
>>>>
>>>> --cut here--
>>>>
>>>> Please also post ChangeLog entry.
>>>
>>> Thanks,
>>>
>>> here is with ChangeLogs and renamed internal __builtin_ia32_kmov* to
>>> match instruction names.
>>> For __builtin_ia32_kmov16 change I will follow up for update in branches.
>>>
>>> Regtested on x86_64-linux-gnu, Ok for trunk?
>>
>> OK.
>
> Thanks,
>
> here is one more part for kadd{b,w,d,q}, is it ok?
>
> gcc/
>     * config/i386/avx512bwintrin.h: Add new k-mask intrinsics.
>     * config/i386/avx512dqintrin.h: Ditto.
>     * config/i386/avx512fintrin.h: Ditto.
>     * config/i386/i386-builtin.def (__builtin_ia32_kaddqi,
>     __builtin_ia32_kaddhi, __builtin_ia32_kaddsi,
>     __builtin_ia32_kadddi): New.
>     * config/i386/sse.md (kadd<mode>): New.
>
> gcc/testsuite/
>     * gcc.target/i386/avx512bw-kaddd-1.c: New test.
>     * gcc.target/i386/avx512bw-kaddq-1.c: Ditto.
>     * gcc.target/i386/avx512dq-kaddb-1.c: Ditto.
>     * gcc.target/i386/avx512f-kaddw-1.c: Ditto.

OK.

I'll commit the patch to mainline later today.

Thanks,
Uros.
Andrew Senkevich Jan. 16, 2017, 10:30 p.m. UTC | #2
Hi,

here is one more part of intrinsics for k-mask registers shifts:

gcc/
    * config/i386/avx512bwintrin.h: Add k-mask registers shift intrinsics.
    * config/i386/avx512dqintrin.h: Ditto.
    * config/i386/avx512fintrin.h: Ditto.
    * config/i386/i386-builtin-types.def: Add new types.
    * gcc/config/i386/i386.c: Handle new types.
    * config/i386/i386-builtin.def (__builtin_ia32_kshiftliqi,
    __builtin_ia32_kshiftlihi, __builtin_ia32_kshiftlisi,
    __builtin_ia32_kshiftlidi, __builtin_ia32_kshiftriqi,
    __builtin_ia32_kshiftrihi, __builtin_ia32_kshiftrisi,
    __builtin_ia32_kshiftridi): New.
    * config/i386/sse.md (k<code><mode>2): Rename *k<code><mode>.

gcc/testsuite/
    * gcc.target/i386/avx512bw-kshiftld-1.c: New test.
    * gcc.target/i386/avx512bw-kshiftlq-1.c: Ditto.
    * gcc.target/i386/avx512dq-kshiftlb-1.c: Ditto.
    * gcc.target/i386/avx512f-kshiftlw-1.c: Ditto.
    * gcc.target/i386/avx512bw-kshiftrd-1.c: Ditto.
    * gcc.target/i386/avx512bw-kshiftrq-1.c: Ditto.
    * gcc.target/i386/avx512dq-kshiftrb-1.c: Ditto.
    * gcc.target/i386/avx512f-kshiftrw-1.c: Ditto.


Is it Ok for trunk?


--
WBR,
Andrew
Jakub Jelinek Jan. 16, 2017, 10:55 p.m. UTC | #3
On Tue, Jan 17, 2017 at 01:30:11AM +0300, Andrew Senkevich wrote:
> here is one more part of intrinsics for k-mask registers shifts:

The software developer manuals describe KSHIFT{L,R}* like:
KSHIFTLW
COUNT <- imm8[7:0]
DEST[MAX_KL-1:0] <- 0
IF COUNT <=15
THEN DEST[15:0] <- SRC1[15:0] << COUNT;
FI;

What is the behavior when src1 == dest, like:
  kshiftld $3, %k3, %k3
?  Is it just a bug in the SDM and will it actually do the expected thing
(set %k3 to %k3 << 3 and clear just the upper bits), or do we need
an early-clobber on the destination to make sure GCC never emits these
insns with the same register as both input and output?

	Jakub
Uros Bizjak Jan. 17, 2017, 8:12 a.m. UTC | #4
On Mon, Jan 16, 2017 at 11:30 PM, Andrew Senkevich
<andrew.n.senkevich@gmail.com> wrote:
> Hi,
>
> here is one more part of intrinsics for k-mask registers shifts:
>
> gcc/
>     * config/i386/avx512bwintrin.h: Add k-mask registers shift intrinsics.
>     * config/i386/avx512dqintrin.h: Ditto.
>     * config/i386/avx512fintrin.h: Ditto.
>     * config/i386/i386-builtin-types.def: Add new types.
>     * gcc/config/i386/i386.c: Handle new types.
>     * config/i386/i386-builtin.def (__builtin_ia32_kshiftliqi,
>     __builtin_ia32_kshiftlihi, __builtin_ia32_kshiftlisi,
>     __builtin_ia32_kshiftlidi, __builtin_ia32_kshiftriqi,
>     __builtin_ia32_kshiftrihi, __builtin_ia32_kshiftrisi,
>     __builtin_ia32_kshiftridi): New.
>     * config/i386/sse.md (k<code><mode>2): Rename *k<code><mode>.
>
> gcc/testsuite/
>     * gcc.target/i386/avx512bw-kshiftld-1.c: New test.
>     * gcc.target/i386/avx512bw-kshiftlq-1.c: Ditto.
>     * gcc.target/i386/avx512dq-kshiftlb-1.c: Ditto.
>     * gcc.target/i386/avx512f-kshiftlw-1.c: Ditto.
>     * gcc.target/i386/avx512bw-kshiftrd-1.c: Ditto.
>     * gcc.target/i386/avx512bw-kshiftrq-1.c: Ditto.
>     * gcc.target/i386/avx512dq-kshiftrb-1.c: Ditto.
>     * gcc.target/i386/avx512f-kshiftrw-1.c: Ditto.
>
>
> Is it Ok for trunk?

-(define_insn "*k<code><mode>"
+(define_insn "k<code><mode>2"
   [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
  (any_lshift:SWI1248_AVX512BWDQ
   (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")

Please do not add "2" to the insn name to follow de-facto convention
of other mask insn names.

Otherwise, OK - but please check Jakub's question first.

Uros.
Andrew Senkevich Jan. 17, 2017, 11:04 a.m. UTC | #5
2017-01-17 1:55 GMT+03:00 Jakub Jelinek <jakub@redhat.com>:
> On Tue, Jan 17, 2017 at 01:30:11AM +0300, Andrew Senkevich wrote:
>> here is one more part of intrinsics for k-mask registers shifts:
>
> The software developer manuals describe KSHIFT{L,R}* like:
> KSHIFTLW
> COUNT <- imm8[7:0]
> DEST[MAX_KL-1:0] <- 0
> IF COUNT <=15
> THEN DEST[15:0] <- SRC1[15:0] << COUNT;
> FI;
>
> What is the behavior when src1 == dest, like:
>   kshiftld $3, %k3, %k3
> ?  Is it just a bug in the SDM and will it actually do the expected thing
> (set %k3 to %k3 << 3 and clear just the upper bits), or do we need
> an early-clobber on the destination to make sure GCC never emits these
> insns with the same register as both input and output?

Indeed, it should be different registers, how to do it?


--
WBR,
Andrew
Uros Bizjak Jan. 17, 2017, 11:06 a.m. UTC | #6
On Tue, Jan 17, 2017 at 12:04 PM, Andrew Senkevich
<andrew.n.senkevich@gmail.com> wrote:
> 2017-01-17 1:55 GMT+03:00 Jakub Jelinek <jakub@redhat.com>:
>> On Tue, Jan 17, 2017 at 01:30:11AM +0300, Andrew Senkevich wrote:
>>> here is one more part of intrinsics for k-mask registers shifts:
>>
>> The software developer manuals describe KSHIFT{L,R}* like:
>> KSHIFTLW
>> COUNT <- imm8[7:0]
>> DEST[MAX_KL-1:0] <- 0
>> IF COUNT <=15
>> THEN DEST[15:0] <- SRC1[15:0] << COUNT;
>> FI;
>>
>> What is the behavior when src1 == dest, like:
>>   kshiftld $3, %k3, %k3
>> ?  Is it just a bug in the SDM and will it actually do the expected thing
>> (set %k3 to %k3 << 3 and clear just the upper bits), or do we need
>> an early-clobber on the destination to make sure GCC never emits these
>> insns with the same register as both input and output?
>
> Indeed, it should be different registers, how to do it?

"=&k" as operand 0 constraint.

Uros.
Kirill Yukhin Jan. 17, 2017, 12:30 p.m. UTC | #7
Hi Anrey,
On 17 Jan 14:04, Andrew Senkevich wrote:
> 2017-01-17 1:55 GMT+03:00 Jakub Jelinek <jakub@redhat.com>:
> > On Tue, Jan 17, 2017 at 01:30:11AM +0300, Andrew Senkevich wrote:
> >> here is one more part of intrinsics for k-mask registers shifts:
> >
> > The software developer manuals describe KSHIFT{L,R}* like:
> > KSHIFTLW
> > COUNT <- imm8[7:0]
> > DEST[MAX_KL-1:0] <- 0
> > IF COUNT <=15
> > THEN DEST[15:0] <- SRC1[15:0] << COUNT;
> > FI;
> >
> > What is the behavior when src1 == dest, like:
> >   kshiftld $3, %k3, %k3
> > ?  Is it just a bug in the SDM and will it actually do the expected thing
> > (set %k3 to %k3 << 3 and clear just the upper bits), or do we need
> > an early-clobber on the destination to make sure GCC never emits these
> > insns with the same register as both input and output?
>
> Indeed, it should be different registers, how to do it?
Are you sure?

I've played a bit w/ SDE. And looks like operands are not early clobber:
TID0: INS 0x00000000004003ee             AVX512VEX kmovd k0, eax
TID0:   k0 := 00000000_ffffffff
...
TID0: INS 0x00000000004003f4             AVX512VEX kshiftlw k0, k0, 0x3
TID0:   k0 := 00000000_0000fff8

You can see that same dest and source works just fine.

--
Thanks, K
>
>
> --
> WBR,
> Andrew
Andrew Senkevich Jan. 17, 2017, 1:03 p.m. UTC | #8
2017-01-17 15:30 GMT+03:00 Kirill Yukhin <kirill.yukhin@gmail.com>:
> Hi Anrey,
> On 17 Jan 14:04, Andrew Senkevich wrote:
>> 2017-01-17 1:55 GMT+03:00 Jakub Jelinek <jakub@redhat.com>:
>> > On Tue, Jan 17, 2017 at 01:30:11AM +0300, Andrew Senkevich wrote:
>> >> here is one more part of intrinsics for k-mask registers shifts:
>> >
>> > The software developer manuals describe KSHIFT{L,R}* like:
>> > KSHIFTLW
>> > COUNT <- imm8[7:0]
>> > DEST[MAX_KL-1:0] <- 0
>> > IF COUNT <=15
>> > THEN DEST[15:0] <- SRC1[15:0] << COUNT;
>> > FI;
>> >
>> > What is the behavior when src1 == dest, like:
>> >   kshiftld $3, %k3, %k3
>> > ?  Is it just a bug in the SDM and will it actually do the expected thing
>> > (set %k3 to %k3 << 3 and clear just the upper bits), or do we need
>> > an early-clobber on the destination to make sure GCC never emits these
>> > insns with the same register as both input and output?
>>
>> Indeed, it should be different registers, how to do it?
> Are you sure?
>
> I've played a bit w/ SDE. And looks like operands are not early clobber:
> TID0: INS 0x00000000004003ee             AVX512VEX kmovd k0, eax
> TID0:   k0 := 00000000_ffffffff
> ...
> TID0: INS 0x00000000004003f4             AVX512VEX kshiftlw k0, k0, 0x3
> TID0:   k0 := 00000000_0000fff8
>
> You can see that same dest and source works just fine.

Hmm, I looked only on what ICC generates, and it was not correct way.

Thanks Kirill!


--
WBR,
Andrew
Jakub Jelinek Jan. 17, 2017, 1:51 p.m. UTC | #9
On Tue, Jan 17, 2017 at 04:03:08PM +0300, Andrew Senkevich wrote:
> > I've played a bit w/ SDE. And looks like operands are not early clobber:
> > TID0: INS 0x00000000004003ee             AVX512VEX kmovd k0, eax
> > TID0:   k0 := 00000000_ffffffff
> > ...
> > TID0: INS 0x00000000004003f4             AVX512VEX kshiftlw k0, k0, 0x3
> > TID0:   k0 := 00000000_0000fff8
> >
> > You can see that same dest and source works just fine.
> 
> Hmm, I looked only on what ICC generates, and it was not correct way.

I've just tried
int
main ()
{
  unsigned int a = 0x5555;
  asm volatile ("kmovw %1, %%k6; kshiftlw $1, %%k6, %%k6; kmovw %%k6, %0" : "=r" (a) : "r" (a) : "k6");
  __builtin_printf ("%x\n", a);
  return 0;
}
on KNL and got 0xaaaa.
Are you going to report to the SDM authors so that they fix it up?
E.g. using TEMP <- SRC1[0:...] before DEST[...] <- 0 and using TEMP
instead of SRC1[0:...] would fix it, or filling up TEMP first and only
at the end assigning DEST <- TEMP etc. would do.

	Jakub
Andrew Senkevich Jan. 18, 2017, 12:45 p.m. UTC | #10
2017-01-17 16:51 GMT+03:00 Jakub Jelinek <jakub@redhat.com>:
> On Tue, Jan 17, 2017 at 04:03:08PM +0300, Andrew Senkevich wrote:
>> > I've played a bit w/ SDE. And looks like operands are not early clobber:
>> > TID0: INS 0x00000000004003ee             AVX512VEX kmovd k0, eax
>> > TID0:   k0 := 00000000_ffffffff
>> > ...
>> > TID0: INS 0x00000000004003f4             AVX512VEX kshiftlw k0, k0, 0x3
>> > TID0:   k0 := 00000000_0000fff8
>> >
>> > You can see that same dest and source works just fine.
>>
>> Hmm, I looked only on what ICC generates, and it was not correct way.
>
> I've just tried
> int
> main ()
> {
>   unsigned int a = 0x5555;
>   asm volatile ("kmovw %1, %%k6; kshiftlw $1, %%k6, %%k6; kmovw %%k6, %0" : "=r" (a) : "r" (a) : "k6");
>   __builtin_printf ("%x\n", a);
>   return 0;
> }
> on KNL and got 0xaaaa.
> Are you going to report to the SDM authors so that they fix it up?
> E.g. using TEMP <- SRC1[0:...] before DEST[...] <- 0 and using TEMP
> instead of SRC1[0:...] would fix it, or filling up TEMP first and only
> at the end assigning DEST <- TEMP etc. would do.

Yes, we will work on it.

Attached patch refactored in part of builtints declarations and tests, is it Ok?

gcc/
    * config/i386/avx512bwintrin.h: Add k-mask registers shift intrinsics.
    * config/i386/avx512dqintrin.h: Ditto.
    * config/i386/avx512fintrin.h: Ditto.
    * config/i386/i386-builtin-types.def: Add new types.
    * gcc/config/i386/i386.c: Handle new types.
    * config/i386/i386-builtin.def (__builtin_ia32_kshiftliqi,
    __builtin_ia32_kshiftlihi, __builtin_ia32_kshiftlisi,
    __builtin_ia32_kshiftlidi, __builtin_ia32_kshiftriqi,
    __builtin_ia32_kshiftrihi, __builtin_ia32_kshiftrisi,
    __builtin_ia32_kshiftridi): New.
    * config/i386/sse.md (k<code><mode>): Rename *k<code><mode>.

gcc/testsuite/
    * gcc.target/i386/avx512bw-kshiftld-1.c: New test.
    * gcc.target/i386/avx512bw-kshiftlq-1.c: Ditto.
    * gcc.target/i386/avx512dq-kshiftlb-1.c: Ditto.
    * gcc.target/i386/avx512f-kshiftlw-1.c: Ditto.
    * gcc.target/i386/avx512bw-kshiftrd-1.c: Ditto.
    * gcc.target/i386/avx512bw-kshiftrq-1.c: Ditto.
    * gcc.target/i386/avx512dq-kshiftrb-1.c: Ditto.
    * gcc.target/i386/avx512f-kshiftrw-1.c: Ditto.
    * gcc.target/i386/avx-1.c: Test new intrinsics.
    * gcc.target/i386/sse-13.c: Ditto.
    * gcc.target/i386/sse-23.c: Ditto.


--
WBR,
Andrew
Uros Bizjak Jan. 18, 2017, 9:42 p.m. UTC | #11
On Wed, Jan 18, 2017 at 1:45 PM, Andrew Senkevich
<andrew.n.senkevich@gmail.com> wrote:
> 2017-01-17 16:51 GMT+03:00 Jakub Jelinek <jakub@redhat.com>:
>> On Tue, Jan 17, 2017 at 04:03:08PM +0300, Andrew Senkevich wrote:
>>> > I've played a bit w/ SDE. And looks like operands are not early clobber:
>>> > TID0: INS 0x00000000004003ee             AVX512VEX kmovd k0, eax
>>> > TID0:   k0 := 00000000_ffffffff
>>> > ...
>>> > TID0: INS 0x00000000004003f4             AVX512VEX kshiftlw k0, k0, 0x3
>>> > TID0:   k0 := 00000000_0000fff8
>>> >
>>> > You can see that same dest and source works just fine.
>>>
>>> Hmm, I looked only on what ICC generates, and it was not correct way.
>>
>> I've just tried
>> int
>> main ()
>> {
>>   unsigned int a = 0x5555;
>>   asm volatile ("kmovw %1, %%k6; kshiftlw $1, %%k6, %%k6; kmovw %%k6, %0" : "=r" (a) : "r" (a) : "k6");
>>   __builtin_printf ("%x\n", a);
>>   return 0;
>> }
>> on KNL and got 0xaaaa.
>> Are you going to report to the SDM authors so that they fix it up?
>> E.g. using TEMP <- SRC1[0:...] before DEST[...] <- 0 and using TEMP
>> instead of SRC1[0:...] would fix it, or filling up TEMP first and only
>> at the end assigning DEST <- TEMP etc. would do.
>
> Yes, we will work on it.
>
> Attached patch refactored in part of builtints declarations and tests, is it Ok?
>
> gcc/
>     * config/i386/avx512bwintrin.h: Add k-mask registers shift intrinsics.
>     * config/i386/avx512dqintrin.h: Ditto.
>     * config/i386/avx512fintrin.h: Ditto.
>     * config/i386/i386-builtin-types.def: Add new types.
>     * gcc/config/i386/i386.c: Handle new types.
>     * config/i386/i386-builtin.def (__builtin_ia32_kshiftliqi,
>     __builtin_ia32_kshiftlihi, __builtin_ia32_kshiftlisi,
>     __builtin_ia32_kshiftlidi, __builtin_ia32_kshiftriqi,
>     __builtin_ia32_kshiftrihi, __builtin_ia32_kshiftrisi,
>     __builtin_ia32_kshiftridi): New.
>     * config/i386/sse.md (k<code><mode>): Rename *k<code><mode>.
>
> gcc/testsuite/
>     * gcc.target/i386/avx512bw-kshiftld-1.c: New test.
>     * gcc.target/i386/avx512bw-kshiftlq-1.c: Ditto.
>     * gcc.target/i386/avx512dq-kshiftlb-1.c: Ditto.
>     * gcc.target/i386/avx512f-kshiftlw-1.c: Ditto.
>     * gcc.target/i386/avx512bw-kshiftrd-1.c: Ditto.
>     * gcc.target/i386/avx512bw-kshiftrq-1.c: Ditto.
>     * gcc.target/i386/avx512dq-kshiftrb-1.c: Ditto.
>     * gcc.target/i386/avx512f-kshiftrw-1.c: Ditto.
>     * gcc.target/i386/avx-1.c: Test new intrinsics.
>     * gcc.target/i386/sse-13.c: Ditto.
>     * gcc.target/i386/sse-23.c: Ditto.

OK.

Thanks,
Uros.
Kirill Yukhin Jan. 19, 2017, 10:39 a.m. UTC | #12
Hi Andrew,
On 18 Jan 15:45, Andrew Senkevich wrote:
> 2017-01-17 16:51 GMT+03:00 Jakub Jelinek <jakub@redhat.com>:
> > On Tue, Jan 17, 2017 at 04:03:08PM +0300, Andrew Senkevich wrote:
> >> > I've played a bit w/ SDE. And looks like operands are not early clobber:
> >> > TID0: INS 0x00000000004003ee             AVX512VEX kmovd k0, eax
> >> > TID0:   k0 := 00000000_ffffffff
> >> > ...
> >> > TID0: INS 0x00000000004003f4             AVX512VEX kshiftlw k0, k0, 0x3
> >> > TID0:   k0 := 00000000_0000fff8
> >> >
> >> > You can see that same dest and source works just fine.
> >>
> >> Hmm, I looked only on what ICC generates, and it was not correct way.
> >
> > I've just tried
> > int
> > main ()
> > {
> >   unsigned int a = 0x5555;
> >   asm volatile ("kmovw %1, %%k6; kshiftlw $1, %%k6, %%k6; kmovw %%k6, %0" : "=r" (a) : "r" (a) : "k6");
> >   __builtin_printf ("%x\n", a);
> >   return 0;
> > }
> > on KNL and got 0xaaaa.
> > Are you going to report to the SDM authors so that they fix it up?
> > E.g. using TEMP <- SRC1[0:...] before DEST[...] <- 0 and using TEMP
> > instead of SRC1[0:...] would fix it, or filling up TEMP first and only
> > at the end assigning DEST <- TEMP etc. would do.
>
> Yes, we will work on it.
>
> Attached patch refactored in part of builtints declarations and tests, is it Ok?

Could you please add runtime tests for new intrinsics as well?


--
Thanks, K

> --
> WBR,
> Andrew
Andrew Senkevich Jan. 19, 2017, 4:42 p.m. UTC | #13
2017-01-19 13:39 GMT+03:00 Kirill Yukhin <kirill.yukhin@gmail.com>:
> Hi Andrew,
> On 18 Jan 15:45, Andrew Senkevich wrote:
>> 2017-01-17 16:51 GMT+03:00 Jakub Jelinek <jakub@redhat.com>:
>> > On Tue, Jan 17, 2017 at 04:03:08PM +0300, Andrew Senkevich wrote:
>> >> > I've played a bit w/ SDE. And looks like operands are not early clobber:
>> >> > TID0: INS 0x00000000004003ee             AVX512VEX kmovd k0, eax
>> >> > TID0:   k0 := 00000000_ffffffff
>> >> > ...
>> >> > TID0: INS 0x00000000004003f4             AVX512VEX kshiftlw k0, k0, 0x3
>> >> > TID0:   k0 := 00000000_0000fff8
>> >> >
>> >> > You can see that same dest and source works just fine.
>> >>
>> >> Hmm, I looked only on what ICC generates, and it was not correct way.
>> >
>> > I've just tried
>> > int
>> > main ()
>> > {
>> >   unsigned int a = 0x5555;
>> >   asm volatile ("kmovw %1, %%k6; kshiftlw $1, %%k6, %%k6; kmovw %%k6, %0" : "=r" (a) : "r" (a) : "k6");
>> >   __builtin_printf ("%x\n", a);
>> >   return 0;
>> > }
>> > on KNL and got 0xaaaa.
>> > Are you going to report to the SDM authors so that they fix it up?
>> > E.g. using TEMP <- SRC1[0:...] before DEST[...] <- 0 and using TEMP
>> > instead of SRC1[0:...] would fix it, or filling up TEMP first and only
>> > at the end assigning DEST <- TEMP etc. would do.
>>
>> Yes, we will work on it.
>>
>> Attached patch refactored in part of builtints declarations and tests, is it Ok?
>
> Could you please add runtime tests for new intrinsics as well?

Attached with runtime tests.

gcc/
    * config/i386/avx512bwintrin.h: Add k-mask registers shift intrinsics.
    * config/i386/avx512dqintrin.h: Ditto.
    * config/i386/avx512fintrin.h: Ditto.
    * config/i386/i386-builtin-types.def: Add new types.
    * gcc/config/i386/i386.c: Handle new types.
    * config/i386/i386-builtin.def (__builtin_ia32_kshiftliqi,
    __builtin_ia32_kshiftlihi, __builtin_ia32_kshiftlisi,
    __builtin_ia32_kshiftlidi, __builtin_ia32_kshiftriqi,
    __builtin_ia32_kshiftrihi, __builtin_ia32_kshiftrisi,
    __builtin_ia32_kshiftridi): New.
    * config/i386/sse.md (k<code><mode>): Rename *k<code><mode>.

gcc/testsuite/
    * gcc.target/i386/avx512bw-kshiftld-1.c: New test.
    * gcc.target/i386/avx512bw-kshiftlq-1.c: Ditto.
    * gcc.target/i386/avx512dq-kshiftlb-1.c: Ditto.
    * gcc.target/i386/avx512f-kshiftlw-1.c: Ditto.
    * gcc.target/i386/avx512bw-kshiftrd-1.c: Ditto.
    * gcc.target/i386/avx512bw-kshiftrq-1.c: Ditto.
    * gcc.target/i386/avx512dq-kshiftrb-1.c: Ditto.
    * gcc.target/i386/avx512f-kshiftrw-1.c: Ditto.
    * gcc.target/i386/avx512bw-kshiftld-2.c: Ditto.
    * gcc.target/i386/avx512bw-kshiftlq-2.c: Ditto.
    * gcc.target/i386/avx512bw-kshiftrd-2.c: Ditto.
    * gcc.target/i386/avx512bw-kshiftrq-2.c: Ditto.
    * gcc.target/i386/avx512dq-kshiftlb-2.c: Ditto.
    * gcc.target/i386/avx512dq-kshiftrb-2.c: Ditto.
    * gcc.target/i386/avx512f-kshiftlw-2.c: Ditto.
    * gcc.target/i386/avx512f-kshiftrw-2.c: Ditto.
    * gcc.target/i386/avx-1.c: Test new intrinsics.
    * gcc.target/i386/sse-13.c: Ditto.
    * gcc.target/i386/sse-23.c: Ditto.


--
WBR,
Andrew
Kirill Yukhin Jan. 19, 2017, 5:55 p.m. UTC | #14
On 19 Jan 19:42, Andrew Senkevich wrote:
> 2017-01-19 13:39 GMT+03:00 Kirill Yukhin <kirill.yukhin@gmail.com>:
> > Hi Andrew,
> > On 18 Jan 15:45, Andrew Senkevich wrote:
> >> 2017-01-17 16:51 GMT+03:00 Jakub Jelinek <jakub@redhat.com>:
> >> > On Tue, Jan 17, 2017 at 04:03:08PM +0300, Andrew Senkevich wrote:
> >> >> > I've played a bit w/ SDE. And looks like operands are not early clobber:
> >> >> > TID0: INS 0x00000000004003ee             AVX512VEX kmovd k0, eax
> >> >> > TID0:   k0 := 00000000_ffffffff
> >> >> > ...
> >> >> > TID0: INS 0x00000000004003f4             AVX512VEX kshiftlw k0, k0, 0x3
> >> >> > TID0:   k0 := 00000000_0000fff8
> >> >> >
> >> >> > You can see that same dest and source works just fine.
> >> >>
> >> >> Hmm, I looked only on what ICC generates, and it was not correct way.
> >> >
> >> > I've just tried
> >> > int
> >> > main ()
> >> > {
> >> >   unsigned int a = 0x5555;
> >> >   asm volatile ("kmovw %1, %%k6; kshiftlw $1, %%k6, %%k6; kmovw %%k6, %0" : "=r" (a) : "r" (a) : "k6");
> >> >   __builtin_printf ("%x\n", a);
> >> >   return 0;
> >> > }
> >> > on KNL and got 0xaaaa.
> >> > Are you going to report to the SDM authors so that they fix it up?
> >> > E.g. using TEMP <- SRC1[0:...] before DEST[...] <- 0 and using TEMP
> >> > instead of SRC1[0:...] would fix it, or filling up TEMP first and only
> >> > at the end assigning DEST <- TEMP etc. would do.
> >>
> >> Yes, we will work on it.
> >>
> >> Attached patch refactored in part of builtints declarations and tests, is it Ok?
> >
> > Could you please add runtime tests for new intrinsics as well?
>
> Attached with runtime tests.
Great! Thanks. Patch is OK for main trunk.

--
Thanks, K
>
> gcc/
>     * config/i386/avx512bwintrin.h: Add k-mask registers shift intrinsics.
>     * config/i386/avx512dqintrin.h: Ditto.
>     * config/i386/avx512fintrin.h: Ditto.
>     * config/i386/i386-builtin-types.def: Add new types.
>     * gcc/config/i386/i386.c: Handle new types.
>     * config/i386/i386-builtin.def (__builtin_ia32_kshiftliqi,
>     __builtin_ia32_kshiftlihi, __builtin_ia32_kshiftlisi,
>     __builtin_ia32_kshiftlidi, __builtin_ia32_kshiftriqi,
>     __builtin_ia32_kshiftrihi, __builtin_ia32_kshiftrisi,
>     __builtin_ia32_kshiftridi): New.
>     * config/i386/sse.md (k<code><mode>): Rename *k<code><mode>.
>
> gcc/testsuite/
>     * gcc.target/i386/avx512bw-kshiftld-1.c: New test.
>     * gcc.target/i386/avx512bw-kshiftlq-1.c: Ditto.
>     * gcc.target/i386/avx512dq-kshiftlb-1.c: Ditto.
>     * gcc.target/i386/avx512f-kshiftlw-1.c: Ditto.
>     * gcc.target/i386/avx512bw-kshiftrd-1.c: Ditto.
>     * gcc.target/i386/avx512bw-kshiftrq-1.c: Ditto.
>     * gcc.target/i386/avx512dq-kshiftrb-1.c: Ditto.
>     * gcc.target/i386/avx512f-kshiftrw-1.c: Ditto.
>     * gcc.target/i386/avx512bw-kshiftld-2.c: Ditto.
>     * gcc.target/i386/avx512bw-kshiftlq-2.c: Ditto.
>     * gcc.target/i386/avx512bw-kshiftrd-2.c: Ditto.
>     * gcc.target/i386/avx512bw-kshiftrq-2.c: Ditto.
>     * gcc.target/i386/avx512dq-kshiftlb-2.c: Ditto.
>     * gcc.target/i386/avx512dq-kshiftrb-2.c: Ditto.
>     * gcc.target/i386/avx512f-kshiftlw-2.c: Ditto.
>     * gcc.target/i386/avx512f-kshiftrw-2.c: Ditto.
>     * gcc.target/i386/avx-1.c: Test new intrinsics.
>     * gcc.target/i386/sse-13.c: Ditto.
>     * gcc.target/i386/sse-23.c: Ditto.
>
>
> --
> WBR,
> Andrew
Andrew Senkevich Jan. 20, 2017, 1:32 p.m. UTC | #15
2017-01-19 20:55 GMT+03:00 Kirill Yukhin <kirill.yukhin@gmail.com>:
> On 19 Jan 19:42, Andrew Senkevich wrote:
>> 2017-01-19 13:39 GMT+03:00 Kirill Yukhin <kirill.yukhin@gmail.com>:
>> > Hi Andrew,
>> > On 18 Jan 15:45, Andrew Senkevich wrote:
>> >> 2017-01-17 16:51 GMT+03:00 Jakub Jelinek <jakub@redhat.com>:
>> >> > On Tue, Jan 17, 2017 at 04:03:08PM +0300, Andrew Senkevich wrote:
>> >> >> > I've played a bit w/ SDE. And looks like operands are not early clobber:
>> >> >> > TID0: INS 0x00000000004003ee             AVX512VEX kmovd k0, eax
>> >> >> > TID0:   k0 := 00000000_ffffffff
>> >> >> > ...
>> >> >> > TID0: INS 0x00000000004003f4             AVX512VEX kshiftlw k0, k0, 0x3
>> >> >> > TID0:   k0 := 00000000_0000fff8
>> >> >> >
>> >> >> > You can see that same dest and source works just fine.
>> >> >>
>> >> >> Hmm, I looked only on what ICC generates, and it was not correct way.
>> >> >
>> >> > I've just tried
>> >> > int
>> >> > main ()
>> >> > {
>> >> >   unsigned int a = 0x5555;
>> >> >   asm volatile ("kmovw %1, %%k6; kshiftlw $1, %%k6, %%k6; kmovw %%k6, %0" : "=r" (a) : "r" (a) : "k6");
>> >> >   __builtin_printf ("%x\n", a);
>> >> >   return 0;
>> >> > }
>> >> > on KNL and got 0xaaaa.
>> >> > Are you going to report to the SDM authors so that they fix it up?
>> >> > E.g. using TEMP <- SRC1[0:...] before DEST[...] <- 0 and using TEMP
>> >> > instead of SRC1[0:...] would fix it, or filling up TEMP first and only
>> >> > at the end assigning DEST <- TEMP etc. would do.
>> >>
>> >> Yes, we will work on it.
>> >>
>> >> Attached patch refactored in part of builtints declarations and tests, is it Ok?
>> >
>> > Could you please add runtime tests for new intrinsics as well?
>>
>> Attached with runtime tests.
> Great! Thanks. Patch is OK for main trunk.
>
> --
> Thanks, K
>>
>> gcc/
>>     * config/i386/avx512bwintrin.h: Add k-mask registers shift intrinsics.
>>     * config/i386/avx512dqintrin.h: Ditto.
>>     * config/i386/avx512fintrin.h: Ditto.
>>     * config/i386/i386-builtin-types.def: Add new types.
>>     * gcc/config/i386/i386.c: Handle new types.
>>     * config/i386/i386-builtin.def (__builtin_ia32_kshiftliqi,
>>     __builtin_ia32_kshiftlihi, __builtin_ia32_kshiftlisi,
>>     __builtin_ia32_kshiftlidi, __builtin_ia32_kshiftriqi,
>>     __builtin_ia32_kshiftrihi, __builtin_ia32_kshiftrisi,
>>     __builtin_ia32_kshiftridi): New.
>>     * config/i386/sse.md (k<code><mode>): Rename *k<code><mode>.
>>
>> gcc/testsuite/
>>     * gcc.target/i386/avx512bw-kshiftld-1.c: New test.
>>     * gcc.target/i386/avx512bw-kshiftlq-1.c: Ditto.
>>     * gcc.target/i386/avx512dq-kshiftlb-1.c: Ditto.
>>     * gcc.target/i386/avx512f-kshiftlw-1.c: Ditto.
>>     * gcc.target/i386/avx512bw-kshiftrd-1.c: Ditto.
>>     * gcc.target/i386/avx512bw-kshiftrq-1.c: Ditto.
>>     * gcc.target/i386/avx512dq-kshiftrb-1.c: Ditto.
>>     * gcc.target/i386/avx512f-kshiftrw-1.c: Ditto.
>>     * gcc.target/i386/avx512bw-kshiftld-2.c: Ditto.
>>     * gcc.target/i386/avx512bw-kshiftlq-2.c: Ditto.
>>     * gcc.target/i386/avx512bw-kshiftrd-2.c: Ditto.
>>     * gcc.target/i386/avx512bw-kshiftrq-2.c: Ditto.
>>     * gcc.target/i386/avx512dq-kshiftlb-2.c: Ditto.
>>     * gcc.target/i386/avx512dq-kshiftrb-2.c: Ditto.
>>     * gcc.target/i386/avx512f-kshiftlw-2.c: Ditto.
>>     * gcc.target/i386/avx512f-kshiftrw-2.c: Ditto.
>>     * gcc.target/i386/avx-1.c: Test new intrinsics.
>>     * gcc.target/i386/sse-13.c: Ditto.
>>     * gcc.target/i386/sse-23.c: Ditto.

Hi,

here is intrinsics for ktest{b,w,d,q} and kortest{b,w,d,q}. Is it Ok?

gcc/
    * config/i386/avx512bwintrin.h: Add k-mask test, kortest intrinsics.
    * config/i386/avx512dqintrin.h: Ditto.
    * config/i386/avx512fintrin.h: Ditto.
    * gcc/config/i386/i386.c: Handle new builtins.
    * config/i386/i386-builtin.def: Add new builtins.
    * config/i386/sse.md (ktest<mode>, kortest<mode>): New.
    (UNSPEC_KORTEST, UNSPEC_KTEST): New.

gcc/testsuite/
    * gcc.target/i386/avx512bw-ktestd-1.c: New test.
    * gcc.target/i386/avx512bw-ktestq-1.c: Ditto.
    * gcc.target/i386/avx512dq-ktestb-1.c: Ditto.
    * gcc.target/i386/avx512f-ktestw-1.c: Ditto.
    * gcc.target/i386/avx512bw-kortestd-1.c: Ditto.
    * gcc.target/i386/avx512bw-kortestq-1.c: Ditto.
    * gcc.target/i386/avx512dq-kortestb-1.c: Ditto.
    * gcc.target/i386/avx512f-kortestw-1.c: Ditto.


--
WBR,
Andrew
Uros Bizjak Jan. 20, 2017, 1:46 p.m. UTC | #16
On Fri, Jan 20, 2017 at 2:32 PM, Andrew Senkevich
<andrew.n.senkevich@gmail.com> wrote:

> here is intrinsics for ktest{b,w,d,q} and kortest{b,w,d,q}. Is it Ok?
>
> gcc/
>     * config/i386/avx512bwintrin.h: Add k-mask test, kortest intrinsics.
>     * config/i386/avx512dqintrin.h: Ditto.
>     * config/i386/avx512fintrin.h: Ditto.
>     * gcc/config/i386/i386.c: Handle new builtins.
>     * config/i386/i386-builtin.def: Add new builtins.
>     * config/i386/sse.md (ktest<mode>, kortest<mode>): New.
>     (UNSPEC_KORTEST, UNSPEC_KTEST): New.
>
> gcc/testsuite/
>     * gcc.target/i386/avx512bw-ktestd-1.c: New test.
>     * gcc.target/i386/avx512bw-ktestq-1.c: Ditto.
>     * gcc.target/i386/avx512dq-ktestb-1.c: Ditto.
>     * gcc.target/i386/avx512f-ktestw-1.c: Ditto.
>     * gcc.target/i386/avx512bw-kortestd-1.c: Ditto.
>     * gcc.target/i386/avx512bw-kortestq-1.c: Ditto.
>     * gcc.target/i386/avx512dq-kortestb-1.c: Ditto.
>     * gcc.target/i386/avx512f-kortestw-1.c: Ditto.

IMO, you should add some runtime tests.

Otherwise, the patch LGTM, but I'l leave the final approval to Kirill.

Uros.
Kirill Yukhin Jan. 20, 2017, 5:08 p.m. UTC | #17
Hi,
On 20 Jan 14:46, Uros Bizjak wrote:
> On Fri, Jan 20, 2017 at 2:32 PM, Andrew Senkevich
> <andrew.n.senkevich@gmail.com> wrote:
>
> > here is intrinsics for ktest{b,w,d,q} and kortest{b,w,d,q}. Is it Ok?
> >
> > gcc/
> >     * config/i386/avx512bwintrin.h: Add k-mask test, kortest intrinsics.
> >     * config/i386/avx512dqintrin.h: Ditto.
> >     * config/i386/avx512fintrin.h: Ditto.
> >     * gcc/config/i386/i386.c: Handle new builtins.
> >     * config/i386/i386-builtin.def: Add new builtins.
> >     * config/i386/sse.md (ktest<mode>, kortest<mode>): New.
> >     (UNSPEC_KORTEST, UNSPEC_KTEST): New.
> >
> > gcc/testsuite/
> >     * gcc.target/i386/avx512bw-ktestd-1.c: New test.
> >     * gcc.target/i386/avx512bw-ktestq-1.c: Ditto.
> >     * gcc.target/i386/avx512dq-ktestb-1.c: Ditto.
> >     * gcc.target/i386/avx512f-ktestw-1.c: Ditto.
> >     * gcc.target/i386/avx512bw-kortestd-1.c: Ditto.
> >     * gcc.target/i386/avx512bw-kortestq-1.c: Ditto.
> >     * gcc.target/i386/avx512dq-kortestb-1.c: Ditto.
> >     * gcc.target/i386/avx512f-kortestw-1.c: Ditto.
>
> IMO, you should add some runtime tests.
+1

> Otherwise, the patch LGTM, but I'l leave the final approval to Kirill.
Anyway trunk is frozen, so I suppose you'll need OK from RM.

So, no much hurry. Pls add runtime tests.

--
Thanks, K
>
> Uros.
Andrew Senkevich Jan. 20, 2017, 8:03 p.m. UTC | #18
2017-01-20 20:08 GMT+03:00 Kirill Yukhin <kirill.yukhin@gmail.com>:
> Hi,
> On 20 Jan 14:46, Uros Bizjak wrote:
>> On Fri, Jan 20, 2017 at 2:32 PM, Andrew Senkevich
>> <andrew.n.senkevich@gmail.com> wrote:
>>
>> > here is intrinsics for ktest{b,w,d,q} and kortest{b,w,d,q}. Is it Ok?
>> >
>> > gcc/
>> >     * config/i386/avx512bwintrin.h: Add k-mask test, kortest intrinsics.
>> >     * config/i386/avx512dqintrin.h: Ditto.
>> >     * config/i386/avx512fintrin.h: Ditto.
>> >     * gcc/config/i386/i386.c: Handle new builtins.
>> >     * config/i386/i386-builtin.def: Add new builtins.
>> >     * config/i386/sse.md (ktest<mode>, kortest<mode>): New.
>> >     (UNSPEC_KORTEST, UNSPEC_KTEST): New.
>> >
>> > gcc/testsuite/
>> >     * gcc.target/i386/avx512bw-ktestd-1.c: New test.
>> >     * gcc.target/i386/avx512bw-ktestq-1.c: Ditto.
>> >     * gcc.target/i386/avx512dq-ktestb-1.c: Ditto.
>> >     * gcc.target/i386/avx512f-ktestw-1.c: Ditto.
>> >     * gcc.target/i386/avx512bw-kortestd-1.c: Ditto.
>> >     * gcc.target/i386/avx512bw-kortestq-1.c: Ditto.
>> >     * gcc.target/i386/avx512dq-kortestb-1.c: Ditto.
>> >     * gcc.target/i386/avx512f-kortestw-1.c: Ditto.
>>
>> IMO, you should add some runtime tests.
> +1
>
>> Otherwise, the patch LGTM, but I'l leave the final approval to Kirill.
> Anyway trunk is frozen, so I suppose you'll need OK from RM.

Kirill, attached with runtime tests.

Richard, are you OK to approve commit of this patch?
It is last part of k-mask intrinsics, it would be great to have all
intrinsics of this type available in single GCC release..

Updated changelog:

gcc/
    * config/i386/avx512bwintrin.h: Add k-mask test, kortest intrinsics.
    * config/i386/avx512dqintrin.h: Ditto.
    * config/i386/avx512fintrin.h: Ditto.
    * gcc/config/i386/i386.c: Handle new builtins.
    * config/i386/i386-builtin.def: Add new builtins.
    * config/i386/sse.md (ktest<mode>, kortest<mode>): New.
    (UNSPEC_KORTEST, UNSPEC_KTEST): New.

gcc/testsuite/
    * gcc.target/i386/avx512bw-ktestd-1.c: New test.
    * gcc.target/i386/avx512bw-ktestq-1.c: Ditto.
    * gcc.target/i386/avx512dq-ktestb-1.c: Ditto.
    * gcc.target/i386/avx512f-ktestw-1.c: Ditto.
    * gcc.target/i386/avx512bw-kortestd-1.c: Ditto.
    * gcc.target/i386/avx512bw-kortestq-1.c: Ditto.
    * gcc.target/i386/avx512dq-kortestb-1.c: Ditto.
    * gcc.target/i386/avx512f-kortestw-1.c: Ditto.
    * gcc.target/i386/avx512bw-ktestd-2.c: Ditt
    * gcc.target/i386/avx512bw-ktestq-2.c: Ditto.
    * gcc.target/i386/avx512dq-ktestb-2.c: Ditto.
    * gcc.target/i386/avx512f-ktestw-2.c: Ditto.
    * gcc.target/i386/avx512bw-kortestd-2.c: Ditto.
    * gcc.target/i386/avx512bw-kortestq-2.c: Ditto.
    * gcc.target/i386/avx512dq-kortestb-2.c: Ditto.
    * gcc.target/i386/avx512f-kortestw-2.c: Ditto.


--
WBR,
Andrew
Richard Biener Jan. 21, 2017, 8:23 a.m. UTC | #19
On January 20, 2017 9:03:53 PM GMT+01:00, Andrew Senkevich <andrew.n.senkevich@gmail.com> wrote:
>2017-01-20 20:08 GMT+03:00 Kirill Yukhin <kirill.yukhin@gmail.com>:
>> Hi,
>> On 20 Jan 14:46, Uros Bizjak wrote:
>>> On Fri, Jan 20, 2017 at 2:32 PM, Andrew Senkevich
>>> <andrew.n.senkevich@gmail.com> wrote:
>>>
>>> > here is intrinsics for ktest{b,w,d,q} and kortest{b,w,d,q}. Is it
>Ok?
>>> >
>>> > gcc/
>>> >     * config/i386/avx512bwintrin.h: Add k-mask test, kortest
>intrinsics.
>>> >     * config/i386/avx512dqintrin.h: Ditto.
>>> >     * config/i386/avx512fintrin.h: Ditto.
>>> >     * gcc/config/i386/i386.c: Handle new builtins.
>>> >     * config/i386/i386-builtin.def: Add new builtins.
>>> >     * config/i386/sse.md (ktest<mode>, kortest<mode>): New.
>>> >     (UNSPEC_KORTEST, UNSPEC_KTEST): New.
>>> >
>>> > gcc/testsuite/
>>> >     * gcc.target/i386/avx512bw-ktestd-1.c: New test.
>>> >     * gcc.target/i386/avx512bw-ktestq-1.c: Ditto.
>>> >     * gcc.target/i386/avx512dq-ktestb-1.c: Ditto.
>>> >     * gcc.target/i386/avx512f-ktestw-1.c: Ditto.
>>> >     * gcc.target/i386/avx512bw-kortestd-1.c: Ditto.
>>> >     * gcc.target/i386/avx512bw-kortestq-1.c: Ditto.
>>> >     * gcc.target/i386/avx512dq-kortestb-1.c: Ditto.
>>> >     * gcc.target/i386/avx512f-kortestw-1.c: Ditto.
>>>
>>> IMO, you should add some runtime tests.
>> +1
>>
>>> Otherwise, the patch LGTM, but I'l leave the final approval to
>Kirill.
>> Anyway trunk is frozen, so I suppose you'll need OK from RM.
>
>Kirill, attached with runtime tests.
>
>Richard, are you OK to approve commit of this patch?

OK.  Note trunk is not frozen, it's operated in release branch mode now.

Richard.

>It is last part of k-mask intrinsics, it would be great to have all
>intrinsics of this type available in single GCC release..
>
>Updated changelog:
>
>gcc/
>   * config/i386/avx512bwintrin.h: Add k-mask test, kortest intrinsics.
>    * config/i386/avx512dqintrin.h: Ditto.
>    * config/i386/avx512fintrin.h: Ditto.
>    * gcc/config/i386/i386.c: Handle new builtins.
>    * config/i386/i386-builtin.def: Add new builtins.
>    * config/i386/sse.md (ktest<mode>, kortest<mode>): New.
>    (UNSPEC_KORTEST, UNSPEC_KTEST): New.
>
>gcc/testsuite/
>    * gcc.target/i386/avx512bw-ktestd-1.c: New test.
>    * gcc.target/i386/avx512bw-ktestq-1.c: Ditto.
>    * gcc.target/i386/avx512dq-ktestb-1.c: Ditto.
>    * gcc.target/i386/avx512f-ktestw-1.c: Ditto.
>    * gcc.target/i386/avx512bw-kortestd-1.c: Ditto.
>    * gcc.target/i386/avx512bw-kortestq-1.c: Ditto.
>    * gcc.target/i386/avx512dq-kortestb-1.c: Ditto.
>    * gcc.target/i386/avx512f-kortestw-1.c: Ditto.
>    * gcc.target/i386/avx512bw-ktestd-2.c: Ditt
>    * gcc.target/i386/avx512bw-ktestq-2.c: Ditto.
>    * gcc.target/i386/avx512dq-ktestb-2.c: Ditto.
>    * gcc.target/i386/avx512f-ktestw-2.c: Ditto.
>    * gcc.target/i386/avx512bw-kortestd-2.c: Ditto.
>    * gcc.target/i386/avx512bw-kortestq-2.c: Ditto.
>    * gcc.target/i386/avx512dq-kortestb-2.c: Ditto.
>    * gcc.target/i386/avx512f-kortestw-2.c: Ditto.
>
>
>--
>WBR,
>Andrew
Kirill Yukhin Jan. 23, 2017, 11:32 a.m. UTC | #20
On 20 Jan 23:03, Andrew Senkevich wrote:
> 2017-01-20 20:08 GMT+03:00 Kirill Yukhin <kirill.yukhin@gmail.com>:
> > Hi,
> > On 20 Jan 14:46, Uros Bizjak wrote:
> >> On Fri, Jan 20, 2017 at 2:32 PM, Andrew Senkevich
> >> <andrew.n.senkevich@gmail.com> wrote:
> >>
> >> > here is intrinsics for ktest{b,w,d,q} and kortest{b,w,d,q}. Is it Ok?
> >> >
> >> > gcc/
> >> >     * config/i386/avx512bwintrin.h: Add k-mask test, kortest intrinsics.
> >> >     * config/i386/avx512dqintrin.h: Ditto.
> >> >     * config/i386/avx512fintrin.h: Ditto.
> >> >     * gcc/config/i386/i386.c: Handle new builtins.
> >> >     * config/i386/i386-builtin.def: Add new builtins.
> >> >     * config/i386/sse.md (ktest<mode>, kortest<mode>): New.
> >> >     (UNSPEC_KORTEST, UNSPEC_KTEST): New.
> >> >
> >> > gcc/testsuite/
> >> >     * gcc.target/i386/avx512bw-ktestd-1.c: New test.
> >> >     * gcc.target/i386/avx512bw-ktestq-1.c: Ditto.
> >> >     * gcc.target/i386/avx512dq-ktestb-1.c: Ditto.
> >> >     * gcc.target/i386/avx512f-ktestw-1.c: Ditto.
> >> >     * gcc.target/i386/avx512bw-kortestd-1.c: Ditto.
> >> >     * gcc.target/i386/avx512bw-kortestq-1.c: Ditto.
> >> >     * gcc.target/i386/avx512dq-kortestb-1.c: Ditto.
> >> >     * gcc.target/i386/avx512f-kortestw-1.c: Ditto.
> >>
> >> IMO, you should add some runtime tests.
> > +1
> >
> >> Otherwise, the patch LGTM, but I'l leave the final approval to Kirill.
> > Anyway trunk is frozen, so I suppose you'll need OK from RM.
>
> Kirill, attached with runtime tests.
>
> Richard, are you OK to approve commit of this patch?
> It is last part of k-mask intrinsics, it would be great to have all
> intrinsics of this type available in single GCC release..
OK for main trunk. I'll check it in.

--
Thanks, K
Thomas Schwinge Jan. 26, 2017, 9:14 a.m. UTC | #21
Hi!

On Fri, 20 Jan 2017 23:03:53 +0300, Andrew Senkevich <andrew.n.senkevich@gmail.com> wrote:
> diff --git a/gcc/testsuite/gcc.target/i386/avx512f-ktestw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-ktestw-2.c
> new file mode 100644
> index 0000000..6602c7a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512f-ktestw-2.c
> @@ -0,0 +1,20 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mavx512f" } */
> +/* { dg-require-effective-target avx512f } */
> +
> +#include "avx512f-check.h"
> +
> +void
> +avx512f_test ()
> +{
> +  volatile __mmask16 k1, k2;
> +  unsigned char r1, r2;
> +
> +  __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (0) );
> +  __asm__( "kmovw %1, %0" : "=k" (k2) : "r" (-1) );
> +
> +  r1 = _ktest_mask16_u8(k1, k2, &r2);
> +
> +  if (r1 != 1 || r2 != 0)
> +    abort ();
> +}

I see:

    {+FAIL: gcc.target/i386/avx512f-ktestw-2.c (test for excess errors)+}
    {+UNRESOLVED: gcc.target/i386/avx512f-ktestw-2.c compilation failed to produce executable+}

... because of:

    /tmp/ccjv3mX2.s: Assembler messages:
    /tmp/ccjv3mX2.s:26: Error: no such instruction: `ktestw %k1,%k0'
    compiler exited with status 1


Grüße
 Thomas
Uros Bizjak Jan. 26, 2017, 9:59 a.m. UTC | #22
On Thu, Jan 26, 2017 at 10:14 AM, Thomas Schwinge
<thomas@codesourcery.com> wrote:
> Hi!
>
> On Fri, 20 Jan 2017 23:03:53 +0300, Andrew Senkevich <andrew.n.senkevich@gmail.com> wrote:
>> diff --git a/gcc/testsuite/gcc.target/i386/avx512f-ktestw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-ktestw-2.c
>> new file mode 100644
>> index 0000000..6602c7a
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/avx512f-ktestw-2.c
>> @@ -0,0 +1,20 @@
>> +/* { dg-do run } */
>> +/* { dg-options "-O2 -mavx512f" } */
>> +/* { dg-require-effective-target avx512f } */
>> +
>> +#include "avx512f-check.h"
>> +
>> +void
>> +avx512f_test ()
>> +{
>> +  volatile __mmask16 k1, k2;
>> +  unsigned char r1, r2;
>> +
>> +  __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (0) );
>> +  __asm__( "kmovw %1, %0" : "=k" (k2) : "r" (-1) );
>> +
>> +  r1 = _ktest_mask16_u8(k1, k2, &r2);
>> +
>> +  if (r1 != 1 || r2 != 0)
>> +    abort ();
>> +}
>
> I see:
>
>     {+FAIL: gcc.target/i386/avx512f-ktestw-2.c (test for excess errors)+}
>     {+UNRESOLVED: gcc.target/i386/avx512f-ktestw-2.c compilation failed to produce executable+}
>
> ... because of:
>
>     /tmp/ccjv3mX2.s: Assembler messages:
>     /tmp/ccjv3mX2.s:26: Error: no such instruction: `ktestw %k1,%k0'
>     compiler exited with status 1

The problem is with __builtin_ia32_ktesthi (and __builtin_ia32_kaddhi)
intrinsics. These should be enabled only with AVX512DQ, since
corresponding insns are available in AVX512DQ ISA extension.

Andrew, can you please adjust builtins, instruction patterns,
intrinsics and testcases? Also, can you please review if there are any
other inconsistencies w.r.t. ISA throughout mask intrinsics?

Uros.
Kirill Yukhin Jan. 26, 2017, 10:44 a.m. UTC | #23
Hello Thomas,
On 26 Jan 10:14, Thomas Schwinge wrote:
> I see:
>
>     {+FAIL: gcc.target/i386/avx512f-ktestw-2.c (test for excess errors)+}
>     {+UNRESOLVED: gcc.target/i386/avx512f-ktestw-2.c compilation failed to produce executable+}
>
> ... because of:
>
>     /tmp/ccjv3mX2.s: Assembler messages:
>     /tmp/ccjv3mX2.s:26: Error: no such instruction: `ktestw %k1,%k0'
>     compiler exited with status 1
Which version of gas do you use?
It should be OK since v2.25.

--
Thanks, K
>
>
> Grüße
>  Thomas
Jakub Jelinek Jan. 26, 2017, 10:51 a.m. UTC | #24
On Thu, Jan 26, 2017 at 02:44:56AM -0800, Kirill Yukhin wrote:
> Hello Thomas,
> On 26 Jan 10:14, Thomas Schwinge wrote:
> > I see:
> >
> >     {+FAIL: gcc.target/i386/avx512f-ktestw-2.c (test for excess errors)+}
> >     {+UNRESOLVED: gcc.target/i386/avx512f-ktestw-2.c compilation failed to produce executable+}
> >
> > ... because of:
> >
> >     /tmp/ccjv3mX2.s: Assembler messages:
> >     /tmp/ccjv3mX2.s:26: Error: no such instruction: `ktestw %k1,%k0'
> >     compiler exited with status 1
> Which version of gas do you use?
> It should be OK since v2.25.

It is weird, because the test already has:
/* { dg-require-effective-target avx512f } */
Perhaps if there are gas versions with partial avx512f support, we need
to improve the avx512f effective target test.

	Jakub
Uros Bizjak Jan. 26, 2017, 10:54 a.m. UTC | #25
On Thu, Jan 26, 2017 at 11:51 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Thu, Jan 26, 2017 at 02:44:56AM -0800, Kirill Yukhin wrote:
>> Hello Thomas,
>> On 26 Jan 10:14, Thomas Schwinge wrote:
>> > I see:
>> >
>> >     {+FAIL: gcc.target/i386/avx512f-ktestw-2.c (test for excess errors)+}
>> >     {+UNRESOLVED: gcc.target/i386/avx512f-ktestw-2.c compilation failed to produce executable+}
>> >
>> > ... because of:
>> >
>> >     /tmp/ccjv3mX2.s: Assembler messages:
>> >     /tmp/ccjv3mX2.s:26: Error: no such instruction: `ktestw %k1,%k0'
>> >     compiler exited with status 1
>> Which version of gas do you use?
>> It should be OK since v2.25.
>
> It is weird, because the test already has:
> /* { dg-require-effective-target avx512f } */
> Perhaps if there are gas versions with partial avx512f support, we need
> to improve the avx512f effective target test.

This is actually AVX512DQ instruction, please see [1], 3-509.

[1] https://software.intel.com/sites/default/files/managed/ad/01/253666-sdm-vol-2a.pdf

Uros.
Jakub Jelinek Jan. 26, 2017, 11 a.m. UTC | #26
On Thu, Jan 26, 2017 at 11:54:52AM +0100, Uros Bizjak wrote:
> On Thu, Jan 26, 2017 at 11:51 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> > On Thu, Jan 26, 2017 at 02:44:56AM -0800, Kirill Yukhin wrote:
> >> Hello Thomas,
> >> On 26 Jan 10:14, Thomas Schwinge wrote:
> >> > I see:
> >> >
> >> >     {+FAIL: gcc.target/i386/avx512f-ktestw-2.c (test for excess errors)+}
> >> >     {+UNRESOLVED: gcc.target/i386/avx512f-ktestw-2.c compilation failed to produce executable+}
> >> >
> >> > ... because of:
> >> >
> >> >     /tmp/ccjv3mX2.s: Assembler messages:
> >> >     /tmp/ccjv3mX2.s:26: Error: no such instruction: `ktestw %k1,%k0'
> >> >     compiler exited with status 1
> >> Which version of gas do you use?
> >> It should be OK since v2.25.
> >
> > It is weird, because the test already has:
> > /* { dg-require-effective-target avx512f } */
> > Perhaps if there are gas versions with partial avx512f support, we need
> > to improve the avx512f effective target test.
> 
> This is actually AVX512DQ instruction, please see [1], 3-509.
> 
> [1] https://software.intel.com/sites/default/files/managed/ad/01/253666-sdm-vol-2a.pdf

You're right.  But then the tests should be named avx512dq-ktestw-{1,2}.c,
should use -mavx512dq, avx512dq effective target etc. and indeed the
intrinsics shouldn't be in avx512fintrin.h header, but dq, and should not be
enabled for f, but only dq.

	Jakub
Uros Bizjak Jan. 26, 2017, 11:04 a.m. UTC | #27
On Thu, Jan 26, 2017 at 12:00 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Thu, Jan 26, 2017 at 11:54:52AM +0100, Uros Bizjak wrote:
>> On Thu, Jan 26, 2017 at 11:51 AM, Jakub Jelinek <jakub@redhat.com> wrote:
>> > On Thu, Jan 26, 2017 at 02:44:56AM -0800, Kirill Yukhin wrote:
>> >> Hello Thomas,
>> >> On 26 Jan 10:14, Thomas Schwinge wrote:
>> >> > I see:
>> >> >
>> >> >     {+FAIL: gcc.target/i386/avx512f-ktestw-2.c (test for excess errors)+}
>> >> >     {+UNRESOLVED: gcc.target/i386/avx512f-ktestw-2.c compilation failed to produce executable+}
>> >> >
>> >> > ... because of:
>> >> >
>> >> >     /tmp/ccjv3mX2.s: Assembler messages:
>> >> >     /tmp/ccjv3mX2.s:26: Error: no such instruction: `ktestw %k1,%k0'
>> >> >     compiler exited with status 1
>> >> Which version of gas do you use?
>> >> It should be OK since v2.25.
>> >
>> > It is weird, because the test already has:
>> > /* { dg-require-effective-target avx512f } */
>> > Perhaps if there are gas versions with partial avx512f support, we need
>> > to improve the avx512f effective target test.
>>
>> This is actually AVX512DQ instruction, please see [1], 3-509.
>>
>> [1] https://software.intel.com/sites/default/files/managed/ad/01/253666-sdm-vol-2a.pdf
>
> You're right.  But then the tests should be named avx512dq-ktestw-{1,2}.c,
> should use -mavx512dq, avx512dq effective target etc. and indeed the
> intrinsics shouldn't be in avx512fintrin.h header, but dq, and should not be
> enabled for f, but only dq.

Yes, all this is needed to fix this oversight (and one more with
kaddw), as I proposed a couple of messages earlier.

Uros.
Thomas Schwinge Jan. 26, 2017, 11:49 a.m. UTC | #28
Hi!

On Thu, 26 Jan 2017 02:44:56 -0800, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:
> On 26 Jan 10:14, Thomas Schwinge wrote:
> > I see:
> >
> >     {+FAIL: gcc.target/i386/avx512f-ktestw-2.c (test for excess errors)+}
> >     {+UNRESOLVED: gcc.target/i386/avx512f-ktestw-2.c compilation failed to produce executable+}
> >
> > ... because of:
> >
> >     /tmp/ccjv3mX2.s: Assembler messages:
> >     /tmp/ccjv3mX2.s:26: Error: no such instruction: `ktestw %k1,%k0'
> >     compiler exited with status 1
> Which version of gas do you use?

A rather old one on that Ubuntu 12.10 system:

    $ as --version
    GNU assembler (GNU Binutils for Ubuntu) 2.22.90.20120924
    [...]

> It should be OK since v2.25.

OK, but as done for other tests, for older versions such testing then
should be UNSUPPORTED instead of FAIL/UNRESOLVED (as long as that is
practicable, which has already been described how to do, as I understand
the other messages).


Grüße
 Thomas
Kirill Yukhin Jan. 26, 2017, 11:53 a.m. UTC | #29
Hi,
On 26 Jan 12:49, Thomas Schwinge wrote:
> Hi!
>
> On Thu, 26 Jan 2017 02:44:56 -0800, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:
> > On 26 Jan 10:14, Thomas Schwinge wrote:
> > > I see:
> > >
> > >     {+FAIL: gcc.target/i386/avx512f-ktestw-2.c (test for excess errors)+}
> > >     {+UNRESOLVED: gcc.target/i386/avx512f-ktestw-2.c compilation failed to produce executable+}
> > >
> > > ... because of:
> > >
> > >     /tmp/ccjv3mX2.s: Assembler messages:
> > >     /tmp/ccjv3mX2.s:26: Error: no such instruction: `ktestw %k1,%k0'
> > >     compiler exited with status 1
> > Which version of gas do you use?
>
> A rather old one on that Ubuntu 12.10 system:
>
>     $ as --version
>     GNU assembler (GNU Binutils for Ubuntu) 2.22.90.20120924
>     [...]
>
> > It should be OK since v2.25.
>
> OK, but as done for other tests, for older versions such testing then
> should be UNSUPPORTED instead of FAIL/UNRESOLVED (as long as that is
> practicable, which has already been described how to do, as I understand
> the other messages).
This is a bug as Uroš properly mentioned. Will fix.

--
Thanks, K

>
>
> Grüße
>  Thomas
diff mbox

Patch

diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h
index b35ae2b..e38055c 100644
--- a/gcc/config/i386/avx512bwintrin.h
+++ b/gcc/config/i386/avx512bwintrin.h
@@ -40,6 +40,20 @@  typedef char __v64qi __attribute__ ((__vector_size__ (64)));

 typedef unsigned long long __mmask64;

+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kadd_mask32 (__mmask32 __A, __mmask32 __B)
+{
+  return (__mmask32) __builtin_ia32_kaddsi ((__mmask32) __A, (__mmask32) __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kadd_mask64 (__mmask64 __A, __mmask64 __B)
+{
+  return (__mmask64) __builtin_ia32_kadddi ((__mmask64) __A, (__mmask64) __B);
+}
+
 extern __inline unsigned int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _cvtmask32_u32 (__mmask32 __A)
diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h
index 4db44e4..ccc6a4d 100644
--- a/gcc/config/i386/avx512dqintrin.h
+++ b/gcc/config/i386/avx512dqintrin.h
@@ -34,6 +34,13 @@ 
 #define __DISABLE_AVX512DQ__
 #endif /* __AVX512DQ__ */

+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kadd_mask8 (__mmask8 __A, __mmask8 __B)
+{
+  return (__mmask8) __builtin_ia32_kaddqi ((__mmask8) __A, (__mmask8) __B);
+}
+
 extern __inline unsigned int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _cvtmask8_u32 (__mmask8 __A)
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index a889c83..820741c 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -9984,6 +9984,13 @@  _mm512_maskz_expandloadu_epi32 (__mmask16 __U,
void const *__P)
 #define _kxnor_mask16 _mm512_kxnor
 #define _kxor_mask16 _mm512_kxor

+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kadd_mask16 (__mmask16 __A, __mmask16 __B)
+{
+  return (__mmask16) __builtin_ia32_kaddhi ((__mmask16) __A, (__mmask16) __B);
+}
+
 extern __inline unsigned int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _cvtmask16_u32 (__mmask16 __A)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 71382c8..7d86008 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -1471,6 +1471,10 @@  BDESC (OPTION_MASK_ISA_AVX512DQ,
CODE_FOR_kmovb, "__builtin_ia32_kmovb", IX86_BU
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw,
"__builtin_ia32_kmovw", IX86_BUILTIN_KMOV16, UNKNOWN, (int)
UHI_FTYPE_UHI)
 BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kmovd,
"__builtin_ia32_kmovd", IX86_BUILTIN_KMOV32, UNKNOWN, (int)
USI_FTYPE_USI)
 BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kmovq,
"__builtin_ia32_kmovq", IX86_BUILTIN_KMOV64, UNKNOWN, (int)
UDI_FTYPE_UDI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_kaddqi,
"__builtin_ia32_kaddqi", IX86_BUILTIN_KADD8, UNKNOWN, (int)
UQI_FTYPE_UQI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kaddhi,
"__builtin_ia32_kaddhi", IX86_BUILTIN_KADD16, UNKNOWN, (int)
UHI_FTYPE_UHI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kaddsi,
"__builtin_ia32_kaddsi", IX86_BUILTIN_KADD32, UNKNOWN, (int)
USI_FTYPE_USI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kadddi,
"__builtin_ia32_kadddi", IX86_BUILTIN_KADD64, UNKNOWN, (int)
UDI_FTYPE_UDI_UDI)

 /* SHA */
 BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0,
IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 6dc57aa..4c9bdec 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1309,6 +1309,18 @@ 
 ;; Mask variant shift mnemonics
 (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])

+(define_insn "kadd<mode>"
+  [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
+ (plus:SWI1248_AVX512BWDQ
+  (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
+  (match_operand:SWI1248_AVX512BWDQ 2 "register_operand" "k")))
+   (unspec [(const_int 0)] UNSPEC_MASKOP)]
+  "TARGET_AVX512F"
+  "kadd<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "msklog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
 (define_expand "kmov<mskmodesuffix>"
   [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
  (match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kaddd-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kaddd-1.c
new file mode 100644
index 0000000..1f6c61f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kaddd-1.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kaddd\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask32 k = _kadd_mask32 (11, 12);
+  asm volatile ("" : "+k" (k));
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kaddq-1.c
b/gcc/testsuite/gcc.target/i386/avx512bw-kaddq-1.c
new file mode 100644
index 0000000..9e9aaae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kaddq-1.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kaddq\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask64 k = _kadd_mask64 (11, 12);
+  asm volatile ("" : "+k" (k));
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kaddb-1.c
b/gcc/testsuite/gcc.target/i386/avx512dq-kaddb-1.c
new file mode 100644
index 0000000..4be7b0b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-kaddb-1.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "kaddb\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512dq_test ()
+{
+  __mmask8 k = _kadd_mask8 (11, 12);
+  asm volatile ("" : "+k" (k));
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kaddw-1.c
b/gcc/testsuite/gcc.target/i386/avx512f-kaddw-1.c
new file mode 100644
index 0000000..957a395
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-kaddw-1.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "kaddw\[
\\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512f_test ()
+{
+  __mmask16 k = _kadd_mask16 (11, 12);
+  asm volatile ("" : "+k" (k));