Message ID | 20141208214210.GL1667@tucnak.redhat.com |
---|---|
State | New |
Headers | show |
On Mon, Dec 8, 2014 at 10:42 PM, Jakub Jelinek <jakub@redhat.com> wrote: > Hi! > > This patch attempts to fix > (set (reg:V*<mode>) (vec_duplicate:V*<mode> (reg/mem:<mode>))) > patterns. One issue is that there were separate patterns for > broadcast from gpr and separate patterns for broadcast from memory > (and vector reg), that isn't a good idea for reload, which can't then > freely choose. Another issue is that some pre-AVX2 broadcast patterns > were present above the avx512vl broadcast patterns, so again, reload didn't > have the possibility to use %xmm16-31/%ymm16-31 registers. Also, the > splitter written for AVX2 broadcasts from gpr went into the way of AVX512VL > broadcasts. And finally, the avx512*intrin.h headers were using > #ifdef TARGET_64BIT, macro not used anywhere (probably meant to write > __x86_64__ instead, but with the patch we actually just have one set of > builtins. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2014-12-08 Jakub Jelinek <jakub@redhat.com> > > PR target/63594 > * config/i386/sse.md (vec_dupv4sf): Move after > <mask_codefor><avx512>_vec_dup_gpr<mode><mask_name> pattern. > (*vec_dupv4si, *vec_dupv2di): Likewise. > (<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>): Merge into ... > (<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>): ... this > pattern. > (*vec_dup<mode> AVX2_VEC_DUP_MODE splitter): Disable for > TARGET_AVX512VL (for QI/HI scalar modes only if TARGET_AVX512BW > is set too). > * config/i386/i386.c (enum ix86_builtins): Remove > IX86_BUILTIN_PBROADCASTQ256_MEM_MASK, > IX86_BUILTIN_PBROADCASTQ128_MEM_MASK and > IX86_BUILTIN_PBROADCASTQ512_MEM. > (bdesc_args): Use __builtin_ia32_pbroadcastq512_gpr_mask, > __builtin_ia32_pbroadcastq256_gpr_mask and > __builtin_ia32_pbroadcastq128_gpr_mask instead of *_mem_mask > regardless of OPTION_MASK_ISA_64BIT. > * config/i386/avx512fintrin.h (_mm512_set1_epi64, > _mm512_mask_set1_epi64, _mm512_maskz_set1_epi64): Use *_gpr_mask > builtins regardless of whether TARGET_64BIT is defined or not. > * config/i386/avx512vlintrin.h (_mm256_mask_set1_epi64, > _mm256_maskz_set1_epi64, _mm_mask_set1_epi64, _mm_maskz_set1_epi64): > Likewise. LGTM, but please see inline comment below. > --- gcc/config/i386/sse.md.jj 2014-12-03 11:52:41.000000000 +0100 > +++ gcc/config/i386/sse.md 2014-12-08 13:26:06.505543457 +0100 > @@ -6319,22 +6319,6 @@ (define_insn "avx512f_vec_dup<mode>_1" > (set_attr "prefix" "evex") > (set_attr "mode" "<MODE>")]) > > -(define_insn "vec_dupv4sf" > - [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") > - (vec_duplicate:V4SF > - (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))] > - "TARGET_SSE" > - "@ > - vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0} > - vbroadcastss\t{%1, %0|%0, %1} > - shufps\t{$0, %0, %0|%0, %0, 0}" > - [(set_attr "isa" "avx,avx,noavx") > - (set_attr "type" "sseshuf1,ssemov,sseshuf1") > - (set_attr "length_immediate" "1,0,1") > - (set_attr "prefix_extra" "0,1,*") > - (set_attr "prefix" "vex,vex,orig") > - (set_attr "mode" "V4SF")]) > - > ;; Although insertps takes register source, we prefer > ;; unpcklps with register source since it is shorter. > (define_insn "*vec_concatv2sf_sse4_1" > @@ -12821,37 +12805,6 @@ (define_split > operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs); > }) > > -(define_insn "*vec_dupv4si" > - [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") > - (vec_duplicate:V4SI > - (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))] > - "TARGET_SSE" > - "@ > - %vpshufd\t{$0, %1, %0|%0, %1, 0} > - vbroadcastss\t{%1, %0|%0, %1} > - shufps\t{$0, %0, %0|%0, %0, 0}" > - [(set_attr "isa" "sse2,avx,noavx") > - (set_attr "type" "sselog1,ssemov,sselog1") > - (set_attr "length_immediate" "1,0,1") > - (set_attr "prefix_extra" "0,1,*") > - (set_attr "prefix" "maybe_vex,vex,orig") > - (set_attr "mode" "TI,V4SF,V4SF")]) > - > -(define_insn "*vec_dupv2di" > - [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x") > - (vec_duplicate:V2DI > - (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))] > - "TARGET_SSE" > - "@ > - punpcklqdq\t%0, %0 > - vpunpcklqdq\t{%d1, %0|%0, %d1} > - %vmovddup\t{%1, %0|%0, %1} > - movlhps\t%0, %0" > - [(set_attr "isa" "sse2_noavx,avx,sse3,noavx") > - (set_attr "type" "sselog1,sselog1,sselog1,ssemov") > - (set_attr "prefix" "orig,vex,maybe_vex,orig") > - (set_attr "mode" "TI,TI,DF,V4SF")]) > - > (define_insn "*vec_concatv2si_sse4_1" > [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,x, Yr,*x,x, x, *y,*y") > (vec_concat:V2SI > @@ -16665,46 +16618,78 @@ (define_insn "<mask_codefor>avx512f_broa > (set_attr "mode" "<sseinsnmode>")]) > > (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>" > - [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") > + [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v") > (vec_duplicate:VI12_AVX512VL > - (match_operand:<ssescalarmode> 1 "register_operand" "r")))] > + (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))] > "TARGET_AVX512BW" > - "vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}" > + "@ > + vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1} > + vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}" > [(set_attr "type" "ssemov") > (set_attr "prefix" "evex") > (set_attr "mode" "<sseinsnmode>")]) > > (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>" > - [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") > - (vec_duplicate:VI48_AVX512VL > - (match_operand:<ssescalarmode> 1 "register_operand" "r")))] > - "TARGET_AVX512F && (<ssescalarmode>mode != DImode || TARGET_64BIT)" > -{ > - return "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; > -} > - [(set_attr "type" "ssemov") > - (set_attr "prefix" "evex") > - (set_attr "mode" "<sseinsnmode>")]) > - > -(define_insn "<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>" > - [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v") > - (vec_duplicate:V48_AVX512VL > - (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))] > + [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v") > + (vec_duplicate:V48_AVX512VL > + (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))] > "TARGET_AVX512F" > "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" > [(set_attr "type" "ssemov") > (set_attr "prefix" "evex") > - (set_attr "mode" "<sseinsnmode>")]) > + (set_attr "mode" "<sseinsnmode>") > + (set (attr "enabled") > + (if_then_else (eq_attr "alternative" "1") > + (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT > + && (<ssescalarmode>mode != DImode || TARGET_64BIT)") > + (const_int 1)))]) > > -(define_insn "<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>" > - [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") > - (vec_duplicate:VI12_AVX512VL > - (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))] > - "TARGET_AVX512BW" > - "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" > - [(set_attr "type" "ssemov") > - (set_attr "prefix" "evex") > - (set_attr "mode" "<sseinsnmode>")]) > +(define_insn "vec_dupv4sf" > + [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") > + (vec_duplicate:V4SF > + (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))] > + "TARGET_SSE" > + "@ > + vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0} > + vbroadcastss\t{%1, %0|%0, %1} > + shufps\t{$0, %0, %0|%0, %0, 0}" > + [(set_attr "isa" "avx,avx,noavx") > + (set_attr "type" "sseshuf1,ssemov,sseshuf1") > + (set_attr "length_immediate" "1,0,1") > + (set_attr "prefix_extra" "0,1,*") > + (set_attr "prefix" "vex,vex,orig") > + (set_attr "mode" "V4SF")]) > + > +(define_insn "*vec_dupv4si" > + [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") > + (vec_duplicate:V4SI > + (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))] > + "TARGET_SSE" > + "@ > + %vpshufd\t{$0, %1, %0|%0, %1, 0} > + vbroadcastss\t{%1, %0|%0, %1} > + shufps\t{$0, %0, %0|%0, %0, 0}" > + [(set_attr "isa" "sse2,avx,noavx") > + (set_attr "type" "sselog1,ssemov,sselog1") > + (set_attr "length_immediate" "1,0,1") > + (set_attr "prefix_extra" "0,1,*") > + (set_attr "prefix" "maybe_vex,vex,orig") > + (set_attr "mode" "TI,V4SF,V4SF")]) > + > +(define_insn "*vec_dupv2di" > + [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x") > + (vec_duplicate:V2DI > + (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))] > + "TARGET_SSE" > + "@ > + punpcklqdq\t%0, %0 > + vpunpcklqdq\t{%d1, %0|%0, %d1} > + %vmovddup\t{%1, %0|%0, %1} > + movlhps\t%0, %0" > + [(set_attr "isa" "sse2_noavx,avx,sse3,noavx") > + (set_attr "type" "sselog1,sselog1,sselog1,ssemov") > + (set_attr "prefix" "orig,vex,maybe_vex,orig") > + (set_attr "mode" "TI,TI,DF,V4SF")]) > > (define_insn "avx2_vbroadcasti128_<mode>" > [(set (match_operand:VI_256 0 "register_operand" "=x") > @@ -16759,7 +16744,10 @@ (define_split > [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand") > (vec_duplicate:AVX2_VEC_DUP_MODE > (match_operand:<ssescalarmode> 1 "register_operand")))] > - "TARGET_AVX2 && reload_completed && GENERAL_REG_P (operands[1])" > + "TARGET_AVX2 > + && (!TARGET_AVX512VL > + || (!TARGET_AVX512BW && GET_MODE_SIZE (<ssescalarmode>mode) > 2)) > + && reload_completed && GENERAL_REG_P (operands[1])" > [(const_int 0)] We would like to avoid convoluted insn enable condition by moving the target delated complexity to the mode iterator, even if it requires additional single-use mode iterator. In the ideal case, the remaining target-dependant condition would represent the baseline target for an insn and all other target-related conditions would be inside mode iterator. > { > emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]), > --- gcc/config/i386/i386.c.jj 2014-12-08 10:57:17.000000000 +0100 > +++ gcc/config/i386/i386.c 2014-12-08 12:18:51.377459354 +0100 > @@ -28819,7 +28819,6 @@ enum ix86_builtins > IX86_BUILTIN_PBROADCASTMW512, > IX86_BUILTIN_PBROADCASTQ512, > IX86_BUILTIN_PBROADCASTQ512_GPR, > - IX86_BUILTIN_PBROADCASTQ512_MEM, > IX86_BUILTIN_PCMPEQD512_MASK, > IX86_BUILTIN_PCMPEQQ512_MASK, > IX86_BUILTIN_PCMPGTD512_MASK, > @@ -29257,10 +29256,8 @@ enum ix86_builtins > IX86_BUILTIN_PBROADCASTD128_GPR_MASK, > IX86_BUILTIN_PBROADCASTQ256_MASK, > IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, > - IX86_BUILTIN_PBROADCASTQ256_MEM_MASK, > IX86_BUILTIN_PBROADCASTQ128_MASK, > IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, > - IX86_BUILTIN_PBROADCASTQ128_MEM_MASK, > IX86_BUILTIN_BROADCASTSS256, > IX86_BUILTIN_BROADCASTSS128, > IX86_BUILTIN_BROADCASTSD256, > @@ -31799,8 +31796,7 @@ static const struct builtin_description > { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI }, > { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI }, > { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI }, > - { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI }, > - { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI }, > + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI }, > { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI }, > { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI }, > { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI }, > @@ -32074,11 +32070,9 @@ static const struct builtin_description > { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI }, > { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI }, > { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI }, > - { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI }, > - { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv4di_mask, "__builtin_ia32_pbroadcastq256_mem_mask", IX86_BUILTIN_PBROADCASTQ256_MEM_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI }, > + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI }, > { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI }, > - { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI }, > - { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv2di_mask, "__builtin_ia32_pbroadcastq128_mem_mask", IX86_BUILTIN_PBROADCASTQ128_MEM_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI }, > + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI }, > { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI }, > { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI }, > { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI }, > --- gcc/config/i386/avx512fintrin.h.jj 2014-11-18 08:26:47.000000000 +0100 > +++ gcc/config/i386/avx512fintrin.h 2014-12-08 13:07:40.657521773 +0100 > @@ -3603,47 +3603,28 @@ extern __inline __m512i > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm512_set1_epi64 (long long __A) > { > -#ifdef TARGET_64BIT > return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, > (__v8di) > _mm512_undefined_si512 (), > (__mmask8)(-1)); > -#else > - return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, > - (__v8di) > - _mm512_undefined_si512 (), > - (__mmask8)(-1)); > -#endif > } > > extern __inline __m512i > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) > { > -#ifdef TARGET_64BIT > return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O, > __M); > -#else > - return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, (__v8di) __O, > - __M); > -#endif > } > > extern __inline __m512i > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A) > { > -#ifdef TARGET_64BIT > return (__m512i) > __builtin_ia32_pbroadcastq512_gpr_mask (__A, > (__v8di) _mm512_setzero_si512 (), > __M); > -#else > - return (__m512i) > - __builtin_ia32_pbroadcastq512_mem_mask (__A, > - (__v8di) _mm512_setzero_si512 (), > - __M); > -#endif > } > > extern __inline __m512 > --- gcc/config/i386/avx512vlintrin.h.jj 2014-11-11 00:06:22.000000000 +0100 > +++ gcc/config/i386/avx512vlintrin.h 2014-12-08 12:20:06.498102723 +0100 > @@ -2642,30 +2642,18 @@ extern __inline __m256i > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) > { > -#ifdef TARGET_64BIT > return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O, > __M); > -#else > - return (__m256i) __builtin_ia32_pbroadcastq256_mem_mask (__A, (__v4di) __O, > - __M); > -#endif > } > > extern __inline __m256i > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) > { > -#ifdef TARGET_64BIT > return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, > (__v4di) > _mm256_setzero_si256 (), > __M); > -#else > - return (__m256i) __builtin_ia32_pbroadcastq256_mem_mask (__A, > - (__v4di) > - _mm256_setzero_si256 (), > - __M); > -#endif > } > > extern __inline __m128i > @@ -2691,30 +2679,18 @@ extern __inline __m128i > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) > { > -#ifdef TARGET_64BIT > return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O, > __M); > -#else > - return (__m128i) __builtin_ia32_pbroadcastq128_mem_mask (__A, (__v2di) __O, > - __M); > -#endif > } > > extern __inline __m128i > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm_maskz_set1_epi64 (__mmask8 __M, long long __A) > { > -#ifdef TARGET_64BIT > return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, > (__v2di) > _mm_setzero_si128 (), > __M); > -#else > - return (__m128i) __builtin_ia32_pbroadcastq128_mem_mask (__A, > - (__v2di) > - _mm_setzero_si128 (), > - __M); > -#endif > } > > extern __inline __m256 > > Jakub
--- gcc/config/i386/sse.md.jj 2014-12-03 11:52:41.000000000 +0100 +++ gcc/config/i386/sse.md 2014-12-08 13:26:06.505543457 +0100 @@ -6319,22 +6319,6 @@ (define_insn "avx512f_vec_dup<mode>_1" (set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) -(define_insn "vec_dupv4sf" - [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") - (vec_duplicate:V4SF - (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))] - "TARGET_SSE" - "@ - vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0} - vbroadcastss\t{%1, %0|%0, %1} - shufps\t{$0, %0, %0|%0, %0, 0}" - [(set_attr "isa" "avx,avx,noavx") - (set_attr "type" "sseshuf1,ssemov,sseshuf1") - (set_attr "length_immediate" "1,0,1") - (set_attr "prefix_extra" "0,1,*") - (set_attr "prefix" "vex,vex,orig") - (set_attr "mode" "V4SF")]) - ;; Although insertps takes register source, we prefer ;; unpcklps with register source since it is shorter. (define_insn "*vec_concatv2sf_sse4_1" @@ -12821,37 +12805,6 @@ (define_split operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs); }) -(define_insn "*vec_dupv4si" - [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") - (vec_duplicate:V4SI - (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))] - "TARGET_SSE" - "@ - %vpshufd\t{$0, %1, %0|%0, %1, 0} - vbroadcastss\t{%1, %0|%0, %1} - shufps\t{$0, %0, %0|%0, %0, 0}" - [(set_attr "isa" "sse2,avx,noavx") - (set_attr "type" "sselog1,ssemov,sselog1") - (set_attr "length_immediate" "1,0,1") - (set_attr "prefix_extra" "0,1,*") - (set_attr "prefix" "maybe_vex,vex,orig") - (set_attr "mode" "TI,V4SF,V4SF")]) - -(define_insn "*vec_dupv2di" - [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x") - (vec_duplicate:V2DI - (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))] - "TARGET_SSE" - "@ - punpcklqdq\t%0, %0 - vpunpcklqdq\t{%d1, %0|%0, %d1} - %vmovddup\t{%1, %0|%0, %1} - movlhps\t%0, %0" - [(set_attr "isa" "sse2_noavx,avx,sse3,noavx") - (set_attr "type" "sselog1,sselog1,sselog1,ssemov") - (set_attr "prefix" "orig,vex,maybe_vex,orig") - (set_attr "mode" "TI,TI,DF,V4SF")]) - (define_insn "*vec_concatv2si_sse4_1" [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,x, Yr,*x,x, x, *y,*y") (vec_concat:V2SI @@ -16665,46 +16618,78 @@ (define_insn "<mask_codefor>avx512f_broa (set_attr "mode" "<sseinsnmode>")]) (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>" - [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") + [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v") (vec_duplicate:VI12_AVX512VL - (match_operand:<ssescalarmode> 1 "register_operand" "r")))] + (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))] "TARGET_AVX512BW" - "vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}" + "@ + vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1} + vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>" - [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") - (vec_duplicate:VI48_AVX512VL - (match_operand:<ssescalarmode> 1 "register_operand" "r")))] - "TARGET_AVX512F && (<ssescalarmode>mode != DImode || TARGET_64BIT)" -{ - return "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; -} - [(set_attr "type" "ssemov") - (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) - -(define_insn "<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>" - [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v") - (vec_duplicate:V48_AVX512VL - (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))] + [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v") + (vec_duplicate:V48_AVX512VL + (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))] "TARGET_AVX512F" "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) + (set_attr "mode" "<sseinsnmode>") + (set (attr "enabled") + (if_then_else (eq_attr "alternative" "1") + (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT + && (<ssescalarmode>mode != DImode || TARGET_64BIT)") + (const_int 1)))]) -(define_insn "<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>" - [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") - (vec_duplicate:VI12_AVX512VL - (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))] - "TARGET_AVX512BW" - "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" - [(set_attr "type" "ssemov") - (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) +(define_insn "vec_dupv4sf" + [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") + (vec_duplicate:V4SF + (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))] + "TARGET_SSE" + "@ + vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0} + vbroadcastss\t{%1, %0|%0, %1} + shufps\t{$0, %0, %0|%0, %0, 0}" + [(set_attr "isa" "avx,avx,noavx") + (set_attr "type" "sseshuf1,ssemov,sseshuf1") + (set_attr "length_immediate" "1,0,1") + (set_attr "prefix_extra" "0,1,*") + (set_attr "prefix" "vex,vex,orig") + (set_attr "mode" "V4SF")]) + +(define_insn "*vec_dupv4si" + [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") + (vec_duplicate:V4SI + (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))] + "TARGET_SSE" + "@ + %vpshufd\t{$0, %1, %0|%0, %1, 0} + vbroadcastss\t{%1, %0|%0, %1} + shufps\t{$0, %0, %0|%0, %0, 0}" + [(set_attr "isa" "sse2,avx,noavx") + (set_attr "type" "sselog1,ssemov,sselog1") + (set_attr "length_immediate" "1,0,1") + (set_attr "prefix_extra" "0,1,*") + (set_attr "prefix" "maybe_vex,vex,orig") + (set_attr "mode" "TI,V4SF,V4SF")]) + +(define_insn "*vec_dupv2di" + [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x") + (vec_duplicate:V2DI + (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))] + "TARGET_SSE" + "@ + punpcklqdq\t%0, %0 + vpunpcklqdq\t{%d1, %0|%0, %d1} + %vmovddup\t{%1, %0|%0, %1} + movlhps\t%0, %0" + [(set_attr "isa" "sse2_noavx,avx,sse3,noavx") + (set_attr "type" "sselog1,sselog1,sselog1,ssemov") + (set_attr "prefix" "orig,vex,maybe_vex,orig") + (set_attr "mode" "TI,TI,DF,V4SF")]) (define_insn "avx2_vbroadcasti128_<mode>" [(set (match_operand:VI_256 0 "register_operand" "=x") @@ -16759,7 +16744,10 @@ (define_split [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand") (vec_duplicate:AVX2_VEC_DUP_MODE (match_operand:<ssescalarmode> 1 "register_operand")))] - "TARGET_AVX2 && reload_completed && GENERAL_REG_P (operands[1])" + "TARGET_AVX2 + && (!TARGET_AVX512VL + || (!TARGET_AVX512BW && GET_MODE_SIZE (<ssescalarmode>mode) > 2)) + && reload_completed && GENERAL_REG_P (operands[1])" [(const_int 0)] { emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]), --- gcc/config/i386/i386.c.jj 2014-12-08 10:57:17.000000000 +0100 +++ gcc/config/i386/i386.c 2014-12-08 12:18:51.377459354 +0100 @@ -28819,7 +28819,6 @@ enum ix86_builtins IX86_BUILTIN_PBROADCASTMW512, IX86_BUILTIN_PBROADCASTQ512, IX86_BUILTIN_PBROADCASTQ512_GPR, - IX86_BUILTIN_PBROADCASTQ512_MEM, IX86_BUILTIN_PCMPEQD512_MASK, IX86_BUILTIN_PCMPEQQ512_MASK, IX86_BUILTIN_PCMPGTD512_MASK, @@ -29257,10 +29256,8 @@ enum ix86_builtins IX86_BUILTIN_PBROADCASTD128_GPR_MASK, IX86_BUILTIN_PBROADCASTQ256_MASK, IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, - IX86_BUILTIN_PBROADCASTQ256_MEM_MASK, IX86_BUILTIN_PBROADCASTQ128_MASK, IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, - IX86_BUILTIN_PBROADCASTQ128_MEM_MASK, IX86_BUILTIN_BROADCASTSS256, IX86_BUILTIN_BROADCASTSS128, IX86_BUILTIN_BROADCASTSD256, @@ -31799,8 +31796,7 @@ static const struct builtin_description { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI }, { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI }, - { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI }, - { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI }, @@ -32074,11 +32070,9 @@ static const struct builtin_description { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI }, { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI }, { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI }, - { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI }, - { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv4di_mask, "__builtin_ia32_pbroadcastq256_mem_mask", IX86_BUILTIN_PBROADCASTQ256_MEM_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI }, { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI }, - { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI }, - { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv2di_mask, "__builtin_ia32_pbroadcastq128_mem_mask", IX86_BUILTIN_PBROADCASTQ128_MEM_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI }, { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI }, { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI }, { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI }, --- gcc/config/i386/avx512fintrin.h.jj 2014-11-18 08:26:47.000000000 +0100 +++ gcc/config/i386/avx512fintrin.h 2014-12-08 13:07:40.657521773 +0100 @@ -3603,47 +3603,28 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_set1_epi64 (long long __A) { -#ifdef TARGET_64BIT return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) _mm512_undefined_si512 (), (__mmask8)(-1)); -#else - return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8)(-1)); -#endif } extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) { -#ifdef TARGET_64BIT return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O, __M); -#else - return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, (__v8di) __O, - __M); -#endif } extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A) { -#ifdef TARGET_64BIT return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) _mm512_setzero_si512 (), __M); -#else - return (__m512i) - __builtin_ia32_pbroadcastq512_mem_mask (__A, - (__v8di) _mm512_setzero_si512 (), - __M); -#endif } extern __inline __m512 --- gcc/config/i386/avx512vlintrin.h.jj 2014-11-11 00:06:22.000000000 +0100 +++ gcc/config/i386/avx512vlintrin.h 2014-12-08 12:20:06.498102723 +0100 @@ -2642,30 +2642,18 @@ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) { -#ifdef TARGET_64BIT return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O, __M); -#else - return (__m256i) __builtin_ia32_pbroadcastq256_mem_mask (__A, (__v4di) __O, - __M); -#endif } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) { -#ifdef TARGET_64BIT return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) _mm256_setzero_si256 (), __M); -#else - return (__m256i) __builtin_ia32_pbroadcastq256_mem_mask (__A, - (__v4di) - _mm256_setzero_si256 (), - __M); -#endif } extern __inline __m128i @@ -2691,30 +2679,18 @@ extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) { -#ifdef TARGET_64BIT return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O, __M); -#else - return (__m128i) __builtin_ia32_pbroadcastq128_mem_mask (__A, (__v2di) __O, - __M); -#endif } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_set1_epi64 (__mmask8 __M, long long __A) { -#ifdef TARGET_64BIT return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) _mm_setzero_si128 (), __M); -#else - return (__m128i) __builtin_ia32_pbroadcastq128_mem_mask (__A, - (__v2di) - _mm_setzero_si128 (), - __M); -#endif } extern __inline __m256