From b8809a6ae86c32cb97f38ecda5d6c8b167b4259c Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Tue, 2 Oct 2018 14:27:55 -0700
Subject: [PATCH] i386: Use scalar operand in floating point vec_dup patterns
Since vector registers are also used for scalar floating point values,
we can use scalar operand in floating point vec_dup patterns, which
enables combiner to generate
(set (reg:V8SF 84)
(vec_duplicate:V8SF (mem/c:SF (symbol_ref:DI ("y")))))
const_vector_duplicate_operand is added for constant vector broadcast.
We split
(set (reg:V16SF 86)
(const_vector:V16SF
[(const_double:SF 2.0e+0 [0x0.8p+2]) repeated x16])
to
(set (reg:V16SF 86)
(vec_duplicate:V16SF (mem/u/c:SF (symbol_ref/u:DI ("*.LC1")))))
before register allocation so tha IRA can turn
(set (reg:V16SF 86)
(vec_duplicate:V16SF (mem/u/c:SF (symbol_ref/u:DI ("*.LC1")))))
(set (reg:V16SF 90)
(plus:V16SF (reg/v:V16SF 85 [ x ])
(reg:V16SF 86)))
into
(set (reg:V16SF 90)
(plus:V16SF
(vec_duplicate:V16SF (mem/u/c:SF (symbol_ref/u:DI ("*.LC1"))))
(reg/v:V16SF 85 [ x ])))
For AVX512 broadcast instructions from integer register operand, we only
need to broadcast integer to integer vectors.
gcc/
PR target/87537
* config/i386/i386-builtin-types.def: Replace
CODE_FOR_avx2_vec_dupv4sf, CODE_FOR_avx2_vec_dupv8sf and
CODE_FOR_avx2_vec_dupv4df with CODE_FOR_vec_dupv4sf,
CODE_FOR_vec_dupv8sf and CODE_FOR_vec_dupv4df, respectively.
* config/i386/i386.c (expand_vec_perm_1): Replace
gen_avx512f_vec_dupv16sf_1, gen_avx2_vec_dupv8sf_1 and
gen_avx512f_vec_dupv8df_1 with gen_avx512f_vec_dupv16sf,
gen_vec_dupv8sf and gen_avx512f_vec_dupv8df, respectively.
Duplicate them from scalar operand.
* config/i386/i386.md (SF to DF splitter): Replace
gen_avx512f_vec_dupv16sf_1 with gen_avx512f_vec_dupv16sf.
* config/i386/predicates.md (const_vector_duplicate_operand): New.
* config/i386/sse.md (VF48_AVX512VL): New.
(avx2_vec_dup<mode>): Removed.
(avx2_vec_dupv8sf_1): Likewise.
(avx512f_vec_dup<mode>_1): Likewise.
(avx2_vec_dupv4df): Likewise.
(<avx512>_vec_dup<mode><mask_name>:V48_AVX512VL): Likewise.
(<avx512>_vec_dup<mode><mask_name>:VF48_AVX512VL): New.
(*<avx512>_const_vec_dup<mode>): Likewise.
(<avx512>_vec_dup<mode><mask_name>:VI48_AVX512VL): Likewise.
(<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>): Replace
V48_AVX512VL with VI48_AVX512VL.
(*avx_vperm_broadcast_<mode>): Replace gen_avx2_vec_dupv8sf with
gen_vec_dupv8sf.
gcc/testsuite/
PR target/87537
* gcc.target/i386/avx2-vbroadcastss_ps256-1.c: Updated.
* gcc.target/i386/avx512vl-vbroadcast-3.c: Likewise.
* gcc.target/i386/avx512-binop-7.h: New file.
* gcc.target/i386/avx512f-add-sf-zmm-7.c: Likewise.
* gcc.target/i386/pr87537-2.c: Likewise.
* gcc.target/i386/pr87537-3.c: Likewise.
* gcc.target/i386/pr87537-4.c: Likewise.
* gcc.target/i386/pr87537-5.c: Likewise.
* gcc.target/i386/pr87537-6.c: Likewise.
* gcc.target/i386/pr87537-7.c: Likewise.
* gcc.target/i386/pr87537-8.c: Likewise.
* gcc.target/i386/pr87537-9.c: Likewise.
---
gcc/config/i386/i386-builtin.def | 6 +-
gcc/config/i386/i386.c | 28 +++++-
gcc/config/i386/i386.md | 2 +-
gcc/config/i386/predicates.md | 13 +++
gcc/config/i386/sse.md | 95 +++++++------------
.../i386/avx2-vbroadcastss_ps256-1.c | 3 +-
.../gcc.target/i386/avx512-binop-7.h | 12 +++
.../gcc.target/i386/avx512f-add-sf-zmm-7.c | 12 +++
.../gcc.target/i386/avx512vl-vbroadcast-3.c | 5 +-
gcc/testsuite/gcc.target/i386/pr87537-2.c | 12 +++
gcc/testsuite/gcc.target/i386/pr87537-3.c | 12 +++
gcc/testsuite/gcc.target/i386/pr87537-4.c | 12 +++
gcc/testsuite/gcc.target/i386/pr87537-5.c | 12 +++
gcc/testsuite/gcc.target/i386/pr87537-6.c | 12 +++
gcc/testsuite/gcc.target/i386/pr87537-7.c | 12 +++
gcc/testsuite/gcc.target/i386/pr87537-8.c | 12 +++
gcc/testsuite/gcc.target/i386/pr87537-9.c | 12 +++
17 files changed, 202 insertions(+), 70 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/avx512-binop-7.h
create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-add-sf-zmm-7.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87537-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87537-3.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87537-4.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87537-5.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87537-6.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87537-7.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87537-8.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87537-9.c
@@ -1194,9 +1194,9 @@ BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_
BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI)
BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI)
BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI)
-BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF)
-BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF)
-BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF)
+BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF)
+BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF)
+BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF)
BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI)
BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT)
BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT)
@@ -45963,6 +45963,7 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
{
/* Use vpbroadcast{b,w,d}. */
rtx (*gen) (rtx, rtx) = NULL;
+ machine_mode scalar_mode = VOIDmode;
switch (d->vmode)
{
case E_V64QImode:
@@ -45993,15 +45994,18 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
gen = gen_avx2_pbroadcastv8hi;
break;
case E_V16SFmode:
+ scalar_mode = SFmode;
if (TARGET_AVX512F)
- gen = gen_avx512f_vec_dupv16sf_1;
+ gen = gen_avx512f_vec_dupv16sf;
break;
case E_V8SFmode:
- gen = gen_avx2_vec_dupv8sf_1;
+ scalar_mode = SFmode;
+ gen = gen_vec_dupv8sf;
break;
case E_V8DFmode:
+ scalar_mode = DFmode;
if (TARGET_AVX512F)
- gen = gen_avx512f_vec_dupv8df_1;
+ gen = gen_avx512f_vec_dupv8df;
break;
case E_V8DImode:
if (TARGET_AVX512F)
@@ -46013,7 +46017,23 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
if (gen != NULL)
{
if (!d->testing_p)
- emit_insn (gen (d->target, d->op0));
+ {
+ if (scalar_mode == VOIDmode)
+ emit_insn (gen (d->target, d->op0));
+ else
+ {
+ rtx op = d->op0;
+ unsigned int oppos = 0;
+ if (SUBREG_P (op))
+ {
+ op = SUBREG_REG (op);
+ oppos = SUBREG_BYTE (op);
+ }
+ emit_insn (gen (d->target,
+ gen_rtx_SUBREG (scalar_mode,
+ op, oppos)));
+ }
+ }
return true;
}
}
@@ -4399,7 +4399,7 @@
else
{
rtx tmp = lowpart_subreg (V16SFmode, operands[3], V4SFmode);
- emit_insn (gen_avx512f_vec_dupv16sf_1 (tmp, tmp));
+ emit_insn (gen_avx512f_vec_dupv16sf (tmp, tmp));
}
}
else
@@ -1048,6 +1048,19 @@
(ior (match_operand 0 "nonimmediate_operand")
(match_code "const_vector")))
+;; Return true when OP is CONST_VECTOR which can be represented by
+;; VEC_DUPLICATE.
+(define_predicate "const_vector_duplicate_operand"
+ (and (match_code "const_vector")
+ (match_test "!standard_sse_constant_p (op, mode)"))
+{
+ int i, nunits = GET_MODE_NUNITS (mode);
+ for (i = 1; i < nunits; i++)
+ if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, 0))
+ return false;
+ return true;
+})
+
;; Return true when OP is nonimmediate or standard SSE constant.
(define_predicate "nonimmediate_or_sse_const_operand"
(ior (match_operand 0 "nonimmediate_operand")
@@ -304,6 +304,10 @@
(define_mode_iterator VF_512
[V16SF V8DF])
+(define_mode_iterator VF48_AVX512VL
+ [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+ V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+
(define_mode_iterator VI48_AVX512VL
[V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
@@ -7114,42 +7118,6 @@
(set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "SF")])
-(define_insn "avx2_vec_dup<mode>"
- [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
- (vec_duplicate:VF1_128_256
- (vec_select:SF
- (match_operand:V4SF 1 "register_operand" "v")
- (parallel [(const_int 0)]))))]
- "TARGET_AVX2"
- "vbroadcastss\t{%1, %0|%0, %1}"
- [(set_attr "type" "sselog1")
- (set_attr "prefix" "maybe_evex")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "avx2_vec_dupv8sf_1"
- [(set (match_operand:V8SF 0 "register_operand" "=v")
- (vec_duplicate:V8SF
- (vec_select:SF
- (match_operand:V8SF 1 "register_operand" "v")
- (parallel [(const_int 0)]))))]
- "TARGET_AVX2"
- "vbroadcastss\t{%x1, %0|%0, %x1}"
- [(set_attr "type" "sselog1")
- (set_attr "prefix" "maybe_evex")
- (set_attr "mode" "V8SF")])
-
-(define_insn "avx512f_vec_dup<mode>_1"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (vec_duplicate:VF_512
- (vec_select:<ssescalarmode>
- (match_operand:VF_512 1 "register_operand" "v")
- (parallel [(const_int 0)]))))]
- "TARGET_AVX512F"
- "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
- [(set_attr "type" "sselog1")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<MODE>")])
-
;; Although insertps takes register source, we prefer
;; unpcklps with register source since it is shorter.
(define_insn "*vec_concatv2sf_sse4_1"
@@ -18059,18 +18027,6 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
-(define_insn "avx2_vec_dupv4df"
- [(set (match_operand:V4DF 0 "register_operand" "=v")
- (vec_duplicate:V4DF
- (vec_select:DF
- (match_operand:V2DF 1 "register_operand" "v")
- (parallel [(const_int 0)]))))]
- "TARGET_AVX2"
- "vbroadcastsd\t{%1, %0|%0, %1}"
- [(set_attr "type" "sselog1")
- (set_attr "prefix" "maybe_evex")
- (set_attr "mode" "V4DF")])
-
(define_insn "<avx512>_vec_dup<mode>_1"
[(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
(vec_duplicate:VI_AVX512BW
@@ -18086,11 +18042,9 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<avx512>_vec_dup<mode><mask_name>"
- [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
- (vec_duplicate:V48_AVX512VL
- (vec_select:<ssescalarmode>
- (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
- (parallel [(const_int 0)]))))]
+ [(set (match_operand:VF48_AVX512VL 0 "register_operand" "=v")
+ (vec_duplicate:VF48_AVX512VL
+ (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
"TARGET_AVX512F"
{
/* There is no DF broadcast (in AVX-512*) to 128b register.
@@ -18104,6 +18058,31 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn_and_split "*<avx512>_const_vec_dup<mode>"
+ [(set (match_operand:VF48_AVX512VL 0 "register_operand" "=v")
+ (match_operand:VF48_AVX512VL 1 "const_vector_duplicate_operand" "C"))]
+ "TARGET_AVX512F"
+ "#"
+ "&& can_create_pseudo_p ()"
+ [(set (match_dup 0) (match_dup 1))]
+{
+ rtx val = CONST_VECTOR_ELT (operands[1], 0);
+ val = validize_mem (force_const_mem (GET_MODE (val), val));
+ operands[1] = gen_rtx_VEC_DUPLICATE (<MODE>mode, val);
+})
+
+(define_insn "<avx512>_vec_dup<mode><mask_name>"
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+ (vec_duplicate:VI48_AVX512VL
+ (vec_select:<ssescalarmode>
+ (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
+ (parallel [(const_int 0)]))))]
+ "TARGET_AVX512F"
+ "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "<avx512>_vec_dup<mode><mask_name>"
[(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
(vec_duplicate:VI12_AVX512VL
@@ -18153,8 +18132,8 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
- [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
- (vec_duplicate:V48_AVX512VL
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v,v")
+ (vec_duplicate:VI48_AVX512VL
(match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
"TARGET_AVX512F"
"v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
@@ -18163,8 +18142,7 @@
(set_attr "mode" "<sseinsnmode>")
(set (attr "enabled")
(if_then_else (eq_attr "alternative" "1")
- (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
- && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
+ (symbol_ref "<ssescalarmode>mode != DImode || TARGET_64BIT")
(const_int 1)))])
(define_insn "vec_dupv4sf"
@@ -18493,8 +18471,7 @@
or VSHUFF128. */
gcc_assert (<MODE>mode == V8SFmode);
if ((mask & 1) == 0)
- emit_insn (gen_avx2_vec_dupv8sf (op0,
- gen_lowpart (V4SFmode, op0)));
+ emit_insn (gen_vec_dupv8sf (op0, gen_lowpart (V4SFmode, op0)));
else
emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
GEN_INT (4), GEN_INT (5),
@@ -1,6 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-mavx2 -O2" } */
-/* { dg-final { scan-assembler "vbroadcastss\[ \\t\]+\[^\n\]*%xmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "vbroadcastss\[ \\t\]+\[^\n\]*%ymm\[0-9\]" } } */
+/* { dg-final { scan-assembler-not "vmovaps\[\t \]*\[^,\]*,%xmm\[0-9\]" } } */
#include <immintrin.h>
new file mode 100644
@@ -0,0 +1,12 @@
+#include <immintrin.h>
+
+#define PASTER2(x,y) x##y
+#define PASTER3(x,y,z) _mm##x##_##y##_##z
+#define OP(vec, op, suffix) PASTER3 (vec, op, suffix)
+#define DUP(vec, suffix, val) PASTER3 (vec, set1, suffix) (val)
+
+type
+foo (type x)
+{
+ return OP (vec, op, suffix) (DUP (vec, suffix, 2.1f), x);
+}
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vaddps\[ \\t\]+\[^\n\]*\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op add
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-binop-7.h"
@@ -151,8 +151,8 @@ f16 (V2 *x)
}
/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%\[re\]di\[^\n\r]*%xmm16" 4 } } */
-/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%xmm16\[^\n\r]*%ymm16" 3 } } */
-/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%\[re\]di\[^\n\r]*%ymm16" 3 } } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%xmm16\[^\n\r]*%ymm16" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%\[re\]di\[^\n\r]*%ymm16" 4 } } */
/* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$0\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
/* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$85\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
/* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$170\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
@@ -160,3 +160,4 @@ f16 (V2 *x)
/* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$0\[^\n\r]*%ymm16\[^\n\r]*%ymm16" 1 } } */
/* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$85\[^\n\r]*%ymm16\[^\n\r]*%ymm16" 2 } } */
/* { dg-final { scan-assembler-times "vshuff32x4\[^\n\r]*\\\$3\[^\n\r]*%ymm16\[^\n\r]*%ymm16\[^\n\r]*%ymm16" 2 } } */
+/* { dg-final { scan-assembler-times "vshuff32x4\[^\n\r]*\\\$0\[^\n\r]*%ymm16\[^\n\r]*%ymm16\[^\n\r]*%ymm16" 1 } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2 -mtune=skylake" } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-not "vmovss" } } */
+
+#include <immintrin.h>
+
+__m512
+foo (float *x)
+{
+ return _mm512_broadcastss_ps (_mm_load_ss(x));
+}
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2 -mtune=skylake" } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-not "vmovss" } } */
+
+#include <immintrin.h>
+
+__m512
+foo (void)
+{
+ return _mm512_set1_ps (2.0f);
+}
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2 -mtune=skylake" } */
+/* { dg-final { scan-assembler-times "vbroadcastsd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-not "vmovsd" } } */
+
+#include <immintrin.h>
+
+__m512d
+foo (double *x)
+{
+ return _mm512_broadcastsd_pd (_mm_load_sd(x));
+}
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2 -mtune=skylake" } */
+/* { dg-final { scan-assembler-times "vbroadcastsd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-not "vmovsd" } } */
+
+#include <immintrin.h>
+
+__m512d
+foo (void)
+{
+ return _mm512_set1_pd (2.0f);
+}
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2 -mtune=skylake" } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-not "vmovss" } } */
+
+#include <immintrin.h>
+
+__m256
+foo (float *x)
+{
+ return _mm256_broadcastss_ps (_mm_load_ss(x));
+}
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2 -mtune=skylake" } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-not "vmovss" } } */
+
+#include <immintrin.h>
+
+__m256
+foo (void)
+{
+ return _mm256_set1_ps (2.0f);
+}
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2 -mtune=skylake" } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-not "vmovss" } } */
+
+#include <immintrin.h>
+
+__m128
+foo (float *x)
+{
+ return _mm_broadcastss_ps (_mm_load_ss(x));
+}
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2 -mtune=skylake" } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-not "vmovss" } } */
+
+#include <immintrin.h>
+
+__m128
+foo (void)
+{
+ return _mm_set1_ps (2.0f);
+}
--
2.17.2