From bb2d5a4f79dd79a5b3772987d51123547b9319fa Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Sun, 6 Mar 2016 06:38:21 -0800
Subject: [PATCH] Add the TARGET_GEN_MEMSET_VALUE hook
builtin_memset_gen_str returns a register used for memset. But it only
supports integer registers. But a target may vector registers in memmset.
This patch adds the TARGET_GEN_MEMSET_VALUE hook to duplicate QImode value
to mode specified by STORE_MAX_PIECES with vector instructions. The default
hook is the same as the original builtin_memset_gen_str. A target can
override it to support a larger STORE_MAX_PIECES.
gcc/
* builtins.c (c_readstr): Make it global.
(builtin_memset_gen_str): Call targetm.gen_memset_value.
* builtins.h: Add c_readstr.
* target.def (gen_memset_value): New hook.
* targhooks.c: Inclue "expmed.h" and "builtins.h".
(default_gen_memset_value): New function.
* targhooks.h (default_gen_memset_value): New prototype.
* config/i386/i386.c (ix86_gen_memset_value): New function.
(TARGET_GEN_MEMSET_VALUE): New.
* config/i386/i386.h (STORE_MAX_PIECES): Likewise.
* doc/tm.texi.in: Add TARGET_GEN_MEMSET_VALUE hook.
* doc/tm.texi: Updated.
gcc/testsuite/
* gcc.target/i386/pieces-memset-1.c: New test.
* gcc.target/i386/pieces-memset-2.c: Likewise.
* gcc.target/i386/pieces-memset-3.c: Likewise.
* gcc.target/i386/pieces-memset-4.c: Likewise.
* gcc.target/i386/pieces-memset-5.c: Likewise.
* gcc.target/i386/pieces-memset-6.c: Likewise.
* gcc.target/i386/pieces-memset-7.c: Likewise.
* gcc.target/i386/pieces-memset-8.c: Likewise.
* gcc.target/i386/pieces-memset-9.c: Likewise.
* gcc.target/i386/pieces-memset-10.c: Likewise.
* gcc.target/i386/pieces-memset-11.c: Likewise.
* gcc.target/i386/pieces-memset-12.c: Likewise.
* gcc.target/i386/pieces-memset-13.c: Likewise.
* gcc.target/i386/pieces-memset-14.c: Likewise.
* gcc.target/i386/pieces-memset-15.c: Likewise.
* gcc.target/i386/pieces-memset-16.c: Likewise.
* gcc.target/i386/pieces-memset-17.c: Likewise.
* gcc.target/i386/pieces-memset-18.c: Likewise.
* gcc.target/i386/pieces-memset-19.c: Likewise.
* gcc.target/i386/pieces-memset-20.c: Likewise.
* gcc.target/i386/pieces-memset-21.c: Likewise.
* gcc.target/i386/pieces-memset-22.c: Likewise.
* gcc.target/i386/pieces-memset-23.c: Likewise.
* gcc.target/i386/pieces-memset-24.c: Likewise.
* gcc.target/i386/pieces-memset-25.c: Likewise.
* gcc.target/i386/pieces-memset-26.c: Likewise.
* gcc.target/i386/pieces-memset-27.c: Likewise.
* gcc.target/i386/pieces-memset-28.c: Likewise.
* gcc.target/i386/pieces-memset-29.c: Likewise.
* gcc.target/i386/pieces-memset-30.c: Likewise.
* gcc.target/i386/pieces-memset-31.c: Likewise.
* gcc.target/i386/pieces-memset-32.c: Likewise.
* gcc.target/i386/pieces-memset-33.c: Likewise.
* gcc.target/i386/pieces-memset-34.c: Likewise.
* gcc.target/i386/pieces-memset-35.c: Likewise.
* gcc.target/i386/pieces-memset-36.c: Likewise.
* gcc.target/i386/pieces-memset-37.c: Likewise.
* gcc.target/i386/pieces-memset-38.c: Likewise.
* gcc.target/i386/pieces-memset-39.c: Likewise.
* gcc.target/i386/pieces-memset-40.c: Likewise.
* gcc.target/i386/pieces-memset-41.c: Likewise.
* gcc.target/i386/pieces-memset-42.c: Likewise.
* gcc.target/i386/pieces-memset-43.c: Likewise.
* gcc.target/i386/pieces-memset-44.c: Likewise.
@@ -89,7 +89,6 @@ builtin_info_type builtin_info[(int)END_BUILTINS];
/* Non-zero if __builtin_constant_p should be folded right away. */
bool force_folding_builtin_constant_p;
-static rtx c_readstr (const char *, machine_mode);
static int target_char_cast (tree, char *);
static rtx get_memory_rtx (tree, tree);
static int apply_args_size (void);
@@ -616,7 +615,7 @@ c_strlen (tree src, int only_value)
/* Return a constant integer corresponding to target reading
GET_MODE_BITSIZE (MODE) bits from string constant STR. */
-static rtx
+rtx
c_readstr (const char *str, machine_mode mode)
{
HOST_WIDE_INT ch;
@@ -3411,21 +3410,10 @@ static rtx
builtin_memset_gen_str (void *data, HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
machine_mode mode)
{
- rtx target, coeff;
- size_t size;
- char *p;
-
- size = GET_MODE_SIZE (mode);
- if (size == 1)
+ if (GET_MODE_SIZE (mode) == 1)
return (rtx) data;
- p = XALLOCAVEC (char, size);
- memset (p, 1, size);
- coeff = c_readstr (p, mode);
-
- target = convert_to_mode (mode, (rtx) data, 1);
- target = expand_mult (mode, target, coeff, NULL_RTX, 1);
- return force_reg (mode, target);
+ return targetm.gen_memset_value ((rtx) data, mode);
}
/* Expand expression EXP, which is a call to the memset builtin. Return
@@ -101,4 +101,6 @@ extern char target_percent_s_newline[4];
extern internal_fn associated_internal_fn (tree);
extern internal_fn replacement_internal_fn (gcall *);
+extern rtx c_readstr (const char *, machine_mode);
+
#endif
@@ -50292,6 +50292,41 @@ ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
}
}
+/* Implement the TARGET_GEN_MEMSET_VALUE hook. */
+
+static rtx
+ix86_gen_memset_value (rtx data, machine_mode mode)
+{
+ if (GET_MODE_SIZE (mode) <= 8)
+ return default_gen_memset_value (data, mode);
+
+ if (GET_MODE_SIZE (mode) != 16)
+ gcc_unreachable ();
+
+ rtx one, target;
+ machine_mode one_mode;
+
+ if (TARGET_AVX2)
+ {
+ one_mode = QImode;
+ one = data;
+ }
+ else
+ {
+ one_mode = SImode;
+ one = default_gen_memset_value (data, one_mode);
+ }
+
+ machine_mode vector_mode
+ = mode_for_vector (one_mode,
+ GET_MODE_SIZE (mode) / GET_MODE_SIZE (one_mode));
+ target = gen_reg_rtx (vector_mode);
+ if (ix86_vector_duplicate_value (vector_mode, target, one))
+ return convert_to_mode (mode, target, 1);
+ else
+ gcc_unreachable ();
+}
+
/* Address space support.
This is not "far pointers" in the 16-bit sense, but an easy way
@@ -50792,6 +50827,9 @@ ix86_addr_space_zero_address_valid (addr_space_t as)
#undef TARGET_HARD_REGNO_SCRATCH_OK
#define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
+#undef TARGET_GEN_MEMSET_VALUE
+#define TARGET_GEN_MEMSET_VALUE ix86_gen_memset_value
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-i386.h"
@@ -1963,6 +1963,19 @@ typedef struct ix86_args {
&& TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
? GET_MODE_SIZE (TImode) : UNITS_PER_WORD)
+/* STORE_MAX_PIECES is the number of bytes at a time which we can
+ store efficiently.
+
+ ??? We should use TImode in 32-bit mode and use OImode or XImode
+ if they are available. But since by_pieces_ninsns determines the
+ widest mode with MAX_FIXED_MODE_SIZE, we can only use TImode in
+ 64-bit mode. */
+#define STORE_MAX_PIECES \
+ ((TARGET_64BIT \
+ && TARGET_SSE2 \
+ && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
+ ? GET_MODE_SIZE (TImode) : UNITS_PER_WORD)
+
/* If a memory-to-memory move would take MOVE_RATIO or more simple
move-instruction pairs, we will do a movmem or libcall instead.
Increasing the value will always make code faster, but eventually
@@ -11458,6 +11458,13 @@ This function prepares to emit a conditional comparison within a sequence
@var{bit_code} is @code{AND} or @code{IOR}, which is the op on the compares.
@end deftypefn
+@deftypefn {Target Hook} rtx TARGET_GEN_MEMSET_VALUE (rtx @var{data}, machine_mode @var{mode})
+This functio returns the RTL of a register containing
+@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned
+char value given in the RTL register @var{data}. For example, if
+@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.
+@end deftypefn
+
@deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned @var{nunroll}, struct loop *@var{loop})
This target hook returns a new value for the number of times @var{loop}
should be unrolled. The parameter @var{nunroll} is the number of times
@@ -8112,6 +8112,8 @@ build_type_attribute_variant (@var{mdecl},
@hook TARGET_GEN_CCMP_NEXT
+@hook TARGET_GEN_MEMSET_VALUE
+
@hook TARGET_LOOP_UNROLL_ADJUST
@defmac POWI_MAX_MULTS
@@ -2631,6 +2631,15 @@ DEFHOOK
rtx, (rtx *prep_seq, rtx *gen_seq, rtx prev, int cmp_code, tree op0, tree op1, int bit_code),
NULL)
+DEFHOOK
+(gen_memset_value,
+ "This functio returns the RTL of a register containing\n\
+@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned\n\
+char value given in the RTL register @var{data}. For example, if\n\
+@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.",
+ rtx, (rtx data, machine_mode mode),
+ default_gen_memset_value)
+
/* Return a new value for loop unroll size. */
DEFHOOK
(loop_unroll_adjust,
@@ -76,6 +76,8 @@ along with GCC; see the file COPYING3. If not see
#include "gimplify.h"
#include "predict.h"
#include "params.h"
+#include "expmed.h"
+#include "builtins.h"
bool
@@ -2008,4 +2010,21 @@ default_max_noce_ifcvt_seq_cost (edge e)
return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (3);
}
+/* Default implementation of TARGET_GEN_MEMSET_VALUE. */
+
+rtx
+default_gen_memset_value (rtx data, machine_mode mode)
+{
+ rtx target, coeff;
+ char *p;
+ size_t size = GET_MODE_SIZE (mode);
+ p = XALLOCAVEC (char, size);
+ memset (p, 1, size);
+ coeff = c_readstr (p, mode);
+
+ target = convert_to_mode (mode, (rtx) data, 1);
+ target = expand_mult (mode, target, coeff, NULL_RTX, 1);
+ return force_reg (mode, target);
+}
+
#include "gt-targhooks.h"
@@ -257,5 +257,6 @@ extern bool default_optab_supported_p (int, machine_mode, machine_mode,
optimization_type);
extern unsigned int default_max_noce_ifcvt_seq_cost (edge);
+extern rtx default_gen_memset_value (rtx, machine_mode);
#endif /* GCC_TARGHOOKS_H */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 64);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 3, 64);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 3, 64);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 3, 66);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 3, 33);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 3, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 3, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 3, 17);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 3, 17);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 3, 17);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 64);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 64);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 64);
+}
+
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 33);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 17);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 17);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 17);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 64);
+}
+
+/* { dg-final { scan-assembler-times "pcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 64);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx512bw -mavx512f -mtune=intel" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 64);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 33);
+}
+
+/* { dg-final { scan-assembler-times "pcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 34);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 17);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx512bw -mavx512f -mtune=intel" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512bw -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 33);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, 0, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+ __builtin_memset (dst, -1, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=intel" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 17);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 17);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 17);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */