diff mbox

Add a TARGET_GEN_MEMSET_VALUE hook

Message ID 20160817201104.GA21304@intel.com
State New
Headers show

Commit Message

H.J. Lu Aug. 17, 2016, 8:11 p.m. UTC
builtin_memset_gen_str returns a register used for memset, which only
supports integer registers.  But a target may use vector registers in
memmset.  This patch adds a TARGET_GEN_MEMSET_VALUE hook to duplicate
QImode value to mode derived from STORE_MAX_PIECES, which can be used
with vector instructions.  The default hook is the same as the original
builtin_memset_gen_str.  A target can override it to support vector
instructions for STORE_MAX_PIECES.

Tested on x86-64 and i686.  Any comments?

H.J.
---
gcc/

	* builtins.c (builtin_memset_gen_str): Call targetm.gen_memset_value.
	(default_gen_memset_value): New function.
	* target.def (gen_memset_value): New hook.
	* targhooks.c: Inclue "expmed.h" and "builtins.h".
	(default_gen_memset_value): New function.
	* targhooks.h (default_gen_memset_value): New prototype.
	* config/i386/i386.c (ix86_gen_memset_value): New function.
	(TARGET_GEN_MEMSET_VALUE): New.
	* config/i386/i386.h (STORE_MAX_PIECES): Likewise.
	* doc/tm.texi.in: Add TARGET_GEN_MEMSET_VALUE hook.
	* doc/tm.texi: Updated.

gcc/testsuite/

	* gcc.target/i386/pieces-memset-1.c: New test.
	* gcc.target/i386/pieces-memset-2.c: Likewise.
	* gcc.target/i386/pieces-memset-3.c: Likewise.
	* gcc.target/i386/pieces-memset-4.c: Likewise.
	* gcc.target/i386/pieces-memset-5.c: Likewise.
	* gcc.target/i386/pieces-memset-6.c: Likewise.
	* gcc.target/i386/pieces-memset-7.c: Likewise.
	* gcc.target/i386/pieces-memset-8.c: Likewise.
	* gcc.target/i386/pieces-memset-9.c: Likewise.
	* gcc.target/i386/pieces-memset-10.c: Likewise.
	* gcc.target/i386/pieces-memset-11.c: Likewise.
	* gcc.target/i386/pieces-memset-12.c: Likewise.
	* gcc.target/i386/pieces-memset-13.c: Likewise.
	* gcc.target/i386/pieces-memset-14.c: Likewise.
	* gcc.target/i386/pieces-memset-15.c: Likewise.
	* gcc.target/i386/pieces-memset-16.c: Likewise.
	* gcc.target/i386/pieces-memset-17.c: Likewise.
	* gcc.target/i386/pieces-memset-18.c: Likewise.
	* gcc.target/i386/pieces-memset-19.c: Likewise.
	* gcc.target/i386/pieces-memset-20.c: Likewise.
	* gcc.target/i386/pieces-memset-21.c: Likewise.
	* gcc.target/i386/pieces-memset-22.c: Likewise.
	* gcc.target/i386/pieces-memset-23.c: Likewise.
	* gcc.target/i386/pieces-memset-24.c: Likewise.
	* gcc.target/i386/pieces-memset-25.c: Likewise.
	* gcc.target/i386/pieces-memset-26.c: Likewise.
	* gcc.target/i386/pieces-memset-27.c: Likewise.
	* gcc.target/i386/pieces-memset-28.c: Likewise.
	* gcc.target/i386/pieces-memset-29.c: Likewise.
	* gcc.target/i386/pieces-memset-30.c: Likewise.
	* gcc.target/i386/pieces-memset-31.c: Likewise.
	* gcc.target/i386/pieces-memset-32.c: Likewise.
	* gcc.target/i386/pieces-memset-33.c: Likewise.
	* gcc.target/i386/pieces-memset-34.c: Likewise.
	* gcc.target/i386/pieces-memset-35.c: Likewise.
	* gcc.target/i386/pieces-memset-36.c: Likewise.
	* gcc.target/i386/pieces-memset-37.c: Likewise.
	* gcc.target/i386/pieces-memset-38.c: Likewise.
	* gcc.target/i386/pieces-memset-39.c: Likewise.
	* gcc.target/i386/pieces-memset-40.c: Likewise.
	* gcc.target/i386/pieces-memset-41.c: Likewise.
	* gcc.target/i386/pieces-memset-42.c: Likewise.
	* gcc.target/i386/pieces-memset-43.c: Likewise.
	* gcc.target/i386/pieces-memset-44.c: Likewise.
---
 gcc/builtins.c                                   | 32 ++++++++++++--------
 gcc/config/i386/i386.c                           | 38 ++++++++++++++++++++++++
 gcc/config/i386/i386.h                           | 13 ++++++++
 gcc/doc/tm.texi                                  |  7 +++++
 gcc/doc/tm.texi.in                               |  2 ++
 gcc/target.def                                   |  9 ++++++
 gcc/targhooks.h                                  |  1 +
 gcc/testsuite/gcc.target/i386/pieces-memset-1.c  | 12 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-10.c | 12 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-11.c | 12 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-12.c | 12 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-13.c | 12 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-14.c | 12 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-15.c | 12 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-16.c | 12 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-17.c | 12 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-18.c | 12 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-19.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-2.c  | 12 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-20.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-21.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-22.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-23.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-24.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-25.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-26.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-27.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-28.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-29.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-3.c  | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-30.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-31.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-32.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-33.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-34.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-35.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-36.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-37.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-38.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-39.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-4.c  | 12 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-40.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-41.c | 12 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-42.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-43.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-44.c | 13 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-5.c  | 12 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-6.c  | 12 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-7.c  | 12 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-8.c  | 12 ++++++++
 gcc/testsuite/gcc.target/i386/pieces-memset-9.c  | 12 ++++++++
 51 files changed, 643 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-13.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-14.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-15.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-16.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-17.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-18.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-19.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-20.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-21.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-22.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-23.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-24.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-25.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-26.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-27.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-28.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-29.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-30.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-31.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-32.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-33.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-34.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-35.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-36.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-37.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-38.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-39.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-40.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-41.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-42.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-43.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-44.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-9.c

Comments

Uros Bizjak Aug. 18, 2016, 6:21 a.m. UTC | #1
On Wed, Aug 17, 2016 at 10:11 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
> builtin_memset_gen_str returns a register used for memset, which only
> supports integer registers.  But a target may use vector registers in
> memmset.  This patch adds a TARGET_GEN_MEMSET_VALUE hook to duplicate
> QImode value to mode derived from STORE_MAX_PIECES, which can be used
> with vector instructions.  The default hook is the same as the original
> builtin_memset_gen_str.  A target can override it to support vector
> instructions for STORE_MAX_PIECES.
>
> Tested on x86-64 and i686.  Any comments?

It looks to me you have attached an older version of the patch,
STORE_MAX_PIECES change in i386.h is already in the mainline.

(The patch needs middle-end review first).

Uros.

> H.J.
> ---
> gcc/
>
>         * builtins.c (builtin_memset_gen_str): Call targetm.gen_memset_value.
>         (default_gen_memset_value): New function.
>         * target.def (gen_memset_value): New hook.
>         * targhooks.c: Inclue "expmed.h" and "builtins.h".
>         (default_gen_memset_value): New function.
>         * targhooks.h (default_gen_memset_value): New prototype.
>         * config/i386/i386.c (ix86_gen_memset_value): New function.
>         (TARGET_GEN_MEMSET_VALUE): New.
>         * config/i386/i386.h (STORE_MAX_PIECES): Likewise.
>         * doc/tm.texi.in: Add TARGET_GEN_MEMSET_VALUE hook.
>         * doc/tm.texi: Updated.
>
> gcc/testsuite/
>
>         * gcc.target/i386/pieces-memset-1.c: New test.
>         * gcc.target/i386/pieces-memset-2.c: Likewise.
>         * gcc.target/i386/pieces-memset-3.c: Likewise.
>         * gcc.target/i386/pieces-memset-4.c: Likewise.
>         * gcc.target/i386/pieces-memset-5.c: Likewise.
>         * gcc.target/i386/pieces-memset-6.c: Likewise.
>         * gcc.target/i386/pieces-memset-7.c: Likewise.
>         * gcc.target/i386/pieces-memset-8.c: Likewise.
>         * gcc.target/i386/pieces-memset-9.c: Likewise.
>         * gcc.target/i386/pieces-memset-10.c: Likewise.
>         * gcc.target/i386/pieces-memset-11.c: Likewise.
>         * gcc.target/i386/pieces-memset-12.c: Likewise.
>         * gcc.target/i386/pieces-memset-13.c: Likewise.
>         * gcc.target/i386/pieces-memset-14.c: Likewise.
>         * gcc.target/i386/pieces-memset-15.c: Likewise.
>         * gcc.target/i386/pieces-memset-16.c: Likewise.
>         * gcc.target/i386/pieces-memset-17.c: Likewise.
>         * gcc.target/i386/pieces-memset-18.c: Likewise.
>         * gcc.target/i386/pieces-memset-19.c: Likewise.
>         * gcc.target/i386/pieces-memset-20.c: Likewise.
>         * gcc.target/i386/pieces-memset-21.c: Likewise.
>         * gcc.target/i386/pieces-memset-22.c: Likewise.
>         * gcc.target/i386/pieces-memset-23.c: Likewise.
>         * gcc.target/i386/pieces-memset-24.c: Likewise.
>         * gcc.target/i386/pieces-memset-25.c: Likewise.
>         * gcc.target/i386/pieces-memset-26.c: Likewise.
>         * gcc.target/i386/pieces-memset-27.c: Likewise.
>         * gcc.target/i386/pieces-memset-28.c: Likewise.
>         * gcc.target/i386/pieces-memset-29.c: Likewise.
>         * gcc.target/i386/pieces-memset-30.c: Likewise.
>         * gcc.target/i386/pieces-memset-31.c: Likewise.
>         * gcc.target/i386/pieces-memset-32.c: Likewise.
>         * gcc.target/i386/pieces-memset-33.c: Likewise.
>         * gcc.target/i386/pieces-memset-34.c: Likewise.
>         * gcc.target/i386/pieces-memset-35.c: Likewise.
>         * gcc.target/i386/pieces-memset-36.c: Likewise.
>         * gcc.target/i386/pieces-memset-37.c: Likewise.
>         * gcc.target/i386/pieces-memset-38.c: Likewise.
>         * gcc.target/i386/pieces-memset-39.c: Likewise.
>         * gcc.target/i386/pieces-memset-40.c: Likewise.
>         * gcc.target/i386/pieces-memset-41.c: Likewise.
>         * gcc.target/i386/pieces-memset-42.c: Likewise.
>         * gcc.target/i386/pieces-memset-43.c: Likewise.
>         * gcc.target/i386/pieces-memset-44.c: Likewise.
> ---
>  gcc/builtins.c                                   | 32 ++++++++++++--------
>  gcc/config/i386/i386.c                           | 38 ++++++++++++++++++++++++
>  gcc/config/i386/i386.h                           | 13 ++++++++
>  gcc/doc/tm.texi                                  |  7 +++++
>  gcc/doc/tm.texi.in                               |  2 ++
>  gcc/target.def                                   |  9 ++++++
>  gcc/targhooks.h                                  |  1 +
>  gcc/testsuite/gcc.target/i386/pieces-memset-1.c  | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-10.c | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-11.c | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-12.c | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-13.c | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-14.c | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-15.c | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-16.c | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-17.c | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-18.c | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-19.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-2.c  | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-20.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-21.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-22.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-23.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-24.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-25.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-26.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-27.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-28.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-29.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-3.c  | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-30.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-31.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-32.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-33.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-34.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-35.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-36.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-37.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-38.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-39.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-4.c  | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-40.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-41.c | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-42.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-43.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-44.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-5.c  | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-6.c  | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-7.c  | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-8.c  | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-9.c  | 12 ++++++++
>  51 files changed, 643 insertions(+), 13 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-10.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-11.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-12.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-13.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-14.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-15.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-16.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-17.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-18.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-19.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-20.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-21.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-22.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-23.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-24.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-25.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-26.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-27.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-28.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-29.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-30.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-31.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-32.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-33.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-34.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-35.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-36.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-37.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-38.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-39.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-40.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-41.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-42.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-43.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-44.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-6.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-7.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-8.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-9.c
>
> diff --git a/gcc/builtins.c b/gcc/builtins.c
> index 03a0dc8..79be38e 100644
> --- a/gcc/builtins.c
> +++ b/gcc/builtins.c
> @@ -3411,21 +3411,10 @@ static rtx
>  builtin_memset_gen_str (void *data, HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
>                         machine_mode mode)
>  {
> -  rtx target, coeff;
> -  size_t size;
> -  char *p;
> -
> -  size = GET_MODE_SIZE (mode);
> -  if (size == 1)
> +  if (GET_MODE_SIZE (mode) == 1)
>      return (rtx) data;
>
> -  p = XALLOCAVEC (char, size);
> -  memset (p, 1, size);
> -  coeff = c_readstr (p, mode);
> -
> -  target = convert_to_mode (mode, (rtx) data, 1);
> -  target = expand_mult (mode, target, coeff, NULL_RTX, 1);
> -  return force_reg (mode, target);
> +  return targetm.gen_memset_value ((rtx) data, mode);
>  }
>
>  /* Expand expression EXP, which is a call to the memset builtin.  Return
> @@ -10172,3 +10161,20 @@ is_inexpensive_builtin (tree decl)
>
>    return false;
>  }
> +
> +/* Default implementation of TARGET_GEN_MEMSET_VALUE.  */
> +
> +rtx
> +default_gen_memset_value (rtx data, machine_mode mode)
> +{
> +  rtx target, coeff;
> +  char *p;
> +  size_t size = GET_MODE_SIZE (mode);
> +  p = XALLOCAVEC (char, size);
> +  memset (p, 1, size);
> +  coeff = c_readstr (p, mode);
> +
> +  target = convert_to_mode (mode, (rtx) data, 1);
> +  target = expand_mult (mode, target, coeff, NULL_RTX, 1);
> +  return force_reg (mode, target);
> +}
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 3805817..23ec694 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -55314,6 +55314,41 @@ ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
>      }
>  }
>
> +/* Implement the TARGET_GEN_MEMSET_VALUE hook.  */
> +
> +static rtx
> +ix86_gen_memset_value (rtx data, machine_mode mode)
> +{
> +  if (GET_MODE_SIZE (mode) <= 8)
> +    return default_gen_memset_value (data, mode);
> +
> +  if (GET_MODE_SIZE (mode) != 16)
> +    gcc_unreachable ();
> +
> +  rtx one, target;
> +  machine_mode one_mode;
> +
> +  if (TARGET_AVX2)
> +    {
> +      one_mode = QImode;
> +      one = data;
> +    }
> +  else
> +    {
> +      one_mode = SImode;
> +      one = default_gen_memset_value (data, one_mode);
> +    }
> +
> +  machine_mode vector_mode
> +    = mode_for_vector (one_mode,
> +                      GET_MODE_SIZE (mode) / GET_MODE_SIZE (one_mode));
> +  target = gen_reg_rtx (vector_mode);
> +  if (ix86_vector_duplicate_value (vector_mode, target, one))
> +    return convert_to_mode (mode, target, 1);
> +  else
> +    gcc_unreachable ();
> +}
> +
>  /* Address space support.
>
>     This is not "far pointers" in the 16-bit sense, but an easy way
> @@ -55814,6 +55849,9 @@ ix86_addr_space_zero_address_valid (addr_space_t as)
>  #undef TARGET_HARD_REGNO_SCRATCH_OK
>  #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
>
> +#undef TARGET_GEN_MEMSET_VALUE
> +#define TARGET_GEN_MEMSET_VALUE ix86_gen_memset_value
> +
>  struct gcc_target targetm = TARGET_INITIALIZER;
>
>  #include "gt-i386.h"
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 8751143..7ce2e4a 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -1963,6 +1963,19 @@ typedef struct ix86_args {
>      && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
>     ? GET_MODE_SIZE (TImode) : UNITS_PER_WORD)
>
> +/* STORE_MAX_PIECES is the number of bytes at a time which we can
> +   store efficiently.
> +
> +   ??? We should use TImode in 32-bit mode and use OImode or XImode
> +   if they are available.  But since by_pieces_ninsns determines the
> +   widest mode with MAX_FIXED_MODE_SIZE, we can only use TImode in
> +   64-bit mode.  */
> +#define STORE_MAX_PIECES \
> +  ((TARGET_64BIT \
> +    && TARGET_SSE2 \
> +    && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
> +   ? GET_MODE_SIZE (TImode) : UNITS_PER_WORD)
> +
>  /* If a memory-to-memory move would take MOVE_RATIO or more simple
>     move-instruction pairs, we will do a movmem or libcall instead.
>     Increasing the value will always make code faster, but eventually
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index 9edb006..abab7dd 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -11458,6 +11458,13 @@ This function prepares to emit a conditional comparison within a sequence
>   @var{bit_code} is @code{AND} or @code{IOR}, which is the op on the compares.
>  @end deftypefn
>
> +@deftypefn {Target Hook} rtx TARGET_GEN_MEMSET_VALUE (rtx @var{data}, machine_mode @var{mode})
> +This functio returns the RTL of a register containing
> +@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned
> +char value given in the RTL register @var{data}.  For example, if
> +@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.
> +@end deftypefn
> +
>  @deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned @var{nunroll}, struct loop *@var{loop})
>  This target hook returns a new value for the number of times @var{loop}
>  should be unrolled. The parameter @var{nunroll} is the number of times
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index a72c3d8..564c11ba 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -8112,6 +8112,8 @@ build_type_attribute_variant (@var{mdecl},
>
>  @hook TARGET_GEN_CCMP_NEXT
>
> +@hook TARGET_GEN_MEMSET_VALUE
> +
>  @hook TARGET_LOOP_UNROLL_ADJUST
>
>  @defmac POWI_MAX_MULTS
> diff --git a/gcc/target.def b/gcc/target.def
> index 929d9ea..c4eb6b4 100644
> --- a/gcc/target.def
> +++ b/gcc/target.def
> @@ -2631,6 +2631,15 @@ DEFHOOK
>   rtx, (rtx *prep_seq, rtx *gen_seq, rtx prev, int cmp_code, tree op0, tree op1, int bit_code),
>   NULL)
>
> +DEFHOOK
> +(gen_memset_value,
> + "This functio returns the RTL of a register containing\n\
> +@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned\n\
> +char value given in the RTL register @var{data}.  For example, if\n\
> +@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.",
> + rtx, (rtx data, machine_mode mode),
> + default_gen_memset_value)
> +
>  /* Return a new value for loop unroll size.  */
>  DEFHOOK
>  (loop_unroll_adjust,
> diff --git a/gcc/targhooks.h b/gcc/targhooks.h
> index 2e7ca72..798a998 100644
> --- a/gcc/targhooks.h
> +++ b/gcc/targhooks.h
> @@ -257,5 +257,6 @@ extern bool default_optab_supported_p (int, machine_mode, machine_mode,
>                                        optimization_type);
>
>  extern unsigned int default_max_noce_ifcvt_seq_cost (edge);
> +extern rtx default_gen_memset_value (rtx, machine_mode);
>
>  #endif /* GCC_TARGHOOKS_H */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-1.c b/gcc/testsuite/gcc.target/i386/pieces-memset-1.c
> new file mode 100644
> index 0000000..b33c073
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-1.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 64);
> +}
> +
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-10.c b/gcc/testsuite/gcc.target/i386/pieces-memset-10.c
> new file mode 100644
> index 0000000..90df34a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-10.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 3, 64);
> +}
> +
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-11.c b/gcc/testsuite/gcc.target/i386/pieces-memset-11.c
> new file mode 100644
> index 0000000..e234196
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-11.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 3, 64);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-12.c b/gcc/testsuite/gcc.target/i386/pieces-memset-12.c
> new file mode 100644
> index 0000000..ebc0db1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-12.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 3, 66);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-13.c b/gcc/testsuite/gcc.target/i386/pieces-memset-13.c
> new file mode 100644
> index 0000000..251061f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-13.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 3, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-14.c b/gcc/testsuite/gcc.target/i386/pieces-memset-14.c
> new file mode 100644
> index 0000000..e6426e6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-14.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 3, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-15.c b/gcc/testsuite/gcc.target/i386/pieces-memset-15.c
> new file mode 100644
> index 0000000..7ade143
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-15.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 3, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-16.c b/gcc/testsuite/gcc.target/i386/pieces-memset-16.c
> new file mode 100644
> index 0000000..d8ef81e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-16.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 3, 17);
> +}
> +
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-17.c b/gcc/testsuite/gcc.target/i386/pieces-memset-17.c
> new file mode 100644
> index 0000000..c997cda
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-17.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 3, 17);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-18.c b/gcc/testsuite/gcc.target/i386/pieces-memset-18.c
> new file mode 100644
> index 0000000..a611040
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-18.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 3, 17);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-19.c b/gcc/testsuite/gcc.target/i386/pieces-memset-19.c
> new file mode 100644
> index 0000000..8367654
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-19.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 0, 64);
> +}
> +
> +/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-2.c b/gcc/testsuite/gcc.target/i386/pieces-memset-2.c
> new file mode 100644
> index 0000000..15f4396
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-2.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 64);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-20.c b/gcc/testsuite/gcc.target/i386/pieces-memset-20.c
> new file mode 100644
> index 0000000..247936c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-20.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 0, 64);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-21.c b/gcc/testsuite/gcc.target/i386/pieces-memset-21.c
> new file mode 100644
> index 0000000..e4319dc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-21.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 0, 66);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-22.c b/gcc/testsuite/gcc.target/i386/pieces-memset-22.c
> new file mode 100644
> index 0000000..e7b6368
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-22.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 0, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-23.c b/gcc/testsuite/gcc.target/i386/pieces-memset-23.c
> new file mode 100644
> index 0000000..703d663
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-23.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 0, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-24.c b/gcc/testsuite/gcc.target/i386/pieces-memset-24.c
> new file mode 100644
> index 0000000..aaec8c1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-24.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 0, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-25.c b/gcc/testsuite/gcc.target/i386/pieces-memset-25.c
> new file mode 100644
> index 0000000..9c38f86
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-25.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 0, 17);
> +}
> +
> +/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-26.c b/gcc/testsuite/gcc.target/i386/pieces-memset-26.c
> new file mode 100644
> index 0000000..0040c2b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-26.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 0, 17);
> +}
> +
> +/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-27.c b/gcc/testsuite/gcc.target/i386/pieces-memset-27.c
> new file mode 100644
> index 0000000..e742aa3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-27.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 0, 17);
> +}
> +
> +/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-28.c b/gcc/testsuite/gcc.target/i386/pieces-memset-28.c
> new file mode 100644
> index 0000000..7d6a8dd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-28.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, -1, 64);
> +}
> +
> +/* { dg-final { scan-assembler-times "pcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-29.c b/gcc/testsuite/gcc.target/i386/pieces-memset-29.c
> new file mode 100644
> index 0000000..9847d5e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-29.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, -1, 64);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-3.c b/gcc/testsuite/gcc.target/i386/pieces-memset-3.c
> new file mode 100644
> index 0000000..907ee6d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-3.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx512bw -mavx512f -mtune=intel" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 66);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-30.c b/gcc/testsuite/gcc.target/i386/pieces-memset-30.c
> new file mode 100644
> index 0000000..cff88f7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-30.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, -1, 64);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-31.c b/gcc/testsuite/gcc.target/i386/pieces-memset-31.c
> new file mode 100644
> index 0000000..1221f72
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-31.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, -1, 66);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-32.c b/gcc/testsuite/gcc.target/i386/pieces-memset-32.c
> new file mode 100644
> index 0000000..0a8d0aa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-32.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, -1, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "pcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-33.c b/gcc/testsuite/gcc.target/i386/pieces-memset-33.c
> new file mode 100644
> index 0000000..65d4bfa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-33.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, -1, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-34.c b/gcc/testsuite/gcc.target/i386/pieces-memset-34.c
> new file mode 100644
> index 0000000..e2c1fc8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-34.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, -1, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-35.c b/gcc/testsuite/gcc.target/i386/pieces-memset-35.c
> new file mode 100644
> index 0000000..df6dd0d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-35.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, -1, 34);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-36.c b/gcc/testsuite/gcc.target/i386/pieces-memset-36.c
> new file mode 100644
> index 0000000..7f9829a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-36.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 17);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-37.c b/gcc/testsuite/gcc.target/i386/pieces-memset-37.c
> new file mode 100644
> index 0000000..ad8d996
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-37.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 66);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
> new file mode 100644
> index 0000000..907ee6d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx512bw -mavx512f -mtune=intel" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 66);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-39.c b/gcc/testsuite/gcc.target/i386/pieces-memset-39.c
> new file mode 100644
> index 0000000..ab560c9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-39.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512bw -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 66);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-4.c b/gcc/testsuite/gcc.target/i386/pieces-memset-4.c
> new file mode 100644
> index 0000000..17b0c25
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-4.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
> new file mode 100644
> index 0000000..d818f35
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 66);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
> new file mode 100644
> index 0000000..5f3fea5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
> new file mode 100644
> index 0000000..948cf09
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 0, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
> new file mode 100644
> index 0000000..9ab7c53
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, -1, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-44.c b/gcc/testsuite/gcc.target/i386/pieces-memset-44.c
> new file mode 100644
> index 0000000..de45cb9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-44.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, -1, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-5.c b/gcc/testsuite/gcc.target/i386/pieces-memset-5.c
> new file mode 100644
> index 0000000..b47c666
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-5.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-6.c b/gcc/testsuite/gcc.target/i386/pieces-memset-6.c
> new file mode 100644
> index 0000000..9535277
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-6.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mtune=intel" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-7.c b/gcc/testsuite/gcc.target/i386/pieces-memset-7.c
> new file mode 100644
> index 0000000..f0466e8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-7.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 17);
> +}
> +
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-8.c b/gcc/testsuite/gcc.target/i386/pieces-memset-8.c
> new file mode 100644
> index 0000000..9147460
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-8.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 17);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-9.c b/gcc/testsuite/gcc.target/i386/pieces-memset-9.c
> new file mode 100644
> index 0000000..cedf8a3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-9.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 17);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> --
> 2.7.4
>
Richard Biener Aug. 18, 2016, 8:18 a.m. UTC | #2
On Wed, Aug 17, 2016 at 10:11 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
> builtin_memset_gen_str returns a register used for memset, which only
> supports integer registers.  But a target may use vector registers in
> memmset.  This patch adds a TARGET_GEN_MEMSET_VALUE hook to duplicate
> QImode value to mode derived from STORE_MAX_PIECES, which can be used
> with vector instructions.  The default hook is the same as the original
> builtin_memset_gen_str.  A target can override it to support vector
> instructions for STORE_MAX_PIECES.
>
> Tested on x86-64 and i686.  Any comments?
>
> H.J.
> ---
> gcc/
>
>         * builtins.c (builtin_memset_gen_str): Call targetm.gen_memset_value.
>         (default_gen_memset_value): New function.
>         * target.def (gen_memset_value): New hook.
>         * targhooks.c: Inclue "expmed.h" and "builtins.h".
>         (default_gen_memset_value): New function.

I see default_gen_memset_value in builtins.c but it belongs here.

>         * targhooks.h (default_gen_memset_value): New prototype.
>         * config/i386/i386.c (ix86_gen_memset_value): New function.
>         (TARGET_GEN_MEMSET_VALUE): New.
>         * config/i386/i386.h (STORE_MAX_PIECES): Likewise.
>         * doc/tm.texi.in: Add TARGET_GEN_MEMSET_VALUE hook.
>         * doc/tm.texi: Updated.
>
> gcc/testsuite/
>
>         * gcc.target/i386/pieces-memset-1.c: New test.
>         * gcc.target/i386/pieces-memset-2.c: Likewise.
>         * gcc.target/i386/pieces-memset-3.c: Likewise.
>         * gcc.target/i386/pieces-memset-4.c: Likewise.
>         * gcc.target/i386/pieces-memset-5.c: Likewise.
>         * gcc.target/i386/pieces-memset-6.c: Likewise.
>         * gcc.target/i386/pieces-memset-7.c: Likewise.
>         * gcc.target/i386/pieces-memset-8.c: Likewise.
>         * gcc.target/i386/pieces-memset-9.c: Likewise.
>         * gcc.target/i386/pieces-memset-10.c: Likewise.
>         * gcc.target/i386/pieces-memset-11.c: Likewise.
>         * gcc.target/i386/pieces-memset-12.c: Likewise.
>         * gcc.target/i386/pieces-memset-13.c: Likewise.
>         * gcc.target/i386/pieces-memset-14.c: Likewise.
>         * gcc.target/i386/pieces-memset-15.c: Likewise.
>         * gcc.target/i386/pieces-memset-16.c: Likewise.
>         * gcc.target/i386/pieces-memset-17.c: Likewise.
>         * gcc.target/i386/pieces-memset-18.c: Likewise.
>         * gcc.target/i386/pieces-memset-19.c: Likewise.
>         * gcc.target/i386/pieces-memset-20.c: Likewise.
>         * gcc.target/i386/pieces-memset-21.c: Likewise.
>         * gcc.target/i386/pieces-memset-22.c: Likewise.
>         * gcc.target/i386/pieces-memset-23.c: Likewise.
>         * gcc.target/i386/pieces-memset-24.c: Likewise.
>         * gcc.target/i386/pieces-memset-25.c: Likewise.
>         * gcc.target/i386/pieces-memset-26.c: Likewise.
>         * gcc.target/i386/pieces-memset-27.c: Likewise.
>         * gcc.target/i386/pieces-memset-28.c: Likewise.
>         * gcc.target/i386/pieces-memset-29.c: Likewise.
>         * gcc.target/i386/pieces-memset-30.c: Likewise.
>         * gcc.target/i386/pieces-memset-31.c: Likewise.
>         * gcc.target/i386/pieces-memset-32.c: Likewise.
>         * gcc.target/i386/pieces-memset-33.c: Likewise.
>         * gcc.target/i386/pieces-memset-34.c: Likewise.
>         * gcc.target/i386/pieces-memset-35.c: Likewise.
>         * gcc.target/i386/pieces-memset-36.c: Likewise.
>         * gcc.target/i386/pieces-memset-37.c: Likewise.
>         * gcc.target/i386/pieces-memset-38.c: Likewise.
>         * gcc.target/i386/pieces-memset-39.c: Likewise.
>         * gcc.target/i386/pieces-memset-40.c: Likewise.
>         * gcc.target/i386/pieces-memset-41.c: Likewise.
>         * gcc.target/i386/pieces-memset-42.c: Likewise.
>         * gcc.target/i386/pieces-memset-43.c: Likewise.
>         * gcc.target/i386/pieces-memset-44.c: Likewise.
> ---
>  gcc/builtins.c                                   | 32 ++++++++++++--------
>  gcc/config/i386/i386.c                           | 38 ++++++++++++++++++++++++
>  gcc/config/i386/i386.h                           | 13 ++++++++
>  gcc/doc/tm.texi                                  |  7 +++++
>  gcc/doc/tm.texi.in                               |  2 ++
>  gcc/target.def                                   |  9 ++++++
>  gcc/targhooks.h                                  |  1 +
>  gcc/testsuite/gcc.target/i386/pieces-memset-1.c  | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-10.c | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-11.c | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-12.c | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-13.c | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-14.c | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-15.c | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-16.c | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-17.c | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-18.c | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-19.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-2.c  | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-20.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-21.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-22.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-23.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-24.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-25.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-26.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-27.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-28.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-29.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-3.c  | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-30.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-31.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-32.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-33.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-34.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-35.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-36.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-37.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-38.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-39.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-4.c  | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-40.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-41.c | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-42.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-43.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-44.c | 13 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-5.c  | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-6.c  | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-7.c  | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-8.c  | 12 ++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memset-9.c  | 12 ++++++++
>  51 files changed, 643 insertions(+), 13 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-10.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-11.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-12.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-13.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-14.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-15.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-16.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-17.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-18.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-19.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-20.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-21.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-22.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-23.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-24.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-25.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-26.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-27.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-28.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-29.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-30.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-31.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-32.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-33.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-34.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-35.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-36.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-37.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-38.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-39.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-40.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-41.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-42.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-43.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-44.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-6.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-7.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-8.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-9.c
>
> diff --git a/gcc/builtins.c b/gcc/builtins.c
> index 03a0dc8..79be38e 100644
> --- a/gcc/builtins.c
> +++ b/gcc/builtins.c
> @@ -3411,21 +3411,10 @@ static rtx
>  builtin_memset_gen_str (void *data, HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
>                         machine_mode mode)
>  {
> -  rtx target, coeff;
> -  size_t size;
> -  char *p;
> -
> -  size = GET_MODE_SIZE (mode);
> -  if (size == 1)
> +  if (GET_MODE_SIZE (mode) == 1)
>      return (rtx) data;
>
> -  p = XALLOCAVEC (char, size);
> -  memset (p, 1, size);
> -  coeff = c_readstr (p, mode);
> -
> -  target = convert_to_mode (mode, (rtx) data, 1);
> -  target = expand_mult (mode, target, coeff, NULL_RTX, 1);
> -  return force_reg (mode, target);
> +  return targetm.gen_memset_value ((rtx) data, mode);
>  }
>
>  /* Expand expression EXP, which is a call to the memset builtin.  Return
> @@ -10172,3 +10161,20 @@ is_inexpensive_builtin (tree decl)
>
>    return false;
>  }
> +
> +/* Default implementation of TARGET_GEN_MEMSET_VALUE.  */
> +
> +rtx
> +default_gen_memset_value (rtx data, machine_mode mode)
> +{
> +  rtx target, coeff;
> +  char *p;
> +  size_t size = GET_MODE_SIZE (mode);
> +  p = XALLOCAVEC (char, size);
> +  memset (p, 1, size);
> +  coeff = c_readstr (p, mode);
> +
> +  target = convert_to_mode (mode, (rtx) data, 1);
> +  target = expand_mult (mode, target, coeff, NULL_RTX, 1);
> +  return force_reg (mode, target);
> +}
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 3805817..23ec694 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -55314,6 +55314,41 @@ ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
>      }
>  }
>
> +/* Implement the TARGET_GEN_MEMSET_VALUE hook.  */
> +
> +static rtx
> +ix86_gen_memset_value (rtx data, machine_mode mode)
> +{
> +  if (GET_MODE_SIZE (mode) <= 8)
> +    return default_gen_memset_value (data, mode);
> +
> +  if (GET_MODE_SIZE (mode) != 16)
> +    gcc_unreachable ();
> +
> +  rtx one, target;
> +  machine_mode one_mode;
> +
> +  if (TARGET_AVX2)
> +    {
> +      one_mode = QImode;
> +      one = data;
> +    }
> +  else
> +    {
> +      one_mode = SImode;
> +      one = default_gen_memset_value (data, one_mode);
> +    }
> +
> +  machine_mode vector_mode
> +    = mode_for_vector (one_mode,
> +                      GET_MODE_SIZE (mode) / GET_MODE_SIZE (one_mode));
> +  target = gen_reg_rtx (vector_mode);
> +  if (ix86_vector_duplicate_value (vector_mode, target, one))
> +    return convert_to_mode (mode, target, 1);
> +  else
> +    gcc_unreachable ();
> +}
> +
>  /* Address space support.
>
>     This is not "far pointers" in the 16-bit sense, but an easy way
> @@ -55814,6 +55849,9 @@ ix86_addr_space_zero_address_valid (addr_space_t as)
>  #undef TARGET_HARD_REGNO_SCRATCH_OK
>  #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
>
> +#undef TARGET_GEN_MEMSET_VALUE
> +#define TARGET_GEN_MEMSET_VALUE ix86_gen_memset_value
> +
>  struct gcc_target targetm = TARGET_INITIALIZER;
>
>  #include "gt-i386.h"
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 8751143..7ce2e4a 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -1963,6 +1963,19 @@ typedef struct ix86_args {
>      && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
>     ? GET_MODE_SIZE (TImode) : UNITS_PER_WORD)
>
> +/* STORE_MAX_PIECES is the number of bytes at a time which we can
> +   store efficiently.
> +
> +   ??? We should use TImode in 32-bit mode and use OImode or XImode
> +   if they are available.  But since by_pieces_ninsns determines the
> +   widest mode with MAX_FIXED_MODE_SIZE, we can only use TImode in
> +   64-bit mode.  */
> +#define STORE_MAX_PIECES \
> +  ((TARGET_64BIT \
> +    && TARGET_SSE2 \
> +    && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
> +   ? GET_MODE_SIZE (TImode) : UNITS_PER_WORD)
> +
>  /* If a memory-to-memory move would take MOVE_RATIO or more simple
>     move-instruction pairs, we will do a movmem or libcall instead.
>     Increasing the value will always make code faster, but eventually
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index 9edb006..abab7dd 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -11458,6 +11458,13 @@ This function prepares to emit a conditional comparison within a sequence
>   @var{bit_code} is @code{AND} or @code{IOR}, which is the op on the compares.
>  @end deftypefn
>
> +@deftypefn {Target Hook} rtx TARGET_GEN_MEMSET_VALUE (rtx @var{data}, machine_mode @var{mode})
> +This functio returns the RTL of a register containing
> +@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned
> +char value given in the RTL register @var{data}.  For example, if
> +@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.
> +@end deftypefn
> +
>  @deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned @var{nunroll}, struct loop *@var{loop})
>  This target hook returns a new value for the number of times @var{loop}
>  should be unrolled. The parameter @var{nunroll} is the number of times
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index a72c3d8..564c11ba 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -8112,6 +8112,8 @@ build_type_attribute_variant (@var{mdecl},
>
>  @hook TARGET_GEN_CCMP_NEXT
>
> +@hook TARGET_GEN_MEMSET_VALUE
> +
>  @hook TARGET_LOOP_UNROLL_ADJUST
>
>  @defmac POWI_MAX_MULTS
> diff --git a/gcc/target.def b/gcc/target.def
> index 929d9ea..c4eb6b4 100644
> --- a/gcc/target.def
> +++ b/gcc/target.def
> @@ -2631,6 +2631,15 @@ DEFHOOK
>   rtx, (rtx *prep_seq, rtx *gen_seq, rtx prev, int cmp_code, tree op0, tree op1, int bit_code),
>   NULL)
>
> +DEFHOOK
> +(gen_memset_value,
> + "This functio returns the RTL of a register containing\n\
> +@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned\n\
> +char value given in the RTL register @var{data}.  For example, if\n\
> +@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.",
> + rtx, (rtx data, machine_mode mode),
> + default_gen_memset_value)
> +
>  /* Return a new value for loop unroll size.  */
>  DEFHOOK
>  (loop_unroll_adjust,
> diff --git a/gcc/targhooks.h b/gcc/targhooks.h
> index 2e7ca72..798a998 100644
> --- a/gcc/targhooks.h
> +++ b/gcc/targhooks.h
> @@ -257,5 +257,6 @@ extern bool default_optab_supported_p (int, machine_mode, machine_mode,
>                                        optimization_type);
>
>  extern unsigned int default_max_noce_ifcvt_seq_cost (edge);
> +extern rtx default_gen_memset_value (rtx, machine_mode);
>
>  #endif /* GCC_TARGHOOKS_H */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-1.c b/gcc/testsuite/gcc.target/i386/pieces-memset-1.c
> new file mode 100644
> index 0000000..b33c073
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-1.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 64);
> +}
> +
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-10.c b/gcc/testsuite/gcc.target/i386/pieces-memset-10.c
> new file mode 100644
> index 0000000..90df34a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-10.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 3, 64);
> +}
> +
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-11.c b/gcc/testsuite/gcc.target/i386/pieces-memset-11.c
> new file mode 100644
> index 0000000..e234196
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-11.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 3, 64);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-12.c b/gcc/testsuite/gcc.target/i386/pieces-memset-12.c
> new file mode 100644
> index 0000000..ebc0db1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-12.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 3, 66);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-13.c b/gcc/testsuite/gcc.target/i386/pieces-memset-13.c
> new file mode 100644
> index 0000000..251061f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-13.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 3, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-14.c b/gcc/testsuite/gcc.target/i386/pieces-memset-14.c
> new file mode 100644
> index 0000000..e6426e6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-14.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 3, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-15.c b/gcc/testsuite/gcc.target/i386/pieces-memset-15.c
> new file mode 100644
> index 0000000..7ade143
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-15.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 3, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-16.c b/gcc/testsuite/gcc.target/i386/pieces-memset-16.c
> new file mode 100644
> index 0000000..d8ef81e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-16.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 3, 17);
> +}
> +
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-17.c b/gcc/testsuite/gcc.target/i386/pieces-memset-17.c
> new file mode 100644
> index 0000000..c997cda
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-17.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 3, 17);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-18.c b/gcc/testsuite/gcc.target/i386/pieces-memset-18.c
> new file mode 100644
> index 0000000..a611040
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-18.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 3, 17);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-19.c b/gcc/testsuite/gcc.target/i386/pieces-memset-19.c
> new file mode 100644
> index 0000000..8367654
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-19.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 0, 64);
> +}
> +
> +/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-2.c b/gcc/testsuite/gcc.target/i386/pieces-memset-2.c
> new file mode 100644
> index 0000000..15f4396
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-2.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 64);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-20.c b/gcc/testsuite/gcc.target/i386/pieces-memset-20.c
> new file mode 100644
> index 0000000..247936c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-20.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 0, 64);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-21.c b/gcc/testsuite/gcc.target/i386/pieces-memset-21.c
> new file mode 100644
> index 0000000..e4319dc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-21.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 0, 66);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-22.c b/gcc/testsuite/gcc.target/i386/pieces-memset-22.c
> new file mode 100644
> index 0000000..e7b6368
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-22.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 0, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-23.c b/gcc/testsuite/gcc.target/i386/pieces-memset-23.c
> new file mode 100644
> index 0000000..703d663
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-23.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 0, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-24.c b/gcc/testsuite/gcc.target/i386/pieces-memset-24.c
> new file mode 100644
> index 0000000..aaec8c1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-24.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 0, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-25.c b/gcc/testsuite/gcc.target/i386/pieces-memset-25.c
> new file mode 100644
> index 0000000..9c38f86
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-25.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 0, 17);
> +}
> +
> +/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-26.c b/gcc/testsuite/gcc.target/i386/pieces-memset-26.c
> new file mode 100644
> index 0000000..0040c2b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-26.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 0, 17);
> +}
> +
> +/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-27.c b/gcc/testsuite/gcc.target/i386/pieces-memset-27.c
> new file mode 100644
> index 0000000..e742aa3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-27.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 0, 17);
> +}
> +
> +/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-28.c b/gcc/testsuite/gcc.target/i386/pieces-memset-28.c
> new file mode 100644
> index 0000000..7d6a8dd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-28.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, -1, 64);
> +}
> +
> +/* { dg-final { scan-assembler-times "pcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-29.c b/gcc/testsuite/gcc.target/i386/pieces-memset-29.c
> new file mode 100644
> index 0000000..9847d5e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-29.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, -1, 64);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-3.c b/gcc/testsuite/gcc.target/i386/pieces-memset-3.c
> new file mode 100644
> index 0000000..907ee6d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-3.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx512bw -mavx512f -mtune=intel" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 66);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-30.c b/gcc/testsuite/gcc.target/i386/pieces-memset-30.c
> new file mode 100644
> index 0000000..cff88f7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-30.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, -1, 64);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-31.c b/gcc/testsuite/gcc.target/i386/pieces-memset-31.c
> new file mode 100644
> index 0000000..1221f72
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-31.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, -1, 66);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-32.c b/gcc/testsuite/gcc.target/i386/pieces-memset-32.c
> new file mode 100644
> index 0000000..0a8d0aa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-32.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, -1, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "pcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-33.c b/gcc/testsuite/gcc.target/i386/pieces-memset-33.c
> new file mode 100644
> index 0000000..65d4bfa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-33.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, -1, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-34.c b/gcc/testsuite/gcc.target/i386/pieces-memset-34.c
> new file mode 100644
> index 0000000..e2c1fc8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-34.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, -1, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-35.c b/gcc/testsuite/gcc.target/i386/pieces-memset-35.c
> new file mode 100644
> index 0000000..df6dd0d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-35.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, -1, 34);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-36.c b/gcc/testsuite/gcc.target/i386/pieces-memset-36.c
> new file mode 100644
> index 0000000..7f9829a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-36.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 17);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-37.c b/gcc/testsuite/gcc.target/i386/pieces-memset-37.c
> new file mode 100644
> index 0000000..ad8d996
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-37.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 66);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
> new file mode 100644
> index 0000000..907ee6d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx512bw -mavx512f -mtune=intel" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 66);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-39.c b/gcc/testsuite/gcc.target/i386/pieces-memset-39.c
> new file mode 100644
> index 0000000..ab560c9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-39.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512bw -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 66);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-4.c b/gcc/testsuite/gcc.target/i386/pieces-memset-4.c
> new file mode 100644
> index 0000000..17b0c25
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-4.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
> new file mode 100644
> index 0000000..d818f35
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 66);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
> new file mode 100644
> index 0000000..5f3fea5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
> new file mode 100644
> index 0000000..948cf09
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, 0, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
> new file mode 100644
> index 0000000..9ab7c53
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, -1, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-44.c b/gcc/testsuite/gcc.target/i386/pieces-memset-44.c
> new file mode 100644
> index 0000000..de45cb9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-44.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
> +
> +extern char *dst;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memset (dst, -1, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-5.c b/gcc/testsuite/gcc.target/i386/pieces-memset-5.c
> new file mode 100644
> index 0000000..b47c666
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-5.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-6.c b/gcc/testsuite/gcc.target/i386/pieces-memset-6.c
> new file mode 100644
> index 0000000..9535277
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-6.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mtune=intel" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-7.c b/gcc/testsuite/gcc.target/i386/pieces-memset-7.c
> new file mode 100644
> index 0000000..f0466e8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-7.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 17);
> +}
> +
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-8.c b/gcc/testsuite/gcc.target/i386/pieces-memset-8.c
> new file mode 100644
> index 0000000..9147460
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-8.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 17);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-9.c b/gcc/testsuite/gcc.target/i386/pieces-memset-9.c
> new file mode 100644
> index 0000000..cedf8a3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-9.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512f -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 17);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> --
> 2.7.4
>
H.J. Lu Aug. 18, 2016, 2:53 p.m. UTC | #3
On Wed, Aug 17, 2016 at 11:21 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Wed, Aug 17, 2016 at 10:11 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>> builtin_memset_gen_str returns a register used for memset, which only
>> supports integer registers.  But a target may use vector registers in
>> memmset.  This patch adds a TARGET_GEN_MEMSET_VALUE hook to duplicate
>> QImode value to mode derived from STORE_MAX_PIECES, which can be used
>> with vector instructions.  The default hook is the same as the original
>> builtin_memset_gen_str.  A target can override it to support vector
>> instructions for STORE_MAX_PIECES.
>>
>> Tested on x86-64 and i686.  Any comments?
>
> It looks to me you have attached an older version of the patch,
> STORE_MAX_PIECES change in i386.h is already in the mainline.
>

Did you mean MOVE_MAX_PIECES?  There is no STORE_MAX_PIECES
in i386.h

H.J.
Uros Bizjak Aug. 18, 2016, 3:26 p.m. UTC | #4
On Thu, Aug 18, 2016 at 4:53 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Wed, Aug 17, 2016 at 11:21 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> On Wed, Aug 17, 2016 at 10:11 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>> builtin_memset_gen_str returns a register used for memset, which only
>>> supports integer registers.  But a target may use vector registers in
>>> memmset.  This patch adds a TARGET_GEN_MEMSET_VALUE hook to duplicate
>>> QImode value to mode derived from STORE_MAX_PIECES, which can be used
>>> with vector instructions.  The default hook is the same as the original
>>> builtin_memset_gen_str.  A target can override it to support vector
>>> instructions for STORE_MAX_PIECES.
>>>
>>> Tested on x86-64 and i686.  Any comments?
>>
>> It looks to me you have attached an older version of the patch,
>> STORE_MAX_PIECES change in i386.h is already in the mainline.
>>
>
> Did you mean MOVE_MAX_PIECES?  There is no STORE_MAX_PIECES
> in i386.h

Uh, yes. Sorry for the confusion.

Uros.
diff mbox

Patch

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 03a0dc8..79be38e 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -3411,21 +3411,10 @@  static rtx
 builtin_memset_gen_str (void *data, HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
 			machine_mode mode)
 {
-  rtx target, coeff;
-  size_t size;
-  char *p;
-
-  size = GET_MODE_SIZE (mode);
-  if (size == 1)
+  if (GET_MODE_SIZE (mode) == 1)
     return (rtx) data;
 
-  p = XALLOCAVEC (char, size);
-  memset (p, 1, size);
-  coeff = c_readstr (p, mode);
-
-  target = convert_to_mode (mode, (rtx) data, 1);
-  target = expand_mult (mode, target, coeff, NULL_RTX, 1);
-  return force_reg (mode, target);
+  return targetm.gen_memset_value ((rtx) data, mode);
 }
 
 /* Expand expression EXP, which is a call to the memset builtin.  Return
@@ -10172,3 +10161,20 @@  is_inexpensive_builtin (tree decl)
 
   return false;
 }
+
+/* Default implementation of TARGET_GEN_MEMSET_VALUE.  */
+
+rtx
+default_gen_memset_value (rtx data, machine_mode mode)
+{
+  rtx target, coeff;
+  char *p;
+  size_t size = GET_MODE_SIZE (mode);
+  p = XALLOCAVEC (char, size);
+  memset (p, 1, size);
+  coeff = c_readstr (p, mode);
+
+  target = convert_to_mode (mode, (rtx) data, 1);
+  target = expand_mult (mode, target, coeff, NULL_RTX, 1);
+  return force_reg (mode, target);
+}
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3805817..23ec694 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -55314,6 +55314,41 @@  ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
     }
 }
 
+/* Implement the TARGET_GEN_MEMSET_VALUE hook.  */
+
+static rtx
+ix86_gen_memset_value (rtx data, machine_mode mode)
+{
+  if (GET_MODE_SIZE (mode) <= 8)
+    return default_gen_memset_value (data, mode);
+
+  if (GET_MODE_SIZE (mode) != 16)
+    gcc_unreachable ();
+
+  rtx one, target;
+  machine_mode one_mode;
+
+  if (TARGET_AVX2)
+    {
+      one_mode = QImode;
+      one = data;
+    }
+  else
+    {
+      one_mode = SImode;
+      one = default_gen_memset_value (data, one_mode);
+    }
+
+  machine_mode vector_mode
+    = mode_for_vector (one_mode,
+		       GET_MODE_SIZE (mode) / GET_MODE_SIZE (one_mode));
+  target = gen_reg_rtx (vector_mode);
+  if (ix86_vector_duplicate_value (vector_mode, target, one))
+    return convert_to_mode (mode, target, 1);
+  else
+    gcc_unreachable ();
+}
+
 /* Address space support.
 
    This is not "far pointers" in the 16-bit sense, but an easy way
@@ -55814,6 +55849,9 @@  ix86_addr_space_zero_address_valid (addr_space_t as)
 #undef TARGET_HARD_REGNO_SCRATCH_OK
 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
 
+#undef TARGET_GEN_MEMSET_VALUE
+#define TARGET_GEN_MEMSET_VALUE ix86_gen_memset_value
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-i386.h"
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 8751143..7ce2e4a 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1963,6 +1963,19 @@  typedef struct ix86_args {
     && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
    ? GET_MODE_SIZE (TImode) : UNITS_PER_WORD)
 
+/* STORE_MAX_PIECES is the number of bytes at a time which we can
+   store efficiently.
+
+   ??? We should use TImode in 32-bit mode and use OImode or XImode
+   if they are available.  But since by_pieces_ninsns determines the
+   widest mode with MAX_FIXED_MODE_SIZE, we can only use TImode in
+   64-bit mode.  */
+#define STORE_MAX_PIECES \
+  ((TARGET_64BIT \
+    && TARGET_SSE2 \
+    && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
+   ? GET_MODE_SIZE (TImode) : UNITS_PER_WORD)
+
 /* If a memory-to-memory move would take MOVE_RATIO or more simple
    move-instruction pairs, we will do a movmem or libcall instead.
    Increasing the value will always make code faster, but eventually
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 9edb006..abab7dd 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11458,6 +11458,13 @@  This function prepares to emit a conditional comparison within a sequence
  @var{bit_code} is @code{AND} or @code{IOR}, which is the op on the compares.
 @end deftypefn
 
+@deftypefn {Target Hook} rtx TARGET_GEN_MEMSET_VALUE (rtx @var{data}, machine_mode @var{mode})
+This functio returns the RTL of a register containing
+@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned
+char value given in the RTL register @var{data}.  For example, if
+@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.
+@end deftypefn
+
 @deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned @var{nunroll}, struct loop *@var{loop})
 This target hook returns a new value for the number of times @var{loop}
 should be unrolled. The parameter @var{nunroll} is the number of times
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index a72c3d8..564c11ba 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -8112,6 +8112,8 @@  build_type_attribute_variant (@var{mdecl},
 
 @hook TARGET_GEN_CCMP_NEXT
 
+@hook TARGET_GEN_MEMSET_VALUE
+
 @hook TARGET_LOOP_UNROLL_ADJUST
 
 @defmac POWI_MAX_MULTS
diff --git a/gcc/target.def b/gcc/target.def
index 929d9ea..c4eb6b4 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2631,6 +2631,15 @@  DEFHOOK
  rtx, (rtx *prep_seq, rtx *gen_seq, rtx prev, int cmp_code, tree op0, tree op1, int bit_code),
  NULL)
 
+DEFHOOK
+(gen_memset_value,
+ "This functio returns the RTL of a register containing\n\
+@code{GET_MODE_SIZE (@var{mode})} consecutive copies of the unsigned\n\
+char value given in the RTL register @var{data}.  For example, if\n\
+@var{mode} is 4 bytes wide, return the RTL for 0x01010101*@var{data}.",
+ rtx, (rtx data, machine_mode mode),
+ default_gen_memset_value)
+
 /* Return a new value for loop unroll size.  */
 DEFHOOK
 (loop_unroll_adjust,
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 2e7ca72..798a998 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -257,5 +257,6 @@  extern bool default_optab_supported_p (int, machine_mode, machine_mode,
 				       optimization_type);
 
 extern unsigned int default_max_noce_ifcvt_seq_cost (edge);
+extern rtx default_gen_memset_value (rtx, machine_mode);
 
 #endif /* GCC_TARGHOOKS_H */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-1.c b/gcc/testsuite/gcc.target/i386/pieces-memset-1.c
new file mode 100644
index 0000000..b33c073
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-1.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 64);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-10.c b/gcc/testsuite/gcc.target/i386/pieces-memset-10.c
new file mode 100644
index 0000000..90df34a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-10.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 64);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-11.c b/gcc/testsuite/gcc.target/i386/pieces-memset-11.c
new file mode 100644
index 0000000..e234196
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-11.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 64);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-12.c b/gcc/testsuite/gcc.target/i386/pieces-memset-12.c
new file mode 100644
index 0000000..ebc0db1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-12.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 66);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-13.c b/gcc/testsuite/gcc.target/i386/pieces-memset-13.c
new file mode 100644
index 0000000..251061f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-13.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 33);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-14.c b/gcc/testsuite/gcc.target/i386/pieces-memset-14.c
new file mode 100644
index 0000000..e6426e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-14.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-15.c b/gcc/testsuite/gcc.target/i386/pieces-memset-15.c
new file mode 100644
index 0000000..7ade143
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-15.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-16.c b/gcc/testsuite/gcc.target/i386/pieces-memset-16.c
new file mode 100644
index 0000000..d8ef81e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-16.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 17);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-17.c b/gcc/testsuite/gcc.target/i386/pieces-memset-17.c
new file mode 100644
index 0000000..c997cda
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-17.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 17);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-18.c b/gcc/testsuite/gcc.target/i386/pieces-memset-18.c
new file mode 100644
index 0000000..a611040
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-18.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 17);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-19.c b/gcc/testsuite/gcc.target/i386/pieces-memset-19.c
new file mode 100644
index 0000000..8367654
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-19.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 64);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-2.c b/gcc/testsuite/gcc.target/i386/pieces-memset-2.c
new file mode 100644
index 0000000..15f4396
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-2.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 64);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-20.c b/gcc/testsuite/gcc.target/i386/pieces-memset-20.c
new file mode 100644
index 0000000..247936c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-20.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 64);
+}
+
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-21.c b/gcc/testsuite/gcc.target/i386/pieces-memset-21.c
new file mode 100644
index 0000000..e4319dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-21.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-22.c b/gcc/testsuite/gcc.target/i386/pieces-memset-22.c
new file mode 100644
index 0000000..e7b6368
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-22.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 33);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-23.c b/gcc/testsuite/gcc.target/i386/pieces-memset-23.c
new file mode 100644
index 0000000..703d663
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-23.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-24.c b/gcc/testsuite/gcc.target/i386/pieces-memset-24.c
new file mode 100644
index 0000000..aaec8c1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-24.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-25.c b/gcc/testsuite/gcc.target/i386/pieces-memset-25.c
new file mode 100644
index 0000000..9c38f86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-25.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 17);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-26.c b/gcc/testsuite/gcc.target/i386/pieces-memset-26.c
new file mode 100644
index 0000000..0040c2b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-26.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 17);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-27.c b/gcc/testsuite/gcc.target/i386/pieces-memset-27.c
new file mode 100644
index 0000000..e742aa3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-27.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 17);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-28.c b/gcc/testsuite/gcc.target/i386/pieces-memset-28.c
new file mode 100644
index 0000000..7d6a8dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-28.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, -1, 64);
+}
+
+/* { dg-final { scan-assembler-times "pcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-29.c b/gcc/testsuite/gcc.target/i386/pieces-memset-29.c
new file mode 100644
index 0000000..9847d5e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-29.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, -1, 64);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-3.c b/gcc/testsuite/gcc.target/i386/pieces-memset-3.c
new file mode 100644
index 0000000..907ee6d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-3.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx512bw -mavx512f -mtune=intel" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-30.c b/gcc/testsuite/gcc.target/i386/pieces-memset-30.c
new file mode 100644
index 0000000..cff88f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-30.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, -1, 64);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-31.c b/gcc/testsuite/gcc.target/i386/pieces-memset-31.c
new file mode 100644
index 0000000..1221f72
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-31.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, -1, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-32.c b/gcc/testsuite/gcc.target/i386/pieces-memset-32.c
new file mode 100644
index 0000000..0a8d0aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-32.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, -1, 33);
+}
+
+/* { dg-final { scan-assembler-times "pcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-33.c b/gcc/testsuite/gcc.target/i386/pieces-memset-33.c
new file mode 100644
index 0000000..65d4bfa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-33.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, -1, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-34.c b/gcc/testsuite/gcc.target/i386/pieces-memset-34.c
new file mode 100644
index 0000000..e2c1fc8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-34.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, -1, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-35.c b/gcc/testsuite/gcc.target/i386/pieces-memset-35.c
new file mode 100644
index 0000000..df6dd0d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-35.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, -1, 34);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-36.c b/gcc/testsuite/gcc.target/i386/pieces-memset-36.c
new file mode 100644
index 0000000..7f9829a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-36.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 17);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-37.c b/gcc/testsuite/gcc.target/i386/pieces-memset-37.c
new file mode 100644
index 0000000..ad8d996
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-37.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
new file mode 100644
index 0000000..907ee6d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx512bw -mavx512f -mtune=intel" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-39.c b/gcc/testsuite/gcc.target/i386/pieces-memset-39.c
new file mode 100644
index 0000000..ab560c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-39.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512bw -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-4.c b/gcc/testsuite/gcc.target/i386/pieces-memset-4.c
new file mode 100644
index 0000000..17b0c25
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-4.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 33);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
new file mode 100644
index 0000000..d818f35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 66);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
new file mode 100644
index 0000000..5f3fea5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
new file mode 100644
index 0000000..948cf09
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 0, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
new file mode 100644
index 0000000..9ab7c53
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, -1, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-44.c b/gcc/testsuite/gcc.target/i386/pieces-memset-44.c
new file mode 100644
index 0000000..de45cb9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-44.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, -1, 33);
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-5.c b/gcc/testsuite/gcc.target/i386/pieces-memset-5.c
new file mode 100644
index 0000000..b47c666
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-5.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-6.c b/gcc/testsuite/gcc.target/i386/pieces-memset-6.c
new file mode 100644
index 0000000..9535277
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-6.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=intel" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-7.c b/gcc/testsuite/gcc.target/i386/pieces-memset-7.c
new file mode 100644
index 0000000..f0466e8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-7.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 17);
+}
+
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-8.c b/gcc/testsuite/gcc.target/i386/pieces-memset-8.c
new file mode 100644
index 0000000..9147460
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-8.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 17);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-9.c b/gcc/testsuite/gcc.target/i386/pieces-memset-9.c
new file mode 100644
index 0000000..cedf8a3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-9.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 17);
+}
+
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */