From 6944526e088df6c4ef73ab797ee90f99c0ad566a Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Fri, 16 Apr 2021 11:29:10 +0800
Subject: [PATCH] [i386] MASK_AVX256_SPLIT_UNALIGNED_STORE/LOAD should be
cleared in opts->x_target_flags when
X86_TUNE_AVX256_UNALIGNED_LOAD/STORE_OPTIMAL is enabled by target attribute.
gcc/ChangeLog:
PR target/100093
* config/i386/i386-options.c (ix86_option_override_internal):
Clear MASK_AVX256_SPLIT_UNALIGNED_LOAD/STORE in x_target_flags
when X86_TUNE_AVX256_UNALIGNED_LOAD/STORE_OPTIMAL is enabled
by target attribute.
gcc/testsuite/ChangeLog:
PR target/100093
* gcc.target/i386/pr100093.c: New test.
---
gcc/config/i386/i386-options.c | 7 +++++++
gcc/testsuite/gcc.target/i386/pr100093.c | 12 ++++++++++++
2 files changed, 19 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/i386/pr100093.c
@@ -2853,9 +2853,16 @@ ix86_option_override_internal (bool main_args_p,
if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
&& !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
+ else if (!main_args_p
+ && ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL])
+ opts->x_target_flags &= ~MASK_AVX256_SPLIT_UNALIGNED_LOAD;
+
if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
&& !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
+ else if (!main_args_p
+ && ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL])
+ opts->x_target_flags &= ~MASK_AVX256_SPLIT_UNALIGNED_STORE;
/* Enable 128-bit AVX instruction generation
for the auto-vectorizer. */
new file mode 100644
@@ -0,0 +1,12 @@
+/* PR target/100093 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=znver1" } */
+/* { dg-final { scan-assembler-not "vextractf128" } } */
+
+__attribute__((target("tune=skylake-avx512")))
+void fill_avx2(double *__restrict__ data, int n, double value)
+{
+ for (int i = 0; i < n * 16; i++) {
+ data[i] = value;
+ }
+}
--
2.18.1