From 78eb1a4c4938494349032f0e10017ce553fb8fdd Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Fri, 21 Aug 2020 09:42:49 -0700
Subject: [PATCH] x86: Use target("general-regs-only,baseline-isas-only") in
<cpuid.h>
Add -mbaseline-isas-only and target("baseline-isas-only") attribute to
support baseline ISAs, which include FXSR, MMX, SSE and SSE2 in 64-bit
mode. Use only general registers and baseline ISAs to perform CPUID
check. We can inline functions with general registers and baseline
ISAs attributes if caller supports the same set of ISAs.
gcc/
PR target/96744
* common/config/i386/i386-common.c (ix86_handle_option): Support
-mbaseline-isas-only.
* config/i386/cpuid.h: Add #pragma GCC
target("general-regs-only,baseline-isas-only").
* config/i386/i386-options.c (ix86_valid_target_attribute_inner_p):
Handle baseline-isas-only.
* config/i386/i386.c (ix86_can_inline_p): Allow inline functions
with baseline-isas-only and general-regs-only attributes if caller
supports the same set of ISAs.
* config/i386/i386.h (TARGET_64BIT_BASELINE_ISAS): New.
* config/i386/i386.opt: Add -mbaseline-isas-only.
* doc/extend.texi: Document target("baseline-isas-only") function
attribute.
* doc/invoke.texi: Document -mbaseline-isas-only.
gcc/testsuite/
PR target/96744
* gcc.target/i386/avx512-check.h: Add #pragma GCC
target("baseline-isas-only") for CPUID check.
* gcc.target/i386/pr96744-10.c: New test.
* gcc.target/i386/pr96744-11.c: Likewise.
* gcc.target/i386/pr96744-12.c: Likewise.
* gcc.target/i386/pr96744-12.c: Likewise.
* gcc.target/i386/pr96744-14.c: Likewise.
* gcc.target/i386/pr96744-15.c: Likewise.
---
gcc/common/config/i386/i386-common.c | 28 +++++++++++++
gcc/config/i386/cpuid.h | 13 ++++++
gcc/config/i386/i386-options.c | 7 +++-
gcc/config/i386/i386.c | 34 ++++++++-------
gcc/config/i386/i386.h | 4 ++
gcc/config/i386/i386.opt | 6 ++-
gcc/doc/extend.texi | 4 ++
gcc/doc/invoke.texi | 5 +++
gcc/testsuite/gcc.target/i386/avx512-check.h | 5 +++
gcc/testsuite/gcc.target/i386/pr96744-10.c | 27 ++++++++++++
gcc/testsuite/gcc.target/i386/pr96744-11.c | 27 ++++++++++++
gcc/testsuite/gcc.target/i386/pr96744-12.c | 27 ++++++++++++
gcc/testsuite/gcc.target/i386/pr96744-13.c | 32 ++++++++++++++
gcc/testsuite/gcc.target/i386/pr96744-14.c | 44 ++++++++++++++++++++
gcc/testsuite/gcc.target/i386/pr96744-15.c | 44 ++++++++++++++++++++
15 files changed, 290 insertions(+), 17 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr96744-10.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr96744-11.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr96744-12.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr96744-13.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr96744-14.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr96744-15.c
@@ -338,6 +338,34 @@ ix86_handle_option (struct gcc_options *opts,
gcc_unreachable ();
return true;
+ case OPT_mbaseline_isas_only:
+ if (value)
+ {
+ /* Only enable baseline ISAs. */
+ if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
+ {
+ HOST_WIDE_INT x_ix86_isa_flags;
+ if (TARGET_LP64_P (opts->x_ix86_isa_flags))
+ x_ix86_isa_flags = (OPTION_MASK_ISA_64BIT
+ | OPTION_MASK_ABI_64);
+ else
+ x_ix86_isa_flags = (OPTION_MASK_ISA_64BIT
+ | OPTION_MASK_ABI_X32);
+ if (!TARGET_GENERAL_REGS_ONLY_P (opts->x_ix86_target_flags))
+ x_ix86_isa_flags |= TARGET_64BIT_BASELINE_ISAS;
+ opts->x_ix86_isa_flags = x_ix86_isa_flags;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags = 0;
+ opts->x_ix86_fpmath = FPMATH_387;
+ }
+ opts->x_ix86_isa_flags2 = 0;
+ }
+ else
+ gcc_unreachable ();
+ return true;
+
case OPT_mmmx:
if (value)
{
@@ -24,6 +24,17 @@
#ifndef _CPUID_H_INCLUDED
#define _CPUID_H_INCLUDED
+#pragma GCC push_options
+#if __GNUC__ >= 11
+#pragma GCC target("general-regs-only,baseline-isas-only")
+#else
+#ifdef __x86_64__
+#pragma GCC target("arch=x86-64")
+#else
+#pragma GCC target("arch=i386")
+#endif
+#endif
+
/* %eax */
#define bit_AVX512BF16 (1 << 5)
@@ -324,4 +335,6 @@ __cpuidex (int __cpuid_info[4], int __leaf, int __subleaf)
__cpuid_info[2], __cpuid_info[3]);
}
+#pragma GCC pop_options
+
#endif /* _CPUID_H_INCLUDED */
@@ -1072,6 +1072,10 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
IX86_ATTR_IX86_YES ("general-regs-only",
OPT_mgeneral_regs_only,
OPTION_MASK_GENERAL_REGS_ONLY),
+
+ IX86_ATTR_IX86_YES ("baseline-isas-only",
+ OPT_mbaseline_isas_only,
+ OPTION_MASK_BASELINE_ISAS_ONLY),
};
location_t loc
@@ -1187,7 +1191,8 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
else if (type == ix86_opt_ix86_yes || type == ix86_opt_ix86_no)
{
- if (mask == OPTION_MASK_GENERAL_REGS_ONLY)
+ if (mask == OPTION_MASK_GENERAL_REGS_ONLY
+ || mask == OPTION_MASK_BASELINE_ISAS_ONLY)
{
if (type != ix86_opt_ix86_yes)
gcc_unreachable ();
@@ -551,18 +551,6 @@ ix86_can_inline_p (tree caller, tree callee)
tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
- /* Changes of those flags can be tolerated for always inlines. Lets hope
- user knows what he is doing. */
- const unsigned HOST_WIDE_INT always_inline_safe_mask
- = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
- | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
- | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
- | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
- | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
- | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
- | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
-
-
if (!callee_tree)
callee_tree = target_option_default_node;
if (!caller_tree)
@@ -573,10 +561,26 @@ ix86_can_inline_p (tree caller, tree callee)
struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
bool ret = false;
+ /* NB: Also tolerate those flags like always inlines for target
+ ("general-regs-only") attribute. */
bool always_inline
- = (DECL_DISREGARD_INLINE_LIMITS (callee)
- && lookup_attribute ("always_inline",
- DECL_ATTRIBUTES (callee)));
+ = ((DECL_DISREGARD_INLINE_LIMITS (callee)
+ && lookup_attribute ("always_inline",
+ DECL_ATTRIBUTES (callee)))
+ || TARGET_BASELINE_ISAS_ONLY_P (callee_opts->x_ix86_target_flags));
+
+ /* Changes of those flags can be tolerated for always inlines. Lets hope
+ user knows what he is doing. */
+ const unsigned HOST_WIDE_INT always_inline_safe_mask
+ = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
+ | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
+ | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
+ | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
+ | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
+ | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
+ | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER
+ | (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags)
+ ? MASK_80387 : 0));
cgraph_node *callee_node = cgraph_node::get (callee);
/* Callee's isa options should be a subset of the caller's, i.e. a SSE4
@@ -211,6 +211,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_16BIT TARGET_CODE16
#define TARGET_16BIT_P(x) TARGET_CODE16_P(x)
+#define TARGET_64BIT_BASELINE_ISAS \
+ (OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE \
+ | OPTION_MASK_ISA_SSE2)
+
#define TARGET_MMX_WITH_SSE (TARGET_64BIT && TARGET_SSE2)
#include "config/vxworks-dummy.h"
@@ -1015,6 +1015,10 @@ mgeneral-regs-only
Target Report RejectNegative Mask(GENERAL_REGS_ONLY) Var(ix86_target_flags) Save
Generate code which uses only the general registers.
+mbaseline-isas-only
+Target Report RejectNegative Mask(BASELINE_ISAS_ONLY) Var(ix86_target_flags) Save
+Generate code which uses only the baseline ISAs.
+
mshstk
Target Report Mask(ISA_SHSTK) Var(ix86_isa_flags) Save
Enable shadow stack built-in functions from Control-flow Enforcement
@@ -1114,4 +1118,4 @@ Support SERIALIZE built-in functions and code generation.
mtsxldtrk
Target Report Mask(ISA2_TSXLDTRK) Var(ix86_isa_flags2) Save
-Support TSXLDTRK built-in functions and code generation.
\ No newline at end of file
+Support TSXLDTRK built-in functions and code generation.
@@ -6660,6 +6660,10 @@ doing a floating-point division.
@cindex @code{target("general-regs-only")} function attribute, x86
Generate code which uses only the general registers.
+@item baseline-isas-only
+@cindex @code{target("baseline-isas-only")} function attribute, x86
+Generate code which uses only the baseline ISAs.
+
@item arch=@var{ARCH}
@cindex @code{target("arch=@var{ARCH}")} function attribute, x86
Specify the architecture to generate code for in compiling the function.
@@ -30579,6 +30579,11 @@ Generate code that uses only the general-purpose registers. This
prevents the compiler from using floating-point, vector, mask and bound
registers.
+@item -mbaseline-isas-only
+@opindex mbaseline-isas-only
+Generate code that uses only the baseline ISAs which include FXSR, MMX,
+SSE and SSE2 in 64-bit mode.
+
@item -mindirect-branch=@var{choice}
@opindex mindirect-branch
Convert indirect call and jump with @var{choice}. The default is
@@ -25,6 +25,9 @@ do_test (void)
}
#endif
+#pragma GCC push_options
+#pragma GCC target("baseline-isas-only")
+
static int
check_osxsave (void)
{
@@ -110,3 +113,5 @@ main ()
#endif
return 0;
}
+
+#pragma GCC pop_options
new file mode 100644
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <cpuid.h>
+
+int
+main ()
+{
+ unsigned int eax, ebx, ecx, edx;
+ int cpuid_info[4];
+
+ if (!__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx))
+ return 0;
+
+ __cpuidex (cpuid_info, 7, 0);
+
+ if (cpuid_info[0] != eax
+ || cpuid_info[1] != ebx
+ || cpuid_info[2] != ecx
+ || cpuid_info[3] != edx)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-not {call[ \t]+_?__get_cpuid_count} } } */
+/* { dg-final { scan-assembler-not {call[ \t]+_?__cpuidex} } } */
new file mode 100644
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mgeneral-regs-only" } */
+
+#include <cpuid.h>
+
+int
+main ()
+{
+ unsigned int eax, ebx, ecx, edx;
+ int cpuid_info[4];
+
+ if (!__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx))
+ return 0;
+
+ __cpuidex (cpuid_info, 7, 0);
+
+ if (cpuid_info[0] != eax
+ || cpuid_info[1] != ebx
+ || cpuid_info[2] != ecx
+ || cpuid_info[3] != edx)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-not {call[ \t]+_?__get_cpuid_count} } } */
+/* { dg-final { scan-assembler-not {call[ \t]+_?__cpuidex} } } */
new file mode 100644
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake -mfpmath=sse" } */
+
+#include <cpuid.h>
+
+int
+main ()
+{
+ unsigned int eax, ebx, ecx, edx;
+ int cpuid_info[4];
+
+ if (!__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx))
+ return 0;
+
+ __cpuidex (cpuid_info, 7, 0);
+
+ if (cpuid_info[0] != eax
+ || cpuid_info[1] != ebx
+ || cpuid_info[2] != ecx
+ || cpuid_info[3] != edx)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-not {call[ \t]+_?__get_cpuid_count} } } */
+/* { dg-final { scan-assembler-not {call[ \t]+_?__cpuidex} } } */
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake -mfpmath=sse" } */
+
+#include <cpuid.h>
+
+#pragma GCC push_options
+#pragma GCC target("general-regs-only,baseline-isas-only")
+
+int
+main ()
+{
+ unsigned int eax, ebx, ecx, edx;
+ int cpuid_info[4];
+
+ if (!__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx))
+ return 0;
+
+ __cpuidex (cpuid_info, 7, 0);
+
+ if (cpuid_info[0] != eax
+ || cpuid_info[1] != ebx
+ || cpuid_info[2] != ecx
+ || cpuid_info[3] != edx)
+ __builtin_abort ();
+
+ return 0;
+}
+
+#pragma GCC pop_options
+
+/* { dg-final { scan-assembler-not {call[ \t]+_?__get_cpuid_count} } } */
+/* { dg-final { scan-assembler-not {call[ \t]+_?__cpuidex} } } */
new file mode 100644
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake" } */
+
+#include <cpuid.h>
+
+#pragma GCC push_options
+#pragma GCC target("baseline-isas-only")
+
+static __inline void
+foo (int __cpuid_info[4], int __leaf, int __subleaf)
+{
+ __cpuid_count (__leaf, __subleaf, __cpuid_info[0], __cpuid_info[1],
+ __cpuid_info[2], __cpuid_info[3]);
+}
+
+#pragma GCC pop_options
+
+#pragma GCC push_options
+#pragma GCC target("general-regs-only")
+
+int
+bar (void)
+{
+ unsigned int eax, ebx, ecx, edx;
+ int cpuid_info[4];
+
+ if (!__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx))
+ return 0;
+
+ foo (cpuid_info, 7, 0);
+
+ if (cpuid_info[0] != eax
+ || cpuid_info[1] != ebx
+ || cpuid_info[2] != ecx
+ || cpuid_info[3] != edx)
+ __builtin_abort ();
+
+ return 0;
+}
+
+#pragma GCC pop_options
+
+/* { dg-final { scan-assembler-not {call[ \t]+_?__get_cpuid_count} } } */
+/* { dg-final { scan-assembler {call[ \t]+_?foo} } } */
new file mode 100644
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake" } */
+
+#include <cpuid.h>
+
+#pragma GCC push_options
+#pragma GCC target("general-regs-only")
+
+static __inline void
+foo (int __cpuid_info[4], int __leaf, int __subleaf)
+{
+ __cpuid_count (__leaf, __subleaf, __cpuid_info[0], __cpuid_info[1],
+ __cpuid_info[2], __cpuid_info[3]);
+}
+
+#pragma GCC pop_options
+
+#pragma GCC push_options
+#pragma GCC target("baseline-isas-only")
+
+int
+bar (void)
+{
+ unsigned int eax, ebx, ecx, edx;
+ int cpuid_info[4];
+
+ if (!__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx))
+ return 0;
+
+ foo (cpuid_info, 7, 0);
+
+ if (cpuid_info[0] != eax
+ || cpuid_info[1] != ebx
+ || cpuid_info[2] != ecx
+ || cpuid_info[3] != edx)
+ __builtin_abort ();
+
+ return 0;
+}
+
+#pragma GCC pop_options
+
+/* { dg-final { scan-assembler-not {call[ \t]+_?__get_cpuid_count} } } */
+/* { dg-final { scan-assembler {call[ \t]+_?foo} } } */
--
2.26.2