From 113c797d59102d400b3a7342192f68b789bba0de Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Mon, 18 May 2020 05:58:41 -0700
Subject: [PATCH] x86: Move cpuinfo.h from libgcc to common/config/i386
Move cpuinfo.h from libgcc to common/config/i386 so that get_intel_cpu
can be shared by libgcc, GCC driver, gcc.target/i386/builtin_target.c
and libgfortran to detect the specific type of Intel and AMD CPUs. Use
the same enum processor_features in libgcc and x86 backend. Add static
assert to x86 backend to verify that libgcc and x86 backend are in sync.
Update processor feature check in in libgcc:
1. Update FEATURE_GFNI check to support SSE, AVX and AVX512 versions of
GFNI.
2. Add missing FEATURE_AVX512VP2INTERSECT.
3. Add FEATURE_XSAVEOPT, FEATURE_CLWB and FEATURE_CLZERO for get_amd_cpu.
4. Always set __cpu_features2 for has_cpu_feature.
Update libgfortran to use has_cpu_feature to detect x86 CPU features.
gcc/
PR target/95212
PR target/95220
* common/config/i386/cpuinfo.h: Moved from libgcc/config/i386.
(processor_features): Add FEATURE_AVX512VP2INTERSECT,
FEATURE_XSAVEOPT, FEATURE_CLWB and FEATURE_CLZERO.
(CHECK___builtin_cpu_is): New. Defined as empty if not defined.
(get_amd_cpu): Moved from libgcc/config/i386/cpuinfo.c. Add
arguments for BMI, AVX2, XOP, XSAVEOPT, CLWB and CLZERO. Use
CHECK___builtin_cpu_is. Return AMD CPU name.
(get_intel_cpu): Moved from libgcc/config/i386/cpuinfo.c. Add
an argument for AVX512VNNI. Use CHECK___builtin_cpu_is. Return
Intel CPU name.
(has_cpu_feature): New function.
* config/i386/driver-i386.c: Include
"common/config/i386/cpuinfo.h".
(host_detect_local_cpu): Call get_amd_cpu to get AMD CPU name.
Call get_intel_cpu to get Intel CPU name.
* config/i386/i386-builtins.c: Include
"common/config/i386/cpuinfo.h".
(processor_features): Removed. Replace F_XXX with FEATURE_XXX.
(isa_names_table): Add xsaveopt, clwb and clzero.
(CHECK_processor_vendor, CHECK_processor_types,
CHECK_processor_subtypes): New. Add static assert to x86
backend to verify that libgcc and x86 backend are in sync.
gcc/testsuite/
PR target/95212
PR target/95220
* gcc.target/i386/builtin_target.c: Include <stddef.h> and
../../../common/config/i386/cpuinfo.h.
(CHECK___builtin_cpu_is): New.
(inline): New. Defined as empty.
(check_amd_cpu_model): Removed.
(check_intel_cpu_model): Likewise,
(check_detailed): Call get_amd_cpu instead of check_amd_cpu_model.
Call get_intel_cpu instead of check_intel_cpu_model.
libgcc/
PR target/95212
PR target/95220
* config/i386/cpuinfo.h: Moved to ... gcc/common/config/i386.
* config/i386/cpuinfo.c: Include "common/config/i386/cpuinfo.h".
(__cpu_features2): Make it static in libgcc_s.so.1.
(get_amd_cpu): Moved to ... gcc/common/config/i386/cpuinfo.h.
(get_intel_cpu): Moved to ... gcc/common/config/i386/cpuinfo.h.
(get_available_features): Fix FEATURE_GFNI check. Also check
FEATURE_AVX512VP2INTERSECT, FEATURE_XSAVEOPT, FEATURE_CLWB and
FEATURE_CLZERO. Always set __cpu_features2.
(__cpu_indicator_init): Call get_available_features before
calling get_amd_cpu. Pass has_cpu_feature (FEATURE_XXX) to
get_amd_cpu. Call get_available_features before calling
get_intel_cpu. Pass has_cpu_feature (FEATURE_AVX512VNNI) to
get_intel_cpu.
libgfortran/
PR target/95212
PR target/95220
* m4/matmul.m4: Include <common/config/i386/cpuinfo.h> instead
of <config/i386/cpuinfo.h>. Use has_cpu_feature.
* generated/matmul_c10.c: Regenerated.
* generated/matmul_c16.c: Likewise.
* generated/matmul_c4.c: Likewise.
* generated/matmul_c8.c: Likewise.
* generated/matmul_i1.c: Likewise.
* generated/matmul_i16.c: Likewise.
* generated/matmul_i2.c: Likewise.
* generated/matmul_i4.c: Likewise.
* generated/matmul_i8.c: Likewise.
* generated/matmul_r10.c: Likewise.
* generated/matmul_r16.c: Likewise.
* generated/matmul_r4.c: Likewise.
* generated/matmul_r8.c: Likewise.
---
gcc/common/config/i386/cpuinfo.h | 519 ++++++++++++++++++
gcc/config/i386/driver-i386.c | 221 ++------
gcc/config/i386/i386-builtins.c | 203 ++++---
.../gcc.target/i386/builtin_target.c | 194 +------
libgcc/config/i386/cpuinfo.c | 263 ++-------
libgcc/config/i386/cpuinfo.h | 135 -----
libgfortran/generated/matmul_c10.c | 18 +-
libgfortran/generated/matmul_c16.c | 18 +-
libgfortran/generated/matmul_c4.c | 18 +-
libgfortran/generated/matmul_c8.c | 18 +-
libgfortran/generated/matmul_i1.c | 18 +-
libgfortran/generated/matmul_i16.c | 18 +-
libgfortran/generated/matmul_i2.c | 18 +-
libgfortran/generated/matmul_i4.c | 18 +-
libgfortran/generated/matmul_i8.c | 18 +-
libgfortran/generated/matmul_r10.c | 18 +-
libgfortran/generated/matmul_r16.c | 18 +-
libgfortran/generated/matmul_r4.c | 18 +-
libgfortran/generated/matmul_r8.c | 18 +-
libgfortran/m4/matmul.m4 | 18 +-
20 files changed, 855 insertions(+), 932 deletions(-)
create mode 100644 gcc/common/config/i386/cpuinfo.h
delete mode 100644 libgcc/config/i386/cpuinfo.h
new file mode 100644
@@ -0,0 +1,519 @@
+/* Get CPU type and Features for x86 processors.
+ Copyright (C) 2012-2020 Free Software Foundation, Inc.
+ Contributed by Sriraman Tallam (tmsriram@google.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* Processor Vendor and Models. */
+
+enum processor_vendor
+{
+ VENDOR_INTEL = 1,
+ VENDOR_AMD,
+ VENDOR_OTHER,
+ VENDOR_MAX
+};
+
+/* Any new types or subtypes have to be inserted at the end. */
+
+enum processor_types
+{
+ INTEL_BONNELL = 1,
+ INTEL_CORE2,
+ INTEL_COREI7,
+ AMDFAM10H,
+ AMDFAM15H,
+ INTEL_SILVERMONT,
+ INTEL_KNL,
+ AMD_BTVER1,
+ AMD_BTVER2,
+ AMDFAM17H,
+ INTEL_KNM,
+ INTEL_GOLDMONT,
+ INTEL_GOLDMONT_PLUS,
+ INTEL_TREMONT,
+ CPU_TYPE_MAX
+};
+
+enum processor_subtypes
+{
+ INTEL_COREI7_NEHALEM = 1,
+ INTEL_COREI7_WESTMERE,
+ INTEL_COREI7_SANDYBRIDGE,
+ AMDFAM10H_BARCELONA,
+ AMDFAM10H_SHANGHAI,
+ AMDFAM10H_ISTANBUL,
+ AMDFAM15H_BDVER1,
+ AMDFAM15H_BDVER2,
+ AMDFAM15H_BDVER3,
+ AMDFAM15H_BDVER4,
+ AMDFAM17H_ZNVER1,
+ INTEL_COREI7_IVYBRIDGE,
+ INTEL_COREI7_HASWELL,
+ INTEL_COREI7_BROADWELL,
+ INTEL_COREI7_SKYLAKE,
+ INTEL_COREI7_SKYLAKE_AVX512,
+ INTEL_COREI7_CANNONLAKE,
+ INTEL_COREI7_ICELAKE_CLIENT,
+ INTEL_COREI7_ICELAKE_SERVER,
+ AMDFAM17H_ZNVER2,
+ INTEL_COREI7_CASCADELAKE,
+ INTEL_COREI7_TIGERLAKE,
+ INTEL_COREI7_COOPERLAKE,
+ CPU_SUBTYPE_MAX
+};
+
+/* ISA Features supported. New features have to be inserted at the end. */
+
+enum processor_features
+{
+ FEATURE_CMOV = 0,
+ FEATURE_MMX,
+ FEATURE_POPCNT,
+ FEATURE_SSE,
+ FEATURE_SSE2,
+ FEATURE_SSE3,
+ FEATURE_SSSE3,
+ FEATURE_SSE4_1,
+ FEATURE_SSE4_2,
+ FEATURE_AVX,
+ FEATURE_AVX2,
+ FEATURE_SSE4_A,
+ FEATURE_FMA4,
+ FEATURE_XOP,
+ FEATURE_FMA,
+ FEATURE_AVX512F,
+ FEATURE_BMI,
+ FEATURE_BMI2,
+ FEATURE_AES,
+ FEATURE_PCLMUL,
+ FEATURE_AVX512VL,
+ FEATURE_AVX512BW,
+ FEATURE_AVX512DQ,
+ FEATURE_AVX512CD,
+ FEATURE_AVX512ER,
+ FEATURE_AVX512PF,
+ FEATURE_AVX512VBMI,
+ FEATURE_AVX512IFMA,
+ FEATURE_AVX5124VNNIW,
+ FEATURE_AVX5124FMAPS,
+ FEATURE_AVX512VPOPCNTDQ,
+ FEATURE_AVX512VBMI2,
+ FEATURE_GFNI,
+ FEATURE_VPCLMULQDQ,
+ FEATURE_AVX512VNNI,
+ FEATURE_AVX512BITALG,
+ FEATURE_AVX512VP2INTERSECT,
+ FEATURE_AVX512BF16,
+ FEATURE_XSAVEOPT,
+ FEATURE_CLWB,
+ FEATURE_CLZERO
+};
+
+extern struct __processor_model
+{
+ unsigned int __cpu_vendor;
+ unsigned int __cpu_type;
+ unsigned int __cpu_subtype;
+ unsigned int __cpu_features[1];
+} __cpu_model;
+extern unsigned int __cpu_features2;
+
+#ifndef CHECK___builtin_cpu_is
+# define CHECK___builtin_cpu_is(cpu)
+#endif
+
+/* Get the specific type of AMD CPU and return AMD CPU name. Return
+ NULL for unknown AMD CPU. */
+
+static inline const char *
+get_amd_cpu (struct __processor_model *cpu_model, unsigned int family,
+ unsigned int model, int has_bmi, int has_avx2, int has_xop,
+ int has_xsaveopt, int has_clwb, int has_clzero)
+{
+ const char *cpu = NULL;
+
+ switch (family)
+ {
+ case 0x10:
+ /* AMD Family 10h. */
+ cpu = "amdfam10";
+ cpu_model->__cpu_type = AMDFAM10H;
+ switch (model)
+ {
+ case 0x2:
+ /* Barcelona. */
+ CHECK___builtin_cpu_is ("amdfam10h");
+ CHECK___builtin_cpu_is ("barcelona");
+ cpu_model->__cpu_subtype = AMDFAM10H_BARCELONA;
+ break;
+ case 0x4:
+ /* Shanghai. */
+ CHECK___builtin_cpu_is ("amdfam10h");
+ CHECK___builtin_cpu_is ("shanghai");
+ cpu_model->__cpu_subtype = AMDFAM10H_SHANGHAI;
+ break;
+ case 0x8:
+ /* Istanbul. */
+ CHECK___builtin_cpu_is ("amdfam10h");
+ CHECK___builtin_cpu_is ("istanbul");
+ cpu_model->__cpu_subtype = AMDFAM10H_ISTANBUL;
+ break;
+ default:
+ break;
+ }
+ break;
+ case 0x14:
+ /* AMD Family 14h "btver1". */
+ cpu = "btver1";
+ CHECK___builtin_cpu_is ("btver1");
+ cpu_model->__cpu_type = AMD_BTVER1;
+ break;
+ case 0x15:
+ /* AMD Family 15h "Bulldozer". */
+ cpu_model->__cpu_type = AMDFAM15H;
+ if (model == 0x2)
+ {
+ /* Bulldozer version 2 "Piledriver" */
+ cpu = "bdver2";
+ CHECK___builtin_cpu_is ("bdver2");
+ cpu_model->__cpu_subtype = AMDFAM15H_BDVER2;
+ }
+ else if (model <= 0xf)
+ {
+ /* Bulldozer version 1. */
+ cpu = "bdver1";
+ CHECK___builtin_cpu_is ("bdver1");
+ cpu_model->__cpu_subtype = AMDFAM15H_BDVER1;
+ }
+ else if (model <= 0x2f)
+ {
+ /* Bulldozer version 2 "Piledriver" */
+ cpu = "bdver2";
+ CHECK___builtin_cpu_is ("bdver2");
+ cpu_model->__cpu_subtype = AMDFAM15H_BDVER2;
+ }
+ else if (model <= 0x4f)
+ {
+ /* Bulldozer version 3 "Steamroller" */
+ cpu = "bdver3";
+ CHECK___builtin_cpu_is ("bdver3");
+ cpu_model->__cpu_subtype = AMDFAM15H_BDVER3;
+ }
+ else if (model <= 0x7f)
+ {
+ /* Bulldozer version 4 "Excavator" */
+ cpu = "bdver4";
+ CHECK___builtin_cpu_is ("bdver4");
+ cpu_model->__cpu_subtype = AMDFAM15H_BDVER4;
+ }
+ else if (has_avx2)
+ {
+ cpu = "bdver4";
+ CHECK___builtin_cpu_is ("bdver4");
+ cpu_model->__cpu_subtype = AMDFAM15H_BDVER4;
+ }
+ else if (has_xsaveopt)
+ {
+ cpu = "bdver3";
+ CHECK___builtin_cpu_is ("bdver3");
+ cpu_model->__cpu_subtype = AMDFAM15H_BDVER3;
+ }
+ else if (has_bmi)
+ {
+ cpu = "bdver2";
+ CHECK___builtin_cpu_is ("bdver2");
+ cpu_model->__cpu_subtype = AMDFAM15H_BDVER2;
+ }
+ else if (has_xop)
+ {
+ cpu = "bdver1";
+ CHECK___builtin_cpu_is ("bdver1");
+ cpu_model->__cpu_subtype = AMDFAM15H_BDVER1;
+ }
+ break;
+ case 0x16:
+ /* AMD Family 16h "btver2" */
+ cpu = "btver2";
+ CHECK___builtin_cpu_is ("btver2");
+ cpu_model->__cpu_type = AMD_BTVER2;
+ break;
+ case 0x17:
+ cpu_model->__cpu_type = AMDFAM17H;
+ if (model <= 0x1f)
+ {
+ /* AMD family 17h version 1. */
+ cpu = "znver1";
+ CHECK___builtin_cpu_is ("znver1");
+ cpu_model->__cpu_subtype = AMDFAM17H_ZNVER1;
+ }
+ else if (model >= 0x30)
+ {
+ cpu = "znver2";
+ CHECK___builtin_cpu_is ("znver2");
+ cpu_model->__cpu_subtype = AMDFAM17H_ZNVER2;
+ }
+ else if (has_clwb)
+ {
+ cpu = "znver2";
+ CHECK___builtin_cpu_is ("znver2");
+ cpu_model->__cpu_subtype = AMDFAM17H_ZNVER2;
+ }
+ else if (has_clzero)
+ {
+ cpu = "znver1";
+ CHECK___builtin_cpu_is ("znver1");
+ cpu_model->__cpu_subtype = AMDFAM17H_ZNVER1;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return cpu;
+}
+
+/* Get the specific type of Intel CPU and return Intel CPU name. Return
+ NULL for unknown Intel CPU. */
+
+static inline const char *
+get_intel_cpu (struct __processor_model *cpu_model,
+ unsigned int family, unsigned int model,
+ unsigned int brand_id, int has_avx512vnni)
+{
+ const char *cpu = NULL;
+
+ /* Parse family and model only for brand ID 0 and model 6. */
+ if (brand_id != 0 || family != 0x6)
+ return cpu;
+
+ switch (model)
+ {
+ case 0x1c:
+ case 0x26:
+ /* Bonnell. */
+ cpu = "bonnell";
+ CHECK___builtin_cpu_is ("atom");
+ cpu_model->__cpu_type = INTEL_BONNELL;
+ break;
+ case 0x37:
+ case 0x4a:
+ case 0x4d:
+ case 0x5d:
+ /* Silvermont. */
+ case 0x4c:
+ case 0x5a:
+ case 0x75:
+ /* Airmont. */
+ cpu = "silvermont";
+ CHECK___builtin_cpu_is ("silvermont");
+ cpu_model->__cpu_type = INTEL_SILVERMONT;
+ break;
+ case 0x5c:
+ case 0x5f:
+ /* Goldmont. */
+ cpu = "goldmont";
+ CHECK___builtin_cpu_is ("goldmont");
+ cpu_model->__cpu_type = INTEL_GOLDMONT;
+ break;
+ case 0x7a:
+ /* Goldmont Plus. */
+ cpu = "goldmont-plus";
+ CHECK___builtin_cpu_is ("goldmont-plus");
+ cpu_model->__cpu_type = INTEL_GOLDMONT_PLUS;
+ break;
+ case 0x86:
+ case 0x96:
+ case 0x9c:
+ /* Tremont. */
+ cpu = "tremont";
+ CHECK___builtin_cpu_is ("tremont");
+ cpu_model->__cpu_type = INTEL_TREMONT;
+ break;
+ case 0x57:
+ /* Knights Landing. */
+ cpu = "knl";
+ CHECK___builtin_cpu_is ("knl");
+ cpu_model->__cpu_type = INTEL_KNL;
+ break;
+ case 0x85:
+ /* Knights Mill. */
+ cpu = "knm";
+ CHECK___builtin_cpu_is ("knm");
+ cpu_model->__cpu_type = INTEL_KNM;
+ break;
+ case 0x1a:
+ case 0x1e:
+ case 0x1f:
+ case 0x2e:
+ /* Nehalem. */
+ cpu = "nehalem";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("nehalem");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_NEHALEM;
+ break;
+ case 0x25:
+ case 0x2c:
+ case 0x2f:
+ /* Westmere. */
+ cpu = "westmere";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("westmere");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_WESTMERE;
+ break;
+ case 0x2a:
+ case 0x2d:
+ /* Sandy Bridge. */
+ cpu = "sandybridge";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("sandybridge");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_SANDYBRIDGE;
+ break;
+ case 0x3a:
+ case 0x3e:
+ /* Ivy Bridge. */
+ cpu = "ivybridge";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("ivybridge");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_IVYBRIDGE;
+ break;
+ case 0x3c:
+ case 0x3f:
+ case 0x45:
+ case 0x46:
+ /* Haswell. */
+ cpu = "haswell";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("haswell");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_HASWELL;
+ break;
+ case 0x3d:
+ case 0x47:
+ case 0x4f:
+ case 0x56:
+ /* Broadwell. */
+ cpu = "broadwell";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("broadwell");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_BROADWELL;
+ break;
+ case 0x4e:
+ case 0x5e:
+ /* Skylake. */
+ case 0x8e:
+ case 0x9e:
+ /* Kaby Lake. */
+ case 0xa5:
+ case 0xa6:
+ /* Comet Lake. */
+ cpu = "skylake";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("skylake");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_SKYLAKE;
+ break;
+ case 0x55:
+ CHECK___builtin_cpu_is ("corei7");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ if (has_avx512vnni)
+ {
+ /* Cascade Lake. */
+ cpu = "cascadelake";
+ CHECK___builtin_cpu_is ("cascadelake");
+ cpu_model->__cpu_subtype = INTEL_COREI7_CASCADELAKE;
+ }
+ else
+ {
+ /* Skylake with AVX-512 support. */
+ cpu = "skylake-avx512";
+ CHECK___builtin_cpu_is ("skylake-avx512");
+ cpu_model->__cpu_subtype = INTEL_COREI7_SKYLAKE_AVX512;
+ }
+ break;
+ case 0x66:
+ /* Cannon Lake. */
+ cpu = "cannonlake";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("cannonlake");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_CANNONLAKE;
+ break;
+ case 0x6a:
+ case 0x6c:
+ /* Ice Lake server. */
+ cpu = "icelake-server";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("icelake-server");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_ICELAKE_SERVER;
+ break;
+ case 0x7e:
+ case 0x7d:
+ case 0x9d:
+ /* Ice Lake client. */
+ cpu = "icelake-client";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("icelake-client");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_ICELAKE_CLIENT;
+ break;
+ case 0x8c:
+ case 0x8d:
+ /* Tiger Lake. */
+ cpu = "tigerlake";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("tigerlake");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_TIGERLAKE;
+ break;
+ case 0x17:
+ case 0x1d:
+ /* Penryn. */
+ case 0x0f:
+ /* Merom. */
+ cpu = "core2";
+ CHECK___builtin_cpu_is ("core2");
+ cpu_model->__cpu_type = INTEL_CORE2;
+ break;
+ default:
+ break;
+ }
+
+ return cpu;
+}
+
+/* Return non-zero if the processor has feature F. */
+
+static inline int
+has_cpu_feature (enum processor_features f)
+{
+ if (f < 32)
+ return __cpu_model.__cpu_features[0] & (1U << (f & 31));
+ else
+ return __cpu_features2 & (1U << ((f - 32) & 31));
+}
@@ -23,6 +23,7 @@ along with GCC; see the file COPYING3. If not see
#include "system.h"
#include "coretypes.h"
#include "tm.h"
+#include "common/config/i386/cpuinfo.h"
const char *host_detect_local_cpu (int argc, const char **argv);
@@ -656,6 +657,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
}
}
+ struct __processor_model cpu_model;
+
if (vendor == signature_AMD_ebx)
{
unsigned int name;
@@ -666,34 +669,21 @@ const char *host_detect_local_cpu (int argc, const char **argv)
else
name = 0;
- if (name == signature_NSC_ebx)
- processor = PROCESSOR_GEODE;
- else if (has_movbe && family == 22)
- processor = PROCESSOR_BTVER2;
- else if (has_clwb)
- processor = PROCESSOR_ZNVER2;
- else if (has_clzero)
- processor = PROCESSOR_ZNVER1;
- else if (has_avx2)
- processor = PROCESSOR_BDVER4;
- else if (has_xsaveopt)
- processor = PROCESSOR_BDVER3;
- else if (has_bmi)
- processor = PROCESSOR_BDVER2;
- else if (has_xop)
- processor = PROCESSOR_BDVER1;
- else if (has_sse4a && has_ssse3)
- processor = PROCESSOR_BTVER1;
- else if (has_sse4a)
- processor = PROCESSOR_AMDFAM10;
- else if (has_sse2 || has_longmode)
- processor = PROCESSOR_K8;
- else if (has_3dnowp && family == 6)
- processor = PROCESSOR_ATHLON;
- else if (has_mmx)
- processor = PROCESSOR_K6;
- else
- processor = PROCESSOR_PENTIUM;
+ cpu = get_amd_cpu (&cpu_model, family, model, has_bmi, has_avx2,
+ has_xop, has_xsaveopt, has_clwb, has_clzero);
+ if (cpu == NULL)
+ {
+ if (name == signature_NSC_ebx)
+ processor = PROCESSOR_GEODE;
+ else if (has_sse2 || has_longmode)
+ processor = PROCESSOR_K8;
+ else if (has_3dnowp && family == 6)
+ processor = PROCESSOR_ATHLON;
+ else if (has_mmx)
+ processor = PROCESSOR_K6;
+ else
+ processor = PROCESSOR_PENTIUM;
+ }
}
else if (vendor == signature_CENTAUR_ebx)
{
@@ -767,145 +757,24 @@ const char *host_detect_local_cpu (int argc, const char **argv)
cpu = "pentium";
break;
case PROCESSOR_PENTIUMPRO:
- switch (model)
+ cpu = get_intel_cpu (&cpu_model, family, model, 0,
+ has_avx512vnni);
+ if (cpu == NULL)
{
- case 0x1c:
- case 0x26:
- /* Bonnell. */
- cpu = "bonnell";
- break;
- case 0x37:
- case 0x4a:
- case 0x4d:
- case 0x5d:
- /* Silvermont. */
- case 0x4c:
- case 0x5a:
- case 0x75:
- /* Airmont. */
- cpu = "silvermont";
- break;
- case 0x5c:
- case 0x5f:
- /* Goldmont. */
- cpu = "goldmont";
- break;
- case 0x7a:
- /* Goldmont Plus. */
- cpu = "goldmont-plus";
- break;
- case 0x86:
- case 0x96:
- case 0x9c:
- /* Tremont. */
- cpu = "tremont";
- break;
- case 0x0f:
- /* Merom. */
- case 0x17:
- case 0x1d:
- /* Penryn. */
- cpu = "core2";
- break;
- case 0x1a:
- case 0x1e:
- case 0x1f:
- case 0x2e:
- /* Nehalem. */
- cpu = "nehalem";
- break;
- case 0x25:
- case 0x2c:
- case 0x2f:
- /* Westmere. */
- cpu = "westmere";
- break;
- case 0x2a:
- case 0x2d:
- /* Sandy Bridge. */
- cpu = "sandybridge";
- break;
- case 0x3a:
- case 0x3e:
- /* Ivy Bridge. */
- cpu = "ivybridge";
- break;
- case 0x3c:
- case 0x3f:
- case 0x45:
- case 0x46:
- /* Haswell. */
- cpu = "haswell";
- break;
- case 0x3d:
- case 0x47:
- case 0x4f:
- case 0x56:
- /* Broadwell. */
- cpu = "broadwell";
- break;
- case 0x4e:
- case 0x5e:
- /* Skylake. */
- case 0x8e:
- case 0x9e:
- /* Kaby Lake. */
- case 0xa5:
- case 0xa6:
- /* Comet Lake. */
- cpu = "skylake";
- break;
- case 0x55:
- if (has_avx512vnni)
- /* Cascade Lake. */
- cpu = "cascadelake";
- else
- /* Skylake with AVX-512. */
- cpu = "skylake-avx512";
- break;
- case 0x6a:
- case 0x6c:
- /* Ice Lake server. */
- cpu = "icelake-server";
- break;
- case 0x7e:
- case 0x7d:
- case 0x9d:
- /* Ice Lake client. */
- cpu = "icelake-client";
- break;
- case 0x8c:
- case 0x8d:
- /* Tiger Lake. */
- cpu = "tigerlake";
- break;
- case 0x57:
- /* Knights Landing. */
- cpu = "knl";
- break;
- case 0x66:
- /* Cannon Lake. */
- cpu = "cannonlake";
- break;
- case 0x85:
- /* Knights Mill. */
- cpu = "knm";
- break;
- default:
if (arch)
{
/* This is unknown family 0x6 CPU. */
if (has_avx)
- {
- /* Assume Tiger Lake */
- if (has_avx512vp2intersect)
- cpu = "tigerlake";
- /* Assume Cooper Lake */
- else if (has_avx512bf16)
- cpu = "cooperlake";
- /* Assume Ice Lake Server. */
- else if (has_wbnoinvd)
- cpu = "icelake-server";
+ {
+ /* Assume Tiger Lake */
+ if (has_avx512vp2intersect)
+ cpu = "tigerlake";
+ /* Assume Cooper Lake */
+ else if (has_avx512bf16)
+ cpu = "cooperlake";
+ /* Assume Ice Lake Server. */
+ else if (has_wbnoinvd)
+ cpu = "icelake-server";
/* Assume Ice Lake. */
else if (has_avx512bitalg)
cpu = "icelake-client";
@@ -1002,7 +871,6 @@ const char *host_detect_local_cpu (int argc, const char **argv)
else
/* For -mtune, we default to -mtune=generic. */
cpu = "generic";
- break;
}
break;
case PROCESSOR_PENTIUM4:
@@ -1058,33 +926,6 @@ const char *host_detect_local_cpu (int argc, const char **argv)
/* For -mtune, we default to -mtune=k8 */
cpu = "k8";
break;
- case PROCESSOR_AMDFAM10:
- cpu = "amdfam10";
- break;
- case PROCESSOR_BDVER1:
- cpu = "bdver1";
- break;
- case PROCESSOR_BDVER2:
- cpu = "bdver2";
- break;
- case PROCESSOR_BDVER3:
- cpu = "bdver3";
- break;
- case PROCESSOR_BDVER4:
- cpu = "bdver4";
- break;
- case PROCESSOR_ZNVER1:
- cpu = "znver1";
- break;
- case PROCESSOR_ZNVER2:
- cpu = "znver2";
- break;
- case PROCESSOR_BTVER1:
- cpu = "btver1";
- break;
- case PROCESSOR_BTVER2:
- cpu = "btver2";
- break;
default:
/* Use something reasonable. */
@@ -90,6 +90,7 @@ along with GCC; see the file COPYING3. If not see
#include "debug.h"
#include "dwarf2out.h"
#include "i386-builtins.h"
+#include "common/config/i386/cpuinfo.h"
#undef BDESC
#undef BDESC_FIRST
@@ -1872,50 +1873,6 @@ enum feature_priority
P_PROC_AVX512F
};
-/* This is the order of bit-fields in __processor_features in cpuinfo.c */
-enum processor_features
-{
- F_CMOV = 0,
- F_MMX,
- F_POPCNT,
- F_SSE,
- F_SSE2,
- F_SSE3,
- F_SSSE3,
- F_SSE4_1,
- F_SSE4_2,
- F_AVX,
- F_AVX2,
- F_SSE4_A,
- F_FMA4,
- F_XOP,
- F_FMA,
- F_AVX512F,
- F_BMI,
- F_BMI2,
- F_AES,
- F_PCLMUL,
- F_AVX512VL,
- F_AVX512BW,
- F_AVX512DQ,
- F_AVX512CD,
- F_AVX512ER,
- F_AVX512PF,
- F_AVX512VBMI,
- F_AVX512IFMA,
- F_AVX5124VNNIW,
- F_AVX5124FMAPS,
- F_AVX512VPOPCNTDQ,
- F_AVX512VBMI2,
- F_GFNI,
- F_VPCLMULQDQ,
- F_AVX512VNNI,
- F_AVX512BITALG,
- F_AVX512VP2INTERSECT,
- F_AVX512BF16,
- F_MAX
-};
-
/* These are the values for vendor types and cpu types and subtypes
in cpuinfo.c. Cpu types and subtypes should be subtracted by
the corresponding start value. */
@@ -1964,6 +1921,85 @@ enum processor_model
M_INTEL_COREI7_COOPERLAKE
};
+/* Check aganst enum processor_vendor. */
+#define CHECK_processor_vendor(t, e) \
+ static_assert ((int) (t) == (int) (e), "Incorrect " #t)
+CHECK_processor_vendor (M_INTEL, VENDOR_INTEL);
+CHECK_processor_vendor (M_AMD, VENDOR_AMD);
+
+/* Check aganst enum processor_types. */
+#define CHECK_processor_types(t, e) \
+ static_assert ((int) (t) - (M_CPU_TYPE_START) \
+ == (int) (e), "Incorrect " #t)
+CHECK_processor_types (M_INTEL_BONNELL, INTEL_BONNELL);
+CHECK_processor_types (M_INTEL_CORE2, INTEL_CORE2);
+CHECK_processor_types (M_INTEL_COREI7, INTEL_COREI7);
+CHECK_processor_types (M_AMDFAM10H, AMDFAM10H);
+CHECK_processor_types (M_AMDFAM15H, AMDFAM15H);
+CHECK_processor_types (M_INTEL_SILVERMONT, INTEL_SILVERMONT);
+CHECK_processor_types (M_INTEL_KNL, INTEL_KNL);
+CHECK_processor_types (M_AMD_BTVER1, AMD_BTVER1);
+CHECK_processor_types (M_AMD_BTVER2, AMD_BTVER2);
+CHECK_processor_types (M_AMDFAM17H, AMDFAM17H);
+CHECK_processor_types (M_INTEL_KNM, INTEL_KNM);
+CHECK_processor_types (M_INTEL_GOLDMONT, INTEL_GOLDMONT);
+CHECK_processor_types (M_INTEL_GOLDMONT_PLUS, INTEL_GOLDMONT_PLUS);
+CHECK_processor_types (M_INTEL_TREMONT, INTEL_TREMONT);
+CHECK_processor_types (M_INTEL_TREMONT, (int) CPU_TYPE_MAX - 1);
+
+/* Check aganst enum processor_subtypes. */
+#define CHECK_processor_subtypes(t, e) \
+ static_assert ((int) (t) - (M_CPU_SUBTYPE_START) \
+ == (int) (e), "Incorrect " #t)
+CHECK_processor_subtypes (M_INTEL_COREI7_NEHALEM,
+ INTEL_COREI7_NEHALEM);
+CHECK_processor_subtypes (M_INTEL_COREI7_WESTMERE,
+ INTEL_COREI7_WESTMERE);
+CHECK_processor_subtypes (M_INTEL_COREI7_SANDYBRIDGE,
+ INTEL_COREI7_SANDYBRIDGE);
+CHECK_processor_subtypes (M_AMDFAM10H_BARCELONA,
+ AMDFAM10H_BARCELONA);
+CHECK_processor_subtypes (M_AMDFAM10H_SHANGHAI,
+ AMDFAM10H_SHANGHAI);
+CHECK_processor_subtypes (M_AMDFAM10H_ISTANBUL,
+ AMDFAM10H_ISTANBUL);
+CHECK_processor_subtypes (M_AMDFAM15H_BDVER1,
+ AMDFAM15H_BDVER1);
+CHECK_processor_subtypes (M_AMDFAM15H_BDVER2,
+ AMDFAM15H_BDVER2);
+CHECK_processor_subtypes (M_AMDFAM15H_BDVER3,
+ AMDFAM15H_BDVER3);
+CHECK_processor_subtypes (M_AMDFAM15H_BDVER4,
+ AMDFAM15H_BDVER4);
+CHECK_processor_subtypes (M_AMDFAM17H_ZNVER1,
+ AMDFAM17H_ZNVER1);
+CHECK_processor_subtypes (M_INTEL_COREI7_IVYBRIDGE,
+ INTEL_COREI7_IVYBRIDGE);
+CHECK_processor_subtypes (M_INTEL_COREI7_HASWELL,
+ INTEL_COREI7_HASWELL);
+CHECK_processor_subtypes (M_INTEL_COREI7_BROADWELL,
+ INTEL_COREI7_BROADWELL);
+CHECK_processor_subtypes (M_INTEL_COREI7_SKYLAKE,
+ INTEL_COREI7_SKYLAKE);
+CHECK_processor_subtypes (M_INTEL_COREI7_SKYLAKE_AVX512,
+ INTEL_COREI7_SKYLAKE_AVX512);
+CHECK_processor_subtypes (M_INTEL_COREI7_CANNONLAKE,
+ INTEL_COREI7_CANNONLAKE);
+CHECK_processor_subtypes (M_INTEL_COREI7_ICELAKE_CLIENT,
+ INTEL_COREI7_ICELAKE_CLIENT);
+CHECK_processor_subtypes (M_INTEL_COREI7_ICELAKE_SERVER,
+ INTEL_COREI7_ICELAKE_SERVER);
+CHECK_processor_subtypes (M_AMDFAM17H_ZNVER2,
+ AMDFAM17H_ZNVER2);
+CHECK_processor_subtypes (M_INTEL_COREI7_CASCADELAKE,
+ INTEL_COREI7_CASCADELAKE);
+CHECK_processor_subtypes (M_INTEL_COREI7_TIGERLAKE,
+ INTEL_COREI7_TIGERLAKE);
+CHECK_processor_subtypes (M_INTEL_COREI7_COOPERLAKE,
+ INTEL_COREI7_COOPERLAKE);
+CHECK_processor_subtypes (M_INTEL_COREI7_COOPERLAKE,
+ (int) CPU_SUBTYPE_MAX - 1);
+
struct _arch_names_table
{
const char *const name;
@@ -2026,44 +2062,47 @@ struct _isa_names_table
static const _isa_names_table isa_names_table[] =
{
- {"cmov", F_CMOV, P_ZERO},
- {"mmx", F_MMX, P_MMX},
- {"popcnt", F_POPCNT, P_POPCNT},
- {"sse", F_SSE, P_SSE},
- {"sse2", F_SSE2, P_SSE2},
- {"sse3", F_SSE3, P_SSE3},
- {"ssse3", F_SSSE3, P_SSSE3},
- {"sse4a", F_SSE4_A, P_SSE4_A},
- {"sse4.1", F_SSE4_1, P_SSE4_1},
- {"sse4.2", F_SSE4_2, P_SSE4_2},
- {"avx", F_AVX, P_AVX},
- {"fma4", F_FMA4, P_FMA4},
- {"xop", F_XOP, P_XOP},
- {"fma", F_FMA, P_FMA},
- {"avx2", F_AVX2, P_AVX2},
- {"avx512f", F_AVX512F, P_AVX512F},
- {"bmi", F_BMI, P_BMI},
- {"bmi2", F_BMI2, P_BMI2},
- {"aes", F_AES, P_AES},
- {"pclmul", F_PCLMUL, P_PCLMUL},
- {"avx512vl",F_AVX512VL, P_ZERO},
- {"avx512bw",F_AVX512BW, P_ZERO},
- {"avx512dq",F_AVX512DQ, P_ZERO},
- {"avx512cd",F_AVX512CD, P_ZERO},
- {"avx512er",F_AVX512ER, P_ZERO},
- {"avx512pf",F_AVX512PF, P_ZERO},
- {"avx512vbmi",F_AVX512VBMI, P_ZERO},
- {"avx512ifma",F_AVX512IFMA, P_ZERO},
- {"avx5124vnniw",F_AVX5124VNNIW, P_ZERO},
- {"avx5124fmaps",F_AVX5124FMAPS, P_ZERO},
- {"avx512vpopcntdq",F_AVX512VPOPCNTDQ, P_ZERO},
- {"avx512vbmi2", F_AVX512VBMI2, P_ZERO},
- {"gfni", F_GFNI, P_ZERO},
- {"vpclmulqdq", F_VPCLMULQDQ, P_ZERO},
- {"avx512vnni", F_AVX512VNNI, P_ZERO},
- {"avx512bitalg", F_AVX512BITALG, P_ZERO},
- {"avx512vp2intersect",F_AVX512VP2INTERSECT, P_ZERO},
- {"avx512bf16", F_AVX512BF16, P_ZERO}
+ {"cmov", FEATURE_CMOV, P_ZERO},
+ {"mmx", FEATURE_MMX, P_MMX},
+ {"popcnt", FEATURE_POPCNT, P_POPCNT},
+ {"sse", FEATURE_SSE, P_SSE},
+ {"sse2", FEATURE_SSE2, P_SSE2},
+ {"sse3", FEATURE_SSE3, P_SSE3},
+ {"ssse3", FEATURE_SSSE3, P_SSSE3},
+ {"sse4a", FEATURE_SSE4_A, P_SSE4_A},
+ {"sse4.1", FEATURE_SSE4_1, P_SSE4_1},
+ {"sse4.2", FEATURE_SSE4_2, P_SSE4_2},
+ {"avx", FEATURE_AVX, P_AVX},
+ {"fma4", FEATURE_FMA4, P_FMA4},
+ {"xop", FEATURE_XOP, P_XOP},
+ {"fma", FEATURE_FMA, P_FMA},
+ {"avx2", FEATURE_AVX2, P_AVX2},
+ {"avx512f", FEATURE_AVX512F, P_AVX512F},
+ {"bmi", FEATURE_BMI, P_BMI},
+ {"bmi2", FEATURE_BMI2, P_BMI2},
+ {"aes", FEATURE_AES, P_AES},
+ {"pclmul", FEATURE_PCLMUL, P_PCLMUL},
+ {"avx512vl",FEATURE_AVX512VL, P_ZERO},
+ {"avx512bw",FEATURE_AVX512BW, P_ZERO},
+ {"avx512dq",FEATURE_AVX512DQ, P_ZERO},
+ {"avx512cd",FEATURE_AVX512CD, P_ZERO},
+ {"avx512er",FEATURE_AVX512ER, P_ZERO},
+ {"avx512pf",FEATURE_AVX512PF, P_ZERO},
+ {"avx512vbmi",FEATURE_AVX512VBMI, P_ZERO},
+ {"avx512ifma",FEATURE_AVX512IFMA, P_ZERO},
+ {"avx5124vnniw",FEATURE_AVX5124VNNIW, P_ZERO},
+ {"avx5124fmaps",FEATURE_AVX5124FMAPS, P_ZERO},
+ {"avx512vpopcntdq",FEATURE_AVX512VPOPCNTDQ, P_ZERO},
+ {"avx512vbmi2", FEATURE_AVX512VBMI2, P_ZERO},
+ {"gfni", FEATURE_GFNI, P_ZERO},
+ {"vpclmulqdq", FEATURE_VPCLMULQDQ, P_ZERO},
+ {"avx512vnni", FEATURE_AVX512VNNI, P_ZERO},
+ {"avx512bitalg", FEATURE_AVX512BITALG, P_ZERO},
+ {"avx512vp2intersect",FEATURE_AVX512VP2INTERSECT, P_ZERO},
+ {"avx512bf16", FEATURE_AVX512BF16, P_ZERO},
+ {"xsaveopt", FEATURE_XSAVEOPT, P_ZERO},
+ {"clwb", FEATURE_CLWB, P_ZERO},
+ {"clzero", FEATURE_CLZERO, P_ZERO}
};
/* This parses the attribute arguments to target in DECL and determines
@@ -7,184 +7,11 @@
/* { dg-do run } */
#include <assert.h>
+#include <stddef.h>
#include "cpuid.h"
-
-/* Check if the Intel CPU model and sub-model are identified. */
-static void
-check_intel_cpu_model (unsigned int family, unsigned int model,
- unsigned int brand_id)
-{
- /* Parse family and model only if brand ID is 0. */
- if (brand_id == 0)
- {
- switch (family)
- {
- case 0x5:
- /* Pentium. */
- break;
- case 0x6:
- switch (model)
- {
- case 0x1c:
- case 0x26:
- /* Atom. */
- assert (__builtin_cpu_is ("atom"));
- break;
- case 0x37:
- case 0x4a:
- case 0x4d:
- case 0x5a:
- case 0x5d:
- /* Silvermont. */
- assert (__builtin_cpu_is ("silvermont"));
- break;
- case 0x5c:
- case 0x5f:
- /* Goldmont. */
- assert (__builtin_cpu_is ("goldmont"));
- break;
- case 0x7a:
- /* Goldmont Plus. */
- assert (__builtin_cpu_is ("goldmont-plus"));
- break;
- case 0x57:
- /* Knights Landing. */
- assert (__builtin_cpu_is ("knl"));
- break;
- case 0x85:
- /* Knights Mill */
- assert (__builtin_cpu_is ("knm"));
- break;
- case 0x1a:
- case 0x1e:
- case 0x1f:
- case 0x2e:
- /* Nehalem. */
- assert (__builtin_cpu_is ("corei7"));
- assert (__builtin_cpu_is ("nehalem"));
- break;
- case 0x25:
- case 0x2c:
- case 0x2f:
- /* Westmere. */
- assert (__builtin_cpu_is ("corei7"));
- assert (__builtin_cpu_is ("westmere"));
- break;
- case 0x2a:
- case 0x2d:
- /* Sandy Bridge. */
- assert (__builtin_cpu_is ("corei7"));
- assert (__builtin_cpu_is ("sandybridge"));
- break;
- case 0x3a:
- case 0x3e:
- /* Ivy Bridge. */
- assert (__builtin_cpu_is ("corei7"));
- assert (__builtin_cpu_is ("ivybridge"));
- break;
- case 0x3c:
- case 0x3f:
- case 0x45:
- case 0x46:
- /* Haswell. */
- assert (__builtin_cpu_is ("corei7"));
- assert (__builtin_cpu_is ("haswell"));
- break;
- case 0x3d:
- case 0x47:
- case 0x4f:
- case 0x56:
- /* Broadwell. */
- assert (__builtin_cpu_is ("corei7"));
- assert (__builtin_cpu_is ("broadwell"));
- break;
- case 0x4e:
- case 0x5e:
- /* Skylake. */
- case 0x8e:
- case 0x9e:
- /* Kaby Lake. */
- assert (__builtin_cpu_is ("corei7"));
- assert (__builtin_cpu_is ("skylake"));
- break;
- case 0x55:
- {
- unsigned int eax, ebx, ecx, edx;
- __cpuid_count (7, 0, eax, ebx, ecx, edx);
- assert (__builtin_cpu_is ("corei7"));
- if (ecx & bit_AVX512VNNI)
- /* Cascade Lake. */
- assert (__builtin_cpu_is ("cascadelake"));
- else
- /* Skylake with AVX-512 support. */
- assert (__builtin_cpu_is ("skylake-avx512"));
- break;
- }
- case 0x66:
- /* Cannon Lake. */
- assert (__builtin_cpu_is ("cannonlake"));
- break;
- case 0x17:
- case 0x1d:
- /* Penryn. */
- case 0x0f:
- /* Merom. */
- assert (__builtin_cpu_is ("core2"));
- break;
- default:
- break;
- }
- break;
- default:
- /* We have no idea. */
- break;
- }
- }
-}
-
-/* Check if the AMD CPU model and sub-model are identified. */
-static void
-check_amd_cpu_model (unsigned int family, unsigned int model)
-{
- switch (family)
- {
- /* AMD Family 10h. */
- case 0x10:
- switch (model)
- {
- case 0x2:
- /* Barcelona. */
- assert (__builtin_cpu_is ("amdfam10h"));
- assert (__builtin_cpu_is ("barcelona"));
- break;
- case 0x4:
- /* Shanghai. */
- assert (__builtin_cpu_is ("amdfam10h"));
- assert (__builtin_cpu_is ("shanghai"));
- break;
- case 0x8:
- /* Istanbul. */
- assert (__builtin_cpu_is ("amdfam10h"));
- assert (__builtin_cpu_is ("istanbul"));
- break;
- default:
- break;
- }
- break;
- /* AMD Family 15h. */
- case 0x15:
- assert (__builtin_cpu_is ("amdfam15h"));
- /* Bulldozer version 1. */
- if ( model <= 0xf)
- assert (__builtin_cpu_is ("bdver1"));
- /* Bulldozer version 2. */
- if (model >= 0x10 && model <= 0x1f)
- assert (__builtin_cpu_is ("bdver2"));
- break;
- default:
- break;
- }
-}
+#define CHECK___builtin_cpu_is(cpu) assert (__builtin_cpu_is (cpu))
+#define inline
+#include "../../../common/config/i386/cpuinfo.h"
/* Check if the ISA features are identified. */
static void
@@ -305,6 +132,8 @@ check_detailed ()
unsigned int model, family, brand_id;
unsigned int extended_model, extended_family;
+ struct __processor_model cpu_model;
+
/* Assume cpuid insn present. Run in level 0 to get vendor id. */
if (!__get_cpuid_output (0, &eax, &ebx, &ecx, &edx))
return 0;
@@ -335,7 +164,8 @@ check_detailed ()
}
else if (family == 0x06)
model += extended_model;
- check_intel_cpu_model (family, model, brand_id);
+ get_intel_cpu (&cpu_model, family, model, brand_id,
+ __builtin_cpu_supports ("avx512vnni"));
check_features (ecx, edx, max_level);
}
else if (vendor == signature_AMD_ebx)
@@ -347,7 +177,13 @@ check_detailed ()
family += extended_family;
model += (extended_model << 4);
}
- check_amd_cpu_model (family, model);
+ get_amd_cpu (&cpu_model, family, model,
+ __builtin_cpu_supports ("bmi"),
+ __builtin_cpu_supports ("avx2"),
+ __builtin_cpu_supports ("xop"),
+ __builtin_cpu_supports ("xsaveopt"),
+ __builtin_cpu_supports ("clwb"),
+ __builtin_cpu_supports ("clzero"));
check_features (ecx, edx, max_level);
}
@@ -26,7 +26,17 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#include "cpuid.h"
#include "tsystem.h"
#include "auto-target.h"
-#include "cpuinfo.h"
+#ifdef SHARED
+static
+#endif
+/* We want to move away from __cpu_model in libgcc_s.so.1 and the
+ size of __cpu_model is part of ABI. So, new features that don't
+ fit into __cpu_model.__cpu_features[0] go into extra variables
+ in libgcc.a only, preferrably hidden. NB: Declare __cpu_features2
+ before including "cpuinfo.h" so that has_cpu_feature can be used in
+ libgfortran. */
+unsigned int __cpu_features2;
+#include "common/config/i386/cpuinfo.h"
#ifdef HAVE_INIT_PRIORITY
#define CONSTRUCTOR_PRIORITY (101)
@@ -39,216 +49,7 @@ int __cpu_indicator_init (void)
struct __processor_model __cpu_model = { };
-#ifndef SHARED
-/* We want to move away from __cpu_model in libgcc_s.so.1 and the
- size of __cpu_model is part of ABI. So, new features that don't
- fit into __cpu_model.__cpu_features[0] go into extra variables
- in libgcc.a only, preferrably hidden. */
-unsigned int __cpu_features2;
-#endif
-
-
-/* Get the specific type of AMD CPU. */
-
-static void
-get_amd_cpu (unsigned int family, unsigned int model)
-{
- switch (family)
- {
- /* AMD Family 10h. */
- case 0x10:
- __cpu_model.__cpu_type = AMDFAM10H;
- switch (model)
- {
- case 0x2:
- /* Barcelona. */
- __cpu_model.__cpu_subtype = AMDFAM10H_BARCELONA;
- break;
- case 0x4:
- /* Shanghai. */
- __cpu_model.__cpu_subtype = AMDFAM10H_SHANGHAI;
- break;
- case 0x8:
- /* Istanbul. */
- __cpu_model.__cpu_subtype = AMDFAM10H_ISTANBUL;
- break;
- default:
- break;
- }
- break;
- /* AMD Family 14h "btver1". */
- case 0x14:
- __cpu_model.__cpu_type = AMD_BTVER1;
- break;
- /* AMD Family 15h "Bulldozer". */
- case 0x15:
- __cpu_model.__cpu_type = AMDFAM15H;
-
- if (model == 0x2)
- __cpu_model.__cpu_subtype = AMDFAM15H_BDVER2;
- /* Bulldozer version 1. */
- else if (model <= 0xf)
- __cpu_model.__cpu_subtype = AMDFAM15H_BDVER1;
- /* Bulldozer version 2 "Piledriver" */
- else if (model <= 0x2f)
- __cpu_model.__cpu_subtype = AMDFAM15H_BDVER2;
- /* Bulldozer version 3 "Steamroller" */
- else if (model <= 0x4f)
- __cpu_model.__cpu_subtype = AMDFAM15H_BDVER3;
- /* Bulldozer version 4 "Excavator" */
- else if (model <= 0x7f)
- __cpu_model.__cpu_subtype = AMDFAM15H_BDVER4;
- break;
- /* AMD Family 16h "btver2" */
- case 0x16:
- __cpu_model.__cpu_type = AMD_BTVER2;
- break;
- case 0x17:
- __cpu_model.__cpu_type = AMDFAM17H;
- /* AMD family 17h version 1. */
- if (model <= 0x1f)
- __cpu_model.__cpu_subtype = AMDFAM17H_ZNVER1;
- if (model >= 0x30)
- __cpu_model.__cpu_subtype = AMDFAM17H_ZNVER2;
- break;
- default:
- break;
- }
-}
-/* Get the specific type of Intel CPU. */
-
-static void
-get_intel_cpu (unsigned int family, unsigned int model, unsigned int brand_id)
-{
- /* Parse family and model only if brand ID is 0. */
- if (brand_id == 0)
- {
- switch (family)
- {
- case 0x5:
- /* Pentium. */
- break;
- case 0x6:
- switch (model)
- {
- case 0x1c:
- case 0x26:
- /* Bonnell. */
- __cpu_model.__cpu_type = INTEL_BONNELL;
- break;
- case 0x37:
- case 0x4a:
- case 0x4d:
- case 0x5a:
- case 0x5d:
- /* Silvermont. */
- __cpu_model.__cpu_type = INTEL_SILVERMONT;
- break;
- case 0x5c:
- case 0x5f:
- /* Goldmont. */
- __cpu_model.__cpu_type = INTEL_GOLDMONT;
- break;
- case 0x7a:
- /* Goldmont Plus. */
- __cpu_model.__cpu_type = INTEL_GOLDMONT_PLUS;
- break;
- case 0x57:
- /* Knights Landing. */
- __cpu_model.__cpu_type = INTEL_KNL;
- break;
- case 0x85:
- /* Knights Mill. */
- __cpu_model.__cpu_type = INTEL_KNM;
- break;
- case 0x1a:
- case 0x1e:
- case 0x1f:
- case 0x2e:
- /* Nehalem. */
- __cpu_model.__cpu_type = INTEL_COREI7;
- __cpu_model.__cpu_subtype = INTEL_COREI7_NEHALEM;
- break;
- case 0x25:
- case 0x2c:
- case 0x2f:
- /* Westmere. */
- __cpu_model.__cpu_type = INTEL_COREI7;
- __cpu_model.__cpu_subtype = INTEL_COREI7_WESTMERE;
- break;
- case 0x2a:
- case 0x2d:
- /* Sandy Bridge. */
- __cpu_model.__cpu_type = INTEL_COREI7;
- __cpu_model.__cpu_subtype = INTEL_COREI7_SANDYBRIDGE;
- break;
- case 0x3a:
- case 0x3e:
- /* Ivy Bridge. */
- __cpu_model.__cpu_type = INTEL_COREI7;
- __cpu_model.__cpu_subtype = INTEL_COREI7_IVYBRIDGE;
- break;
- case 0x3c:
- case 0x3f:
- case 0x45:
- case 0x46:
- /* Haswell. */
- __cpu_model.__cpu_type = INTEL_COREI7;
- __cpu_model.__cpu_subtype = INTEL_COREI7_HASWELL;
- break;
- case 0x3d:
- case 0x47:
- case 0x4f:
- case 0x56:
- /* Broadwell. */
- __cpu_model.__cpu_type = INTEL_COREI7;
- __cpu_model.__cpu_subtype = INTEL_COREI7_BROADWELL;
- break;
- case 0x4e:
- case 0x5e:
- /* Skylake. */
- case 0x8e:
- case 0x9e:
- /* Kaby Lake. */
- __cpu_model.__cpu_type = INTEL_COREI7;
- __cpu_model.__cpu_subtype = INTEL_COREI7_SKYLAKE;
- break;
- case 0x55:
- {
- unsigned int eax, ebx, ecx, edx;
- __cpu_model.__cpu_type = INTEL_COREI7;
- __cpuid_count (7, 0, eax, ebx, ecx, edx);
- if (ecx & bit_AVX512VNNI)
- /* Cascade Lake. */
- __cpu_model.__cpu_subtype = INTEL_COREI7_CASCADELAKE;
- else
- /* Skylake with AVX-512 support. */
- __cpu_model.__cpu_subtype = INTEL_COREI7_SKYLAKE_AVX512;
- }
- break;
- case 0x66:
- /* Cannon Lake. */
- __cpu_model.__cpu_type = INTEL_COREI7;
- __cpu_model.__cpu_subtype = INTEL_COREI7_CANNONLAKE;
- break;
- case 0x17:
- case 0x1d:
- /* Penryn. */
- case 0x0f:
- /* Merom. */
- __cpu_model.__cpu_type = INTEL_CORE2;
- break;
- default:
- break;
- }
- break;
- default:
- /* We have no idea. */
- break;
- }
- }
-}
/* ECX and EDX are output of CPUID at level one. MAX_CPUID_LEVEL is
the max possible level of CPUID insn. */
@@ -349,6 +150,10 @@ get_available_features (unsigned int ecx, unsigned int edx,
}
if (ebx & bit_BMI2)
set_feature (FEATURE_BMI2);
+ if (ecx & bit_GFNI)
+ set_feature (FEATURE_GFNI);
+ if (ebx & bit_CLWB)
+ set_feature (FEATURE_CLWB);
if (avx512_usable)
{
if (ebx & bit_AVX512F)
@@ -371,8 +176,6 @@ get_available_features (unsigned int ecx, unsigned int edx,
set_feature (FEATURE_AVX512VBMI);
if (ecx & bit_AVX512VBMI2)
set_feature (FEATURE_AVX512VBMI2);
- if (ecx & bit_GFNI)
- set_feature (FEATURE_GFNI);
if (ecx & bit_VPCLMULQDQ)
set_feature (FEATURE_VPCLMULQDQ);
if (ecx & bit_AVX512VNNI)
@@ -385,6 +188,8 @@ get_available_features (unsigned int ecx, unsigned int edx,
set_feature (FEATURE_AVX5124VNNIW);
if (edx & bit_AVX5124FMAPS)
set_feature (FEATURE_AVX5124FMAPS);
+ if (edx & bit_AVX512VP2INTERSECT)
+ set_feature (FEATURE_AVX512VP2INTERSECT);
__cpuid_count (7, 1, eax, ebx, ecx, edx);
if (eax & bit_AVX512BF16)
@@ -392,6 +197,14 @@ get_available_features (unsigned int ecx, unsigned int edx,
}
}
+ /* Get Advanced Features at level 13 (eax = 13, ecx = 1). */
+ if (max_cpuid_level >= 13)
+ {
+ __cpuid_count (7, 1, eax, ebx, ecx, edx);
+ if (eax & bit_XSAVEOPT)
+ set_feature (FEATURE_XSAVEOPT);
+ }
+
/* Check cpuid level of extended features. */
__cpuid (0x80000000, ext_level, ebx, ecx, edx);
@@ -410,12 +223,15 @@ get_available_features (unsigned int ecx, unsigned int edx,
}
}
+ if (ext_level >= 0x80000008)
+ {
+ __cpuid (0x80000008, eax, ebx, ecx, edx);
+ if (ebx & bit_CLZERO)
+ set_feature (FEATURE_CLZERO);
+ }
+
__cpu_model.__cpu_features[0] = features;
-#ifndef SHARED
__cpu_features2 = features2;
-#else
- (void) features2;
-#endif
}
/* A constructor function that is sets __cpu_model and __cpu_features with
@@ -477,10 +293,11 @@ __cpu_indicator_init (void)
else if (family == 0x06)
model += extended_model;
- /* Get CPU type. */
- get_intel_cpu (family, model, brand_id);
/* Find available features. */
get_available_features (ecx, edx, max_level);
+ /* Get CPU type. */
+ get_intel_cpu (&__cpu_model, family, model, brand_id,
+ has_cpu_feature (FEATURE_AVX512VNNI));
__cpu_model.__cpu_vendor = VENDOR_INTEL;
}
else if (vendor == signature_AMD_ebx)
@@ -492,10 +309,16 @@ __cpu_indicator_init (void)
model += extended_model;
}
- /* Get CPU type. */
- get_amd_cpu (family, model);
/* Find available features. */
get_available_features (ecx, edx, max_level);
+ /* Get CPU type. */
+ get_amd_cpu (&__cpu_model, family, model,
+ has_cpu_feature (FEATURE_BMI),
+ has_cpu_feature (FEATURE_AVX2),
+ has_cpu_feature (FEATURE_XOP),
+ has_cpu_feature (FEATURE_XSAVEOPT),
+ has_cpu_feature (FEATURE_CLWB),
+ has_cpu_feature (FEATURE_CLZERO));
__cpu_model.__cpu_vendor = VENDOR_AMD;
}
else
deleted file mode 100644
@@ -1,135 +0,0 @@
-/* Get CPU type and Features for x86 processors.
- Copyright (C) 2012-2020 Free Software Foundation, Inc.
- Contributed by Sriraman Tallam (tmsriram@google.com)
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 3, or (at your option) any later
-version.
-
-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
-<http://www.gnu.org/licenses/>. */
-
-/* Processor Vendor and Models. */
-
-enum processor_vendor
-{
- VENDOR_INTEL = 1,
- VENDOR_AMD,
- VENDOR_OTHER,
- VENDOR_MAX
-};
-
-/* Any new types or subtypes have to be inserted at the end. */
-
-enum processor_types
-{
- INTEL_BONNELL = 1,
- INTEL_CORE2,
- INTEL_COREI7,
- AMDFAM10H,
- AMDFAM15H,
- INTEL_SILVERMONT,
- INTEL_KNL,
- AMD_BTVER1,
- AMD_BTVER2,
- AMDFAM17H,
- INTEL_KNM,
- INTEL_GOLDMONT,
- INTEL_GOLDMONT_PLUS,
- INTEL_TREMONT,
- CPU_TYPE_MAX
-};
-
-enum processor_subtypes
-{
- INTEL_COREI7_NEHALEM = 1,
- INTEL_COREI7_WESTMERE,
- INTEL_COREI7_SANDYBRIDGE,
- AMDFAM10H_BARCELONA,
- AMDFAM10H_SHANGHAI,
- AMDFAM10H_ISTANBUL,
- AMDFAM15H_BDVER1,
- AMDFAM15H_BDVER2,
- AMDFAM15H_BDVER3,
- AMDFAM15H_BDVER4,
- AMDFAM17H_ZNVER1,
- INTEL_COREI7_IVYBRIDGE,
- INTEL_COREI7_HASWELL,
- INTEL_COREI7_BROADWELL,
- INTEL_COREI7_SKYLAKE,
- INTEL_COREI7_SKYLAKE_AVX512,
- INTEL_COREI7_CANNONLAKE,
- INTEL_COREI7_ICELAKE_CLIENT,
- INTEL_COREI7_ICELAKE_SERVER,
- AMDFAM17H_ZNVER2,
- INTEL_COREI7_CASCADELAKE,
- INTEL_COREI7_TIGERLAKE,
- INTEL_COREI7_COOPERLAKE,
- CPU_SUBTYPE_MAX
-};
-
-/* ISA Features supported. New features have to be inserted at the end. */
-
-enum processor_features
-{
- FEATURE_CMOV = 0,
- FEATURE_MMX,
- FEATURE_POPCNT,
- FEATURE_SSE,
- FEATURE_SSE2,
- FEATURE_SSE3,
- FEATURE_SSSE3,
- FEATURE_SSE4_1,
- FEATURE_SSE4_2,
- FEATURE_AVX,
- FEATURE_AVX2,
- FEATURE_SSE4_A,
- FEATURE_FMA4,
- FEATURE_XOP,
- FEATURE_FMA,
- FEATURE_AVX512F,
- FEATURE_BMI,
- FEATURE_BMI2,
- FEATURE_AES,
- FEATURE_PCLMUL,
- FEATURE_AVX512VL,
- FEATURE_AVX512BW,
- FEATURE_AVX512DQ,
- FEATURE_AVX512CD,
- FEATURE_AVX512ER,
- FEATURE_AVX512PF,
- FEATURE_AVX512VBMI,
- FEATURE_AVX512IFMA,
- FEATURE_AVX5124VNNIW,
- FEATURE_AVX5124FMAPS,
- FEATURE_AVX512VPOPCNTDQ,
- FEATURE_AVX512VBMI2,
- FEATURE_GFNI,
- FEATURE_VPCLMULQDQ,
- FEATURE_AVX512VNNI,
- FEATURE_AVX512BITALG,
- FEATURE_AVX512BF16
-};
-
-extern struct __processor_model
-{
- unsigned int __cpu_vendor;
- unsigned int __cpu_type;
- unsigned int __cpu_subtype;
- unsigned int __cpu_features[1];
-} __cpu_model;
-extern unsigned int __cpu_features2;
@@ -2367,7 +2367,7 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_c10 (gfc_array_c10 * const restrict retarray,
gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_c10 (gfc_array_c10 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_c10_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_c10 (gfc_array_c10 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_c10_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_c10 (gfc_array_c10 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_c10_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_c10 (gfc_array_c10 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_c10_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_c10_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_c16 (gfc_array_c16 * const restrict retarray,
gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_c16 (gfc_array_c16 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_c16_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_c16 (gfc_array_c16 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_c16_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_c16 (gfc_array_c16 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_c16_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_c16 (gfc_array_c16 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_c16_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_c16_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_c4 (gfc_array_c4 * const restrict retarray,
gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_c4 (gfc_array_c4 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_c4_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_c4 (gfc_array_c4 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_c4_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_c4 (gfc_array_c4 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_c4_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_c4 (gfc_array_c4 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_c4_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_c4_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_c8 (gfc_array_c8 * const restrict retarray,
gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_c8 (gfc_array_c8 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_c8_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_c8 (gfc_array_c8 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_c8_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_c8 (gfc_array_c8 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_c8_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_c8 (gfc_array_c8 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_c8_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_c8_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_i1 (gfc_array_i1 * const restrict retarray,
gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_i1 (gfc_array_i1 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_i1_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_i1 (gfc_array_i1 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_i1_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_i1 (gfc_array_i1 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_i1_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_i1 (gfc_array_i1 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_i1_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_i1_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_i16 (gfc_array_i16 * const restrict retarray,
gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_i16 (gfc_array_i16 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_i16_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_i16 (gfc_array_i16 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_i16_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_i16 (gfc_array_i16 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_i16_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_i16 (gfc_array_i16 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_i16_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_i16_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_i2 (gfc_array_i2 * const restrict retarray,
gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_i2 (gfc_array_i2 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_i2_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_i2 (gfc_array_i2 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_i2_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_i2 (gfc_array_i2 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_i2_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_i2 (gfc_array_i2 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_i2_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_i2_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_i4 (gfc_array_i4 * const restrict retarray,
gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_i4 (gfc_array_i4 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_i4_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_i4 (gfc_array_i4 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_i4_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_i4 (gfc_array_i4 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_i4_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_i4 (gfc_array_i4 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_i4_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_i4_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_i8 (gfc_array_i8 * const restrict retarray,
gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_i8 (gfc_array_i8 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_i8_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_i8 (gfc_array_i8 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_i8_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_i8 (gfc_array_i8 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_i8_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_i8 (gfc_array_i8 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_i8_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_i8_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_r10 (gfc_array_r10 * const restrict retarray,
gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_r10 (gfc_array_r10 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_r10_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_r10 (gfc_array_r10 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_r10_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_r10 (gfc_array_r10 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_r10_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_r10 (gfc_array_r10 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_r10_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_r10_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_r16 (gfc_array_r16 * const restrict retarray,
gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_r16 (gfc_array_r16 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_r16_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_r16 (gfc_array_r16 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_r16_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_r16 (gfc_array_r16 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_r16_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_r16 (gfc_array_r16 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_r16_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_r16_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_r4 (gfc_array_r4 * const restrict retarray,
gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_r4 (gfc_array_r4 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_r4_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_r4 (gfc_array_r4 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_r4_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_r4 (gfc_array_r4 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_r4_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_r4 (gfc_array_r4 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_r4_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_r4_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_r8 (gfc_array_r8 * const restrict retarray,
gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_r8 (gfc_array_r8 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_r8_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_r8 (gfc_array_r8 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_r8_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_r8 (gfc_array_r8 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_r8_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_r8 (gfc_array_r8 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_r8_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_r8_avx128_fma4;
goto store;
@@ -134,7 +134,7 @@ internal_proto('matmul_name`);
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_'rtype_code` ('rtype` * const restrict retarray,
'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -155,7 +155,7 @@ void matmul_'rtype_code` ('rtype` * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_'rtype_code`_avx512f;
goto store;
@@ -164,8 +164,8 @@ void matmul_'rtype_code` ('rtype` * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_'rtype_code`_avx2;
goto store;
@@ -174,7 +174,7 @@ void matmul_'rtype_code` ('rtype` * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_'rtype_code`_avx;
goto store;
@@ -184,16 +184,16 @@ void matmul_'rtype_code` ('rtype` * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_'rtype_code`_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_'rtype_code`_avx128_fma4;
goto store;
--
2.26.2