From 3f75ee91b68c196660df21f43c3c2121fb63e69a Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Mon, 18 May 2020 05:58:41 -0700
Subject: [PATCH] x86: Move cpuinfo.h from libgcc to common/config/i386
Move cpuinfo.h from libgcc to common/config/i386 so that get_intel_cpu
can be shared by libgcc, GCC driver, gcc.target/i386/builtin_target.c
and libgfortran to detect the specific type of Intel CPU. Update
libgfortran to use has_cpu_feature to detect x86 CPU features.
gcc/
* common/config/i386/cpuinfo.h: Moved from libgcc/config/i386.
(CHECK___builtin_cpu_is): New. Defined as empty if not defined.
(get_intel_cpu): Moved from libgcc/config/i386/cpuinfo.c. Add
an argument for AVX512VNNI. Use CHECK___builtin_cpu_is. Return
Intel CPU name.
(has_cpu_feature): New function.
* config/i386/driver-i386.c: Include
"common/config/i386/cpuinfo.h".
(host_detect_local_cpu): Call get_intel_cpu to get Intel CPU
name.
gcc/testsuite/
* gcc.target/i386/builtin_target.c: Include <stddef.h> and
../../../common/config/i386/cpuinfo.h.
(CHECK___builtin_cpu_is): New.
(inline): New. Defined as empty.
(check_intel_cpu_model): Removed.
(check_detailed): Call get_intel_cpu instead of
check_intel_cpu_model.
libgcc/
* config/i386/cpuinfo.h: Moved to ... gcc/common/config/i386.
* config/i386/cpuinfo.c: Include "common/config/i386/cpuinfo.h".
(__cpu_features2): Make it static in libgcc_s.so.1.
(get_intel_cpu): Moved to ... gcc/common/config/i386/cpuinfo.h.
(get_available_features): Always set __cpu_features2.
(__cpu_indicator_init): Call get_available_features before
calling get_intel_cpu. Pass has_cpu_feature (FEATURE_AVX512VNNI)
to get_intel_cpu.
libgfortran/
* m4/matmul.m4: Include <common/config/i386/cpuinfo.h> instead
of <config/i386/cpuinfo.h>. Use has_cpu_feature.
* generated/matmul_c10.c: Regenerated.
* generated/matmul_c16.c: Likewise.
* generated/matmul_c4.c: Likewise.
* generated/matmul_c8.c: Likewise.
* generated/matmul_i1.c: Likewise.
* generated/matmul_i16.c: Likewise.
* generated/matmul_i2.c: Likewise.
* generated/matmul_i4.c: Likewise.
* generated/matmul_i8.c: Likewise.
* generated/matmul_r10.c: Likewise.
* generated/matmul_r16.c: Likewise.
* generated/matmul_r4.c: Likewise.
* generated/matmul_r8.c: Likewise.
---
gcc/common/config/i386/cpuinfo.h | 365 ++++++++++++++++++
gcc/config/i386/driver-i386.c | 131 +------
.../gcc.target/i386/builtin_target.c | 141 +------
libgcc/config/i386/cpuinfo.c | 162 +-------
libgcc/config/i386/cpuinfo.h | 135 -------
libgfortran/generated/matmul_c10.c | 18 +-
libgfortran/generated/matmul_c16.c | 18 +-
libgfortran/generated/matmul_c4.c | 18 +-
libgfortran/generated/matmul_c8.c | 18 +-
libgfortran/generated/matmul_i1.c | 18 +-
libgfortran/generated/matmul_i16.c | 18 +-
libgfortran/generated/matmul_i2.c | 18 +-
libgfortran/generated/matmul_i4.c | 18 +-
libgfortran/generated/matmul_i8.c | 18 +-
libgfortran/generated/matmul_r10.c | 18 +-
libgfortran/generated/matmul_r16.c | 18 +-
libgfortran/generated/matmul_r4.c | 18 +-
libgfortran/generated/matmul_r8.c | 18 +-
libgfortran/m4/matmul.m4 | 18 +-
19 files changed, 518 insertions(+), 668 deletions(-)
create mode 100644 gcc/common/config/i386/cpuinfo.h
delete mode 100644 libgcc/config/i386/cpuinfo.h
new file mode 100644
@@ -0,0 +1,365 @@
+/* Get CPU type and Features for x86 processors.
+ Copyright (C) 2012-2020 Free Software Foundation, Inc.
+ Contributed by Sriraman Tallam (tmsriram@google.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* Processor Vendor and Models. */
+
+enum processor_vendor
+{
+ VENDOR_INTEL = 1,
+ VENDOR_AMD,
+ VENDOR_OTHER,
+ VENDOR_MAX
+};
+
+/* Any new types or subtypes have to be inserted at the end. */
+
+enum processor_types
+{
+ INTEL_BONNELL = 1,
+ INTEL_CORE2,
+ INTEL_COREI7,
+ AMDFAM10H,
+ AMDFAM15H,
+ INTEL_SILVERMONT,
+ INTEL_KNL,
+ AMD_BTVER1,
+ AMD_BTVER2,
+ AMDFAM17H,
+ INTEL_KNM,
+ INTEL_GOLDMONT,
+ INTEL_GOLDMONT_PLUS,
+ INTEL_TREMONT,
+ CPU_TYPE_MAX
+};
+
+enum processor_subtypes
+{
+ INTEL_COREI7_NEHALEM = 1,
+ INTEL_COREI7_WESTMERE,
+ INTEL_COREI7_SANDYBRIDGE,
+ AMDFAM10H_BARCELONA,
+ AMDFAM10H_SHANGHAI,
+ AMDFAM10H_ISTANBUL,
+ AMDFAM15H_BDVER1,
+ AMDFAM15H_BDVER2,
+ AMDFAM15H_BDVER3,
+ AMDFAM15H_BDVER4,
+ AMDFAM17H_ZNVER1,
+ INTEL_COREI7_IVYBRIDGE,
+ INTEL_COREI7_HASWELL,
+ INTEL_COREI7_BROADWELL,
+ INTEL_COREI7_SKYLAKE,
+ INTEL_COREI7_SKYLAKE_AVX512,
+ INTEL_COREI7_CANNONLAKE,
+ INTEL_COREI7_ICELAKE_CLIENT,
+ INTEL_COREI7_ICELAKE_SERVER,
+ AMDFAM17H_ZNVER2,
+ INTEL_COREI7_CASCADELAKE,
+ INTEL_COREI7_TIGERLAKE,
+ INTEL_COREI7_COOPERLAKE,
+ CPU_SUBTYPE_MAX
+};
+
+/* ISA Features supported. New features have to be inserted at the end. */
+
+enum processor_features
+{
+ FEATURE_CMOV = 0,
+ FEATURE_MMX,
+ FEATURE_POPCNT,
+ FEATURE_SSE,
+ FEATURE_SSE2,
+ FEATURE_SSE3,
+ FEATURE_SSSE3,
+ FEATURE_SSE4_1,
+ FEATURE_SSE4_2,
+ FEATURE_AVX,
+ FEATURE_AVX2,
+ FEATURE_SSE4_A,
+ FEATURE_FMA4,
+ FEATURE_XOP,
+ FEATURE_FMA,
+ FEATURE_AVX512F,
+ FEATURE_BMI,
+ FEATURE_BMI2,
+ FEATURE_AES,
+ FEATURE_PCLMUL,
+ FEATURE_AVX512VL,
+ FEATURE_AVX512BW,
+ FEATURE_AVX512DQ,
+ FEATURE_AVX512CD,
+ FEATURE_AVX512ER,
+ FEATURE_AVX512PF,
+ FEATURE_AVX512VBMI,
+ FEATURE_AVX512IFMA,
+ FEATURE_AVX5124VNNIW,
+ FEATURE_AVX5124FMAPS,
+ FEATURE_AVX512VPOPCNTDQ,
+ FEATURE_AVX512VBMI2,
+ FEATURE_GFNI,
+ FEATURE_VPCLMULQDQ,
+ FEATURE_AVX512VNNI,
+ FEATURE_AVX512BITALG,
+ FEATURE_AVX512BF16
+};
+
+extern struct __processor_model
+{
+ unsigned int __cpu_vendor;
+ unsigned int __cpu_type;
+ unsigned int __cpu_subtype;
+ unsigned int __cpu_features[1];
+} __cpu_model;
+extern unsigned int __cpu_features2;
+
+#ifndef CHECK___builtin_cpu_is
+# define CHECK___builtin_cpu_is(cpu)
+#endif
+
+/* Get the specific type of Intel CPU and return Intel CPU name. Return
+ NULL for unknown Intel CPU. */
+
+static inline const char *
+get_intel_cpu (struct __processor_model *cpu_model,
+ unsigned int family, unsigned int model,
+ unsigned int brand_id, int has_avx512vnni)
+{
+ const char *cpu = NULL;
+
+ /* Parse family and model only for brand ID 0 and model 6. */
+ if (brand_id != 0 || family != 0x6)
+ return cpu;
+
+ switch (model)
+ {
+ case 0x1c:
+ case 0x26:
+ /* Bonnell. */
+ cpu = "bonnell";
+ CHECK___builtin_cpu_is ("atom");
+ cpu_model->__cpu_type = INTEL_BONNELL;
+ break;
+ case 0x37:
+ case 0x4a:
+ case 0x4d:
+ case 0x5d:
+ /* Silvermont. */
+ case 0x4c:
+ case 0x5a:
+ case 0x75:
+ /* Airmont. */
+ cpu = "silvermont";
+ CHECK___builtin_cpu_is ("silvermont");
+ cpu_model->__cpu_type = INTEL_SILVERMONT;
+ break;
+ case 0x5c:
+ case 0x5f:
+ /* Goldmont. */
+ cpu = "goldmont";
+ CHECK___builtin_cpu_is ("goldmont");
+ cpu_model->__cpu_type = INTEL_GOLDMONT;
+ break;
+ case 0x7a:
+ /* Goldmont Plus. */
+ cpu = "goldmont-plus";
+ CHECK___builtin_cpu_is ("goldmont-plus");
+ cpu_model->__cpu_type = INTEL_GOLDMONT_PLUS;
+ break;
+ case 0x86:
+ case 0x96:
+ case 0x9c:
+ /* Tremont. */
+ cpu = "tremont";
+ CHECK___builtin_cpu_is ("tremont");
+ cpu_model->__cpu_type = INTEL_TREMONT;
+ break;
+ case 0x57:
+ /* Knights Landing. */
+ cpu = "knl";
+ CHECK___builtin_cpu_is ("knl");
+ cpu_model->__cpu_type = INTEL_KNL;
+ break;
+ case 0x85:
+ /* Knights Mill. */
+ cpu = "knm";
+ CHECK___builtin_cpu_is ("knm");
+ cpu_model->__cpu_type = INTEL_KNM;
+ break;
+ case 0x1a:
+ case 0x1e:
+ case 0x1f:
+ case 0x2e:
+ /* Nehalem. */
+ cpu = "nehalem";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("nehalem");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_NEHALEM;
+ break;
+ case 0x25:
+ case 0x2c:
+ case 0x2f:
+ /* Westmere. */
+ cpu = "westmere";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("westmere");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_WESTMERE;
+ break;
+ case 0x2a:
+ case 0x2d:
+ /* Sandy Bridge. */
+ cpu = "sandybridge";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("sandybridge");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_SANDYBRIDGE;
+ break;
+ case 0x3a:
+ case 0x3e:
+ /* Ivy Bridge. */
+ cpu = "ivybridge";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("ivybridge");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_IVYBRIDGE;
+ break;
+ case 0x3c:
+ case 0x3f:
+ case 0x45:
+ case 0x46:
+ /* Haswell. */
+ cpu = "haswell";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("haswell");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_HASWELL;
+ break;
+ case 0x3d:
+ case 0x47:
+ case 0x4f:
+ case 0x56:
+ /* Broadwell. */
+ cpu = "broadwell";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("broadwell");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_BROADWELL;
+ break;
+ case 0x4e:
+ case 0x5e:
+ /* Skylake. */
+ case 0x8e:
+ case 0x9e:
+ /* Kaby Lake. */
+ case 0xa5:
+ case 0xa6:
+ /* Comet Lake. */
+ cpu = "skylake";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("skylake");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_SKYLAKE;
+ break;
+ case 0x55:
+ CHECK___builtin_cpu_is ("corei7");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ if (has_avx512vnni)
+ {
+ /* Cascade Lake. */
+ cpu = "cascadelake";
+ CHECK___builtin_cpu_is ("cascadelake");
+ cpu_model->__cpu_subtype = INTEL_COREI7_CASCADELAKE;
+ }
+ else
+ {
+ /* Skylake with AVX-512 support. */
+ cpu = "skylake-avx512";
+ CHECK___builtin_cpu_is ("skylake-avx512");
+ cpu_model->__cpu_subtype = INTEL_COREI7_SKYLAKE_AVX512;
+ }
+ break;
+ case 0x66:
+ /* Cannon Lake. */
+ cpu = "cannonlake";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("cannonlake");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_CANNONLAKE;
+ break;
+ case 0x6a:
+ case 0x6c:
+ /* Ice Lake server. */
+ cpu = "icelake-server";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("icelake-server");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_ICELAKE_SERVER;
+ break;
+ case 0x7e:
+ case 0x7d:
+ case 0x9d:
+ /* Ice Lake client. */
+ cpu = "icelake-client";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("icelake-client");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_ICELAKE_CLIENT;
+ break;
+ case 0x8c:
+ case 0x8d:
+ /* Tiger Lake. */
+ cpu = "tigerlake";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("tigerlake");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_TIGERLAKE;
+ break;
+ case 0x17:
+ case 0x1d:
+ /* Penryn. */
+ case 0x0f:
+ /* Merom. */
+ cpu = "core2";
+ CHECK___builtin_cpu_is ("core2");
+ cpu_model->__cpu_type = INTEL_CORE2;
+ break;
+ default:
+ break;
+ }
+
+ return cpu;
+}
+
+/* Return non-zero if the processor has feature F. */
+
+static inline int
+has_cpu_feature (enum processor_features f)
+{
+ if (f < 32)
+ return __cpu_model.__cpu_features[0] & (1U << (f & 31));
+ else
+ return __cpu_features2 & (1U << ((f - 32) & 31));
+}
@@ -23,6 +23,7 @@ along with GCC; see the file COPYING3. If not see
#include "system.h"
#include "coretypes.h"
#include "tm.h"
+#include "common/config/i386/cpuinfo.h"
const char *host_detect_local_cpu (int argc, const char **argv);
@@ -741,6 +742,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
}
}
+ struct __processor_model cpu_model;
+
switch (processor)
{
case PROCESSOR_I386:
@@ -767,131 +770,10 @@ const char *host_detect_local_cpu (int argc, const char **argv)
cpu = "pentium";
break;
case PROCESSOR_PENTIUMPRO:
- switch (model)
+ cpu = get_intel_cpu (&cpu_model, family, model, 0,
+ has_avx512vnni);
+ if (cpu == NULL)
{
- case 0x1c:
- case 0x26:
- /* Bonnell. */
- cpu = "bonnell";
- break;
- case 0x37:
- case 0x4a:
- case 0x4d:
- case 0x5d:
- /* Silvermont. */
- case 0x4c:
- case 0x5a:
- case 0x75:
- /* Airmont. */
- cpu = "silvermont";
- break;
- case 0x5c:
- case 0x5f:
- /* Goldmont. */
- cpu = "goldmont";
- break;
- case 0x7a:
- /* Goldmont Plus. */
- cpu = "goldmont-plus";
- break;
- case 0x86:
- case 0x96:
- case 0x9c:
- /* Tremont. */
- cpu = "tremont";
- break;
- case 0x0f:
- /* Merom. */
- case 0x17:
- case 0x1d:
- /* Penryn. */
- cpu = "core2";
- break;
- case 0x1a:
- case 0x1e:
- case 0x1f:
- case 0x2e:
- /* Nehalem. */
- cpu = "nehalem";
- break;
- case 0x25:
- case 0x2c:
- case 0x2f:
- /* Westmere. */
- cpu = "westmere";
- break;
- case 0x2a:
- case 0x2d:
- /* Sandy Bridge. */
- cpu = "sandybridge";
- break;
- case 0x3a:
- case 0x3e:
- /* Ivy Bridge. */
- cpu = "ivybridge";
- break;
- case 0x3c:
- case 0x3f:
- case 0x45:
- case 0x46:
- /* Haswell. */
- cpu = "haswell";
- break;
- case 0x3d:
- case 0x47:
- case 0x4f:
- case 0x56:
- /* Broadwell. */
- cpu = "broadwell";
- break;
- case 0x4e:
- case 0x5e:
- /* Skylake. */
- case 0x8e:
- case 0x9e:
- /* Kaby Lake. */
- case 0xa5:
- case 0xa6:
- /* Comet Lake. */
- cpu = "skylake";
- break;
- case 0x55:
- if (has_avx512vnni)
- /* Cascade Lake. */
- cpu = "cascadelake";
- else
- /* Skylake with AVX-512. */
- cpu = "skylake-avx512";
- break;
- case 0x6a:
- case 0x6c:
- /* Ice Lake server. */
- cpu = "icelake-server";
- break;
- case 0x7e:
- case 0x7d:
- case 0x9d:
- /* Ice Lake client. */
- cpu = "icelake-client";
- break;
- case 0x8c:
- case 0x8d:
- /* Tiger Lake. */
- cpu = "tigerlake";
- break;
- case 0x57:
- /* Knights Landing. */
- cpu = "knl";
- break;
- case 0x66:
- /* Cannon Lake. */
- cpu = "cannonlake";
- break;
- case 0x85:
- /* Knights Mill. */
- cpu = "knm";
- break;
- default:
if (arch)
{
/* This is unknown family 0x6 CPU. */
@@ -1002,7 +884,6 @@ const char *host_detect_local_cpu (int argc, const char **argv)
else
/* For -mtune, we default to -mtune=generic. */
cpu = "generic";
- break;
}
break;
case PROCESSOR_PENTIUM4:
@@ -7,140 +7,11 @@
/* { dg-do run } */
#include <assert.h>
+#include <stddef.h>
#include "cpuid.h"
-
-/* Check if the Intel CPU model and sub-model are identified. */
-static void
-check_intel_cpu_model (unsigned int family, unsigned int model,
- unsigned int brand_id)
-{
- /* Parse family and model only if brand ID is 0. */
- if (brand_id == 0)
- {
- switch (family)
- {
- case 0x5:
- /* Pentium. */
- break;
- case 0x6:
- switch (model)
- {
- case 0x1c:
- case 0x26:
- /* Atom. */
- assert (__builtin_cpu_is ("atom"));
- break;
- case 0x37:
- case 0x4a:
- case 0x4d:
- case 0x5a:
- case 0x5d:
- /* Silvermont. */
- assert (__builtin_cpu_is ("silvermont"));
- break;
- case 0x5c:
- case 0x5f:
- /* Goldmont. */
- assert (__builtin_cpu_is ("goldmont"));
- break;
- case 0x7a:
- /* Goldmont Plus. */
- assert (__builtin_cpu_is ("goldmont-plus"));
- break;
- case 0x57:
- /* Knights Landing. */
- assert (__builtin_cpu_is ("knl"));
- break;
- case 0x85:
- /* Knights Mill */
- assert (__builtin_cpu_is ("knm"));
- break;
- case 0x1a:
- case 0x1e:
- case 0x1f:
- case 0x2e:
- /* Nehalem. */
- assert (__builtin_cpu_is ("corei7"));
- assert (__builtin_cpu_is ("nehalem"));
- break;
- case 0x25:
- case 0x2c:
- case 0x2f:
- /* Westmere. */
- assert (__builtin_cpu_is ("corei7"));
- assert (__builtin_cpu_is ("westmere"));
- break;
- case 0x2a:
- case 0x2d:
- /* Sandy Bridge. */
- assert (__builtin_cpu_is ("corei7"));
- assert (__builtin_cpu_is ("sandybridge"));
- break;
- case 0x3a:
- case 0x3e:
- /* Ivy Bridge. */
- assert (__builtin_cpu_is ("corei7"));
- assert (__builtin_cpu_is ("ivybridge"));
- break;
- case 0x3c:
- case 0x3f:
- case 0x45:
- case 0x46:
- /* Haswell. */
- assert (__builtin_cpu_is ("corei7"));
- assert (__builtin_cpu_is ("haswell"));
- break;
- case 0x3d:
- case 0x47:
- case 0x4f:
- case 0x56:
- /* Broadwell. */
- assert (__builtin_cpu_is ("corei7"));
- assert (__builtin_cpu_is ("broadwell"));
- break;
- case 0x4e:
- case 0x5e:
- /* Skylake. */
- case 0x8e:
- case 0x9e:
- /* Kaby Lake. */
- assert (__builtin_cpu_is ("corei7"));
- assert (__builtin_cpu_is ("skylake"));
- break;
- case 0x55:
- {
- unsigned int eax, ebx, ecx, edx;
- __cpuid_count (7, 0, eax, ebx, ecx, edx);
- assert (__builtin_cpu_is ("corei7"));
- if (ecx & bit_AVX512VNNI)
- /* Cascade Lake. */
- assert (__builtin_cpu_is ("cascadelake"));
- else
- /* Skylake with AVX-512 support. */
- assert (__builtin_cpu_is ("skylake-avx512"));
- break;
- }
- case 0x66:
- /* Cannon Lake. */
- assert (__builtin_cpu_is ("cannonlake"));
- break;
- case 0x17:
- case 0x1d:
- /* Penryn. */
- case 0x0f:
- /* Merom. */
- assert (__builtin_cpu_is ("core2"));
- break;
- default:
- break;
- }
- break;
- default:
- /* We have no idea. */
- break;
- }
- }
-}
+#define CHECK___builtin_cpu_is(cpu) assert (__builtin_cpu_is (cpu))
+#define inline
+#include "../../../common/config/i386/cpuinfo.h"
/* Check if the AMD CPU model and sub-model are identified. */
static void
@@ -326,6 +197,7 @@ check_detailed ()
if (vendor == signature_INTEL_ebx)
{
+ struct __processor_model cpu_model;
assert (__builtin_cpu_is ("intel"));
/* Adjust family and model for Intel CPUs. */
if (family == 0x0f)
@@ -335,7 +207,8 @@ check_detailed ()
}
else if (family == 0x06)
model += extended_model;
- check_intel_cpu_model (family, model, brand_id);
+ get_intel_cpu (&cpu_model, family, model, brand_id,
+ __builtin_cpu_supports ("avx512vnni"));
check_features (ecx, edx, max_level);
}
else if (vendor == signature_AMD_ebx)
@@ -26,7 +26,17 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#include "cpuid.h"
#include "tsystem.h"
#include "auto-target.h"
-#include "cpuinfo.h"
+#ifdef SHARED
+static
+#endif
+/* We want to move away from __cpu_model in libgcc_s.so.1 and the
+ size of __cpu_model is part of ABI. So, new features that don't
+ fit into __cpu_model.__cpu_features[0] go into extra variables
+ in libgcc.a only, preferrably hidden. NB: Declare __cpu_features2
+ before including "cpuinfo.h" so that has_cpu_feature can be used in
+ libgfortran. */
+unsigned int __cpu_features2;
+#include "common/config/i386/cpuinfo.h"
#ifdef HAVE_INIT_PRIORITY
#define CONSTRUCTOR_PRIORITY (101)
@@ -39,13 +49,6 @@ int __cpu_indicator_init (void)
struct __processor_model __cpu_model = { };
-#ifndef SHARED
-/* We want to move away from __cpu_model in libgcc_s.so.1 and the
- size of __cpu_model is part of ABI. So, new features that don't
- fit into __cpu_model.__cpu_features[0] go into extra variables
- in libgcc.a only, preferrably hidden. */
-unsigned int __cpu_features2;
-#endif
/* Get the specific type of AMD CPU. */
@@ -116,140 +119,6 @@ get_amd_cpu (unsigned int family, unsigned int model)
}
}
-/* Get the specific type of Intel CPU. */
-
-static void
-get_intel_cpu (unsigned int family, unsigned int model, unsigned int brand_id)
-{
- /* Parse family and model only if brand ID is 0. */
- if (brand_id == 0)
- {
- switch (family)
- {
- case 0x5:
- /* Pentium. */
- break;
- case 0x6:
- switch (model)
- {
- case 0x1c:
- case 0x26:
- /* Bonnell. */
- __cpu_model.__cpu_type = INTEL_BONNELL;
- break;
- case 0x37:
- case 0x4a:
- case 0x4d:
- case 0x5a:
- case 0x5d:
- /* Silvermont. */
- __cpu_model.__cpu_type = INTEL_SILVERMONT;
- break;
- case 0x5c:
- case 0x5f:
- /* Goldmont. */
- __cpu_model.__cpu_type = INTEL_GOLDMONT;
- break;
- case 0x7a:
- /* Goldmont Plus. */
- __cpu_model.__cpu_type = INTEL_GOLDMONT_PLUS;
- break;
- case 0x57:
- /* Knights Landing. */
- __cpu_model.__cpu_type = INTEL_KNL;
- break;
- case 0x85:
- /* Knights Mill. */
- __cpu_model.__cpu_type = INTEL_KNM;
- break;
- case 0x1a:
- case 0x1e:
- case 0x1f:
- case 0x2e:
- /* Nehalem. */
- __cpu_model.__cpu_type = INTEL_COREI7;
- __cpu_model.__cpu_subtype = INTEL_COREI7_NEHALEM;
- break;
- case 0x25:
- case 0x2c:
- case 0x2f:
- /* Westmere. */
- __cpu_model.__cpu_type = INTEL_COREI7;
- __cpu_model.__cpu_subtype = INTEL_COREI7_WESTMERE;
- break;
- case 0x2a:
- case 0x2d:
- /* Sandy Bridge. */
- __cpu_model.__cpu_type = INTEL_COREI7;
- __cpu_model.__cpu_subtype = INTEL_COREI7_SANDYBRIDGE;
- break;
- case 0x3a:
- case 0x3e:
- /* Ivy Bridge. */
- __cpu_model.__cpu_type = INTEL_COREI7;
- __cpu_model.__cpu_subtype = INTEL_COREI7_IVYBRIDGE;
- break;
- case 0x3c:
- case 0x3f:
- case 0x45:
- case 0x46:
- /* Haswell. */
- __cpu_model.__cpu_type = INTEL_COREI7;
- __cpu_model.__cpu_subtype = INTEL_COREI7_HASWELL;
- break;
- case 0x3d:
- case 0x47:
- case 0x4f:
- case 0x56:
- /* Broadwell. */
- __cpu_model.__cpu_type = INTEL_COREI7;
- __cpu_model.__cpu_subtype = INTEL_COREI7_BROADWELL;
- break;
- case 0x4e:
- case 0x5e:
- /* Skylake. */
- case 0x8e:
- case 0x9e:
- /* Kaby Lake. */
- __cpu_model.__cpu_type = INTEL_COREI7;
- __cpu_model.__cpu_subtype = INTEL_COREI7_SKYLAKE;
- break;
- case 0x55:
- {
- unsigned int eax, ebx, ecx, edx;
- __cpu_model.__cpu_type = INTEL_COREI7;
- __cpuid_count (7, 0, eax, ebx, ecx, edx);
- if (ecx & bit_AVX512VNNI)
- /* Cascade Lake. */
- __cpu_model.__cpu_subtype = INTEL_COREI7_CASCADELAKE;
- else
- /* Skylake with AVX-512 support. */
- __cpu_model.__cpu_subtype = INTEL_COREI7_SKYLAKE_AVX512;
- }
- break;
- case 0x66:
- /* Cannon Lake. */
- __cpu_model.__cpu_type = INTEL_COREI7;
- __cpu_model.__cpu_subtype = INTEL_COREI7_CANNONLAKE;
- break;
- case 0x17:
- case 0x1d:
- /* Penryn. */
- case 0x0f:
- /* Merom. */
- __cpu_model.__cpu_type = INTEL_CORE2;
- break;
- default:
- break;
- }
- break;
- default:
- /* We have no idea. */
- break;
- }
- }
-}
-
/* ECX and EDX are output of CPUID at level one. MAX_CPUID_LEVEL is
the max possible level of CPUID insn. */
static void
@@ -411,11 +280,7 @@ get_available_features (unsigned int ecx, unsigned int edx,
}
__cpu_model.__cpu_features[0] = features;
-#ifndef SHARED
__cpu_features2 = features2;
-#else
- (void) features2;
-#endif
}
/* A constructor function that is sets __cpu_model and __cpu_features with
@@ -477,10 +342,11 @@ __cpu_indicator_init (void)
else if (family == 0x06)
model += extended_model;
- /* Get CPU type. */
- get_intel_cpu (family, model, brand_id);
/* Find available features. */
get_available_features (ecx, edx, max_level);
+ /* Get CPU type. */
+ get_intel_cpu (&__cpu_model, family, model, brand_id,
+ has_cpu_feature (FEATURE_AVX512VNNI));
__cpu_model.__cpu_vendor = VENDOR_INTEL;
}
else if (vendor == signature_AMD_ebx)
deleted file mode 100644
@@ -1,135 +0,0 @@
-/* Get CPU type and Features for x86 processors.
- Copyright (C) 2012-2020 Free Software Foundation, Inc.
- Contributed by Sriraman Tallam (tmsriram@google.com)
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 3, or (at your option) any later
-version.
-
-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
-<http://www.gnu.org/licenses/>. */
-
-/* Processor Vendor and Models. */
-
-enum processor_vendor
-{
- VENDOR_INTEL = 1,
- VENDOR_AMD,
- VENDOR_OTHER,
- VENDOR_MAX
-};
-
-/* Any new types or subtypes have to be inserted at the end. */
-
-enum processor_types
-{
- INTEL_BONNELL = 1,
- INTEL_CORE2,
- INTEL_COREI7,
- AMDFAM10H,
- AMDFAM15H,
- INTEL_SILVERMONT,
- INTEL_KNL,
- AMD_BTVER1,
- AMD_BTVER2,
- AMDFAM17H,
- INTEL_KNM,
- INTEL_GOLDMONT,
- INTEL_GOLDMONT_PLUS,
- INTEL_TREMONT,
- CPU_TYPE_MAX
-};
-
-enum processor_subtypes
-{
- INTEL_COREI7_NEHALEM = 1,
- INTEL_COREI7_WESTMERE,
- INTEL_COREI7_SANDYBRIDGE,
- AMDFAM10H_BARCELONA,
- AMDFAM10H_SHANGHAI,
- AMDFAM10H_ISTANBUL,
- AMDFAM15H_BDVER1,
- AMDFAM15H_BDVER2,
- AMDFAM15H_BDVER3,
- AMDFAM15H_BDVER4,
- AMDFAM17H_ZNVER1,
- INTEL_COREI7_IVYBRIDGE,
- INTEL_COREI7_HASWELL,
- INTEL_COREI7_BROADWELL,
- INTEL_COREI7_SKYLAKE,
- INTEL_COREI7_SKYLAKE_AVX512,
- INTEL_COREI7_CANNONLAKE,
- INTEL_COREI7_ICELAKE_CLIENT,
- INTEL_COREI7_ICELAKE_SERVER,
- AMDFAM17H_ZNVER2,
- INTEL_COREI7_CASCADELAKE,
- INTEL_COREI7_TIGERLAKE,
- INTEL_COREI7_COOPERLAKE,
- CPU_SUBTYPE_MAX
-};
-
-/* ISA Features supported. New features have to be inserted at the end. */
-
-enum processor_features
-{
- FEATURE_CMOV = 0,
- FEATURE_MMX,
- FEATURE_POPCNT,
- FEATURE_SSE,
- FEATURE_SSE2,
- FEATURE_SSE3,
- FEATURE_SSSE3,
- FEATURE_SSE4_1,
- FEATURE_SSE4_2,
- FEATURE_AVX,
- FEATURE_AVX2,
- FEATURE_SSE4_A,
- FEATURE_FMA4,
- FEATURE_XOP,
- FEATURE_FMA,
- FEATURE_AVX512F,
- FEATURE_BMI,
- FEATURE_BMI2,
- FEATURE_AES,
- FEATURE_PCLMUL,
- FEATURE_AVX512VL,
- FEATURE_AVX512BW,
- FEATURE_AVX512DQ,
- FEATURE_AVX512CD,
- FEATURE_AVX512ER,
- FEATURE_AVX512PF,
- FEATURE_AVX512VBMI,
- FEATURE_AVX512IFMA,
- FEATURE_AVX5124VNNIW,
- FEATURE_AVX5124FMAPS,
- FEATURE_AVX512VPOPCNTDQ,
- FEATURE_AVX512VBMI2,
- FEATURE_GFNI,
- FEATURE_VPCLMULQDQ,
- FEATURE_AVX512VNNI,
- FEATURE_AVX512BITALG,
- FEATURE_AVX512BF16
-};
-
-extern struct __processor_model
-{
- unsigned int __cpu_vendor;
- unsigned int __cpu_type;
- unsigned int __cpu_subtype;
- unsigned int __cpu_features[1];
-} __cpu_model;
-extern unsigned int __cpu_features2;
@@ -2367,7 +2367,7 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_c10 (gfc_array_c10 * const restrict retarray,
gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_c10 (gfc_array_c10 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_c10_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_c10 (gfc_array_c10 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_c10_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_c10 (gfc_array_c10 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_c10_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_c10 (gfc_array_c10 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_c10_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_c10_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_c16 (gfc_array_c16 * const restrict retarray,
gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_c16 (gfc_array_c16 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_c16_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_c16 (gfc_array_c16 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_c16_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_c16 (gfc_array_c16 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_c16_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_c16 (gfc_array_c16 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_c16_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_c16_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_c4 (gfc_array_c4 * const restrict retarray,
gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_c4 (gfc_array_c4 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_c4_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_c4 (gfc_array_c4 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_c4_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_c4 (gfc_array_c4 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_c4_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_c4 (gfc_array_c4 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_c4_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_c4_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_c8 (gfc_array_c8 * const restrict retarray,
gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_c8 (gfc_array_c8 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_c8_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_c8 (gfc_array_c8 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_c8_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_c8 (gfc_array_c8 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_c8_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_c8 (gfc_array_c8 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_c8_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_c8_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_i1 (gfc_array_i1 * const restrict retarray,
gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_i1 (gfc_array_i1 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_i1_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_i1 (gfc_array_i1 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_i1_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_i1 (gfc_array_i1 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_i1_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_i1 (gfc_array_i1 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_i1_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_i1_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_i16 (gfc_array_i16 * const restrict retarray,
gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_i16 (gfc_array_i16 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_i16_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_i16 (gfc_array_i16 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_i16_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_i16 (gfc_array_i16 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_i16_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_i16 (gfc_array_i16 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_i16_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_i16_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_i2 (gfc_array_i2 * const restrict retarray,
gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_i2 (gfc_array_i2 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_i2_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_i2 (gfc_array_i2 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_i2_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_i2 (gfc_array_i2 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_i2_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_i2 (gfc_array_i2 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_i2_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_i2_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_i4 (gfc_array_i4 * const restrict retarray,
gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_i4 (gfc_array_i4 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_i4_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_i4 (gfc_array_i4 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_i4_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_i4 (gfc_array_i4 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_i4_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_i4 (gfc_array_i4 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_i4_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_i4_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_i8 (gfc_array_i8 * const restrict retarray,
gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_i8 (gfc_array_i8 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_i8_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_i8 (gfc_array_i8 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_i8_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_i8 (gfc_array_i8 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_i8_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_i8 (gfc_array_i8 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_i8_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_i8_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_r10 (gfc_array_r10 * const restrict retarray,
gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_r10 (gfc_array_r10 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_r10_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_r10 (gfc_array_r10 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_r10_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_r10 (gfc_array_r10 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_r10_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_r10 (gfc_array_r10 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_r10_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_r10_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_r16 (gfc_array_r16 * const restrict retarray,
gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_r16 (gfc_array_r16 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_r16_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_r16 (gfc_array_r16 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_r16_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_r16 (gfc_array_r16 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_r16_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_r16 (gfc_array_r16 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_r16_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_r16_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_r4 (gfc_array_r4 * const restrict retarray,
gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_r4 (gfc_array_r4 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_r4_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_r4 (gfc_array_r4 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_r4_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_r4 (gfc_array_r4 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_r4_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_r4 (gfc_array_r4 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_r4_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_r4_avx128_fma4;
goto store;
@@ -2367,7 +2367,7 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_r8 (gfc_array_r8 * const restrict retarray,
gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -2388,7 +2388,7 @@ void matmul_r8 (gfc_array_r8 * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_r8_avx512f;
goto store;
@@ -2397,8 +2397,8 @@ void matmul_r8 (gfc_array_r8 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_r8_avx2;
goto store;
@@ -2407,7 +2407,7 @@ void matmul_r8 (gfc_array_r8 * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_r8_avx;
goto store;
@@ -2417,16 +2417,16 @@ void matmul_r8 (gfc_array_r8 * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_r8_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_r8_avx128_fma4;
goto store;
@@ -134,7 +134,7 @@ internal_proto('matmul_name`);
/* Currently, this is i386 only. Adjust for other architectures. */
-#include <config/i386/cpuinfo.h>
+#include <common/config/i386/cpuinfo.h>
void matmul_'rtype_code` ('rtype` * const restrict retarray,
'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
@@ -155,7 +155,7 @@ void matmul_'rtype_code` ('rtype` * const restrict retarray,
{
/* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+ if (has_cpu_feature (FEATURE_AVX512F))
{
matmul_fn = matmul_'rtype_code`_avx512f;
goto store;
@@ -164,8 +164,8 @@ void matmul_'rtype_code` ('rtype` * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX2)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_'rtype_code`_avx2;
goto store;
@@ -174,7 +174,7 @@ void matmul_'rtype_code` ('rtype` * const restrict retarray,
#endif
#ifdef HAVE_AVX
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ if (has_cpu_feature (FEATURE_AVX))
{
matmul_fn = matmul_'rtype_code`_avx;
goto store;
@@ -184,16 +184,16 @@ void matmul_'rtype_code` ('rtype` * const restrict retarray,
else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
{
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA))
{
matmul_fn = matmul_'rtype_code`_avx128_fma3;
goto store;
}
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
- if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
- && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ if (has_cpu_feature (FEATURE_AVX)
+ && has_cpu_feature (FEATURE_FMA4))
{
matmul_fn = matmul_'rtype_code`_avx128_fma4;
goto store;
--
2.26.2