diff mbox

[part,libgcc] Add AVX-specific matmul

Message ID 31a0faab-ab00-c6e4-705d-872fda554ca9@netcologne.de
State New
Headers show

Commit Message

Thomas Koenig Nov. 30, 2016, 7:17 a.m. UTC
Hello world,

the patch at https://gcc.gnu.org/ml/fortran/2016-11/msg00246.html
(the one going to gcc-patches was rejected due to size of
regernerated files) contains one libgcc change, which exposes
the __cpu_model interface fox i386 to libgfortran.

The Fortran bits are OKd, but I need an approval from a libgcc
maintainer (or some hint how to do this better :-).

I have attached the libgcc-specific part of the patch.

OK for trunk?

Regards

	Thomas

2016-11-27  Thomas Koenig  <tkoenig@gcc.gnu.org>

         PR fortran/78379
         * config/i386/cpuinfo.c:  Move denums for processor vendors,
         processor type, processor subtypes and declaration of
         struct __processor_model into
         * config/i386/cpuinfo.h:  New header file.
         * Makefile.am:  Add dependence of m4/matmul_internal_m4 to
         mamtul files..
         * Makefile.in:  Regenerated.
         * acinclude.m4:  Check for AVX, AVX2 and AVX512F.
         * config.h.in:  Add HAVE_AVX, HAVE_AVX2 and HAVE_AVX512F.
         * configure:  Regenerated.
         * configure.ac:  Use checks for AVX, AVX2 and AVX_512F.
         * m4/matmul_internal.m4:  New file. working part of matmul.m4.
         * m4/matmul.m4:  Implement architecture-specific switching
         for AVX, AVX2 and AVX512F by including matmul_internal.m4
         multiple times.
         * generated/matmul_c10.c: Regenerated.
         * generated/matmul_c16.c: Regenerated.
         * generated/matmul_c4.c: Regenerated.
         * generated/matmul_c8.c: Regenerated.
         * generated/matmul_i1.c: Regenerated.
         * generated/matmul_i16.c: Regenerated.
         * generated/matmul_i2.c: Regenerated.
         * generated/matmul_i4.c: Regenerated.
         * generated/matmul_i8.c: Regenerated.
         * generated/matmul_r10.c: Regenerated.
         * generated/matmul_r16.c: Regenerated.
         * generated/matmul_r4.c: Regenerated.
         * generated/matmul_r8.c: Regenerated.

Comments

Markus Trippelsdorf Dec. 5, 2016, 3:55 p.m. UTC | #1
On 2016.11.30 at 08:17 +0100, Thomas Koenig wrote:
> Hello world,
> 
> the patch at https://gcc.gnu.org/ml/fortran/2016-11/msg00246.html
> (the one going to gcc-patches was rejected due to size of
> regernerated files) contains one libgcc change, which exposes
> the __cpu_model interface fox i386 to libgfortran.
> 
> The Fortran bits are OKd, but I need an approval from a libgcc
> maintainer (or some hint how to do this better :-).
> 
> I have attached the libgcc-specific part of the patch.

FYI this gives nice additional speedups for 178.galgel:
http://gcc.opensuse.org/SPEC/CFP/sb-czerny-head-64/178_galgel_recent_big.png
diff mbox

Patch

Index: config/i386/cpuinfo.c
===================================================================
--- config/i386/cpuinfo.c	(Revision 242477)
+++ config/i386/cpuinfo.c	(Arbeitskopie)
@@ -26,6 +26,7 @@  see the files COPYING3 and COPYING.RUNTIME respect
 #include "cpuid.h"
 #include "tsystem.h"
 #include "auto-target.h"
+#include "cpuinfo.h"
 
 #ifdef HAVE_INIT_PRIORITY
 #define CONSTRUCTOR_PRIORITY (101)
@@ -36,97 +37,9 @@  see the files COPYING3 and COPYING.RUNTIME respect
 int __cpu_indicator_init (void)
   __attribute__ ((constructor CONSTRUCTOR_PRIORITY));
 
-/* Processor Vendor and Models. */
+struct __processor_model __cpu_model = { };
 
-enum processor_vendor
-{
-  VENDOR_INTEL = 1,
-  VENDOR_AMD,
-  VENDOR_OTHER,
-  VENDOR_MAX
-};
 
-/* Any new types or subtypes have to be inserted at the end. */
-
-enum processor_types
-{
-  INTEL_BONNELL = 1,
-  INTEL_CORE2,
-  INTEL_COREI7,
-  AMDFAM10H,
-  AMDFAM15H,
-  INTEL_SILVERMONT,
-  INTEL_KNL,
-  AMD_BTVER1,
-  AMD_BTVER2,  
-  AMDFAM17H,
-  CPU_TYPE_MAX
-};
-
-enum processor_subtypes
-{
-  INTEL_COREI7_NEHALEM = 1,
-  INTEL_COREI7_WESTMERE,
-  INTEL_COREI7_SANDYBRIDGE,
-  AMDFAM10H_BARCELONA,
-  AMDFAM10H_SHANGHAI,
-  AMDFAM10H_ISTANBUL,
-  AMDFAM15H_BDVER1,
-  AMDFAM15H_BDVER2,
-  AMDFAM15H_BDVER3,
-  AMDFAM15H_BDVER4,
-  AMDFAM17H_ZNVER1,
-  INTEL_COREI7_IVYBRIDGE,
-  INTEL_COREI7_HASWELL,
-  INTEL_COREI7_BROADWELL,
-  INTEL_COREI7_SKYLAKE,
-  INTEL_COREI7_SKYLAKE_AVX512,
-  CPU_SUBTYPE_MAX
-};
-
-/* ISA Features supported. New features have to be inserted at the end.  */
-
-enum processor_features
-{
-  FEATURE_CMOV = 0,
-  FEATURE_MMX,
-  FEATURE_POPCNT,
-  FEATURE_SSE,
-  FEATURE_SSE2,
-  FEATURE_SSE3,
-  FEATURE_SSSE3,
-  FEATURE_SSE4_1,
-  FEATURE_SSE4_2,
-  FEATURE_AVX,
-  FEATURE_AVX2,
-  FEATURE_SSE4_A,
-  FEATURE_FMA4,
-  FEATURE_XOP,
-  FEATURE_FMA,
-  FEATURE_AVX512F,
-  FEATURE_BMI,
-  FEATURE_BMI2,
-  FEATURE_AES,
-  FEATURE_PCLMUL,
-  FEATURE_AVX512VL,
-  FEATURE_AVX512BW,
-  FEATURE_AVX512DQ,
-  FEATURE_AVX512CD,
-  FEATURE_AVX512ER,
-  FEATURE_AVX512PF,
-  FEATURE_AVX512VBMI,
-  FEATURE_AVX512IFMA
-};
-
-struct __processor_model
-{
-  unsigned int __cpu_vendor;
-  unsigned int __cpu_type;
-  unsigned int __cpu_subtype;
-  unsigned int __cpu_features[1];
-} __cpu_model = { };
-
-
 /* Get the specific type of AMD CPU.  */
 
 static void
Index: config/i386/cpuinfo.h
===================================================================
--- config/i386/cpuinfo.h	(Revision 0)
+++ config/i386/cpuinfo.h	(Arbeitskopie)
@@ -0,0 +1,90 @@ 
+
+/* Processor Vendor and Models. */
+
+enum processor_vendor
+{
+  VENDOR_INTEL = 1,
+  VENDOR_AMD,
+  VENDOR_OTHER,
+  VENDOR_MAX
+};
+
+/* Any new types or subtypes have to be inserted at the end. */
+
+enum processor_types
+{
+  INTEL_BONNELL = 1,
+  INTEL_CORE2,
+  INTEL_COREI7,
+  AMDFAM10H,
+  AMDFAM15H,
+  INTEL_SILVERMONT,
+  INTEL_KNL,
+  AMD_BTVER1,
+  AMD_BTVER2,  
+  AMDFAM17H,
+  CPU_TYPE_MAX
+};
+
+enum processor_subtypes
+{
+  INTEL_COREI7_NEHALEM = 1,
+  INTEL_COREI7_WESTMERE,
+  INTEL_COREI7_SANDYBRIDGE,
+  AMDFAM10H_BARCELONA,
+  AMDFAM10H_SHANGHAI,
+  AMDFAM10H_ISTANBUL,
+  AMDFAM15H_BDVER1,
+  AMDFAM15H_BDVER2,
+  AMDFAM15H_BDVER3,
+  AMDFAM15H_BDVER4,
+  AMDFAM17H_ZNVER1,
+  INTEL_COREI7_IVYBRIDGE,
+  INTEL_COREI7_HASWELL,
+  INTEL_COREI7_BROADWELL,
+  INTEL_COREI7_SKYLAKE,
+  INTEL_COREI7_SKYLAKE_AVX512,
+  CPU_SUBTYPE_MAX
+};
+
+/* ISA Features supported. New features have to be inserted at the end.  */
+
+enum processor_features
+{
+  FEATURE_CMOV = 0,
+  FEATURE_MMX,
+  FEATURE_POPCNT,
+  FEATURE_SSE,
+  FEATURE_SSE2,
+  FEATURE_SSE3,
+  FEATURE_SSSE3,
+  FEATURE_SSE4_1,
+  FEATURE_SSE4_2,
+  FEATURE_AVX,
+  FEATURE_AVX2,
+  FEATURE_SSE4_A,
+  FEATURE_FMA4,
+  FEATURE_XOP,
+  FEATURE_FMA,
+  FEATURE_AVX512F,
+  FEATURE_BMI,
+  FEATURE_BMI2,
+  FEATURE_AES,
+  FEATURE_PCLMUL,
+  FEATURE_AVX512VL,
+  FEATURE_AVX512BW,
+  FEATURE_AVX512DQ,
+  FEATURE_AVX512CD,
+  FEATURE_AVX512ER,
+  FEATURE_AVX512PF,
+  FEATURE_AVX512VBMI,
+  FEATURE_AVX512IFMA
+};
+
+extern struct __processor_model
+{
+  unsigned int __cpu_vendor;
+  unsigned int __cpu_type;
+  unsigned int __cpu_subtype;
+  unsigned int __cpu_features[1];
+} __cpu_model;