diff mbox

[i386] PR 59422 - Support more targets for function multi versioning

Message ID EB4625145972F94C9680D8CADD6516155E7998F0@satlexdag03.amd.com
State New
Headers show

Commit Message

Gopalasubramanian, Ganesh Dec. 19, 2013, 10:13 a.m. UTC
> Sorry, I must have been looking at an older version, but as I said I already did enable it in the latest patch. (see http://gcc.gnu.org/ml/gcc-patches/2013-12/msg01577.html )

Sorry for causing another revision but we would like to stick with "btver1" and "btver2" rather than "BOBCAT" or "JAGUAR".
Therefore the changes would be like

Regards
Ganesh

Comments

Allan Sandfeld Jensen Dec. 19, 2013, 10:20 a.m. UTC | #1
On Thursday 19 December 2013, Gopalasubramanian, Ganesh wrote:
> > Sorry, I must have been looking at an older version, but as I said I
> > already did enable it in the latest patch. (see
> > http://gcc.gnu.org/ml/gcc-patches/2013-12/msg01577.html )
> 
> Sorry for causing another revision but we would like to stick with "btver1"
> and "btver2" rather than "BOBCAT" or "JAGUAR". Therefore the changes would
> be like
> 
I will need to make an updated patch to move the new ISAs to the end of the 
list anyway. I will send it in a few days to give AMD or Intel developers time 
to comment on the current version.

`Allan
Jakub Jelinek Dec. 19, 2013, 10:33 a.m. UTC | #2
Hi!

On Thu, Dec 19, 2013 at 10:13:17AM +0000, Gopalasubramanian, Ganesh wrote:
> @@ -30044,25 +30053,49 @@
>  	      break;
>              case PROCESSOR_COREI7_AVX:
>                arg_str = "corei7-avx";
> -              priority = P_PROC_SSE4_2;
> +              priority = P_PROC_AVX;
>                break;
> +            case PROCESSOR_HASWELL:
> +              arg_str = "core-avx2";
> +              priority = P_PROC_AVX2;
> +              break;

...
Please check formatting, in lots of places you are using spaces instead
of tabs.

> @@ -30922,9 +30955,13 @@
>      F_SSE2,
>      F_SSE3,
>      F_SSSE3,
> +    F_SSE4_a,
>      F_SSE4_1,
>      F_SSE4_2,
>      F_AVX,
> +    F_FMA4,
> +    F_XOP,
> +    F_FMA,
>      F_AVX2,
>      F_MAX
>    };

I think the F_* constants, like the differences of M_* constants
from the preceeding M_*_START value, are part of the ABI, so you can't
reorder them.  Also, if the enum names are all capital letters, using
F_SSE4_a instead of F_SSE4_A looks inconsistent.

> @@ -30943,6 +30980,10 @@
>      M_AMDFAM10H,
>      M_AMDFAM15H,
>      M_INTEL_SLM,
> +    M_INTEL_COREI7_AVX,
> +    M_INTEL_CORE_AVX2,
> +    M_AMD_BTVER1,
> +    M_AMD_BTVER2,
>      M_CPU_SUBTYPE_START,
>      M_INTEL_COREI7_NEHALEM,
>      M_INTEL_COREI7_WESTMERE,
> @@ -30953,7 +30994,9 @@
>      M_AMDFAM15H_BDVER1,
>      M_AMDFAM15H_BDVER2,
>      M_AMDFAM15H_BDVER3,
> -    M_AMDFAM15H_BDVER4
> +    M_AMDFAM15H_BDVER4,
> +    M_INTEL_COREI7_IVYBRIDGE,
> +    M_INTEL_CORE_HASWELL
>    };
>  
>    static struct _arch_names_table
> @@ -89,9 +97,13 @@
>    FEATURE_SSE2,
>    FEATURE_SSE3,
>    FEATURE_SSSE3,
> +  FEATURE_SSE4_a,
>    FEATURE_SSE4_1,
>    FEATURE_SSE4_2,
>    FEATURE_AVX,
> +  FEATURE_FMA4,
> +  FEATURE_XOP,
> +  FEATURE_FMA,
>    FEATURE_AVX2
>  };
>  

Ditto here.

	Jakub
diff mbox

Patch

Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	(revision 206065)
+++ gcc/config/i386/i386.c	(working copy)
@@ -29965,9 +29965,14 @@ 
     P_PROC_SSE4_2,
     P_POPCNT,
     P_AVX,
+    P_PROC_AVX,
+    P_FMA4,
+    P_XOP,
+    P_PROC_XOP,
+    P_FMA,    
+    P_PROC_FMA,
     P_AVX2,
-    P_FMA,
-    P_PROC_FMA
+    P_PROC_AVX2
   };
 
  enum feature_priority priority = P_ZERO;
@@ -29986,11 +29991,15 @@ 
       {"sse", P_SSE},
       {"sse2", P_SSE2},
       {"sse3", P_SSE3},
+      {"sse4a", P_SSE4_a},
       {"ssse3", P_SSSE3},
       {"sse4.1", P_SSE4_1},
       {"sse4.2", P_SSE4_2},
       {"popcnt", P_POPCNT},
       {"avx", P_AVX},
+      {"fma4", P_FMA4},
+      {"xop", P_XOP},
+      {"fma", P_FMA},
       {"avx2", P_AVX2}
     };
 
@@ -30044,25 +30053,49 @@ 
 	      break;
             case PROCESSOR_COREI7_AVX:
               arg_str = "corei7-avx";
-              priority = P_PROC_SSE4_2;
+              priority = P_PROC_AVX;
               break;
+            case PROCESSOR_HASWELL:
+              arg_str = "core-avx2";
+              priority = P_PROC_AVX2;
+              break;
 	    case PROCESSOR_ATOM:
 	      arg_str = "atom";
 	      priority = P_PROC_SSSE3;
 	      break;
+            case PROCESSOR_SLM:
+              arg_str = "slm";
+              priority = P_PROC_SSE4_2;
+              break;
 	    case PROCESSOR_AMDFAM10:
 	      arg_str = "amdfam10h";
 	      priority = P_PROC_SSE4_a;
 	      break;
+            case PROCESSOR_BTVER1:
+              arg_str = "btver1";
+              priority = P_PROC_SSE4_a;
+              break;
+            case PROCESSOR_BTVER2:
+              arg_str = "btver2";
+              priority = P_PROC_AVX;
+              break;
 	    case PROCESSOR_BDVER1:
 	      arg_str = "bdver1";
-	      priority = P_PROC_FMA;
+	      priority = P_PROC_XOP;
 	      break;
 	    case PROCESSOR_BDVER2:
 	      arg_str = "bdver2";
 	      priority = P_PROC_FMA;
 	      break;
-	    }  
+            case PROCESSOR_BDVER3:
+              arg_str = "bdver3";
+              priority = P_PROC_FMA;
+              break;
+            case PROCESSOR_BDVER4:
+              arg_str = "bdver4";
+              priority = P_PROC_AVX2;
+              break;
+            }  
 	}    
     
       cl_target_option_restore (&global_options, &cur_target);
@@ -30922,9 +30955,13 @@ 
     F_SSE2,
     F_SSE3,
     F_SSSE3,
+    F_SSE4_a,
     F_SSE4_1,
     F_SSE4_2,
     F_AVX,
+    F_FMA4,
+    F_XOP,
+    F_FMA,
     F_AVX2,
     F_MAX
   };
@@ -30943,6 +30980,10 @@ 
     M_AMDFAM10H,
     M_AMDFAM15H,
     M_INTEL_SLM,
+    M_INTEL_COREI7_AVX,
+    M_INTEL_CORE_AVX2,
+    M_AMD_BTVER1,
+    M_AMD_BTVER2,
     M_CPU_SUBTYPE_START,
     M_INTEL_COREI7_NEHALEM,
     M_INTEL_COREI7_WESTMERE,
@@ -30953,7 +30994,9 @@ 
     M_AMDFAM15H_BDVER1,
     M_AMDFAM15H_BDVER2,
     M_AMDFAM15H_BDVER3,
-    M_AMDFAM15H_BDVER4
+    M_AMDFAM15H_BDVER4,
+    M_INTEL_COREI7_IVYBRIDGE,
+    M_INTEL_CORE_HASWELL
   };
 
   static struct _arch_names_table
@@ -30971,11 +31014,17 @@ 
       {"corei7", M_INTEL_COREI7},
       {"nehalem", M_INTEL_COREI7_NEHALEM},
       {"westmere", M_INTEL_COREI7_WESTMERE},
+      {"corei7-avx", M_INTEL_COREI7_AVX},
       {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
+      {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
+      {"core-avx2", M_INTEL_CORE_AVX2},
+      {"haswell", M_INTEL_CORE_HASWELL},
       {"amdfam10h", M_AMDFAM10H},
       {"barcelona", M_AMDFAM10H_BARCELONA},
       {"shanghai", M_AMDFAM10H_SHANGHAI},
       {"istanbul", M_AMDFAM10H_ISTANBUL},
+      {"btver1", M_AMD_BTVER1},
+      {"btver2", M_AMD_BTVER2},
       {"amdfam15h", M_AMDFAM15H},
       {"bdver1", M_AMDFAM15H_BDVER1},
       {"bdver2", M_AMDFAM15H_BDVER2},
@@ -30997,9 +31046,13 @@ 
       {"sse2",   F_SSE2},
       {"sse3",   F_SSE3},
       {"ssse3",  F_SSSE3},
+      {"sse4a",  F_SSE4_a},
       {"sse4.1", F_SSE4_1},
       {"sse4.2", F_SSE4_2},
       {"avx",    F_AVX},
+      {"fma4",   F_FMA4},
+      {"xop",    F_XOP},
+      {"fma",    F_FMA},
       {"avx2",   F_AVX2}
     };

Index: libgcc/config/i386/cpuinfo.c
===================================================================
--- libgcc/config/i386/cpuinfo.c	(revision 206065)
+++ libgcc/config/i386/cpuinfo.c	(working copy)
@@ -62,6 +62,10 @@ 
   AMDFAM10H,
   AMDFAM15H,
   INTEL_SLM,
+  INTEL_COREI7_AVX,
+  INTEL_CORE_AVX2,
+  AMD_BTVER1,
+  AMD_BTVER2,
   CPU_TYPE_MAX
 };
 
@@ -75,6 +79,10 @@ 
   AMDFAM10H_ISTANBUL,
   AMDFAM15H_BDVER1,
   AMDFAM15H_BDVER2,
+  AMDFAM15H_BDVER3,
+  AMDFAM15H_BDVER4,
+  INTEL_COREI7_IVYBRIDGE,
+  INTEL_CORE_HASWELL,
   CPU_SUBTYPE_MAX
 };
 
@@ -89,9 +97,13 @@ 
   FEATURE_SSE2,
   FEATURE_SSE3,
   FEATURE_SSSE3,
+  FEATURE_SSE4_a,
   FEATURE_SSE4_1,
   FEATURE_SSE4_2,
   FEATURE_AVX,
+  FEATURE_FMA4,
+  FEATURE_XOP,
+  FEATURE_FMA,
   FEATURE_AVX2
 };
 
@@ -113,37 +125,46 @@ 
     {
     /* AMD Family 10h.  */
     case 0x10:
+      __cpu_model.__cpu_type = AMDFAM10H;
       switch (model)
 	{
 	case 0x2:
 	  /* Barcelona.  */
-	  __cpu_model.__cpu_type = AMDFAM10H;
 	  __cpu_model.__cpu_subtype = AMDFAM10H_BARCELONA;
 	  break;
 	case 0x4:
 	  /* Shanghai.  */
-	  __cpu_model.__cpu_type = AMDFAM10H;
 	  __cpu_model.__cpu_subtype = AMDFAM10H_SHANGHAI;
 	  break;
 	case 0x8:
 	  /* Istanbul.  */
-	  __cpu_model.__cpu_type = AMDFAM10H;
 	  __cpu_model.__cpu_subtype = AMDFAM10H_ISTANBUL;
 	  break;
 	default:
 	  break;
 	}
       break;
-    /* AMD Family 15h.  */
+    /* AMD Family 14h "btver1". */
+    case 0x14:
+      __cpu_model.__cpu_type = AMD_BTVER1;
+      break;
+    /* AMD Family 15h "Bulldozer".  */
     case 0x15:
       __cpu_model.__cpu_type = AMDFAM15H;
       /* Bulldozer version 1.  */
       if ( model <= 0xf)
 	__cpu_model.__cpu_subtype = AMDFAM15H_BDVER1;
-      /* Bulldozer version 2.  */
-      if (model >= 0x10 && model <= 0x1f)
-	__cpu_model.__cpu_subtype = AMDFAM15H_BDVER2;
+      /* Bulldozer version 2 "Piledriver" */
+      if (model >= 0x10 && model <= 0x2f)
+	__cpu_model.__cpu_subtype = AMDFAM15H_BDVER2;      
+      /* Bulldozer version 3 "Steamroller"  */
+      if (model >= 0x30 && model <= 0x4f)
+        __cpu_model.__cpu_subtype = AMDFAM15H_BDVER3;
       break;
+    /* AMD Family 16h "btver2". */
+    case 0x16:
+      __cpu_model.__cpu_type = AMD_BTVER2;
+      break;
     default:
       break;
     }
@@ -193,9 +214,21 @@ 
 	    case 0x2a:
 	    case 0x2d:
 	      /* Sandy Bridge.  */
-	      __cpu_model.__cpu_type = INTEL_COREI7;
+	      __cpu_model.__cpu_type = INTEL_COREI7_AVX;
 	      __cpu_model.__cpu_subtype = INTEL_COREI7_SANDYBRIDGE;
 	      break;
+            case 0x3a:
+            case 0x3e:
+              /* Ivy Bridge.  */
+              __cpu_model.__cpu_type = INTEL_COREI7_AVX;
+              __cpu_model.__cpu_subtype = INTEL_COREI7_IVYBRIDGE;
+            case 0x3c:
+            case 0x3f:
+            case 0x45:
+            case 0x46:
+              /* Haswell.  */
+              __cpu_model.__cpu_type = INTEL_CORE_AVX2;
+              __cpu_model.__cpu_subtype = INTEL_CORE_HASWELL;
 	    case 0x17:
 	    case 0x1d:
 	      /* Penryn.  */
@@ -242,6 +275,8 @@ 
     features |= (1 << FEATURE_SSE4_2);
   if (ecx & bit_AVX)
     features |= (1 << FEATURE_AVX);
+  if (ecx & bit_FMA)
+    features |= (1 << FEATURE_FMA);
 
   /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */
   if (max_cpuid_level >= 7)
@@ -252,6 +287,23 @@ 
 	features |= (1 << FEATURE_AVX2);
     }
 
+  unsigned int ext_level;
+  unsigned int eax, ebx;
+  /* Check cpuid level of extended features.  */
+  __cpuid (0x80000000, ext_level, ebx, ecx, edx);
+
+  if (ext_level > 0x80000000)
+    {
+      __cpuid (0x80000001, eax, ebx, ecx, edx);
+
+      if (ecx & bit_SSE4a)
+        features |= (1 << FEATURE_SSE4_a);
+      if (ecx & bit_FMA4)
+        features |= (1 << FEATURE_FMA4);
+      if (ecx & bit_XOP)
+        features |= (1 << FEATURE_XOP);
+    }
+    
   __cpu_model.__cpu_features[0] = features;
 }