Patchwork 0002-Fix-32-bit-alignment for Core i7

login
register
mail settings
Submitter Maxim Kuvyrkov
Date Oct. 19, 2010, 11:52 a.m.
Message ID <4CBD8680.5010302@codesourcery.com>
Download mbox | patch
Permalink /patch/68326/
State New
Headers show

Comments

Maxim Kuvyrkov - Oct. 19, 2010, 11:52 a.m.
This patch adjusts alignment for Core i7 32-bit ABI.  This speeds up 
SPECfp by 0.4% and SPECint by 0.2% in 32-bit mode.

Tested by bootstrapping on i686-pc-linux-gnu.

OK to commit?

Thank you,
Richard Henderson - Oct. 19, 2010, 5:21 p.m.
> +  PROCESSOR_COREI7_32,
> +  PROCESSOR_COREI7_64,

You had different tunings for 32 and 64-bit to come, right?
You've done it this way instead of 

  TARGET_COREI7 && TARGET_32BIT

because of the indicies into processor_target_table?

> -#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
> -#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
> +#define m_GENERIC32 (1<<PROCESSOR_GENERIC32 | m_COREI7_32)
> +#define m_GENERIC64 (1<<PROCESSOR_GENERIC64 | m_COREI7_64)

This bit I don't understand.  Why does GENERIC include COREI7?


r~
Jan Hubicka - Oct. 20, 2010, 2:17 a.m.
> This patch adjusts alignment for Core i7 32-bit ABI.  This speeds up  
> SPECfp by 0.4% and SPECint by 0.2% in 32-bit mode.
>
> Tested by bootstrapping on i686-pc-linux-gnu.
>
> OK to commit?
>
> Thank you,
>
> -- 
> Maxim Kuvyrkov
> CodeSourcery
> maxim@codesourcery.com
> (650) 331-3385 x724

> 2010-10-19  Bernd Schmidt  <bernds@codesourcery.com>
> 	    Maxim Kuvyrkov  <maxim@codesourcery.com>
> 
> 	Tune alignment for Intel Core i7
> 
> 	* config/i386.h (TARGET_COREI7{_32,_64,}): New macros.
> 	(enum processor_type): Update comment.  Add entries for Core i7.
> 	* config/i386-c.c (ix86_target_macros_internal): Update.
> 	* config/i386.c (m_COREI7{_32,_64}): New macros.
> 	(m_GENERIC32, m_GENERIC64): Use generic tuning for Core i7.
> 	(processor_target_table): Tune alignment for Core i7.
> 
> 	* doc/invoke.texi: Document "corei7" option value.

I am bit confused with the split of patches. I would expect the __corei7__
define and manual to come in the previous patch..  The alignment changes are
OK.

I would like to see the naming issue Andi pointed out discussed a bit.
Traditionally naming x86 CPUs is a major mess.  Otherwise the first patch
seems sane, too.

Thanks for breaking this up!
Honza


> >From f41da54a0f33975b4a08acb40f797328bb0aa443 Mon Sep 17 00:00:00 2001
> From: Maxim Kuvyrkov <maxim@codesourcery.com>
> Date: Tue, 19 Oct 2010 04:28:13 -0700
> Subject: [PATCH] Fix 32-bit alignment
> 
> ---
>  gcc/config/i386/i386-c.c |    9 +++++++++
>  gcc/config/i386/i386.c   |   10 ++++++++--
>  gcc/config/i386/i386.h   |    8 ++++++--
>  gcc/doc/invoke.texi      |    3 +++
>  4 files changed, 26 insertions(+), 4 deletions(-)
> 
> diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
> index 1846efb..3b4409e 100644
> --- a/gcc/config/i386/i386-c.c
> +++ b/gcc/config/i386/i386-c.c
> @@ -122,6 +122,11 @@ ix86_target_macros_internal (int isa_flag,
>        def_or_undef (parse_in, "__core2");
>        def_or_undef (parse_in, "__core2__");
>        break;
> +    case PROCESSOR_COREI7_32:
> +    case PROCESSOR_COREI7_64:
> +      def_or_undef (parse_in, "__corei7");
> +      def_or_undef (parse_in, "__corei7__");
> +      break;
>      case PROCESSOR_ATOM:
>        def_or_undef (parse_in, "__atom");
>        def_or_undef (parse_in, "__atom__");
> @@ -197,6 +202,10 @@ ix86_target_macros_internal (int isa_flag,
>      case PROCESSOR_CORE2:
>        def_or_undef (parse_in, "__tune_core2__");
>        break;
> +    case PROCESSOR_COREI7_32:
> +    case PROCESSOR_COREI7_64:
> +      def_or_undef (parse_in, "__tune_corei7__");
> +      break;
>      case PROCESSOR_ATOM:
>        def_or_undef (parse_in, "__tune_atom__");
>        break;
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index d04c20e..00d37a1 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -1355,6 +1355,8 @@ const struct processor_costs *ix86_cost = &pentium_cost;
>  #define m_PENT4  (1<<PROCESSOR_PENTIUM4)
>  #define m_NOCONA  (1<<PROCESSOR_NOCONA)
>  #define m_CORE2  (1<<PROCESSOR_CORE2)
> +#define m_COREI7_32  (1<<PROCESSOR_COREI7_32)
> +#define m_COREI7_64  (1<<PROCESSOR_COREI7_64)
>  #define m_ATOM  (1<<PROCESSOR_ATOM)
>  
>  #define m_GEODE  (1<<PROCESSOR_GEODE)
> @@ -1367,8 +1369,8 @@ const struct processor_costs *ix86_cost = &pentium_cost;
>  #define m_BDVER1  (1<<PROCESSOR_BDVER1)
>  #define m_AMD_MULTIPLE  (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
>  
> -#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
> -#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
> +#define m_GENERIC32 (1<<PROCESSOR_GENERIC32 | m_COREI7_32)
> +#define m_GENERIC64 (1<<PROCESSOR_GENERIC64 | m_COREI7_64)
>  
>  /* Generic instruction choice should be common subset of supported CPUs
>     (PPro/PENT4/NOCONA/CORE2/Athlon/K8).  */
> @@ -2173,6 +2175,10 @@ static const struct ptt processor_target_table[PROCESSOR_max] =
>    {&k8_cost, 16, 7, 16, 7, 16},
>    {&nocona_cost, 0, 0, 0, 0, 0},
>    {&core2_cost, 16, 10, 16, 10, 16},
> +  /* Core i7 32-bit.  */
> +  {&generic32_cost, 16, 10, 16, 10, 16},
> +  /* Core i7 64-bit.  */
> +  {&generic64_cost, 16, 10, 16, 10, 16},
>    {&generic32_cost, 16, 7, 16, 7, 16},
>    {&generic64_cost, 16, 10, 16, 10, 16},
>    {&amdfam10_cost, 32, 24, 32, 7, 32},
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 62f35ca..e0c7260 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -239,6 +239,9 @@ extern const struct processor_costs ix86_size_cost;
>  #define TARGET_ATHLON_K8 (TARGET_K8 || TARGET_ATHLON)
>  #define TARGET_NOCONA (ix86_tune == PROCESSOR_NOCONA)
>  #define TARGET_CORE2 (ix86_tune == PROCESSOR_CORE2)
> +#define TARGET_COREI7_32 (ix86_tune == PROCESSOR_COREI7_32)
> +#define TARGET_COREI7_64 (ix86_tune == PROCESSOR_COREI7_64)
> +#define TARGET_COREI7 (TARGET_COREI7_32 || TARGET_COREI7_64)
>  #define TARGET_GENERIC32 (ix86_tune == PROCESSOR_GENERIC32)
>  #define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64)
>  #define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
> @@ -2057,8 +2060,7 @@ do {									\
>  	"call " CRT_MKSTR(__USER_LABEL_PREFIX__) #FUNC "\n"	\
>  	TEXT_SECTION_ASM_OP);
>  
> -/* Which processor to schedule for. The cpu attribute defines a list that
> -   mirrors this list, so changes to i386.md must be made at the same time.  */
> +/* Which processor to tune code generation for.  */
>  
>  enum processor_type
>  {
> @@ -2073,6 +2075,8 @@ enum processor_type
>    PROCESSOR_K8,
>    PROCESSOR_NOCONA,
>    PROCESSOR_CORE2,
> +  PROCESSOR_COREI7_32,
> +  PROCESSOR_COREI7_64,
>    PROCESSOR_GENERIC32,
>    PROCESSOR_GENERIC64,
>    PROCESSOR_AMDFAM10,
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 776fdd0..24a8e87 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -12051,6 +12051,9 @@ SSE2 and SSE3 instruction set support.
>  @item core2
>  Intel Core2 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3
>  instruction set support.
> +@item corei7
> +Intel Core i7 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1
> +and SSE4.2 instruction set support.
>  @item atom
>  Intel Atom CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3
>  instruction set support.
> -- 
> 1.6.2.4
>
H.J. Lu - Oct. 20, 2010, 3:28 a.m.
On Tue, Oct 19, 2010 at 7:17 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
>> This patch adjusts alignment for Core i7 32-bit ABI.  This speeds up
>> SPECfp by 0.4% and SPECint by 0.2% in 32-bit mode.
>>
>> Tested by bootstrapping on i686-pc-linux-gnu.
>>
>> OK to commit?
>>
>> Thank you,
>>
>> --
>> Maxim Kuvyrkov
>> CodeSourcery
>> maxim@codesourcery.com
>> (650) 331-3385 x724
>
>> 2010-10-19  Bernd Schmidt  <bernds@codesourcery.com>
>>           Maxim Kuvyrkov  <maxim@codesourcery.com>
>>
>>       Tune alignment for Intel Core i7
>>
>>       * config/i386.h (TARGET_COREI7{_32,_64,}): New macros.
>>       (enum processor_type): Update comment.  Add entries for Core i7.
>>       * config/i386-c.c (ix86_target_macros_internal): Update.
>>       * config/i386.c (m_COREI7{_32,_64}): New macros.
>>       (m_GENERIC32, m_GENERIC64): Use generic tuning for Core i7.
>>       (processor_target_table): Tune alignment for Core i7.
>>
>>       * doc/invoke.texi: Document "corei7" option value.
>
> I am bit confused with the split of patches. I would expect the __corei7__
> define and manual to come in the previous patch..  The alignment changes are
> OK.
>
> I would like to see the naming issue Andi pointed out discussed a bit.
> Traditionally naming x86 CPUs is a major mess.  Otherwise the first patch
> seems sane, too.
>

There is no good solution for this. -mtune=corei7 is as good as
we can get.
Maxim Kuvyrkov - Oct. 20, 2010, 8:24 p.m.
On 10/20/10 6:17 AM, Jan Hubicka wrote:

>> 	* doc/invoke.texi: Document "corei7" option value.
>
> I am bit confused with the split of patches. I would expect the __corei7__
> define and manual to come in the previous patch..  The alignment changes are
> OK.

That is because the documentation was initially in a different patch.  I 
will check in the first few approved patches one after the other, so the 
artifacts of patch splitting will not be noticeable to a naked eye.

>
> I would like to see the naming issue Andi pointed out discussed a bit.
> Traditionally naming x86 CPUs is a major mess.  Otherwise the first patch
> seems sane, too.

Though I don't have a strong preference to one naming scheme other the 
other, differentiation provided by -mtune=core2 and -mtune=corei7 seems 
just enough.  As H.J. mentioned -mtune=native provides an average user a 
way to tune to a specific CPU revision including cache size, etc.  More 
sophisticated users can use the params to specify such low-level tuning 
details.

Thanks,

Patch

diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 1846efb..3b4409e 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -122,6 +122,11 @@  ix86_target_macros_internal (int isa_flag,
       def_or_undef (parse_in, "__core2");
       def_or_undef (parse_in, "__core2__");
       break;
+    case PROCESSOR_COREI7_32:
+    case PROCESSOR_COREI7_64:
+      def_or_undef (parse_in, "__corei7");
+      def_or_undef (parse_in, "__corei7__");
+      break;
     case PROCESSOR_ATOM:
       def_or_undef (parse_in, "__atom");
       def_or_undef (parse_in, "__atom__");
@@ -197,6 +202,10 @@  ix86_target_macros_internal (int isa_flag,
     case PROCESSOR_CORE2:
       def_or_undef (parse_in, "__tune_core2__");
       break;
+    case PROCESSOR_COREI7_32:
+    case PROCESSOR_COREI7_64:
+      def_or_undef (parse_in, "__tune_corei7__");
+      break;
     case PROCESSOR_ATOM:
       def_or_undef (parse_in, "__tune_atom__");
       break;
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index d04c20e..00d37a1 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1355,6 +1355,8 @@  const struct processor_costs *ix86_cost = &pentium_cost;
 #define m_PENT4  (1<<PROCESSOR_PENTIUM4)
 #define m_NOCONA  (1<<PROCESSOR_NOCONA)
 #define m_CORE2  (1<<PROCESSOR_CORE2)
+#define m_COREI7_32  (1<<PROCESSOR_COREI7_32)
+#define m_COREI7_64  (1<<PROCESSOR_COREI7_64)
 #define m_ATOM  (1<<PROCESSOR_ATOM)
 
 #define m_GEODE  (1<<PROCESSOR_GEODE)
@@ -1367,8 +1369,8 @@  const struct processor_costs *ix86_cost = &pentium_cost;
 #define m_BDVER1  (1<<PROCESSOR_BDVER1)
 #define m_AMD_MULTIPLE  (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
 
-#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
-#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
+#define m_GENERIC32 (1<<PROCESSOR_GENERIC32 | m_COREI7_32)
+#define m_GENERIC64 (1<<PROCESSOR_GENERIC64 | m_COREI7_64)
 
 /* Generic instruction choice should be common subset of supported CPUs
    (PPro/PENT4/NOCONA/CORE2/Athlon/K8).  */
@@ -2173,6 +2175,10 @@  static const struct ptt processor_target_table[PROCESSOR_max] =
   {&k8_cost, 16, 7, 16, 7, 16},
   {&nocona_cost, 0, 0, 0, 0, 0},
   {&core2_cost, 16, 10, 16, 10, 16},
+  /* Core i7 32-bit.  */
+  {&generic32_cost, 16, 10, 16, 10, 16},
+  /* Core i7 64-bit.  */
+  {&generic64_cost, 16, 10, 16, 10, 16},
   {&generic32_cost, 16, 7, 16, 7, 16},
   {&generic64_cost, 16, 10, 16, 10, 16},
   {&amdfam10_cost, 32, 24, 32, 7, 32},
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 62f35ca..e0c7260 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -239,6 +239,9 @@  extern const struct processor_costs ix86_size_cost;
 #define TARGET_ATHLON_K8 (TARGET_K8 || TARGET_ATHLON)
 #define TARGET_NOCONA (ix86_tune == PROCESSOR_NOCONA)
 #define TARGET_CORE2 (ix86_tune == PROCESSOR_CORE2)
+#define TARGET_COREI7_32 (ix86_tune == PROCESSOR_COREI7_32)
+#define TARGET_COREI7_64 (ix86_tune == PROCESSOR_COREI7_64)
+#define TARGET_COREI7 (TARGET_COREI7_32 || TARGET_COREI7_64)
 #define TARGET_GENERIC32 (ix86_tune == PROCESSOR_GENERIC32)
 #define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64)
 #define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
@@ -2057,8 +2060,7 @@  do {									\
 	"call " CRT_MKSTR(__USER_LABEL_PREFIX__) #FUNC "\n"	\
 	TEXT_SECTION_ASM_OP);
 
-/* Which processor to schedule for. The cpu attribute defines a list that
-   mirrors this list, so changes to i386.md must be made at the same time.  */
+/* Which processor to tune code generation for.  */
 
 enum processor_type
 {
@@ -2073,6 +2075,8 @@  enum processor_type
   PROCESSOR_K8,
   PROCESSOR_NOCONA,
   PROCESSOR_CORE2,
+  PROCESSOR_COREI7_32,
+  PROCESSOR_COREI7_64,
   PROCESSOR_GENERIC32,
   PROCESSOR_GENERIC64,
   PROCESSOR_AMDFAM10,
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 776fdd0..24a8e87 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -12051,6 +12051,9 @@  SSE2 and SSE3 instruction set support.
 @item core2
 Intel Core2 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3
 instruction set support.
+@item corei7
+Intel Core i7 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1
+and SSE4.2 instruction set support.
 @item atom
 Intel Atom CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3
 instruction set support.