Message ID | 4CBD8680.5010302@codesourcery.com |
---|---|
State | New |
Headers | show |
> + PROCESSOR_COREI7_32, > + PROCESSOR_COREI7_64, You had different tunings for 32 and 64-bit to come, right? You've done it this way instead of TARGET_COREI7 && TARGET_32BIT because of the indicies into processor_target_table? > -#define m_GENERIC32 (1<<PROCESSOR_GENERIC32) > -#define m_GENERIC64 (1<<PROCESSOR_GENERIC64) > +#define m_GENERIC32 (1<<PROCESSOR_GENERIC32 | m_COREI7_32) > +#define m_GENERIC64 (1<<PROCESSOR_GENERIC64 | m_COREI7_64) This bit I don't understand. Why does GENERIC include COREI7? r~
> This patch adjusts alignment for Core i7 32-bit ABI. This speeds up > SPECfp by 0.4% and SPECint by 0.2% in 32-bit mode. > > Tested by bootstrapping on i686-pc-linux-gnu. > > OK to commit? > > Thank you, > > -- > Maxim Kuvyrkov > CodeSourcery > maxim@codesourcery.com > (650) 331-3385 x724 > 2010-10-19 Bernd Schmidt <bernds@codesourcery.com> > Maxim Kuvyrkov <maxim@codesourcery.com> > > Tune alignment for Intel Core i7 > > * config/i386.h (TARGET_COREI7{_32,_64,}): New macros. > (enum processor_type): Update comment. Add entries for Core i7. > * config/i386-c.c (ix86_target_macros_internal): Update. > * config/i386.c (m_COREI7{_32,_64}): New macros. > (m_GENERIC32, m_GENERIC64): Use generic tuning for Core i7. > (processor_target_table): Tune alignment for Core i7. > > * doc/invoke.texi: Document "corei7" option value. I am bit confused with the split of patches. I would expect the __corei7__ define and manual to come in the previous patch.. The alignment changes are OK. I would like to see the naming issue Andi pointed out discussed a bit. Traditionally naming x86 CPUs is a major mess. Otherwise the first patch seems sane, too. Thanks for breaking this up! Honza > >From f41da54a0f33975b4a08acb40f797328bb0aa443 Mon Sep 17 00:00:00 2001 > From: Maxim Kuvyrkov <maxim@codesourcery.com> > Date: Tue, 19 Oct 2010 04:28:13 -0700 > Subject: [PATCH] Fix 32-bit alignment > > --- > gcc/config/i386/i386-c.c | 9 +++++++++ > gcc/config/i386/i386.c | 10 ++++++++-- > gcc/config/i386/i386.h | 8 ++++++-- > gcc/doc/invoke.texi | 3 +++ > 4 files changed, 26 insertions(+), 4 deletions(-) > > diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c > index 1846efb..3b4409e 100644 > --- a/gcc/config/i386/i386-c.c > +++ b/gcc/config/i386/i386-c.c > @@ -122,6 +122,11 @@ ix86_target_macros_internal (int isa_flag, > def_or_undef (parse_in, "__core2"); > def_or_undef (parse_in, "__core2__"); > break; > + case PROCESSOR_COREI7_32: > + case PROCESSOR_COREI7_64: > + def_or_undef (parse_in, "__corei7"); > + def_or_undef (parse_in, "__corei7__"); > + break; > case PROCESSOR_ATOM: > def_or_undef (parse_in, "__atom"); > def_or_undef (parse_in, "__atom__"); > @@ -197,6 +202,10 @@ ix86_target_macros_internal (int isa_flag, > case PROCESSOR_CORE2: > def_or_undef (parse_in, "__tune_core2__"); > break; > + case PROCESSOR_COREI7_32: > + case PROCESSOR_COREI7_64: > + def_or_undef (parse_in, "__tune_corei7__"); > + break; > case PROCESSOR_ATOM: > def_or_undef (parse_in, "__tune_atom__"); > break; > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index d04c20e..00d37a1 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -1355,6 +1355,8 @@ const struct processor_costs *ix86_cost = &pentium_cost; > #define m_PENT4 (1<<PROCESSOR_PENTIUM4) > #define m_NOCONA (1<<PROCESSOR_NOCONA) > #define m_CORE2 (1<<PROCESSOR_CORE2) > +#define m_COREI7_32 (1<<PROCESSOR_COREI7_32) > +#define m_COREI7_64 (1<<PROCESSOR_COREI7_64) > #define m_ATOM (1<<PROCESSOR_ATOM) > > #define m_GEODE (1<<PROCESSOR_GEODE) > @@ -1367,8 +1369,8 @@ const struct processor_costs *ix86_cost = &pentium_cost; > #define m_BDVER1 (1<<PROCESSOR_BDVER1) > #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1) > > -#define m_GENERIC32 (1<<PROCESSOR_GENERIC32) > -#define m_GENERIC64 (1<<PROCESSOR_GENERIC64) > +#define m_GENERIC32 (1<<PROCESSOR_GENERIC32 | m_COREI7_32) > +#define m_GENERIC64 (1<<PROCESSOR_GENERIC64 | m_COREI7_64) > > /* Generic instruction choice should be common subset of supported CPUs > (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */ > @@ -2173,6 +2175,10 @@ static const struct ptt processor_target_table[PROCESSOR_max] = > {&k8_cost, 16, 7, 16, 7, 16}, > {&nocona_cost, 0, 0, 0, 0, 0}, > {&core2_cost, 16, 10, 16, 10, 16}, > + /* Core i7 32-bit. */ > + {&generic32_cost, 16, 10, 16, 10, 16}, > + /* Core i7 64-bit. */ > + {&generic64_cost, 16, 10, 16, 10, 16}, > {&generic32_cost, 16, 7, 16, 7, 16}, > {&generic64_cost, 16, 10, 16, 10, 16}, > {&amdfam10_cost, 32, 24, 32, 7, 32}, > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index 62f35ca..e0c7260 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -239,6 +239,9 @@ extern const struct processor_costs ix86_size_cost; > #define TARGET_ATHLON_K8 (TARGET_K8 || TARGET_ATHLON) > #define TARGET_NOCONA (ix86_tune == PROCESSOR_NOCONA) > #define TARGET_CORE2 (ix86_tune == PROCESSOR_CORE2) > +#define TARGET_COREI7_32 (ix86_tune == PROCESSOR_COREI7_32) > +#define TARGET_COREI7_64 (ix86_tune == PROCESSOR_COREI7_64) > +#define TARGET_COREI7 (TARGET_COREI7_32 || TARGET_COREI7_64) > #define TARGET_GENERIC32 (ix86_tune == PROCESSOR_GENERIC32) > #define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64) > #define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64) > @@ -2057,8 +2060,7 @@ do { \ > "call " CRT_MKSTR(__USER_LABEL_PREFIX__) #FUNC "\n" \ > TEXT_SECTION_ASM_OP); > > -/* Which processor to schedule for. The cpu attribute defines a list that > - mirrors this list, so changes to i386.md must be made at the same time. */ > +/* Which processor to tune code generation for. */ > > enum processor_type > { > @@ -2073,6 +2075,8 @@ enum processor_type > PROCESSOR_K8, > PROCESSOR_NOCONA, > PROCESSOR_CORE2, > + PROCESSOR_COREI7_32, > + PROCESSOR_COREI7_64, > PROCESSOR_GENERIC32, > PROCESSOR_GENERIC64, > PROCESSOR_AMDFAM10, > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index 776fdd0..24a8e87 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -12051,6 +12051,9 @@ SSE2 and SSE3 instruction set support. > @item core2 > Intel Core2 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3 > instruction set support. > +@item corei7 > +Intel Core i7 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 > +and SSE4.2 instruction set support. > @item atom > Intel Atom CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3 > instruction set support. > -- > 1.6.2.4 >
On Tue, Oct 19, 2010 at 7:17 PM, Jan Hubicka <hubicka@ucw.cz> wrote: >> This patch adjusts alignment for Core i7 32-bit ABI. This speeds up >> SPECfp by 0.4% and SPECint by 0.2% in 32-bit mode. >> >> Tested by bootstrapping on i686-pc-linux-gnu. >> >> OK to commit? >> >> Thank you, >> >> -- >> Maxim Kuvyrkov >> CodeSourcery >> maxim@codesourcery.com >> (650) 331-3385 x724 > >> 2010-10-19 Bernd Schmidt <bernds@codesourcery.com> >> Maxim Kuvyrkov <maxim@codesourcery.com> >> >> Tune alignment for Intel Core i7 >> >> * config/i386.h (TARGET_COREI7{_32,_64,}): New macros. >> (enum processor_type): Update comment. Add entries for Core i7. >> * config/i386-c.c (ix86_target_macros_internal): Update. >> * config/i386.c (m_COREI7{_32,_64}): New macros. >> (m_GENERIC32, m_GENERIC64): Use generic tuning for Core i7. >> (processor_target_table): Tune alignment for Core i7. >> >> * doc/invoke.texi: Document "corei7" option value. > > I am bit confused with the split of patches. I would expect the __corei7__ > define and manual to come in the previous patch.. The alignment changes are > OK. > > I would like to see the naming issue Andi pointed out discussed a bit. > Traditionally naming x86 CPUs is a major mess. Otherwise the first patch > seems sane, too. > There is no good solution for this. -mtune=corei7 is as good as we can get.
On 10/20/10 6:17 AM, Jan Hubicka wrote: >> * doc/invoke.texi: Document "corei7" option value. > > I am bit confused with the split of patches. I would expect the __corei7__ > define and manual to come in the previous patch.. The alignment changes are > OK. That is because the documentation was initially in a different patch. I will check in the first few approved patches one after the other, so the artifacts of patch splitting will not be noticeable to a naked eye. > > I would like to see the naming issue Andi pointed out discussed a bit. > Traditionally naming x86 CPUs is a major mess. Otherwise the first patch > seems sane, too. Though I don't have a strong preference to one naming scheme other the other, differentiation provided by -mtune=core2 and -mtune=corei7 seems just enough. As H.J. mentioned -mtune=native provides an average user a way to tune to a specific CPU revision including cache size, etc. More sophisticated users can use the params to specify such low-level tuning details. Thanks,
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 1846efb..3b4409e 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -122,6 +122,11 @@ ix86_target_macros_internal (int isa_flag, def_or_undef (parse_in, "__core2"); def_or_undef (parse_in, "__core2__"); break; + case PROCESSOR_COREI7_32: + case PROCESSOR_COREI7_64: + def_or_undef (parse_in, "__corei7"); + def_or_undef (parse_in, "__corei7__"); + break; case PROCESSOR_ATOM: def_or_undef (parse_in, "__atom"); def_or_undef (parse_in, "__atom__"); @@ -197,6 +202,10 @@ ix86_target_macros_internal (int isa_flag, case PROCESSOR_CORE2: def_or_undef (parse_in, "__tune_core2__"); break; + case PROCESSOR_COREI7_32: + case PROCESSOR_COREI7_64: + def_or_undef (parse_in, "__tune_corei7__"); + break; case PROCESSOR_ATOM: def_or_undef (parse_in, "__tune_atom__"); break; diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index d04c20e..00d37a1 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1355,6 +1355,8 @@ const struct processor_costs *ix86_cost = &pentium_cost; #define m_PENT4 (1<<PROCESSOR_PENTIUM4) #define m_NOCONA (1<<PROCESSOR_NOCONA) #define m_CORE2 (1<<PROCESSOR_CORE2) +#define m_COREI7_32 (1<<PROCESSOR_COREI7_32) +#define m_COREI7_64 (1<<PROCESSOR_COREI7_64) #define m_ATOM (1<<PROCESSOR_ATOM) #define m_GEODE (1<<PROCESSOR_GEODE) @@ -1367,8 +1369,8 @@ const struct processor_costs *ix86_cost = &pentium_cost; #define m_BDVER1 (1<<PROCESSOR_BDVER1) #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1) -#define m_GENERIC32 (1<<PROCESSOR_GENERIC32) -#define m_GENERIC64 (1<<PROCESSOR_GENERIC64) +#define m_GENERIC32 (1<<PROCESSOR_GENERIC32 | m_COREI7_32) +#define m_GENERIC64 (1<<PROCESSOR_GENERIC64 | m_COREI7_64) /* Generic instruction choice should be common subset of supported CPUs (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */ @@ -2173,6 +2175,10 @@ static const struct ptt processor_target_table[PROCESSOR_max] = {&k8_cost, 16, 7, 16, 7, 16}, {&nocona_cost, 0, 0, 0, 0, 0}, {&core2_cost, 16, 10, 16, 10, 16}, + /* Core i7 32-bit. */ + {&generic32_cost, 16, 10, 16, 10, 16}, + /* Core i7 64-bit. */ + {&generic64_cost, 16, 10, 16, 10, 16}, {&generic32_cost, 16, 7, 16, 7, 16}, {&generic64_cost, 16, 10, 16, 10, 16}, {&amdfam10_cost, 32, 24, 32, 7, 32}, diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 62f35ca..e0c7260 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -239,6 +239,9 @@ extern const struct processor_costs ix86_size_cost; #define TARGET_ATHLON_K8 (TARGET_K8 || TARGET_ATHLON) #define TARGET_NOCONA (ix86_tune == PROCESSOR_NOCONA) #define TARGET_CORE2 (ix86_tune == PROCESSOR_CORE2) +#define TARGET_COREI7_32 (ix86_tune == PROCESSOR_COREI7_32) +#define TARGET_COREI7_64 (ix86_tune == PROCESSOR_COREI7_64) +#define TARGET_COREI7 (TARGET_COREI7_32 || TARGET_COREI7_64) #define TARGET_GENERIC32 (ix86_tune == PROCESSOR_GENERIC32) #define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64) #define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64) @@ -2057,8 +2060,7 @@ do { \ "call " CRT_MKSTR(__USER_LABEL_PREFIX__) #FUNC "\n" \ TEXT_SECTION_ASM_OP); -/* Which processor to schedule for. The cpu attribute defines a list that - mirrors this list, so changes to i386.md must be made at the same time. */ +/* Which processor to tune code generation for. */ enum processor_type { @@ -2073,6 +2075,8 @@ enum processor_type PROCESSOR_K8, PROCESSOR_NOCONA, PROCESSOR_CORE2, + PROCESSOR_COREI7_32, + PROCESSOR_COREI7_64, PROCESSOR_GENERIC32, PROCESSOR_GENERIC64, PROCESSOR_AMDFAM10, diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 776fdd0..24a8e87 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -12051,6 +12051,9 @@ SSE2 and SSE3 instruction set support. @item core2 Intel Core2 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3 instruction set support. +@item corei7 +Intel Core i7 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 +and SSE4.2 instruction set support. @item atom Intel Atom CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3 instruction set support.