Message ID | 20230626022913.32556-1-hongyu.wang@intel.com |
---|---|
State | New |
Headers | show |
Series | i386: Sync tune_string with arch_string for target attribute arch=* | expand |
On Mon, Jun 26, 2023 at 4:31 AM Hongyu Wang <hongyu.wang@intel.com> wrote: > > Hi, > > For function with target attribute arch=*, current logic will set its > tune to -mtune from command line so all target_clones will get same > tuning flags which would affect the performance for each clone. Override > tune with arch if tune was not explicitly specified to get proper tuning > flags for target_clones. > > Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,} > > Ok for trunk and backport to active release branches? > > gcc/ChangeLog: > > * config/i386/i386-options.cc (ix86_valid_target_attribute_tree): > Override tune_string with arch_string if tune_string is not > explicitly specified. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/mvc17.c: New test. LGTM. Thanks, Uros. > --- > gcc/config/i386/i386-options.cc | 6 +++++- > gcc/testsuite/gcc.target/i386/mvc17.c | 11 +++++++++++ > 2 files changed, 16 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.target/i386/mvc17.c > > diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc > index 2cb0bddcd35..7f593cebe76 100644 > --- a/gcc/config/i386/i386-options.cc > +++ b/gcc/config/i386/i386-options.cc > @@ -1400,7 +1400,11 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args, > if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]) > opts->x_ix86_tune_string > = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]); > - else if (orig_tune_defaulted) > + /* If we have explicit arch string and no tune string specified, set > + tune_string to NULL and later it will be overriden by arch_string > + so target clones can get proper optimization. */ > + else if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH] > + || orig_tune_defaulted) > opts->x_ix86_tune_string = NULL; > > /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */ > diff --git a/gcc/testsuite/gcc.target/i386/mvc17.c b/gcc/testsuite/gcc.target/i386/mvc17.c > new file mode 100644 > index 00000000000..2c7cc2fdace > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/mvc17.c > @@ -0,0 +1,11 @@ > +/* { dg-do compile } */ > +/* { dg-require-ifunc "" } */ > +/* { dg-options "-O2" } */ > +/* { dg-final { scan-assembler-times "rep mov" 1 } } */ > + > +__attribute__((target_clones("default","arch=icelake-server"))) > +void > +foo (char *a, char *b, int size) > +{ > + __builtin_memcpy (a, b, size & 0x7F); > +} > -- > 2.31.1 >
Thanks, I'll backport it down to GCC10 after this passed all bootstrap/regtest. Uros Bizjak via Gcc-patches <gcc-patches@gcc.gnu.org> 于2023年6月26日周一 14:05写道: > > On Mon, Jun 26, 2023 at 4:31 AM Hongyu Wang <hongyu.wang@intel.com> wrote: > > > > Hi, > > > > For function with target attribute arch=*, current logic will set its > > tune to -mtune from command line so all target_clones will get same > > tuning flags which would affect the performance for each clone. Override > > tune with arch if tune was not explicitly specified to get proper tuning > > flags for target_clones. > > > > Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,} > > > > Ok for trunk and backport to active release branches? > > > > gcc/ChangeLog: > > > > * config/i386/i386-options.cc (ix86_valid_target_attribute_tree): > > Override tune_string with arch_string if tune_string is not > > explicitly specified. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/i386/mvc17.c: New test. > > LGTM. > > Thanks, > Uros. > > > --- > > gcc/config/i386/i386-options.cc | 6 +++++- > > gcc/testsuite/gcc.target/i386/mvc17.c | 11 +++++++++++ > > 2 files changed, 16 insertions(+), 1 deletion(-) > > create mode 100644 gcc/testsuite/gcc.target/i386/mvc17.c > > > > diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc > > index 2cb0bddcd35..7f593cebe76 100644 > > --- a/gcc/config/i386/i386-options.cc > > +++ b/gcc/config/i386/i386-options.cc > > @@ -1400,7 +1400,11 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args, > > if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]) > > opts->x_ix86_tune_string > > = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]); > > - else if (orig_tune_defaulted) > > + /* If we have explicit arch string and no tune string specified, set > > + tune_string to NULL and later it will be overriden by arch_string > > + so target clones can get proper optimization. */ > > + else if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH] > > + || orig_tune_defaulted) > > opts->x_ix86_tune_string = NULL; > > > > /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */ > > diff --git a/gcc/testsuite/gcc.target/i386/mvc17.c b/gcc/testsuite/gcc.target/i386/mvc17.c > > new file mode 100644 > > index 00000000000..2c7cc2fdace > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/mvc17.c > > @@ -0,0 +1,11 @@ > > +/* { dg-do compile } */ > > +/* { dg-require-ifunc "" } */ > > +/* { dg-options "-O2" } */ > > +/* { dg-final { scan-assembler-times "rep mov" 1 } } */ > > + > > +__attribute__((target_clones("default","arch=icelake-server"))) > > +void > > +foo (char *a, char *b, int size) > > +{ > > + __builtin_memcpy (a, b, size & 0x7F); > > +} > > -- > > 2.31.1 > >
The testcase fails with --with-arch=native build on cascadelake, here is the patch to adjust it gcc/testsuite/ChangeLog: * gcc.target/i386/mvc17.c: Add -march=x86-64 to dg-options. --- gcc/testsuite/gcc.target/i386/mvc17.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/i386/mvc17.c b/gcc/testsuite/gcc.target/i386/mvc17.c index 2c7cc2fdace..8b83c1aecb3 100644 --- a/gcc/testsuite/gcc.target/i386/mvc17.c +++ b/gcc/testsuite/gcc.target/i386/mvc17.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-require-ifunc "" } */ -/* { dg-options "-O2" } */ +/* { dg-options "-O2 -march=x86-64" } */ /* { dg-final { scan-assembler-times "rep mov" 1 } } */ __attribute__((target_clones("default","arch=icelake-server"))) --- Will push it as an obvious fix, also will apply to the pending backports. Hongyu Wang <wwwhhhyyy333@gmail.com> 于2023年6月27日周二 13:43写道: > > Thanks, I'll backport it down to GCC10 after this passed all bootstrap/regtest. > > Uros Bizjak via Gcc-patches <gcc-patches@gcc.gnu.org> 于2023年6月26日周一 14:05写道: > > > > On Mon, Jun 26, 2023 at 4:31 AM Hongyu Wang <hongyu.wang@intel.com> wrote: > > > > > > Hi, > > > > > > For function with target attribute arch=*, current logic will set its > > > tune to -mtune from command line so all target_clones will get same > > > tuning flags which would affect the performance for each clone. Override > > > tune with arch if tune was not explicitly specified to get proper tuning > > > flags for target_clones. > > > > > > Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,} > > > > > > Ok for trunk and backport to active release branches? > > > > > > gcc/ChangeLog: > > > > > > * config/i386/i386-options.cc (ix86_valid_target_attribute_tree): > > > Override tune_string with arch_string if tune_string is not > > > explicitly specified. > > > > > > gcc/testsuite/ChangeLog: > > > > > > * gcc.target/i386/mvc17.c: New test. > > > > LGTM. > > > > Thanks, > > Uros. > > > > > --- > > > gcc/config/i386/i386-options.cc | 6 +++++- > > > gcc/testsuite/gcc.target/i386/mvc17.c | 11 +++++++++++ > > > 2 files changed, 16 insertions(+), 1 deletion(-) > > > create mode 100644 gcc/testsuite/gcc.target/i386/mvc17.c > > > > > > diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc > > > index 2cb0bddcd35..7f593cebe76 100644 > > > --- a/gcc/config/i386/i386-options.cc > > > +++ b/gcc/config/i386/i386-options.cc > > > @@ -1400,7 +1400,11 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args, > > > if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]) > > > opts->x_ix86_tune_string > > > = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]); > > > - else if (orig_tune_defaulted) > > > + /* If we have explicit arch string and no tune string specified, set > > > + tune_string to NULL and later it will be overriden by arch_string > > > + so target clones can get proper optimization. */ > > > + else if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH] > > > + || orig_tune_defaulted) > > > opts->x_ix86_tune_string = NULL; > > > > > > /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */ > > > diff --git a/gcc/testsuite/gcc.target/i386/mvc17.c b/gcc/testsuite/gcc.target/i386/mvc17.c > > > new file mode 100644 > > > index 00000000000..2c7cc2fdace > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/i386/mvc17.c > > > @@ -0,0 +1,11 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-require-ifunc "" } */ > > > +/* { dg-options "-O2" } */ > > > +/* { dg-final { scan-assembler-times "rep mov" 1 } } */ > > > + > > > +__attribute__((target_clones("default","arch=icelake-server"))) > > > +void > > > +foo (char *a, char *b, int size) > > > +{ > > > + __builtin_memcpy (a, b, size & 0x7F); > > > +} > > > -- > > > 2.31.1 > > >
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index 2cb0bddcd35..7f593cebe76 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -1400,7 +1400,11 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args, if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]) opts->x_ix86_tune_string = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]); - else if (orig_tune_defaulted) + /* If we have explicit arch string and no tune string specified, set + tune_string to NULL and later it will be overriden by arch_string + so target clones can get proper optimization. */ + else if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH] + || orig_tune_defaulted) opts->x_ix86_tune_string = NULL; /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */ diff --git a/gcc/testsuite/gcc.target/i386/mvc17.c b/gcc/testsuite/gcc.target/i386/mvc17.c new file mode 100644 index 00000000000..2c7cc2fdace --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/mvc17.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-require-ifunc "" } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times "rep mov" 1 } } */ + +__attribute__((target_clones("default","arch=icelake-server"))) +void +foo (char *a, char *b, int size) +{ + __builtin_memcpy (a, b, size & 0x7F); +}