Message ID | 5657d636-a35d-d4f8-d264-ebdbb606a0e4@suse.cz |
---|---|
State | New |
Headers | show |
Series | Make redirection only for target_clones (PR ipa/85329). | expand |
On 04/11/2018 03:12 PM, Martin Liška wrote: > Hi. > > Following restricts cgraph redirection done in multiple_target.c just > to clones that are created in the IPA pass. > > Patch can bootstrap on x86_64-linux-gnu and survives regression tests. > > Ready to be installed? > Martin > > gcc/ChangeLog: > > 2018-04-11 Martin Liska <mliska@suse.cz> > > PR ipa/85329 > * multiple_target.c (create_dispatcher_calls): Rename to ... > (redirect_target_clone_callers): ... this. > (expand_target_clones): Record created clones. > (ipa_target_clone): Call redirect_target_clone_callers only > for these clones. > --- > gcc/multiple_target.c | 25 +++++++++++++++---------- > 1 file changed, 15 insertions(+), 10 deletions(-) > > So the patch is not sufficient for the original test-case. I'm reducing that and will enhance the patch. Martin
Hi. I'm sending V2. The patch adjusts: - make redirection just for target_clones, done simply by recording nodes where expand_target_clones return true - reset various DECL_* flags on default version, needed for ipa-visibility assert I've seen - handle empty string in target_clones: __attribute__((target_clones("",.. I saw that during reduction of the ICE. Patch can bootstrap on x86_64-linux-gnu and survives regression tests. Ready to be installed? Martin
Forgot to add the patch.
Martin
From fb1bbf142af6668eeb1bdfeec96920de2f0edb21 Mon Sep 17 00:00:00 2001
From: marxin <mliska@suse.cz>
Date: Thu, 12 Apr 2018 12:15:17 +0200
Subject: [PATCH] Make redirection only for target_clones: V2 (PR ipa/85329).
gcc/ChangeLog:
2018-04-12 Martin Liska <mliska@suse.cz>
PR ipa/85329
* multiple_target.c (create_dispatcher_calls): Set apostrophes
for target_clone error message.
(separate_attrs): Add new argument and check for an emptry
string.
(expand_target_clones): Handle it.
(ipa_target_clone): Make redirection just for target_clones
functions.
gcc/testsuite/ChangeLog:
2018-04-12 Martin Liska <mliska@suse.cz>
PR ipa/85329
* g++.dg/ext/pr85329.C: New test.
* gcc.target/i386/mvc12.c: New test.
---
gcc/multiple_target.c | 43 ++++++++++++++++++++++++-----------
gcc/testsuite/g++.dg/ext/pr85329.C | 19 ++++++++++++++++
gcc/testsuite/gcc.target/i386/mvc12.c | 11 +++++++++
3 files changed, 60 insertions(+), 13 deletions(-)
create mode 100644 gcc/testsuite/g++.dg/ext/pr85329.C
create mode 100644 gcc/testsuite/gcc.target/i386/mvc12.c
diff --git a/gcc/multiple_target.c b/gcc/multiple_target.c
index b006a5ab6ec..2357e458ec8 100644
--- a/gcc/multiple_target.c
+++ b/gcc/multiple_target.c
@@ -88,7 +88,7 @@ create_dispatcher_calls (struct cgraph_node *node)
if (!idecl)
{
error_at (DECL_SOURCE_LOCATION (node->decl),
- "default target_clones attribute was not set");
+ "default %<target_clones%> attribute was not set");
return;
}
@@ -216,26 +216,30 @@ get_attr_str (tree arglist, char *attr_str)
}
/* Return number of attributes separated by comma and put them into ARGS.
- If there is no DEFAULT attribute return -1. */
+ If there is no DEFAULT attribute return -1. If there is an empty
+ string in attribute return -2. */
static int
-separate_attrs (char *attr_str, char **attrs)
+separate_attrs (char *attr_str, char **attrs, int attrnum)
{
int i = 0;
- bool has_default = false;
+ int default_count = 0;
for (char *attr = strtok (attr_str, ",");
attr != NULL; attr = strtok (NULL, ","))
{
if (strcmp (attr, "default") == 0)
{
- has_default = true;
+ default_count++;
continue;
}
attrs[i++] = attr;
}
- if (!has_default)
+ if (default_count == 0)
return -1;
+ else if (i + default_count < attrnum)
+ return -2;
+
return i;
}
@@ -321,7 +325,7 @@ expand_target_clones (struct cgraph_node *node, bool definition)
{
warning_at (DECL_SOURCE_LOCATION (node->decl),
0,
- "single target_clones attribute is ignored");
+ "single %<target_clones%> attribute is ignored");
return false;
}
@@ -345,7 +349,7 @@ expand_target_clones (struct cgraph_node *node, bool definition)
int attrnum = get_attr_str (arglist, attr_str);
char **attrs = XNEWVEC (char *, attrnum);
- attrnum = separate_attrs (attr_str, attrs);
+ attrnum = separate_attrs (attr_str, attrs, attrnum);
if (attrnum == -1)
{
error_at (DECL_SOURCE_LOCATION (node->decl),
@@ -354,6 +358,14 @@ expand_target_clones (struct cgraph_node *node, bool definition)
XDELETEVEC (attr_str);
return false;
}
+ else if (attrnum == -2)
+ {
+ error_at (DECL_SOURCE_LOCATION (node->decl),
+ "an empty string cannot be in %<target_clones%> attribute");
+ XDELETEVEC (attrs);
+ XDELETEVEC (attr_str);
+ return false;
+ }
cgraph_function_version_info *decl1_v = NULL;
cgraph_function_version_info *decl2_v = NULL;
@@ -382,6 +394,7 @@ expand_target_clones (struct cgraph_node *node, bool definition)
DECL_ATTRIBUTES (new_node->decl) = attributes;
location_t saved_loc = input_location;
input_location = DECL_SOURCE_LOCATION (node->decl);
+
if (!targetm.target_option.valid_attribute_p (new_node->decl, NULL,
TREE_VALUE (attributes),
0))
@@ -413,7 +426,11 @@ expand_target_clones (struct cgraph_node *node, bool definition)
tree attributes = make_attribute ("target", "default",
DECL_ATTRIBUTES (node->decl));
DECL_ATTRIBUTES (node->decl) = attributes;
+ DECL_COMDAT (node->decl) = 0;
+ DECL_WEAK (node->decl) = 0;
+ DECL_ARTIFICIAL (node->decl) = 1;
node->local.local = false;
+ node->set_comdat_group (NULL);
location_t saved_loc = input_location;
input_location = DECL_SOURCE_LOCATION (node->decl);
bool ret
@@ -427,14 +444,14 @@ static unsigned int
ipa_target_clone (void)
{
struct cgraph_node *node;
+ auto_vec<cgraph_node *> to_dispatch;
- bool target_clone_pass = false;
FOR_EACH_FUNCTION (node)
- target_clone_pass |= expand_target_clones (node, node->definition);
+ if (expand_target_clones (node, node->definition))
+ to_dispatch.safe_push (node);
- if (target_clone_pass)
- FOR_EACH_FUNCTION (node)
- create_dispatcher_calls (node);
+ for (unsigned i = 0; i < to_dispatch.length (); i++)
+ create_dispatcher_calls (to_dispatch[i]);
return 0;
}
diff --git a/gcc/testsuite/g++.dg/ext/pr85329.C b/gcc/testsuite/g++.dg/ext/pr85329.C
new file mode 100644
index 00000000000..fb77e42cd78
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/pr85329.C
@@ -0,0 +1,19 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-ifunc "" } */
+
+struct a
+{
+ __attribute__((target_clones("sse", "default"))) void operator^=(a) {}
+} * b;
+
+class c {
+public:
+ a *d();
+};
+
+class f {
+ void g();
+ c e;
+};
+
+void f::g() { *e.d() ^= b[0]; }
diff --git a/gcc/testsuite/gcc.target/i386/mvc12.c b/gcc/testsuite/gcc.target/i386/mvc12.c
new file mode 100644
index 00000000000..f42ae8080e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mvc12.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+
+__attribute__((target_clones("","arch=slm","arch=core-avx2", "default")))
+int foo (); /* { dg-error "an empty string cannot be in .target_clones. attribute" } */
+
+int
+bar ()
+{
+ return foo();
+}
> 2018-04-12 Martin Liska <mliska@suse.cz> > > PR ipa/85329 > * multiple_target.c (create_dispatcher_calls): Set apostrophes > for target_clone error message. > (separate_attrs): Add new argument and check for an emptry > string. > (expand_target_clones): Handle it. > (ipa_target_clone): Make redirection just for target_clones > functions. > > gcc/testsuite/ChangeLog: > > 2018-04-12 Martin Liska <mliska@suse.cz> > > PR ipa/85329 > * g++.dg/ext/pr85329.C: New test. > * gcc.target/i386/mvc12.c: New test. > @@ -413,7 +426,11 @@ expand_target_clones (struct cgraph_node *node, bool definition) > tree attributes = make_attribute ("target", "default", > DECL_ATTRIBUTES (node->decl)); > DECL_ATTRIBUTES (node->decl) = attributes; > + DECL_COMDAT (node->decl) = 0; > + DECL_WEAK (node->decl) = 0; > + DECL_ARTIFICIAL (node->decl) = 1; > node->local.local = false; > + node->set_comdat_group (NULL); If you make C++ inline and get the idea to use target cloning attribute on this, this will likely lead to link error if you compile multiple files because you turn comdat to non-comdat. For comdats this woudl effectivly need to become C++ abi extension and we would need to define comdat sections for these. Perhaps easiest way is to simply reject the attribute on comdats and probaby also extern functions? Otherwise patch looks OK. Honza
On Thu, Apr 12, 2018 at 03:46:26PM +0200, Jan Hubicka wrote: > If you make C++ inline and get the idea to use target cloning attribute on this, > this will likely lead to link error if you compile multiple files because you > turn comdat to non-comdat. > > For comdats this woudl effectivly need to become C++ abi extension and we would > need to define comdat sections for these. Perhaps easiest way is to simply > reject the attribute on comdats and probaby also extern functions? I'm not really sure we can do that, various packages in the wild are already using this. What is the problem with comdats and multi-versioning? The question is what comdat groups we should use for the comdat resolver and the versioned functions, shall the ifunc symbol be the original mangling of the method (or other comdat) and the other entrypoints just be .local non-weak symbols inside of the same section? Jakub
On Thu, Apr 12, 2018 at 07:53:35PM +0200, Jakub Jelinek wrote: > On Thu, Apr 12, 2018 at 03:46:26PM +0200, Jan Hubicka wrote: > > If you make C++ inline and get the idea to use target cloning attribute on this, > > this will likely lead to link error if you compile multiple files because you > > turn comdat to non-comdat. > > > > For comdats this woudl effectivly need to become C++ abi extension and we would > > need to define comdat sections for these. Perhaps easiest way is to simply > > reject the attribute on comdats and probaby also extern functions? > > I'm not really sure we can do that, various packages in the wild are already > using this. > What is the problem with comdats and multi-versioning? > The question is what comdat groups we should use for the comdat resolver and > the versioned functions, shall the ifunc symbol be the original mangling of > the method (or other comdat) and the other entrypoints just be .local > non-weak symbols inside of the same section? Ah, but we emit the resolver only if we see a use of it. That sounds quite broken, resolver in each TU that uses it? Better to have one at each definition... Jakub
On 04/12/2018 07:59 PM, Jakub Jelinek wrote: > On Thu, Apr 12, 2018 at 07:53:35PM +0200, Jakub Jelinek wrote: >> On Thu, Apr 12, 2018 at 03:46:26PM +0200, Jan Hubicka wrote: >>> If you make C++ inline and get the idea to use target cloning attribute on this, >>> this will likely lead to link error if you compile multiple files because you >>> turn comdat to non-comdat. >>> >>> For comdats this woudl effectivly need to become C++ abi extension and we would >>> need to define comdat sections for these. Perhaps easiest way is to simply >>> reject the attribute on comdats and probaby also extern functions? >> >> I'm not really sure we can do that, various packages in the wild are already >> using this. >> What is the problem with comdats and multi-versioning? >> The question is what comdat groups we should use for the comdat resolver and >> the versioned functions, shall the ifunc symbol be the original mangling of >> the method (or other comdat) and the other entrypoints just be .local >> non-weak symbols inside of the same section? > > Ah, but we emit the resolver only if we see a use of it. That sounds quite > broken, resolver in each TU that uses it? Better to have one at each > definition... > > Jakub > So after quite some time I would need some brainstorming as I'm not sure how to fix that properly. Let's start how 'target' attribute works and I believe it should behave same as target_clones attribute: $ cat mv.h int foo (void); void test (); $ cat mv.cpp #include "mv.h" __attribute__ ((target ("default"))) int foo () { return 0; } __attribute__ ((target ("sse4.2"))) int foo () { return 1; } int main () { __builtin_printf ("in main: %d\n", foo ()); test (); return 0; } $ cat mv2.cpp #include "mv.h" void test() { int (*f) (void) = &foo; __builtin_printf ("value: %d\n", foo ()); } $ gcc -fdump-ipa-cgraph=/dev/stdout mv.cpp -c _Z13_Z3foov.ifuncv/2 (int _Z3foov.ifunc()) @0x7ffff67182e0 Type: function definition analyzed alias cpp_implicit_alias Visibility: externally_visible asm_written public comdat comdat_group:_Z3foov.resolver one_only section:.text._Z3foov.resolver (implicit_section) artificial Same comdat group as: _Z3foov.resolver/6 References: _Z3foov.resolver/6 (alias) Referring: Availability: overwritable First run: 0 Version info: next: _Z3foov/0 dispatcher: _Z3foov.resolver Function flags: Called by: Calls: _Z3foov.sse4.2/1 (int foo()) @0x7ffff6718170 Type: function definition analyzed Visibility: force_output externally_visible asm_written public Address is taken. References: Referring: Availability: available First run: 0 Version info: prev: _Z3foov/0 dispatcher: int _Z3foov.ifunc() Function flags: Called by: Calls: _Z3foov/0 (int foo()) @0x7ffff6718000 Type: function definition analyzed Visibility: force_output externally_visible asm_written public Address is taken. References: Referring: Availability: available First run: 0 Version info: next: _Z3foov.sse4.2/1 dispatcher: int _Z3foov.ifunc() Function flags: Called by: Calls: _Z3foov.resolver/6 (_Z3foov.resolver) @0x7ffff67188a0 Type: function definition analyzed Visibility: externally_visible asm_written public weak comdat comdat_group:_Z3foov.resolver one_only section:.text._Z3foov.resolver (implicit_section) artificial Same comdat group as: _Z13_Z3foov.ifuncv/2 References: Referring: _Z13_Z3foov.ifuncv/2 (alias) Availability: available First run: 0 Function flags: Called by: Calls: So note that resolver and ifunc live in a unique comdat_group. And as opposed to target_clones, default implementation has not appended '.default' suffix. That's problem because an address taken returns default implementation as seen here: $ gcc mv.cpp mv2.cpp && ./a.out in main: 1 value: 0 Which is the same problem is fixed for target_clones in this release cycle. So it's broken. Apart from that, following is broken for target attribute: cat Crypto-target.ii struct aaa { static __attribute__((target("avx512f"))) void foo() { __builtin_printf ("avx512f\n"); } static __attribute__((target("sse"))) void foo() {__builtin_printf ("sse\n");} static __attribute__((target("default"))) void foo() {__builtin_printf ("default\n");} }; void (*initializer) (void) = { &aaa::foo }; int main() { initializer (); // aaa::foo (); } $ g++ Crypto-target.ii && ./a.out /tmp/ccJMMaDc.o:(.data+0x0): undefined reference to `_ZN3aaa3fooEv.ifunc()' collect2: error: ld returned 1 exit status For target_clones, I have a patch candidate that works for the test-case in PR85329: $ cat ~/Programming/testcases/Crypto.ii struct a { __attribute__((target_clones("sse", "default"))) void operator^=(a) {} } * b; class c { public: a *d(); }; class f { void g(); c e; }; void f::g() { *e.d() ^= b[0]; } $ g++ ~/Programming/testcases/Crypto.ii -c -fdump-ipa-cgraph=/dev/stdout _ZN1aeOES_.resolver/6 (_ZN1aeOES_.resolver) @0x7ffff690d730 Type: function definition analyzed Visibility: force_output externally_visible public weak comdat comdat_group:_ZN1aeOES_ one_only artificial Same comdat group as: _ZN1aeOES_/5 References: _ZN1aeOES_.sse.0/4 (addr)_ZN1aeOES_.default.1/0 (addr) Referring: _ZN1aeOES_/5 (alias) Availability: available First run: 0 Function flags: body Called by: Calls: int __builtin_cpu_supports(const char*)/8 (can throw external) int __builtin_cpu_init()/7 (can throw external) _ZN1aeOES_/5 (_ZN1aeOES_) @0x7ffff690d5c0 Type: function definition analyzed alias cpp_implicit_alias target:_ZN1aeOES_.resolver Visibility: externally_visible public comdat comdat_group:_ZN1aeOES_ one_only artificial Same comdat group as: _ZN1aeOES_.resolver/6 References: _ZN1aeOES_.resolver/6 (alias) Referring: Availability: overwritable First run: 0 Version info: next: _ZN1aeOES_.default.1/0 dispatcher: _ZN1aeOES_.resolver Function flags: Called by: void f::g()/2 Calls: _ZN1aeOES_.sse.0/4 (void a::_ZN1aeOES_.sse.0(a)) @0x7ffff690d450 Type: function definition analyzed Visibility: force_output no_reorder prevailing_def_ironly artificial Address is taken. References: Referring: _ZN1aeOES_.resolver/6 (addr) Availability: available First run: 0 Version info: prev: _ZN1aeOES_.default.1/0 dispatcher: _ZN1aeOES_ Function flags: body Called by: Calls: _ZN1aeOES_.default.1/0 (void a::operator^=(a)) @0x7ffff690d000 Type: function definition analyzed Visibility: force_output no_reorder prevailing_def_ironly artificial Address is taken. References: Referring: _ZN1aeOES_.resolver/6 (addr) Availability: available First run: 0 Version info: next: _ZN1aeOES_.sse.0/4 dispatcher: _ZN1aeOES_ Function flags: body Called by: Calls: It's not ideal because _ZN1aeOES_.sse.0/4 and _ZN1aeOES_.default.1/0 don't share the same comdat group. But it should not break anything now, am I right? Honza? I'll carry on during weekend after I'll get some feedback. Thanks, Martin
On Fri, Apr 13, 2018 at 04:33:40PM +0200, Martin Liška wrote: > > Ah, but we emit the resolver only if we see a use of it. That sounds quite > > broken, resolver in each TU that uses it? Better to have one at each > > definition... > > > > Jakub > > > > So after quite some time I would need some brainstorming as I'm not sure how to > fix that properly. Let's start how 'target' attribute works and I believe it should > behave same as target_clones attribute: I think the target_clones behavior if you put the <function>.resolver into .text.<function> section in <function> comdat rather than .text.<function>.resolver and <function>.resolver comdat is reasonable. The thing is that both the <function>.resolver and <function> symbols are defined in the section in which the resolver is defined. Maybe it would be nice if the resolver was made not exported out of the TU, and eventually, not necessarily now for GCC8, turn the specializations into non-exported symbols too and put them into the same section or different sections of the same comdat group. For ctors and dtors we need extra care about the aliases, not sure if we can have an alias to ifunc symbol, or if we need to emit two ifunc symbols with the same resolver or what exactly. And yes, it would be nice if the target attribute multiversioning worked similarly to this. It would change behavior for it in ABI incompatible way, the question is how much work it would be as well. What you can do right now with the target attribute multiversioning and couldn't do after the change would be e.g. mv.h: __attribute__((target ("default"))) void foo (); __attribute__((target ("avx"))) void foo (); mv1.C: #include "mv.h" __attribute__((target ("default"))) void foo () {} mv2.C: #include "mv.h" __attribute__((target ("avx"))) void foo () {} mv3.C: #include "mv.h" void bar () { foo (); } You couldn't this in the semantics closer to target_clones, all the definitions would need to be done in the same file which is where you'd also get the ifunc symbol. IMHO it is worth changing the semantics anyway, because the current one isn't very well thought out, it doesn't really work well with comdats, can have too many resolvers with the associated costs, etc. Honza, do you agree? Jakub
> On Fri, Apr 13, 2018 at 04:33:40PM +0200, Martin Liška wrote: > > > Ah, but we emit the resolver only if we see a use of it. That sounds quite > > > broken, resolver in each TU that uses it? Better to have one at each > > > definition... > > > > > > Jakub > > > > > > > So after quite some time I would need some brainstorming as I'm not sure how to > > fix that properly. Let's start how 'target' attribute works and I believe it should > > behave same as target_clones attribute: > > I think the target_clones behavior if you put the <function>.resolver into > .text.<function> section in <function> comdat rather than > .text.<function>.resolver and <function>.resolver comdat is reasonable. > > The thing is that both the <function>.resolver and <function> symbols are > defined in the section in which the resolver is defined. > Maybe it would be nice if the resolver was made not exported out of the TU, > and eventually, not necessarily now for GCC8, turn the specializations into > non-exported symbols too and put them into the same section or different > sections of the same comdat group. > > For ctors and dtors we need extra care about the aliases, not sure if we can > have an alias to ifunc symbol, or if we need to emit two ifunc symbols with > the same resolver or what exactly. > > And yes, it would be nice if the target attribute multiversioning worked > similarly to this. It would change behavior for it in ABI incompatible way, > the question is how much work it would be as well. > > What you can do right now with the target attribute multiversioning and > couldn't do after the change would be e.g. > mv.h: > __attribute__((target ("default"))) void foo (); > __attribute__((target ("avx"))) void foo (); > mv1.C: > #include "mv.h" > __attribute__((target ("default"))) void foo () {} > mv2.C: > #include "mv.h" > __attribute__((target ("avx"))) void foo () {} > mv3.C: > #include "mv.h" > void bar () { foo (); } > You couldn't this in the semantics closer to target_clones, all the > definitions would need to be done in the same file which is where you'd also > get the ifunc symbol. > > IMHO it is worth changing the semantics anyway, because the current one > isn't very well thought out, it doesn't really work well with comdats, can > have too many resolvers with the associated costs, etc. > > Honza, do you agree? Yes, it looks reasonable. I will give it bit more tought today (I am on a trip with only sporadic internet access) Honza > > Jakub
On 04/12/2018 03:46 PM, Jan Hubicka wrote: >> 2018-04-12 Martin Liska <mliska@suse.cz> >> >> PR ipa/85329 >> * multiple_target.c (create_dispatcher_calls): Set apostrophes >> for target_clone error message. >> (separate_attrs): Add new argument and check for an emptry >> string. >> (expand_target_clones): Handle it. >> (ipa_target_clone): Make redirection just for target_clones >> functions. >> >> gcc/testsuite/ChangeLog: >> >> 2018-04-12 Martin Liska <mliska@suse.cz> >> >> PR ipa/85329 >> * g++.dg/ext/pr85329.C: New test. >> * gcc.target/i386/mvc12.c: New test. >> @@ -413,7 +426,11 @@ expand_target_clones (struct cgraph_node *node, bool definition) >> tree attributes = make_attribute ("target", "default", >> DECL_ATTRIBUTES (node->decl)); >> DECL_ATTRIBUTES (node->decl) = attributes; >> + DECL_COMDAT (node->decl) = 0; >> + DECL_WEAK (node->decl) = 0; >> + DECL_ARTIFICIAL (node->decl) = 1; >> node->local.local = false; >> + node->set_comdat_group (NULL); > > If you make C++ inline and get the idea to use target cloning attribute on this, > this will likely lead to link error if you compile multiple files because you > turn comdat to non-comdat. Can you please explain this in more detail? Ideally showing a test-case that would fail? Martin > > For comdats this woudl effectivly need to become C++ abi extension and we would > need to define comdat sections for these. Perhaps easiest way is to simply > reject the attribute on comdats and probaby also extern functions? > > Otherwise patch looks OK. > Honza >
On 04/12/2018 03:46 PM, Jan Hubicka wrote: >> 2018-04-12 Martin Liska <mliska@suse.cz> >> >> PR ipa/85329 >> * multiple_target.c (create_dispatcher_calls): Set apostrophes >> for target_clone error message. >> (separate_attrs): Add new argument and check for an emptry >> string. >> (expand_target_clones): Handle it. >> (ipa_target_clone): Make redirection just for target_clones >> functions. >> >> gcc/testsuite/ChangeLog: >> >> 2018-04-12 Martin Liska <mliska@suse.cz> >> >> PR ipa/85329 >> * g++.dg/ext/pr85329.C: New test. >> * gcc.target/i386/mvc12.c: New test. >> @@ -413,7 +426,11 @@ expand_target_clones (struct cgraph_node *node, bool definition) >> tree attributes = make_attribute ("target", "default", >> DECL_ATTRIBUTES (node->decl)); >> DECL_ATTRIBUTES (node->decl) = attributes; >> + DECL_COMDAT (node->decl) = 0; >> + DECL_WEAK (node->decl) = 0; >> + DECL_ARTIFICIAL (node->decl) = 1; >> node->local.local = false; >> + node->set_comdat_group (NULL); > > If you make C++ inline and get the idea to use target cloning attribute on this, > this will likely lead to link error if you compile multiple files because you > turn comdat to non-comdat. > > For comdats this woudl effectivly need to become C++ abi extension and we would > need to define comdat sections for these. Perhaps easiest way is to simply > reject the attribute on comdats and probaby also extern functions? > > Otherwise patch looks OK. > Honza > And Honza also asked me for description what has changes in between GCC 7.x and current trunk. There are test-cases that illustrate that: ./gcc/testsuite/gcc.target/i386/mvc10.c ./gcc/testsuite/gcc.target/i386/mvc11.c ./gcc/testsuite/gcc.target/i386/mvc5.c ./gcc/testsuite/gcc.target/i386/mvc7.c ./gcc/testsuite/gcc.target/i386/pr80732.c ./gcc/testsuite/gcc.target/i386/pr81214.c Martin
On Sat, Apr 14, 2018 at 11:53:51AM +0200, Martin Liška wrote: > > > 2018-04-12 Martin Liska <mliska@suse.cz> > > > > > > PR ipa/85329 > > > * g++.dg/ext/pr85329.C: New test. > > > * gcc.target/i386/mvc12.c: New test. > > > @@ -413,7 +426,11 @@ expand_target_clones (struct cgraph_node *node, bool definition) > > > tree attributes = make_attribute ("target", "default", > > > DECL_ATTRIBUTES (node->decl)); > > > DECL_ATTRIBUTES (node->decl) = attributes; > > > + DECL_COMDAT (node->decl) = 0; > > > + DECL_WEAK (node->decl) = 0; > > > + DECL_ARTIFICIAL (node->decl) = 1; > > > node->local.local = false; > > > + node->set_comdat_group (NULL); > > > > If you make C++ inline and get the idea to use target cloning attribute on this, > > this will likely lead to link error if you compile multiple files because you > > turn comdat to non-comdat. > > Can you please explain this in more detail? Ideally showing a test-case that would fail? I'd guess something along the lines of typedef void (*F) (); __attribute__((target ("default"))) inline void foo () {} __attribute__((target ("avx"))) inline void foo () {} __attribute__((target_clones ("default", "avx"))) inline void bar () {} #ifdef SRC_A F pa = foo; F qa = bar; #else F pb = foo; F qb = bar; extern F pa, qa; int main () { asm volatile ("" : "+g" (pa), "+g" (pb), "+g" (qa), "+g" (qb)); pa (); pb (); qa (); qb (); } #endif and g++ -O2 -c -DSRC_A test.C -o testa.o g++ -O2 -o test{,a.o,.C} This doesn't actually fail, because the bar symbol, even when it doesn't have STB_WEAK (which it should), is in the comdat group and so only one is picked up. Jakub
Hi.
I'm sending V3 which we did with Honza. It's similar to V2, but properly makes
FUNCTION_DECL local for default implementation.
Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
Tests on ppc64le are running.
Ready to be installed?
Martin
From 77b48cfad59dd24a5c068bfb32b1059535ae1f75 Mon Sep 17 00:00:00 2001
From: marxin <mliska@suse.cz>
Date: Sat, 14 Apr 2018 09:55:35 +0200
Subject: [PATCH] Make redirection only for target_clones: V3 (PR ipa/85329).
2018-04-16 Martin Liska <mliska@suse.cz>
* multiple_target.c (create_dispatcher_calls): Set apostrophes
for target_clone error message. Make default implementation
clone to be a local declaration.
(separate_attrs): Add new argument and check for an emptry
string.
(expand_target_clones): Handle it.
(ipa_target_clone): Make redirection just for target_clones
functions.
gcc/testsuite/ChangeLog:
2018-04-16 Martin Liska <mliska@suse.cz>
* g++.dg/ext/pr85329-2.C: New test.
* g++.dg/ext/pr85329.C: New test.
* gcc.target/i386/mvc12.c: New test.
---
gcc/multiple_target.c | 55 ++++++++++++++++++++++++++---------
gcc/testsuite/g++.dg/ext/pr85329-2.C | 22 ++++++++++++++
gcc/testsuite/g++.dg/ext/pr85329.C | 19 ++++++++++++
gcc/testsuite/gcc.target/i386/mvc12.c | 11 +++++++
4 files changed, 93 insertions(+), 14 deletions(-)
create mode 100644 gcc/testsuite/g++.dg/ext/pr85329-2.C
create mode 100644 gcc/testsuite/g++.dg/ext/pr85329.C
create mode 100644 gcc/testsuite/gcc.target/i386/mvc12.c
diff --git a/gcc/multiple_target.c b/gcc/multiple_target.c
index b006a5ab6ec..a1fe09a5983 100644
--- a/gcc/multiple_target.c
+++ b/gcc/multiple_target.c
@@ -88,7 +88,7 @@ create_dispatcher_calls (struct cgraph_node *node)
if (!idecl)
{
error_at (DECL_SOURCE_LOCATION (node->decl),
- "default target_clones attribute was not set");
+ "default %<target_clones%> attribute was not set");
return;
}
@@ -161,10 +161,25 @@ create_dispatcher_calls (struct cgraph_node *node)
}
}
- TREE_PUBLIC (node->decl) = 0;
symtab->change_decl_assembler_name (node->decl,
clone_function_name (node->decl,
"default"));
+
+ /* FIXME: copy of cgraph_node::make_local that should be cleaned up
+ in next stage1. */
+ node->make_decl_local ();
+ node->set_section (NULL);
+ node->set_comdat_group (NULL);
+ node->externally_visible = false;
+ node->forced_by_abi = false;
+ node->set_section (NULL);
+ node->unique_name = ((node->resolution == LDPR_PREVAILING_DEF_IRONLY
+ || node->resolution == LDPR_PREVAILING_DEF_IRONLY_EXP)
+ && !flag_incremental_link);
+ node->resolution = LDPR_PREVAILING_DEF_IRONLY;
+
+ DECL_ARTIFICIAL (node->decl) = 1;
+ node->force_output = true;
}
/* Return length of attribute names string,
@@ -216,26 +231,30 @@ get_attr_str (tree arglist, char *attr_str)
}
/* Return number of attributes separated by comma and put them into ARGS.
- If there is no DEFAULT attribute return -1. */
+ If there is no DEFAULT attribute return -1. If there is an empty
+ string in attribute return -2. */
static int
-separate_attrs (char *attr_str, char **attrs)
+separate_attrs (char *attr_str, char **attrs, int attrnum)
{
int i = 0;
- bool has_default = false;
+ int default_count = 0;
for (char *attr = strtok (attr_str, ",");
attr != NULL; attr = strtok (NULL, ","))
{
if (strcmp (attr, "default") == 0)
{
- has_default = true;
+ default_count++;
continue;
}
attrs[i++] = attr;
}
- if (!has_default)
+ if (default_count == 0)
return -1;
+ else if (i + default_count < attrnum)
+ return -2;
+
return i;
}
@@ -321,7 +340,7 @@ expand_target_clones (struct cgraph_node *node, bool definition)
{
warning_at (DECL_SOURCE_LOCATION (node->decl),
0,
- "single target_clones attribute is ignored");
+ "single %<target_clones%> attribute is ignored");
return false;
}
@@ -345,7 +364,7 @@ expand_target_clones (struct cgraph_node *node, bool definition)
int attrnum = get_attr_str (arglist, attr_str);
char **attrs = XNEWVEC (char *, attrnum);
- attrnum = separate_attrs (attr_str, attrs);
+ attrnum = separate_attrs (attr_str, attrs, attrnum);
if (attrnum == -1)
{
error_at (DECL_SOURCE_LOCATION (node->decl),
@@ -354,6 +373,14 @@ expand_target_clones (struct cgraph_node *node, bool definition)
XDELETEVEC (attr_str);
return false;
}
+ else if (attrnum == -2)
+ {
+ error_at (DECL_SOURCE_LOCATION (node->decl),
+ "an empty string cannot be in %<target_clones%> attribute");
+ XDELETEVEC (attrs);
+ XDELETEVEC (attr_str);
+ return false;
+ }
cgraph_function_version_info *decl1_v = NULL;
cgraph_function_version_info *decl2_v = NULL;
@@ -427,14 +454,14 @@ static unsigned int
ipa_target_clone (void)
{
struct cgraph_node *node;
+ auto_vec<cgraph_node *> to_dispatch;
- bool target_clone_pass = false;
FOR_EACH_FUNCTION (node)
- target_clone_pass |= expand_target_clones (node, node->definition);
+ if (expand_target_clones (node, node->definition))
+ to_dispatch.safe_push (node);
- if (target_clone_pass)
- FOR_EACH_FUNCTION (node)
- create_dispatcher_calls (node);
+ for (unsigned i = 0; i < to_dispatch.length (); i++)
+ create_dispatcher_calls (to_dispatch[i]);
return 0;
}
diff --git a/gcc/testsuite/g++.dg/ext/pr85329-2.C b/gcc/testsuite/g++.dg/ext/pr85329-2.C
new file mode 100644
index 00000000000..24622d404f7
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/pr85329-2.C
@@ -0,0 +1,22 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-ifunc "" } */
+
+class b
+{
+public:
+ __attribute__ ((target ("aes"))) b () {}
+ __attribute__ ((target ("default"))) b () {}
+};
+class c
+{
+ b d;
+};
+void
+fn1 ()
+{
+ c a;
+}
+__attribute__ ((target_clones ("sse", "default"))) void
+e ()
+{
+}
diff --git a/gcc/testsuite/g++.dg/ext/pr85329.C b/gcc/testsuite/g++.dg/ext/pr85329.C
new file mode 100644
index 00000000000..fb77e42cd78
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/pr85329.C
@@ -0,0 +1,19 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-ifunc "" } */
+
+struct a
+{
+ __attribute__((target_clones("sse", "default"))) void operator^=(a) {}
+} * b;
+
+class c {
+public:
+ a *d();
+};
+
+class f {
+ void g();
+ c e;
+};
+
+void f::g() { *e.d() ^= b[0]; }
diff --git a/gcc/testsuite/gcc.target/i386/mvc12.c b/gcc/testsuite/gcc.target/i386/mvc12.c
new file mode 100644
index 00000000000..f42ae8080e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mvc12.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+
+__attribute__((target_clones("","arch=slm","arch=core-avx2", "default")))
+int foo (); /* { dg-error "an empty string cannot be in .target_clones. attribute" } */
+
+int
+bar ()
+{
+ return foo();
+}
> 2018-04-16 Martin Liska <mliska@suse.cz> > > * multiple_target.c (create_dispatcher_calls): Set apostrophes > for target_clone error message. Make default implementation > clone to be a local declaration. > (separate_attrs): Add new argument and check for an emptry > string. > (expand_target_clones): Handle it. > (ipa_target_clone): Make redirection just for target_clones > functions. > > gcc/testsuite/ChangeLog: > > 2018-04-16 Martin Liska <mliska@suse.cz> > > * g++.dg/ext/pr85329-2.C: New test. > * g++.dg/ext/pr85329.C: New test. > * gcc.target/i386/mvc12.c: New test. > --- > gcc/multiple_target.c | 55 ++++++++++++++++++++++++++--------- > gcc/testsuite/g++.dg/ext/pr85329-2.C | 22 ++++++++++++++ > gcc/testsuite/g++.dg/ext/pr85329.C | 19 ++++++++++++ > gcc/testsuite/gcc.target/i386/mvc12.c | 11 +++++++ > 4 files changed, 93 insertions(+), 14 deletions(-) > create mode 100644 gcc/testsuite/g++.dg/ext/pr85329-2.C > create mode 100644 gcc/testsuite/g++.dg/ext/pr85329.C > create mode 100644 gcc/testsuite/gcc.target/i386/mvc12.c > > diff --git a/gcc/multiple_target.c b/gcc/multiple_target.c > index b006a5ab6ec..a1fe09a5983 100644 > --- a/gcc/multiple_target.c > +++ b/gcc/multiple_target.c > @@ -88,7 +88,7 @@ create_dispatcher_calls (struct cgraph_node *node) > if (!idecl) > { > error_at (DECL_SOURCE_LOCATION (node->decl), > - "default target_clones attribute was not set"); > + "default %<target_clones%> attribute was not set"); > return; > } > > @@ -161,10 +161,25 @@ create_dispatcher_calls (struct cgraph_node *node) > } > } > > - TREE_PUBLIC (node->decl) = 0; > symtab->change_decl_assembler_name (node->decl, > clone_function_name (node->decl, > "default")); > + > + /* FIXME: copy of cgraph_node::make_local that should be cleaned up > + in next stage1. */ > + node->make_decl_local (); > + node->set_section (NULL); > + node->set_comdat_group (NULL); > + node->externally_visible = false; > + node->forced_by_abi = false; > + node->set_section (NULL); Probably no need to do this twice (next stage1 we can fix that in make_local as well) > + node->unique_name = ((node->resolution == LDPR_PREVAILING_DEF_IRONLY > + || node->resolution == LDPR_PREVAILING_DEF_IRONLY_EXP) > + && !flag_incremental_link); > + node->resolution = LDPR_PREVAILING_DEF_IRONLY; > + > + DECL_ARTIFICIAL (node->decl) = 1; > + node->force_output = true; And we don't really need force_output, but that is for next stage1 as well. OK and thanks for working on this! Honza
On Mon, Apr 16, 2018 at 04:01:18PM +0200, Martin Liška wrote: > >From 77b48cfad59dd24a5c068bfb32b1059535ae1f75 Mon Sep 17 00:00:00 2001 > From: marxin <mliska@suse.cz> > Date: Sat, 14 Apr 2018 09:55:35 +0200 > Subject: [PATCH] Make redirection only for target_clones: V3 (PR ipa/85329). > > 2018-04-16 Martin Liska <mliska@suse.cz> > Missing PR line. > * multiple_target.c (create_dispatcher_calls): Set apostrophes > for target_clone error message. Make default implementation > clone to be a local declaration. > (separate_attrs): Add new argument and check for an emptry s/emptry/empty/ > string. > (expand_target_clones): Handle it. > (ipa_target_clone): Make redirection just for target_clones > functions. Jakub
diff --git a/gcc/multiple_target.c b/gcc/multiple_target.c index b006a5ab6ec..5bd88fbd83c 100644 --- a/gcc/multiple_target.c +++ b/gcc/multiple_target.c @@ -58,11 +58,11 @@ replace_function_decl (tree *op, int *walk_subtrees, void *data) return NULL; } -/* If the call in NODE has multiple target attribute with multiple fields, - replace it with dispatcher call and create dispatcher (once). */ +/* If the call in NODE is a target_clone attribute clone, redirect all + edges and references that point to cgraph NODE. */ static void -create_dispatcher_calls (struct cgraph_node *node) +redirect_target_clone_callers (struct cgraph_node *node) { ipa_ref *ref; @@ -300,10 +300,12 @@ create_target_clone (cgraph_node *node, bool definition, char *name) } /* If the function in NODE has multiple target attributes - create the appropriate clone for each valid target attribute. */ + create the appropriate clone for each valid target attribute. + TO_REDIRECT is a vector where we place all newly created clones. */ static bool -expand_target_clones (struct cgraph_node *node, bool definition) +expand_target_clones (struct cgraph_node *node, bool definition, + auto_vec <cgraph_node *> &to_redirect) { int i; /* Parsing target attributes separated by comma. */ @@ -404,6 +406,8 @@ expand_target_clones (struct cgraph_node *node, bool definition) before->next = after; after->prev = before; DECL_FUNCTION_VERSIONED (new_node->decl) = 1; + + to_redirect.safe_push (new_node); } XDELETEVEC (attrs); @@ -420,6 +424,8 @@ expand_target_clones (struct cgraph_node *node, bool definition) = targetm.target_option.valid_attribute_p (node->decl, NULL, TREE_VALUE (attributes), 0); input_location = saved_loc; + to_redirect.safe_push (node); + return ret; } @@ -428,13 +434,12 @@ ipa_target_clone (void) { struct cgraph_node *node; - bool target_clone_pass = false; + auto_vec <cgraph_node *> nodes_to_redirect; FOR_EACH_FUNCTION (node) - target_clone_pass |= expand_target_clones (node, node->definition); + expand_target_clones (node, node->definition, nodes_to_redirect); - if (target_clone_pass) - FOR_EACH_FUNCTION (node) - create_dispatcher_calls (node); + for (unsigned i = 0; i < nodes_to_redirect.length (); i++) + redirect_target_clone_callers (nodes_to_redirect[i]); return 0; }