diff mbox series

Make redirection only for target_clones (PR ipa/85329).

Message ID 5657d636-a35d-d4f8-d264-ebdbb606a0e4@suse.cz
State New
Headers show
Series Make redirection only for target_clones (PR ipa/85329). | expand

Commit Message

Martin Liška April 11, 2018, 1:12 p.m. UTC
Hi.

Following restricts cgraph redirection done in multiple_target.c just
to clones that are created in the IPA pass.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Martin

gcc/ChangeLog:

2018-04-11  Martin Liska  <mliska@suse.cz>

	PR ipa/85329
	* multiple_target.c (create_dispatcher_calls): Rename to ...
	(redirect_target_clone_callers): ... this.
	(expand_target_clones): Record created clones.
	(ipa_target_clone): Call redirect_target_clone_callers only
	for these clones.
---
 gcc/multiple_target.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

Comments

Martin Liška April 11, 2018, 1:42 p.m. UTC | #1
On 04/11/2018 03:12 PM, Martin Liška wrote:
> Hi.
> 
> Following restricts cgraph redirection done in multiple_target.c just
> to clones that are created in the IPA pass.
> 
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> 
> Ready to be installed?
> Martin
> 
> gcc/ChangeLog:
> 
> 2018-04-11  Martin Liska  <mliska@suse.cz>
> 
> 	PR ipa/85329
> 	* multiple_target.c (create_dispatcher_calls): Rename to ...
> 	(redirect_target_clone_callers): ... this.
> 	(expand_target_clones): Record created clones.
> 	(ipa_target_clone): Call redirect_target_clone_callers only
> 	for these clones.
> ---
>  gcc/multiple_target.c | 25 +++++++++++++++----------
>  1 file changed, 15 insertions(+), 10 deletions(-)
> 
> 

So the patch is not sufficient for the original test-case. I'm reducing that
and will enhance the patch.

Martin
Martin Liška April 12, 2018, 1:25 p.m. UTC | #2
Hi.

I'm sending V2. The patch adjusts:
- make redirection just for target_clones, done simply by recording nodes
  where expand_target_clones return true
- reset various DECL_* flags on default version, needed for ipa-visibility assert I've seen
- handle empty string in target_clones: __attribute__((target_clones("",..
  I saw that during reduction of the ICE.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Martin
Martin Liška April 12, 2018, 1:26 p.m. UTC | #3
Forgot to add the patch.

Martin
From fb1bbf142af6668eeb1bdfeec96920de2f0edb21 Mon Sep 17 00:00:00 2001
From: marxin <mliska@suse.cz>
Date: Thu, 12 Apr 2018 12:15:17 +0200
Subject: [PATCH] Make redirection only for target_clones: V2 (PR ipa/85329).

gcc/ChangeLog:

2018-04-12  Martin Liska  <mliska@suse.cz>

	PR ipa/85329
	* multiple_target.c (create_dispatcher_calls): Set apostrophes
	for target_clone error message.
	(separate_attrs): Add new argument and check for an emptry
	string.
	(expand_target_clones): Handle it.
	(ipa_target_clone): Make redirection just for target_clones
	functions.

gcc/testsuite/ChangeLog:

2018-04-12  Martin Liska  <mliska@suse.cz>

	PR ipa/85329
	* g++.dg/ext/pr85329.C: New test.
	* gcc.target/i386/mvc12.c: New test.
---
 gcc/multiple_target.c                 | 43 ++++++++++++++++++++++++-----------
 gcc/testsuite/g++.dg/ext/pr85329.C    | 19 ++++++++++++++++
 gcc/testsuite/gcc.target/i386/mvc12.c | 11 +++++++++
 3 files changed, 60 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ext/pr85329.C
 create mode 100644 gcc/testsuite/gcc.target/i386/mvc12.c

diff --git a/gcc/multiple_target.c b/gcc/multiple_target.c
index b006a5ab6ec..2357e458ec8 100644
--- a/gcc/multiple_target.c
+++ b/gcc/multiple_target.c
@@ -88,7 +88,7 @@ create_dispatcher_calls (struct cgraph_node *node)
   if (!idecl)
     {
       error_at (DECL_SOURCE_LOCATION (node->decl),
-		"default target_clones attribute was not set");
+		"default %<target_clones%> attribute was not set");
       return;
     }
 
@@ -216,26 +216,30 @@ get_attr_str (tree arglist, char *attr_str)
 }
 
 /* Return number of attributes separated by comma and put them into ARGS.
-   If there is no DEFAULT attribute return -1.  */
+   If there is no DEFAULT attribute return -1.  If there is an empty
+   string in attribute return -2.  */
 
 static int
-separate_attrs (char *attr_str, char **attrs)
+separate_attrs (char *attr_str, char **attrs, int attrnum)
 {
   int i = 0;
-  bool has_default = false;
+  int default_count = 0;
 
   for (char *attr = strtok (attr_str, ",");
        attr != NULL; attr = strtok (NULL, ","))
     {
       if (strcmp (attr, "default") == 0)
 	{
-	  has_default = true;
+	  default_count++;
 	  continue;
 	}
       attrs[i++] = attr;
     }
-  if (!has_default)
+  if (default_count == 0)
     return -1;
+  else if (i + default_count < attrnum)
+    return -2;
+
   return i;
 }
 
@@ -321,7 +325,7 @@ expand_target_clones (struct cgraph_node *node, bool definition)
     {
       warning_at (DECL_SOURCE_LOCATION (node->decl),
 		  0,
-		  "single target_clones attribute is ignored");
+		  "single %<target_clones%> attribute is ignored");
       return false;
     }
 
@@ -345,7 +349,7 @@ expand_target_clones (struct cgraph_node *node, bool definition)
   int attrnum = get_attr_str (arglist, attr_str);
   char **attrs = XNEWVEC (char *, attrnum);
 
-  attrnum = separate_attrs (attr_str, attrs);
+  attrnum = separate_attrs (attr_str, attrs, attrnum);
   if (attrnum == -1)
     {
       error_at (DECL_SOURCE_LOCATION (node->decl),
@@ -354,6 +358,14 @@ expand_target_clones (struct cgraph_node *node, bool definition)
       XDELETEVEC (attr_str);
       return false;
     }
+  else if (attrnum == -2)
+    {
+      error_at (DECL_SOURCE_LOCATION (node->decl),
+		"an empty string cannot be in %<target_clones%> attribute");
+      XDELETEVEC (attrs);
+      XDELETEVEC (attr_str);
+      return false;
+    }
 
   cgraph_function_version_info *decl1_v = NULL;
   cgraph_function_version_info *decl2_v = NULL;
@@ -382,6 +394,7 @@ expand_target_clones (struct cgraph_node *node, bool definition)
       DECL_ATTRIBUTES (new_node->decl) = attributes;
       location_t saved_loc = input_location;
       input_location = DECL_SOURCE_LOCATION (node->decl);
+
       if (!targetm.target_option.valid_attribute_p (new_node->decl, NULL,
 						    TREE_VALUE (attributes),
 						    0))
@@ -413,7 +426,11 @@ expand_target_clones (struct cgraph_node *node, bool definition)
   tree attributes = make_attribute ("target", "default",
 				    DECL_ATTRIBUTES (node->decl));
   DECL_ATTRIBUTES (node->decl) = attributes;
+  DECL_COMDAT (node->decl) = 0;
+  DECL_WEAK (node->decl) = 0;
+  DECL_ARTIFICIAL (node->decl) = 1;
   node->local.local = false;
+  node->set_comdat_group (NULL);
   location_t saved_loc = input_location;
   input_location = DECL_SOURCE_LOCATION (node->decl);
   bool ret
@@ -427,14 +444,14 @@ static unsigned int
 ipa_target_clone (void)
 {
   struct cgraph_node *node;
+  auto_vec<cgraph_node *> to_dispatch;
 
-  bool target_clone_pass = false;
   FOR_EACH_FUNCTION (node)
-    target_clone_pass |= expand_target_clones (node, node->definition);
+    if (expand_target_clones (node, node->definition))
+      to_dispatch.safe_push (node);
 
-  if (target_clone_pass)
-    FOR_EACH_FUNCTION (node)
-      create_dispatcher_calls (node);
+  for (unsigned i = 0; i < to_dispatch.length (); i++)
+    create_dispatcher_calls (to_dispatch[i]);
 
   return 0;
 }
diff --git a/gcc/testsuite/g++.dg/ext/pr85329.C b/gcc/testsuite/g++.dg/ext/pr85329.C
new file mode 100644
index 00000000000..fb77e42cd78
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/pr85329.C
@@ -0,0 +1,19 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-ifunc "" } */
+
+struct a
+{
+  __attribute__((target_clones("sse", "default"))) void operator^=(a) {}
+} * b;
+
+class c {
+public:
+  a *d();
+};
+
+class f {
+  void g();
+  c e;
+};
+
+void f::g() { *e.d() ^= b[0]; }
diff --git a/gcc/testsuite/gcc.target/i386/mvc12.c b/gcc/testsuite/gcc.target/i386/mvc12.c
new file mode 100644
index 00000000000..f42ae8080e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mvc12.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+
+__attribute__((target_clones("","arch=slm","arch=core-avx2", "default")))
+int foo (); /* { dg-error "an empty string cannot be in .target_clones. attribute" } */
+
+int
+bar ()
+{
+  return foo();
+}
Jan Hubicka April 12, 2018, 1:46 p.m. UTC | #4
> 2018-04-12  Martin Liska  <mliska@suse.cz>
> 
> 	PR ipa/85329
> 	* multiple_target.c (create_dispatcher_calls): Set apostrophes
> 	for target_clone error message.
> 	(separate_attrs): Add new argument and check for an emptry
> 	string.
> 	(expand_target_clones): Handle it.
> 	(ipa_target_clone): Make redirection just for target_clones
> 	functions.
> 
> gcc/testsuite/ChangeLog:
> 
> 2018-04-12  Martin Liska  <mliska@suse.cz>
> 
> 	PR ipa/85329
> 	* g++.dg/ext/pr85329.C: New test.
> 	* gcc.target/i386/mvc12.c: New test.
> @@ -413,7 +426,11 @@ expand_target_clones (struct cgraph_node *node, bool definition)
>    tree attributes = make_attribute ("target", "default",
>  				    DECL_ATTRIBUTES (node->decl));
>    DECL_ATTRIBUTES (node->decl) = attributes;
> +  DECL_COMDAT (node->decl) = 0;
> +  DECL_WEAK (node->decl) = 0;
> +  DECL_ARTIFICIAL (node->decl) = 1;
>    node->local.local = false;
> +  node->set_comdat_group (NULL);

If you make C++ inline and get the idea to use target cloning attribute on this,
this will likely lead to link error if you compile multiple files because you
turn comdat to non-comdat.

For comdats this woudl effectivly need to become C++ abi extension and we would
need to define comdat sections for these.  Perhaps easiest way is to simply
reject the attribute on comdats and probaby also extern functions?

Otherwise patch looks OK.
Honza
Jakub Jelinek April 12, 2018, 5:53 p.m. UTC | #5
On Thu, Apr 12, 2018 at 03:46:26PM +0200, Jan Hubicka wrote:
> If you make C++ inline and get the idea to use target cloning attribute on this,
> this will likely lead to link error if you compile multiple files because you
> turn comdat to non-comdat.
> 
> For comdats this woudl effectivly need to become C++ abi extension and we would
> need to define comdat sections for these.  Perhaps easiest way is to simply
> reject the attribute on comdats and probaby also extern functions?

I'm not really sure we can do that, various packages in the wild are already
using this.
What is the problem with comdats and multi-versioning?
The question is what comdat groups we should use for the comdat resolver and
the versioned functions, shall the ifunc symbol be the original mangling of
the method (or other comdat) and the other entrypoints just be .local
non-weak symbols inside of the same section?

	Jakub
Jakub Jelinek April 12, 2018, 5:59 p.m. UTC | #6
On Thu, Apr 12, 2018 at 07:53:35PM +0200, Jakub Jelinek wrote:
> On Thu, Apr 12, 2018 at 03:46:26PM +0200, Jan Hubicka wrote:
> > If you make C++ inline and get the idea to use target cloning attribute on this,
> > this will likely lead to link error if you compile multiple files because you
> > turn comdat to non-comdat.
> > 
> > For comdats this woudl effectivly need to become C++ abi extension and we would
> > need to define comdat sections for these.  Perhaps easiest way is to simply
> > reject the attribute on comdats and probaby also extern functions?
> 
> I'm not really sure we can do that, various packages in the wild are already
> using this.
> What is the problem with comdats and multi-versioning?
> The question is what comdat groups we should use for the comdat resolver and
> the versioned functions, shall the ifunc symbol be the original mangling of
> the method (or other comdat) and the other entrypoints just be .local
> non-weak symbols inside of the same section?

Ah, but we emit the resolver only if we see a use of it.  That sounds quite
broken, resolver in each TU that uses it?  Better to have one at each
definition...

	Jakub
Martin Liška April 13, 2018, 2:33 p.m. UTC | #7
On 04/12/2018 07:59 PM, Jakub Jelinek wrote:
> On Thu, Apr 12, 2018 at 07:53:35PM +0200, Jakub Jelinek wrote:
>> On Thu, Apr 12, 2018 at 03:46:26PM +0200, Jan Hubicka wrote:
>>> If you make C++ inline and get the idea to use target cloning attribute on this,
>>> this will likely lead to link error if you compile multiple files because you
>>> turn comdat to non-comdat.
>>>
>>> For comdats this woudl effectivly need to become C++ abi extension and we would
>>> need to define comdat sections for these.  Perhaps easiest way is to simply
>>> reject the attribute on comdats and probaby also extern functions?
>>
>> I'm not really sure we can do that, various packages in the wild are already
>> using this.
>> What is the problem with comdats and multi-versioning?
>> The question is what comdat groups we should use for the comdat resolver and
>> the versioned functions, shall the ifunc symbol be the original mangling of
>> the method (or other comdat) and the other entrypoints just be .local
>> non-weak symbols inside of the same section?
> 
> Ah, but we emit the resolver only if we see a use of it.  That sounds quite
> broken, resolver in each TU that uses it?  Better to have one at each
> definition...
> 
> 	Jakub
> 

So after quite some time I would need some brainstorming as I'm not sure how to
fix that properly. Let's start how 'target' attribute works and I believe it should
behave same as target_clones attribute:

$ cat mv.h
int foo (void);

void test ();

$ cat mv.cpp 
#include "mv.h"
  
  __attribute__ ((target ("default")))
int foo ()
{
  return 0;
}

__attribute__ ((target ("sse4.2")))
int foo ()
{
  return 1;
}

int main ()
{
  __builtin_printf ("in main: %d\n", foo ());
  test ();

  return 0;
}

$ cat mv2.cpp 
#include "mv.h"

void test()
{
  int (*f) (void) = &foo;
  __builtin_printf ("value: %d\n", foo ());
}

$ gcc -fdump-ipa-cgraph=/dev/stdout mv.cpp -c

_Z13_Z3foov.ifuncv/2 (int _Z3foov.ifunc()) @0x7ffff67182e0
  Type: function definition analyzed alias cpp_implicit_alias
  Visibility: externally_visible asm_written public comdat comdat_group:_Z3foov.resolver one_only section:.text._Z3foov.resolver (implicit_section) artificial
  Same comdat group as: _Z3foov.resolver/6
  References: _Z3foov.resolver/6 (alias)
  Referring: 
  Availability: overwritable
  First run: 0
  Version info: next: _Z3foov/0 dispatcher: _Z3foov.resolver
  Function flags:
  Called by: 
  Calls: 
_Z3foov.sse4.2/1 (int foo()) @0x7ffff6718170
  Type: function definition analyzed
  Visibility: force_output externally_visible asm_written public
  Address is taken.
  References: 
  Referring: 
  Availability: available
  First run: 0
  Version info: prev: _Z3foov/0 dispatcher: int _Z3foov.ifunc()
  Function flags:
  Called by: 
  Calls: 
_Z3foov/0 (int foo()) @0x7ffff6718000
  Type: function definition analyzed
  Visibility: force_output externally_visible asm_written public
  Address is taken.
  References: 
  Referring: 
  Availability: available
  First run: 0
  Version info: next: _Z3foov.sse4.2/1 dispatcher: int _Z3foov.ifunc()
  Function flags:
  Called by: 
  Calls: 
_Z3foov.resolver/6 (_Z3foov.resolver) @0x7ffff67188a0
  Type: function definition analyzed
  Visibility: externally_visible asm_written public weak comdat comdat_group:_Z3foov.resolver one_only section:.text._Z3foov.resolver (implicit_section) artificial
  Same comdat group as: _Z13_Z3foov.ifuncv/2
  References: 
  Referring: _Z13_Z3foov.ifuncv/2 (alias)
  Availability: available
  First run: 0
  Function flags:
  Called by: 
  Calls: 

So note that resolver and ifunc live in a unique comdat_group. And as opposed to target_clones, default implementation has not appended '.default' suffix. That's
problem because an address taken returns default implementation as seen here:

$  gcc mv.cpp mv2.cpp && ./a.out 
in main: 1
value: 0

Which is the same problem is fixed for target_clones in this release cycle. So it's broken.

Apart from that, following is broken for target attribute:
cat Crypto-target.ii
struct aaa
{
  static __attribute__((target("avx512f"))) void foo() { __builtin_printf ("avx512f\n"); }
  static __attribute__((target("sse"))) void foo() {__builtin_printf ("sse\n");}
  static __attribute__((target("default"))) void foo() {__builtin_printf ("default\n");}
};

void (*initializer) (void) = { &aaa::foo };

int main()
{
  initializer ();
//  aaa::foo ();
}

$ g++ Crypto-target.ii && ./a.out
/tmp/ccJMMaDc.o:(.data+0x0): undefined reference to `_ZN3aaa3fooEv.ifunc()'
collect2: error: ld returned 1 exit status

For target_clones, I have a patch candidate that works for the test-case in PR85329:

$ cat ~/Programming/testcases/Crypto.ii
struct a
{
  __attribute__((target_clones("sse", "default"))) void operator^=(a) {}
} * b;

class c {
public:
  a *d();
};

class f {
  void g();
  c e;
};

void f::g() { *e.d() ^= b[0]; }

$ g++ ~/Programming/testcases/Crypto.ii -c -fdump-ipa-cgraph=/dev/stdout

_ZN1aeOES_.resolver/6 (_ZN1aeOES_.resolver) @0x7ffff690d730
  Type: function definition analyzed
  Visibility: force_output externally_visible public weak comdat comdat_group:_ZN1aeOES_ one_only artificial
  Same comdat group as: _ZN1aeOES_/5
  References: _ZN1aeOES_.sse.0/4 (addr)_ZN1aeOES_.default.1/0 (addr)
  Referring: _ZN1aeOES_/5 (alias)
  Availability: available
  First run: 0
  Function flags: body
  Called by: 
  Calls: int __builtin_cpu_supports(const char*)/8 (can throw external) int __builtin_cpu_init()/7 (can throw external) 
_ZN1aeOES_/5 (_ZN1aeOES_) @0x7ffff690d5c0
  Type: function definition analyzed alias cpp_implicit_alias target:_ZN1aeOES_.resolver
  Visibility: externally_visible public comdat comdat_group:_ZN1aeOES_ one_only artificial
  Same comdat group as: _ZN1aeOES_.resolver/6
  References: _ZN1aeOES_.resolver/6 (alias)
  Referring: 
  Availability: overwritable
  First run: 0
  Version info: next: _ZN1aeOES_.default.1/0 dispatcher: _ZN1aeOES_.resolver
  Function flags:
  Called by: void f::g()/2 
  Calls: 
_ZN1aeOES_.sse.0/4 (void a::_ZN1aeOES_.sse.0(a)) @0x7ffff690d450
  Type: function definition analyzed
  Visibility: force_output no_reorder prevailing_def_ironly artificial
  Address is taken.
  References: 
  Referring: _ZN1aeOES_.resolver/6 (addr)
  Availability: available
  First run: 0
  Version info: prev: _ZN1aeOES_.default.1/0 dispatcher: _ZN1aeOES_
  Function flags: body
  Called by: 
  Calls: 
_ZN1aeOES_.default.1/0 (void a::operator^=(a)) @0x7ffff690d000
  Type: function definition analyzed
  Visibility: force_output no_reorder prevailing_def_ironly artificial
  Address is taken.
  References: 
  Referring: _ZN1aeOES_.resolver/6 (addr)
  Availability: available
  First run: 0
  Version info: next: _ZN1aeOES_.sse.0/4 dispatcher: _ZN1aeOES_
  Function flags: body
  Called by: 
  Calls: 

It's not ideal because _ZN1aeOES_.sse.0/4 and _ZN1aeOES_.default.1/0 don't share the same comdat group.
But it should not break anything now, am I right? Honza?

I'll carry on during weekend after I'll get some feedback.

Thanks,
Martin
Jakub Jelinek April 13, 2018, 3:20 p.m. UTC | #8
On Fri, Apr 13, 2018 at 04:33:40PM +0200, Martin Liška wrote:
> > Ah, but we emit the resolver only if we see a use of it.  That sounds quite
> > broken, resolver in each TU that uses it?  Better to have one at each
> > definition...
> > 
> > 	Jakub
> > 
> 
> So after quite some time I would need some brainstorming as I'm not sure how to
> fix that properly. Let's start how 'target' attribute works and I believe it should
> behave same as target_clones attribute:

I think the target_clones behavior if you put the <function>.resolver into
.text.<function> section in <function> comdat rather than
.text.<function>.resolver and <function>.resolver comdat is reasonable.

The thing is that both the <function>.resolver and <function> symbols are
defined in the section in which the resolver is defined.
Maybe it would be nice if the resolver was made not exported out of the TU,
and eventually, not necessarily now for GCC8, turn the specializations into
non-exported symbols too and put them into the same section or different
sections of the same comdat group.

For ctors and dtors we need extra care about the aliases, not sure if we can
have an alias to ifunc symbol, or if we need to emit two ifunc symbols with
the same resolver or what exactly.

And yes, it would be nice if the target attribute multiversioning worked
similarly to this.  It would change behavior for it in ABI incompatible way,
the question is how much work it would be as well.

What you can do right now with the target attribute multiversioning and
couldn't do after the change would be e.g.
mv.h:
__attribute__((target ("default"))) void foo ();
__attribute__((target ("avx"))) void foo ();
mv1.C:
#include "mv.h"
__attribute__((target ("default"))) void foo () {}
mv2.C:
#include "mv.h"
__attribute__((target ("avx"))) void foo () {}
mv3.C:
#include "mv.h"
void bar () { foo (); }
You couldn't this in the semantics closer to target_clones, all the
definitions would need to be done in the same file which is where you'd also
get the ifunc symbol.

IMHO it is worth changing the semantics anyway, because the current one
isn't very well thought out, it doesn't really work well with comdats, can
have too many resolvers with the associated costs, etc.

Honza, do you agree?

	Jakub
Jan Hubicka April 14, 2018, 9:48 a.m. UTC | #9
> On Fri, Apr 13, 2018 at 04:33:40PM +0200, Martin Liška wrote:
> > > Ah, but we emit the resolver only if we see a use of it.  That sounds quite
> > > broken, resolver in each TU that uses it?  Better to have one at each
> > > definition...
> > > 
> > > 	Jakub
> > > 
> > 
> > So after quite some time I would need some brainstorming as I'm not sure how to
> > fix that properly. Let's start how 'target' attribute works and I believe it should
> > behave same as target_clones attribute:
> 
> I think the target_clones behavior if you put the <function>.resolver into
> .text.<function> section in <function> comdat rather than
> .text.<function>.resolver and <function>.resolver comdat is reasonable.
> 
> The thing is that both the <function>.resolver and <function> symbols are
> defined in the section in which the resolver is defined.
> Maybe it would be nice if the resolver was made not exported out of the TU,
> and eventually, not necessarily now for GCC8, turn the specializations into
> non-exported symbols too and put them into the same section or different
> sections of the same comdat group.
> 
> For ctors and dtors we need extra care about the aliases, not sure if we can
> have an alias to ifunc symbol, or if we need to emit two ifunc symbols with
> the same resolver or what exactly.
> 
> And yes, it would be nice if the target attribute multiversioning worked
> similarly to this.  It would change behavior for it in ABI incompatible way,
> the question is how much work it would be as well.
> 
> What you can do right now with the target attribute multiversioning and
> couldn't do after the change would be e.g.
> mv.h:
> __attribute__((target ("default"))) void foo ();
> __attribute__((target ("avx"))) void foo ();
> mv1.C:
> #include "mv.h"
> __attribute__((target ("default"))) void foo () {}
> mv2.C:
> #include "mv.h"
> __attribute__((target ("avx"))) void foo () {}
> mv3.C:
> #include "mv.h"
> void bar () { foo (); }
> You couldn't this in the semantics closer to target_clones, all the
> definitions would need to be done in the same file which is where you'd also
> get the ifunc symbol.
> 
> IMHO it is worth changing the semantics anyway, because the current one
> isn't very well thought out, it doesn't really work well with comdats, can
> have too many resolvers with the associated costs, etc.
> 
> Honza, do you agree?

Yes, it looks reasonable.  I will give it bit more tought today (I am on a trip
with only sporadic internet access)

Honza
> 
> 	Jakub
Martin Liška April 14, 2018, 9:53 a.m. UTC | #10
On 04/12/2018 03:46 PM, Jan Hubicka wrote:
>> 2018-04-12  Martin Liska  <mliska@suse.cz>
>>
>> 	PR ipa/85329
>> 	* multiple_target.c (create_dispatcher_calls): Set apostrophes
>> 	for target_clone error message.
>> 	(separate_attrs): Add new argument and check for an emptry
>> 	string.
>> 	(expand_target_clones): Handle it.
>> 	(ipa_target_clone): Make redirection just for target_clones
>> 	functions.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2018-04-12  Martin Liska  <mliska@suse.cz>
>>
>> 	PR ipa/85329
>> 	* g++.dg/ext/pr85329.C: New test.
>> 	* gcc.target/i386/mvc12.c: New test.
>> @@ -413,7 +426,11 @@ expand_target_clones (struct cgraph_node *node, bool definition)
>>     tree attributes = make_attribute ("target", "default",
>>   				    DECL_ATTRIBUTES (node->decl));
>>     DECL_ATTRIBUTES (node->decl) = attributes;
>> +  DECL_COMDAT (node->decl) = 0;
>> +  DECL_WEAK (node->decl) = 0;
>> +  DECL_ARTIFICIAL (node->decl) = 1;
>>     node->local.local = false;
>> +  node->set_comdat_group (NULL);
> 
> If you make C++ inline and get the idea to use target cloning attribute on this,
> this will likely lead to link error if you compile multiple files because you
> turn comdat to non-comdat.

Can you please explain this in more detail? Ideally showing a test-case that would fail?

Martin

> 
> For comdats this woudl effectivly need to become C++ abi extension and we would
> need to define comdat sections for these.  Perhaps easiest way is to simply
> reject the attribute on comdats and probaby also extern functions?
> 
> Otherwise patch looks OK.
> Honza
>
Martin Liška April 14, 2018, 9:58 a.m. UTC | #11
On 04/12/2018 03:46 PM, Jan Hubicka wrote:
>> 2018-04-12  Martin Liska  <mliska@suse.cz>
>>
>> 	PR ipa/85329
>> 	* multiple_target.c (create_dispatcher_calls): Set apostrophes
>> 	for target_clone error message.
>> 	(separate_attrs): Add new argument and check for an emptry
>> 	string.
>> 	(expand_target_clones): Handle it.
>> 	(ipa_target_clone): Make redirection just for target_clones
>> 	functions.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2018-04-12  Martin Liska  <mliska@suse.cz>
>>
>> 	PR ipa/85329
>> 	* g++.dg/ext/pr85329.C: New test.
>> 	* gcc.target/i386/mvc12.c: New test.
>> @@ -413,7 +426,11 @@ expand_target_clones (struct cgraph_node *node, bool definition)
>>     tree attributes = make_attribute ("target", "default",
>>   				    DECL_ATTRIBUTES (node->decl));
>>     DECL_ATTRIBUTES (node->decl) = attributes;
>> +  DECL_COMDAT (node->decl) = 0;
>> +  DECL_WEAK (node->decl) = 0;
>> +  DECL_ARTIFICIAL (node->decl) = 1;
>>     node->local.local = false;
>> +  node->set_comdat_group (NULL);
> 
> If you make C++ inline and get the idea to use target cloning attribute on this,
> this will likely lead to link error if you compile multiple files because you
> turn comdat to non-comdat.
> 
> For comdats this woudl effectivly need to become C++ abi extension and we would
> need to define comdat sections for these.  Perhaps easiest way is to simply
> reject the attribute on comdats and probaby also extern functions?
> 
> Otherwise patch looks OK.
> Honza
> 

And Honza also asked me for description what has changes in between GCC 7.x and current trunk.
There are test-cases that illustrate that:

./gcc/testsuite/gcc.target/i386/mvc10.c
./gcc/testsuite/gcc.target/i386/mvc11.c
./gcc/testsuite/gcc.target/i386/mvc5.c
./gcc/testsuite/gcc.target/i386/mvc7.c
./gcc/testsuite/gcc.target/i386/pr80732.c
./gcc/testsuite/gcc.target/i386/pr81214.c

Martin
Jakub Jelinek April 14, 2018, 10:24 a.m. UTC | #12
On Sat, Apr 14, 2018 at 11:53:51AM +0200, Martin Liška wrote:
> > > 2018-04-12  Martin Liska  <mliska@suse.cz>
> > > 
> > > 	PR ipa/85329
> > > 	* g++.dg/ext/pr85329.C: New test.
> > > 	* gcc.target/i386/mvc12.c: New test.
> > > @@ -413,7 +426,11 @@ expand_target_clones (struct cgraph_node *node, bool definition)
> > >     tree attributes = make_attribute ("target", "default",
> > >   				    DECL_ATTRIBUTES (node->decl));
> > >     DECL_ATTRIBUTES (node->decl) = attributes;
> > > +  DECL_COMDAT (node->decl) = 0;
> > > +  DECL_WEAK (node->decl) = 0;
> > > +  DECL_ARTIFICIAL (node->decl) = 1;
> > >     node->local.local = false;
> > > +  node->set_comdat_group (NULL);
> > 
> > If you make C++ inline and get the idea to use target cloning attribute on this,
> > this will likely lead to link error if you compile multiple files because you
> > turn comdat to non-comdat.
> 
> Can you please explain this in more detail? Ideally showing a test-case that would fail?

I'd guess something along the lines of
typedef void (*F) ();
__attribute__((target ("default"))) inline void foo () {}
__attribute__((target ("avx"))) inline void foo () {}
__attribute__((target_clones ("default", "avx"))) inline void bar () {}
#ifdef SRC_A
F pa = foo;
F qa = bar;
#else
F pb = foo;
F qb = bar;
extern F pa, qa;
int
main ()
{
  asm volatile ("" : "+g" (pa), "+g" (pb), "+g" (qa), "+g" (qb));
  pa ();
  pb ();
  qa ();
  qb ();
}
#endif
and
g++ -O2 -c -DSRC_A test.C -o testa.o
g++ -O2 -o test{,a.o,.C}

This doesn't actually fail, because the bar symbol, even when it doesn't
have STB_WEAK (which it should), is in the comdat group and so only one is
picked up.

	Jakub
Martin Liška April 16, 2018, 2:01 p.m. UTC | #13
Hi.

I'm sending V3 which we did with Honza. It's similar to V2, but properly makes
FUNCTION_DECL local for default implementation.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
Tests on ppc64le are running.

Ready to be installed?
Martin
From 77b48cfad59dd24a5c068bfb32b1059535ae1f75 Mon Sep 17 00:00:00 2001
From: marxin <mliska@suse.cz>
Date: Sat, 14 Apr 2018 09:55:35 +0200
Subject: [PATCH] Make redirection only for target_clones: V3 (PR ipa/85329).

2018-04-16  Martin Liska  <mliska@suse.cz>

	* multiple_target.c (create_dispatcher_calls): Set apostrophes
	for target_clone error message.  Make default implementation
        clone to be a local declaration.
	(separate_attrs): Add new argument and check for an emptry
	string.
	(expand_target_clones): Handle it.
	(ipa_target_clone): Make redirection just for target_clones
	functions.

gcc/testsuite/ChangeLog:

2018-04-16  Martin Liska  <mliska@suse.cz>

	* g++.dg/ext/pr85329-2.C: New test.
	* g++.dg/ext/pr85329.C: New test.
	* gcc.target/i386/mvc12.c: New test.
---
 gcc/multiple_target.c                 | 55 ++++++++++++++++++++++++++---------
 gcc/testsuite/g++.dg/ext/pr85329-2.C  | 22 ++++++++++++++
 gcc/testsuite/g++.dg/ext/pr85329.C    | 19 ++++++++++++
 gcc/testsuite/gcc.target/i386/mvc12.c | 11 +++++++
 4 files changed, 93 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ext/pr85329-2.C
 create mode 100644 gcc/testsuite/g++.dg/ext/pr85329.C
 create mode 100644 gcc/testsuite/gcc.target/i386/mvc12.c

diff --git a/gcc/multiple_target.c b/gcc/multiple_target.c
index b006a5ab6ec..a1fe09a5983 100644
--- a/gcc/multiple_target.c
+++ b/gcc/multiple_target.c
@@ -88,7 +88,7 @@ create_dispatcher_calls (struct cgraph_node *node)
   if (!idecl)
     {
       error_at (DECL_SOURCE_LOCATION (node->decl),
-		"default target_clones attribute was not set");
+		"default %<target_clones%> attribute was not set");
       return;
     }
 
@@ -161,10 +161,25 @@ create_dispatcher_calls (struct cgraph_node *node)
 	}
     }
 
-  TREE_PUBLIC (node->decl) = 0;
   symtab->change_decl_assembler_name (node->decl,
 				      clone_function_name (node->decl,
 							   "default"));
+
+  /* FIXME: copy of cgraph_node::make_local that should be cleaned up
+	    in next stage1.  */
+  node->make_decl_local ();
+  node->set_section (NULL);
+  node->set_comdat_group (NULL);
+  node->externally_visible = false;
+  node->forced_by_abi = false;
+  node->set_section (NULL);
+  node->unique_name = ((node->resolution == LDPR_PREVAILING_DEF_IRONLY
+			|| node->resolution == LDPR_PREVAILING_DEF_IRONLY_EXP)
+		       && !flag_incremental_link);
+  node->resolution = LDPR_PREVAILING_DEF_IRONLY;
+
+  DECL_ARTIFICIAL (node->decl) = 1;
+  node->force_output = true;
 }
 
 /* Return length of attribute names string,
@@ -216,26 +231,30 @@ get_attr_str (tree arglist, char *attr_str)
 }
 
 /* Return number of attributes separated by comma and put them into ARGS.
-   If there is no DEFAULT attribute return -1.  */
+   If there is no DEFAULT attribute return -1.  If there is an empty
+   string in attribute return -2.  */
 
 static int
-separate_attrs (char *attr_str, char **attrs)
+separate_attrs (char *attr_str, char **attrs, int attrnum)
 {
   int i = 0;
-  bool has_default = false;
+  int default_count = 0;
 
   for (char *attr = strtok (attr_str, ",");
        attr != NULL; attr = strtok (NULL, ","))
     {
       if (strcmp (attr, "default") == 0)
 	{
-	  has_default = true;
+	  default_count++;
 	  continue;
 	}
       attrs[i++] = attr;
     }
-  if (!has_default)
+  if (default_count == 0)
     return -1;
+  else if (i + default_count < attrnum)
+    return -2;
+
   return i;
 }
 
@@ -321,7 +340,7 @@ expand_target_clones (struct cgraph_node *node, bool definition)
     {
       warning_at (DECL_SOURCE_LOCATION (node->decl),
 		  0,
-		  "single target_clones attribute is ignored");
+		  "single %<target_clones%> attribute is ignored");
       return false;
     }
 
@@ -345,7 +364,7 @@ expand_target_clones (struct cgraph_node *node, bool definition)
   int attrnum = get_attr_str (arglist, attr_str);
   char **attrs = XNEWVEC (char *, attrnum);
 
-  attrnum = separate_attrs (attr_str, attrs);
+  attrnum = separate_attrs (attr_str, attrs, attrnum);
   if (attrnum == -1)
     {
       error_at (DECL_SOURCE_LOCATION (node->decl),
@@ -354,6 +373,14 @@ expand_target_clones (struct cgraph_node *node, bool definition)
       XDELETEVEC (attr_str);
       return false;
     }
+  else if (attrnum == -2)
+    {
+      error_at (DECL_SOURCE_LOCATION (node->decl),
+		"an empty string cannot be in %<target_clones%> attribute");
+      XDELETEVEC (attrs);
+      XDELETEVEC (attr_str);
+      return false;
+    }
 
   cgraph_function_version_info *decl1_v = NULL;
   cgraph_function_version_info *decl2_v = NULL;
@@ -427,14 +454,14 @@ static unsigned int
 ipa_target_clone (void)
 {
   struct cgraph_node *node;
+  auto_vec<cgraph_node *> to_dispatch;
 
-  bool target_clone_pass = false;
   FOR_EACH_FUNCTION (node)
-    target_clone_pass |= expand_target_clones (node, node->definition);
+    if (expand_target_clones (node, node->definition))
+      to_dispatch.safe_push (node);
 
-  if (target_clone_pass)
-    FOR_EACH_FUNCTION (node)
-      create_dispatcher_calls (node);
+  for (unsigned i = 0; i < to_dispatch.length (); i++)
+    create_dispatcher_calls (to_dispatch[i]);
 
   return 0;
 }
diff --git a/gcc/testsuite/g++.dg/ext/pr85329-2.C b/gcc/testsuite/g++.dg/ext/pr85329-2.C
new file mode 100644
index 00000000000..24622d404f7
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/pr85329-2.C
@@ -0,0 +1,22 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-ifunc "" } */
+
+class b
+{
+public:
+  __attribute__ ((target ("aes"))) b () {}
+  __attribute__ ((target ("default"))) b () {}
+};
+class c
+{
+  b d;
+};
+void
+fn1 ()
+{
+  c a;
+}
+__attribute__ ((target_clones ("sse", "default"))) void
+e ()
+{
+}
diff --git a/gcc/testsuite/g++.dg/ext/pr85329.C b/gcc/testsuite/g++.dg/ext/pr85329.C
new file mode 100644
index 00000000000..fb77e42cd78
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/pr85329.C
@@ -0,0 +1,19 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-ifunc "" } */
+
+struct a
+{
+  __attribute__((target_clones("sse", "default"))) void operator^=(a) {}
+} * b;
+
+class c {
+public:
+  a *d();
+};
+
+class f {
+  void g();
+  c e;
+};
+
+void f::g() { *e.d() ^= b[0]; }
diff --git a/gcc/testsuite/gcc.target/i386/mvc12.c b/gcc/testsuite/gcc.target/i386/mvc12.c
new file mode 100644
index 00000000000..f42ae8080e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mvc12.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+
+__attribute__((target_clones("","arch=slm","arch=core-avx2", "default")))
+int foo (); /* { dg-error "an empty string cannot be in .target_clones. attribute" } */
+
+int
+bar ()
+{
+  return foo();
+}
Jan Hubicka April 16, 2018, 2:08 p.m. UTC | #14
> 2018-04-16  Martin Liska  <mliska@suse.cz>
> 
> 	* multiple_target.c (create_dispatcher_calls): Set apostrophes
> 	for target_clone error message.  Make default implementation
>         clone to be a local declaration.
> 	(separate_attrs): Add new argument and check for an emptry
> 	string.
> 	(expand_target_clones): Handle it.
> 	(ipa_target_clone): Make redirection just for target_clones
> 	functions.
> 
> gcc/testsuite/ChangeLog:
> 
> 2018-04-16  Martin Liska  <mliska@suse.cz>
> 
> 	* g++.dg/ext/pr85329-2.C: New test.
> 	* g++.dg/ext/pr85329.C: New test.
> 	* gcc.target/i386/mvc12.c: New test.
> ---
>  gcc/multiple_target.c                 | 55 ++++++++++++++++++++++++++---------
>  gcc/testsuite/g++.dg/ext/pr85329-2.C  | 22 ++++++++++++++
>  gcc/testsuite/g++.dg/ext/pr85329.C    | 19 ++++++++++++
>  gcc/testsuite/gcc.target/i386/mvc12.c | 11 +++++++
>  4 files changed, 93 insertions(+), 14 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/ext/pr85329-2.C
>  create mode 100644 gcc/testsuite/g++.dg/ext/pr85329.C
>  create mode 100644 gcc/testsuite/gcc.target/i386/mvc12.c
> 
> diff --git a/gcc/multiple_target.c b/gcc/multiple_target.c
> index b006a5ab6ec..a1fe09a5983 100644
> --- a/gcc/multiple_target.c
> +++ b/gcc/multiple_target.c
> @@ -88,7 +88,7 @@ create_dispatcher_calls (struct cgraph_node *node)
>    if (!idecl)
>      {
>        error_at (DECL_SOURCE_LOCATION (node->decl),
> -		"default target_clones attribute was not set");
> +		"default %<target_clones%> attribute was not set");
>        return;
>      }
>  
> @@ -161,10 +161,25 @@ create_dispatcher_calls (struct cgraph_node *node)
>  	}
>      }
>  
> -  TREE_PUBLIC (node->decl) = 0;
>    symtab->change_decl_assembler_name (node->decl,
>  				      clone_function_name (node->decl,
>  							   "default"));
> +
> +  /* FIXME: copy of cgraph_node::make_local that should be cleaned up
> +	    in next stage1.  */
> +  node->make_decl_local ();
> +  node->set_section (NULL);
> +  node->set_comdat_group (NULL);
> +  node->externally_visible = false;
> +  node->forced_by_abi = false;
> +  node->set_section (NULL);

Probably no need to do this twice (next stage1 we can fix that in make_local as well)

> +  node->unique_name = ((node->resolution == LDPR_PREVAILING_DEF_IRONLY
> +			|| node->resolution == LDPR_PREVAILING_DEF_IRONLY_EXP)
> +		       && !flag_incremental_link);
> +  node->resolution = LDPR_PREVAILING_DEF_IRONLY;
> +
> +  DECL_ARTIFICIAL (node->decl) = 1;
> +  node->force_output = true;

And we don't really need force_output, but that is for next stage1 as well.

OK and thanks for working on this!
Honza
Jakub Jelinek April 16, 2018, 2:46 p.m. UTC | #15
On Mon, Apr 16, 2018 at 04:01:18PM +0200, Martin Liška wrote:
> >From 77b48cfad59dd24a5c068bfb32b1059535ae1f75 Mon Sep 17 00:00:00 2001
> From: marxin <mliska@suse.cz>
> Date: Sat, 14 Apr 2018 09:55:35 +0200
> Subject: [PATCH] Make redirection only for target_clones: V3 (PR ipa/85329).
> 
> 2018-04-16  Martin Liska  <mliska@suse.cz>
> 

Missing PR line.

> 	* multiple_target.c (create_dispatcher_calls): Set apostrophes
> 	for target_clone error message.  Make default implementation
>         clone to be a local declaration.
> 	(separate_attrs): Add new argument and check for an emptry

s/emptry/empty/

> 	string.
> 	(expand_target_clones): Handle it.
> 	(ipa_target_clone): Make redirection just for target_clones
> 	functions.

	Jakub
diff mbox series

Patch

diff --git a/gcc/multiple_target.c b/gcc/multiple_target.c
index b006a5ab6ec..5bd88fbd83c 100644
--- a/gcc/multiple_target.c
+++ b/gcc/multiple_target.c
@@ -58,11 +58,11 @@  replace_function_decl (tree *op, int *walk_subtrees, void *data)
   return NULL;
 }
 
-/* If the call in NODE has multiple target attribute with multiple fields,
-   replace it with dispatcher call and create dispatcher (once).  */
+/* If the call in NODE is a target_clone attribute clone, redirect all
+   edges and references that point to cgraph NODE.  */
 
 static void
-create_dispatcher_calls (struct cgraph_node *node)
+redirect_target_clone_callers (struct cgraph_node *node)
 {
   ipa_ref *ref;
 
@@ -300,10 +300,12 @@  create_target_clone (cgraph_node *node, bool definition, char *name)
 }
 
 /* If the function in NODE has multiple target attributes
-   create the appropriate clone for each valid target attribute.  */
+   create the appropriate clone for each valid target attribute.
+   TO_REDIRECT is a vector where we place all newly created clones.  */
 
 static bool
-expand_target_clones (struct cgraph_node *node, bool definition)
+expand_target_clones (struct cgraph_node *node, bool definition,
+		      auto_vec <cgraph_node *> &to_redirect)
 {
   int i;
   /* Parsing target attributes separated by comma.  */
@@ -404,6 +406,8 @@  expand_target_clones (struct cgraph_node *node, bool definition)
       before->next = after;
       after->prev = before;
       DECL_FUNCTION_VERSIONED (new_node->decl) = 1;
+
+      to_redirect.safe_push (new_node);
     }
 
   XDELETEVEC (attrs);
@@ -420,6 +424,8 @@  expand_target_clones (struct cgraph_node *node, bool definition)
     = targetm.target_option.valid_attribute_p (node->decl, NULL,
 					       TREE_VALUE (attributes), 0);
   input_location = saved_loc;
+  to_redirect.safe_push (node);
+
   return ret;
 }
 
@@ -428,13 +434,12 @@  ipa_target_clone (void)
 {
   struct cgraph_node *node;
 
-  bool target_clone_pass = false;
+  auto_vec <cgraph_node *> nodes_to_redirect;
   FOR_EACH_FUNCTION (node)
-    target_clone_pass |= expand_target_clones (node, node->definition);
+    expand_target_clones (node, node->definition, nodes_to_redirect);
 
-  if (target_clone_pass)
-    FOR_EACH_FUNCTION (node)
-      create_dispatcher_calls (node);
+  for (unsigned i = 0; i < nodes_to_redirect.length (); i++)
+    redirect_target_clone_callers (nodes_to_redirect[i]);
 
   return 0;
 }