diff mbox

SIMD clones LTO fixes part 2 (PR lto/59326)

Message ID 20131128230027.GS892@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Nov. 28, 2013, 11 p.m. UTC
Hi!

And here is second part of the fixes.  Still, the vect-simd-clone-12.c
testcase fails with -flto -flto-partition=1to1, so there is further work to
do, but at least all current test succeed and actually use SIMD elementals
when they should.  Bootstrapped/regtested on x86_64-linux and i686-linux,
ok for trunk?

2013-11-28  Jakub Jelinek  <jakub@redhat.com>
	    Richard Biener  <rguenther@suse.de>

	PR lto/59326
	* omp-low.c (simd_clone_create): Return NULL if for definition
	!cgraph_function_with_gimple_body_p (old_node).  Call cgraph_get_body
	before calling cgraph_function_versioning.
	(expand_simd_clones): Look for "omp declare simd" attribute first.
	Don't check targetm.simd_clone.compute_vecsize_and_simdlen here.
	Punt if node->global.inlined_to.
	(pass_omp_simd_clone::gate): Also enable if flag_ltrans.  Disable
	pass if targetm.simd_clone.compute_vecsize_and_simdlen is NULL.
	* lto-streamer-out.c (hash_tree): Handle OMP_CLAUSE.
lto/
	* lto.c (compare_tree_sccs_1): Handle OMP_CLAUSE.
testsuite/
	* gcc.dg/vect/vect-simd-clone-12.c: New test.
	* gcc.dg/vect/vect-simd-clone-12a.c: New test.
	* gcc.dg/vect/vect-simd-clone-10a.c: Remove extern keywords.


	Jakub

Comments

Richard Biener Nov. 29, 2013, 8:58 a.m. UTC | #1
On Fri, 29 Nov 2013, Jakub Jelinek wrote:

> Hi!
> 
> And here is second part of the fixes.  Still, the vect-simd-clone-12.c
> testcase fails with -flto -flto-partition=1to1, so there is further work to
> do, but at least all current test succeed and actually use SIMD elementals
> when they should.  Bootstrapped/regtested on x86_64-linux and i686-linux,
> ok for trunk?

Ok.  Still my hack

> -  bool gate () { return flag_openmp || flag_openmp_simd
> -                     || flag_enable_cilkplus; }
> +  bool gate () { return ((flag_openmp || flag_openmp_simd
> +                       || flag_enable_cilkplus || flag_ltrans)
> +                      && 
(targetm.simd_clone.compute_vecsize_and_simdlen
> +                          != NULL)); }

won't work for -flto-partition=none.  Instead of flag_ltrans you
probably want to test (in_lto_p && !flag_wpa).

Richard.

> 2013-11-28  Jakub Jelinek  <jakub@redhat.com>
> 	    Richard Biener  <rguenther@suse.de>
> 
> 	PR lto/59326
> 	* omp-low.c (simd_clone_create): Return NULL if for definition
> 	!cgraph_function_with_gimple_body_p (old_node).  Call cgraph_get_body
> 	before calling cgraph_function_versioning.
> 	(expand_simd_clones): Look for "omp declare simd" attribute first.
> 	Don't check targetm.simd_clone.compute_vecsize_and_simdlen here.
> 	Punt if node->global.inlined_to.
> 	(pass_omp_simd_clone::gate): Also enable if flag_ltrans.  Disable
> 	pass if targetm.simd_clone.compute_vecsize_and_simdlen is NULL.
> 	* lto-streamer-out.c (hash_tree): Handle OMP_CLAUSE.
> lto/
> 	* lto.c (compare_tree_sccs_1): Handle OMP_CLAUSE.
> testsuite/
> 	* gcc.dg/vect/vect-simd-clone-12.c: New test.
> 	* gcc.dg/vect/vect-simd-clone-12a.c: New test.
> 	* gcc.dg/vect/vect-simd-clone-10a.c: Remove extern keywords.
> 
> --- gcc/omp-low.c.jj	2013-11-27 12:15:13.000000000 +0100
> +++ gcc/omp-low.c	2013-11-28 16:53:49.388242468 +0100
> @@ -10912,8 +10912,13 @@ simd_clone_create (struct cgraph_node *o
>  {
>    struct cgraph_node *new_node;
>    if (old_node->definition)
> -    new_node = cgraph_function_versioning (old_node, vNULL, NULL, NULL, false,
> -					   NULL, NULL, "simdclone");
> +    {
> +      if (!cgraph_function_with_gimple_body_p (old_node))
> +	return NULL;
> +      cgraph_get_body (old_node);
> +      new_node = cgraph_function_versioning (old_node, vNULL, NULL, NULL,
> +					     false, NULL, NULL, "simdclone");
> +    }
>    else
>      {
>        tree old_decl = old_node->decl;
> @@ -11622,13 +11627,13 @@ simd_clone_adjust (struct cgraph_node *n
>  static void
>  expand_simd_clones (struct cgraph_node *node)
>  {
> -  if (lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl)))
> -    return;
> -
>    tree attr = lookup_attribute ("omp declare simd",
>  				DECL_ATTRIBUTES (node->decl));
> -  if (!attr || targetm.simd_clone.compute_vecsize_and_simdlen == NULL)
> +  if (attr == NULL_TREE
> +      || node->global.inlined_to
> +      || lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl)))
>      return;
> +
>    /* Ignore
>       #pragma omp declare simd
>       extern int foo ();
> @@ -11764,8 +11769,10 @@ public:
>    {}
>  
>    /* opt_pass methods: */
> -  bool gate () { return flag_openmp || flag_openmp_simd
> -			|| flag_enable_cilkplus; }
> +  bool gate () { return ((flag_openmp || flag_openmp_simd
> +			  || flag_enable_cilkplus || flag_ltrans)
> +			 && (targetm.simd_clone.compute_vecsize_and_simdlen
> +			     != NULL)); }
>    unsigned int execute () { return ipa_omp_simd_clone (); }
>  };
>  
> --- gcc/lto/lto.c.jj	2013-11-28 16:02:36.000000000 +0100
> +++ gcc/lto/lto.c	2013-11-28 16:27:04.164663085 +0100
> @@ -1410,6 +1410,36 @@ compare_tree_sccs_1 (tree t1, tree t2, t
>  		   TREE_STRING_LENGTH (t1)) != 0)
>        return false;
>  
> +  if (code == OMP_CLAUSE)
> +    {
> +      compare_values (OMP_CLAUSE_CODE);
> +      switch (OMP_CLAUSE_CODE (t1))
> +	{
> +	case OMP_CLAUSE_DEFAULT:
> +	  compare_values (OMP_CLAUSE_DEFAULT_KIND);
> +	  break;
> +	case OMP_CLAUSE_SCHEDULE:
> +	  compare_values (OMP_CLAUSE_SCHEDULE_KIND);
> +	  break;
> +	case OMP_CLAUSE_DEPEND:
> +	  compare_values (OMP_CLAUSE_DEPEND_KIND);
> +	  break;
> +	case OMP_CLAUSE_MAP:
> +	  compare_values (OMP_CLAUSE_MAP_KIND);
> +	  break;
> +	case OMP_CLAUSE_PROC_BIND:
> +	  compare_values (OMP_CLAUSE_PROC_BIND_KIND);
> +	  break;
> +	case OMP_CLAUSE_REDUCTION:
> +	  compare_values (OMP_CLAUSE_REDUCTION_CODE);
> +	  compare_values (OMP_CLAUSE_REDUCTION_GIMPLE_INIT);
> +	  compare_values (OMP_CLAUSE_REDUCTION_GIMPLE_MERGE);
> +	  break;
> +	default:
> +	  break;
> +	}
> +    }
> +
>  #undef compare_values
>  
>  
> @@ -1633,6 +1663,16 @@ compare_tree_sccs_1 (tree t1, tree t2, t
>  	}
>      }
>  
> +  if (code == OMP_CLAUSE)
> +    {
> +      int i;
> +
> +      for (i = 0; i < omp_clause_num_ops[OMP_CLAUSE_CODE (t1)]; i++)
> +	compare_tree_edges (OMP_CLAUSE_OPERAND (t1, i),
> +			    OMP_CLAUSE_OPERAND (t2, i));
> +      compare_tree_edges (OMP_CLAUSE_CHAIN (t1), OMP_CLAUSE_CHAIN (t2));
> +    }
> +
>  #undef compare_tree_edges
>  
>    return true;
> --- gcc/lto-streamer-out.c.jj	2013-11-28 16:02:36.000000000 +0100
> +++ gcc/lto-streamer-out.c	2013-11-28 16:26:42.059776312 +0100
> @@ -1060,6 +1060,39 @@ hash_tree (struct streamer_tree_cache_d
>  	}
>      }
>  
> +  if (code == OMP_CLAUSE)
> +    {
> +      int i;
> +
> +      v = iterative_hash_host_wide_int (OMP_CLAUSE_CODE (t), v);
> +      switch (OMP_CLAUSE_CODE (t))
> +	{
> +	case OMP_CLAUSE_DEFAULT:
> +	  v = iterative_hash_host_wide_int (OMP_CLAUSE_DEFAULT_KIND (t), v);
> +	  break;
> +	case OMP_CLAUSE_SCHEDULE:
> +	  v = iterative_hash_host_wide_int (OMP_CLAUSE_SCHEDULE_KIND (t), v);
> +	  break;
> +	case OMP_CLAUSE_DEPEND:
> +	  v = iterative_hash_host_wide_int (OMP_CLAUSE_DEPEND_KIND (t), v);
> +	  break;
> +	case OMP_CLAUSE_MAP:
> +	  v = iterative_hash_host_wide_int (OMP_CLAUSE_MAP_KIND (t), v);
> +	  break;
> +	case OMP_CLAUSE_PROC_BIND:
> +	  v = iterative_hash_host_wide_int (OMP_CLAUSE_PROC_BIND_KIND (t), v);
> +	  break;
> +	case OMP_CLAUSE_REDUCTION:
> +	  v = iterative_hash_host_wide_int (OMP_CLAUSE_REDUCTION_CODE (t), v);
> +	  break;
> +	default:
> +	  break;
> +	}
> +      for (i = 0; i < omp_clause_num_ops[OMP_CLAUSE_CODE (t)]; i++)
> +	visit (OMP_CLAUSE_OPERAND (t, i));
> +      visit (OMP_CLAUSE_CHAIN (t));
> +    }
> +
>    return v;
>  
>  #undef visit
> --- gcc/testsuite/gcc.dg/vect/vect-simd-clone-12.c.jj	2013-11-28 16:43:28.779493462 +0100
> +++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-12.c	2013-11-28 16:43:48.797396410 +0100
> @@ -0,0 +1,7 @@
> +/* { dg-additional-options "-fopenmp-simd" } */
> +/* { dg-additional-options "-mavx" { target avx_runtime } } */
> +/* { dg-additional-sources vect-simd-clone-12a.c } */
> +
> +#include "vect-simd-clone-10.c"
> +
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> --- gcc/testsuite/gcc.dg/vect/vect-simd-clone-12a.c.jj	2013-11-28 16:44:01.061323339 +0100
> +++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-12a.c	2013-11-28 16:42:36.000000000 +0100
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +
> +#include "vect-simd-clone-10.h"
> +
> +#pragma omp declare simd notinbranch
> +__attribute__((noinline)) int
> +foo (long int a, int b, int c)
> +{
> +  return a + b + c;
> +}
> +
> +#pragma omp declare simd notinbranch
> +__attribute__((noinline)) long int
> +bar (int a, int b, long int c)
> +{
> +  return a + b + c;
> +}
> --- gcc/testsuite/gcc.dg/vect/vect-simd-clone-10a.c.jj	2013-11-27 12:15:14.000000000 +0100
> +++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-10a.c	2013-11-28 16:41:20.000000000 +0100
> @@ -3,14 +3,14 @@
>  #include "vect-simd-clone-10.h"
>  
>  #pragma omp declare simd notinbranch
> -extern int
> +int
>  foo (long int a, int b, int c)
>  {
>    return a + b + c;
>  }
>  
>  #pragma omp declare simd notinbranch
> -extern long int
> +long int
>  bar (int a, int b, long int c)
>  {
>    return a + b + c;
> 
> 	Jakub
> 
>
Jeff Law Dec. 3, 2013, 8:12 p.m. UTC | #2
On 11/28/13 16:00, Jakub Jelinek wrote:
> Hi!
>
> And here is second part of the fixes.  Still, the vect-simd-clone-12.c
> testcase fails with -flto -flto-partition=1to1, so there is further work to
> do, but at least all current test succeed and actually use SIMD elementals
> when they should.  Bootstrapped/regtested on x86_64-linux and i686-linux,
> ok for trunk?
>
> 2013-11-28  Jakub Jelinek  <jakub@redhat.com>
> 	    Richard Biener  <rguenther@suse.de>
>
> 	PR lto/59326
> 	* omp-low.c (simd_clone_create): Return NULL if for definition
> 	!cgraph_function_with_gimple_body_p (old_node).  Call cgraph_get_body
> 	before calling cgraph_function_versioning.
> 	(expand_simd_clones): Look for "omp declare simd" attribute first.
> 	Don't check targetm.simd_clone.compute_vecsize_and_simdlen here.
> 	Punt if node->global.inlined_to.
> 	(pass_omp_simd_clone::gate): Also enable if flag_ltrans.  Disable
> 	pass if targetm.simd_clone.compute_vecsize_and_simdlen is NULL.
> 	* lto-streamer-out.c (hash_tree): Handle OMP_CLAUSE.
> lto/
> 	* lto.c (compare_tree_sccs_1): Handle OMP_CLAUSE.
> testsuite/
> 	* gcc.dg/vect/vect-simd-clone-12.c: New test.
> 	* gcc.dg/vect/vect-simd-clone-12a.c: New test.
> 	* gcc.dg/vect/vect-simd-clone-10a.c: Remove extern keywords.
OK.

Jeff
diff mbox

Patch

--- gcc/omp-low.c.jj	2013-11-27 12:15:13.000000000 +0100
+++ gcc/omp-low.c	2013-11-28 16:53:49.388242468 +0100
@@ -10912,8 +10912,13 @@  simd_clone_create (struct cgraph_node *o
 {
   struct cgraph_node *new_node;
   if (old_node->definition)
-    new_node = cgraph_function_versioning (old_node, vNULL, NULL, NULL, false,
-					   NULL, NULL, "simdclone");
+    {
+      if (!cgraph_function_with_gimple_body_p (old_node))
+	return NULL;
+      cgraph_get_body (old_node);
+      new_node = cgraph_function_versioning (old_node, vNULL, NULL, NULL,
+					     false, NULL, NULL, "simdclone");
+    }
   else
     {
       tree old_decl = old_node->decl;
@@ -11622,13 +11627,13 @@  simd_clone_adjust (struct cgraph_node *n
 static void
 expand_simd_clones (struct cgraph_node *node)
 {
-  if (lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl)))
-    return;
-
   tree attr = lookup_attribute ("omp declare simd",
 				DECL_ATTRIBUTES (node->decl));
-  if (!attr || targetm.simd_clone.compute_vecsize_and_simdlen == NULL)
+  if (attr == NULL_TREE
+      || node->global.inlined_to
+      || lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl)))
     return;
+
   /* Ignore
      #pragma omp declare simd
      extern int foo ();
@@ -11764,8 +11769,10 @@  public:
   {}
 
   /* opt_pass methods: */
-  bool gate () { return flag_openmp || flag_openmp_simd
-			|| flag_enable_cilkplus; }
+  bool gate () { return ((flag_openmp || flag_openmp_simd
+			  || flag_enable_cilkplus || flag_ltrans)
+			 && (targetm.simd_clone.compute_vecsize_and_simdlen
+			     != NULL)); }
   unsigned int execute () { return ipa_omp_simd_clone (); }
 };
 
--- gcc/lto/lto.c.jj	2013-11-28 16:02:36.000000000 +0100
+++ gcc/lto/lto.c	2013-11-28 16:27:04.164663085 +0100
@@ -1410,6 +1410,36 @@  compare_tree_sccs_1 (tree t1, tree t2, t
 		   TREE_STRING_LENGTH (t1)) != 0)
       return false;
 
+  if (code == OMP_CLAUSE)
+    {
+      compare_values (OMP_CLAUSE_CODE);
+      switch (OMP_CLAUSE_CODE (t1))
+	{
+	case OMP_CLAUSE_DEFAULT:
+	  compare_values (OMP_CLAUSE_DEFAULT_KIND);
+	  break;
+	case OMP_CLAUSE_SCHEDULE:
+	  compare_values (OMP_CLAUSE_SCHEDULE_KIND);
+	  break;
+	case OMP_CLAUSE_DEPEND:
+	  compare_values (OMP_CLAUSE_DEPEND_KIND);
+	  break;
+	case OMP_CLAUSE_MAP:
+	  compare_values (OMP_CLAUSE_MAP_KIND);
+	  break;
+	case OMP_CLAUSE_PROC_BIND:
+	  compare_values (OMP_CLAUSE_PROC_BIND_KIND);
+	  break;
+	case OMP_CLAUSE_REDUCTION:
+	  compare_values (OMP_CLAUSE_REDUCTION_CODE);
+	  compare_values (OMP_CLAUSE_REDUCTION_GIMPLE_INIT);
+	  compare_values (OMP_CLAUSE_REDUCTION_GIMPLE_MERGE);
+	  break;
+	default:
+	  break;
+	}
+    }
+
 #undef compare_values
 
 
@@ -1633,6 +1663,16 @@  compare_tree_sccs_1 (tree t1, tree t2, t
 	}
     }
 
+  if (code == OMP_CLAUSE)
+    {
+      int i;
+
+      for (i = 0; i < omp_clause_num_ops[OMP_CLAUSE_CODE (t1)]; i++)
+	compare_tree_edges (OMP_CLAUSE_OPERAND (t1, i),
+			    OMP_CLAUSE_OPERAND (t2, i));
+      compare_tree_edges (OMP_CLAUSE_CHAIN (t1), OMP_CLAUSE_CHAIN (t2));
+    }
+
 #undef compare_tree_edges
 
   return true;
--- gcc/lto-streamer-out.c.jj	2013-11-28 16:02:36.000000000 +0100
+++ gcc/lto-streamer-out.c	2013-11-28 16:26:42.059776312 +0100
@@ -1060,6 +1060,39 @@  hash_tree (struct streamer_tree_cache_d
 	}
     }
 
+  if (code == OMP_CLAUSE)
+    {
+      int i;
+
+      v = iterative_hash_host_wide_int (OMP_CLAUSE_CODE (t), v);
+      switch (OMP_CLAUSE_CODE (t))
+	{
+	case OMP_CLAUSE_DEFAULT:
+	  v = iterative_hash_host_wide_int (OMP_CLAUSE_DEFAULT_KIND (t), v);
+	  break;
+	case OMP_CLAUSE_SCHEDULE:
+	  v = iterative_hash_host_wide_int (OMP_CLAUSE_SCHEDULE_KIND (t), v);
+	  break;
+	case OMP_CLAUSE_DEPEND:
+	  v = iterative_hash_host_wide_int (OMP_CLAUSE_DEPEND_KIND (t), v);
+	  break;
+	case OMP_CLAUSE_MAP:
+	  v = iterative_hash_host_wide_int (OMP_CLAUSE_MAP_KIND (t), v);
+	  break;
+	case OMP_CLAUSE_PROC_BIND:
+	  v = iterative_hash_host_wide_int (OMP_CLAUSE_PROC_BIND_KIND (t), v);
+	  break;
+	case OMP_CLAUSE_REDUCTION:
+	  v = iterative_hash_host_wide_int (OMP_CLAUSE_REDUCTION_CODE (t), v);
+	  break;
+	default:
+	  break;
+	}
+      for (i = 0; i < omp_clause_num_ops[OMP_CLAUSE_CODE (t)]; i++)
+	visit (OMP_CLAUSE_OPERAND (t, i));
+      visit (OMP_CLAUSE_CHAIN (t));
+    }
+
   return v;
 
 #undef visit
--- gcc/testsuite/gcc.dg/vect/vect-simd-clone-12.c.jj	2013-11-28 16:43:28.779493462 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-12.c	2013-11-28 16:43:48.797396410 +0100
@@ -0,0 +1,7 @@ 
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-additional-sources vect-simd-clone-12a.c } */
+
+#include "vect-simd-clone-10.c"
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.dg/vect/vect-simd-clone-12a.c.jj	2013-11-28 16:44:01.061323339 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-12a.c	2013-11-28 16:42:36.000000000 +0100
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+
+#include "vect-simd-clone-10.h"
+
+#pragma omp declare simd notinbranch
+__attribute__((noinline)) int
+foo (long int a, int b, int c)
+{
+  return a + b + c;
+}
+
+#pragma omp declare simd notinbranch
+__attribute__((noinline)) long int
+bar (int a, int b, long int c)
+{
+  return a + b + c;
+}
--- gcc/testsuite/gcc.dg/vect/vect-simd-clone-10a.c.jj	2013-11-27 12:15:14.000000000 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-10a.c	2013-11-28 16:41:20.000000000 +0100
@@ -3,14 +3,14 @@ 
 #include "vect-simd-clone-10.h"
 
 #pragma omp declare simd notinbranch
-extern int
+int
 foo (long int a, int b, int c)
 {
   return a + b + c;
 }
 
 #pragma omp declare simd notinbranch
-extern long int
+long int
 bar (int a, int b, long int c)
 {
   return a + b + c;