diff mbox series

Fix ICE with exp.simdclone.0 (PR tree-optimization/84687)

Message ID 20180305204028.GR5867@tucnak
State New
Headers show
Series Fix ICE with exp.simdclone.0 (PR tree-optimization/84687) | expand

Commit Message

Jakub Jelinek March 5, 2018, 8:40 p.m. UTC
Hi!

This patch clears DECL_BUILT_IN on simd clones, similarly how cgraphclones.c
does:
  /* When signature changes, we need to clear builtin info.  */
  if (DECL_BUILT_IN (new_decl)
      && args_to_skip
      && !bitmap_empty_p (args_to_skip))
    {
      DECL_BUILT_IN_CLASS (new_decl) = NOT_BUILT_IN;
      DECL_FUNCTION_CODE (new_decl) = (enum built_in_function) 0;
    }
because simd clones are always signature changes (would be nice if we had
some way to optimize these later, but seems it wouldn't be easy),
and in order not to regress optimization-wise, also an early optimization
in match.pd - we have this now deferred till late folding of pow(C,x)
to exp(log(C)*x), and also exp(x)*exp(y) folding to exp(x+y), if we
already have one exp (or exp2 or exp10 etc.) call in the multiplication,
there is no reason to defer it any longer, all we do is trade the one
pow call and one exp{,2,10} call and one multiplication for that
exp{,2,10} call plus one multiplication and one addition, the addition
surely will be less expensive than pow and I think precision should not be
that bad either (and it is -ffast-math guarded anyway).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2018-03-05  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/84687
	* omp-simd-clone.c (simd_clone_create): Clear DECL_BUILT_IN_CLASS
	on new_node->decl.
	* match.pd (pow(C,x)*expN(y) -> expN(logN(C)*x+y)): New optimization.

	* gcc.dg/pr84687.c: New test.


	Jakub

Comments

Richard Biener March 6, 2018, 6:27 a.m. UTC | #1
On March 5, 2018 9:40:28 PM GMT+01:00, Jakub Jelinek <jakub@redhat.com> wrote:
>Hi!
>
>This patch clears DECL_BUILT_IN on simd clones, similarly how
>cgraphclones.c
>does:
>  /* When signature changes, we need to clear builtin info.  */
>  if (DECL_BUILT_IN (new_decl)
>      && args_to_skip
>      && !bitmap_empty_p (args_to_skip))
>    {
>      DECL_BUILT_IN_CLASS (new_decl) = NOT_BUILT_IN;
>      DECL_FUNCTION_CODE (new_decl) = (enum built_in_function) 0;
>    }
>because simd clones are always signature changes (would be nice if we
>had
>some way to optimize these later, but seems it wouldn't be easy),
>and in order not to regress optimization-wise, also an early
>optimization
>in match.pd - we have this now deferred till late folding of pow(C,x)
>to exp(log(C)*x), and also exp(x)*exp(y) folding to exp(x+y), if we
>already have one exp (or exp2 or exp10 etc.) call in the
>multiplication,
>there is no reason to defer it any longer, all we do is trade the one
>pow call and one exp{,2,10} call and one multiplication for that
>exp{,2,10} call plus one multiplication and one addition, the addition
>surely will be less expensive than pow and I think precision should not
>be
>that bad either (and it is -ffast-math guarded anyway).
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK. 

Thanks, 
Richard. 

>2018-03-05  Jakub Jelinek  <jakub@redhat.com>
>
>	PR tree-optimization/84687
>	* omp-simd-clone.c (simd_clone_create): Clear DECL_BUILT_IN_CLASS
>	on new_node->decl.
>	* match.pd (pow(C,x)*expN(y) -> expN(logN(C)*x+y)): New optimization.
>
>	* gcc.dg/pr84687.c: New test.
>
>--- gcc/omp-simd-clone.c.jj	2018-02-13 09:33:31.107560174 +0100
>+++ gcc/omp-simd-clone.c	2018-03-05 16:47:56.943365091 +0100
>@@ -456,6 +456,8 @@ simd_clone_create (struct cgraph_node *o
>   if (new_node == NULL)
>     return new_node;
> 
>+  DECL_BUILT_IN_CLASS (new_node->decl) = NOT_BUILT_IN;
>+  DECL_FUNCTION_CODE (new_node->decl) = (enum built_in_function) 0;
>   TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl);
>   DECL_COMDAT (new_node->decl) = DECL_COMDAT (old_node->decl);
>   DECL_WEAK (new_node->decl) = DECL_WEAK (old_node->decl);
>--- gcc/match.pd.jj	2018-02-20 14:55:28.988215213 +0100
>+++ gcc/match.pd	2018-03-05 16:52:11.486487079 +0100
>@@ -4030,6 +4030,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>       (exps (mult (logs @0) @1))
>       (exp2s (mult (log2s @0) @1)))))))
> 
>+ /* pow(C,x)*expN(y) -> expN(logN(C)*x+y) if C > 0.  */
>+ (for pows (POW)
>+      exps (EXP EXP2 EXP10 POW10)
>+      logs (LOG LOG2 LOG10 LOG10)
>+  (simplify
>+   (mult:c (pows:s REAL_CST@0 @1) (exps:s @2))
>+   (if (real_compare (GT_EXPR, TREE_REAL_CST_PTR (@0), &dconst0)
>+	&& real_isfinite (TREE_REAL_CST_PTR (@0)))
>+    (exps (plus (mult (logs @0) @1) @2)))))
>+
>  (for sqrts (SQRT)
>       cbrts (CBRT)
>       pows (POW)
>--- gcc/testsuite/gcc.dg/pr84687.c.jj	2018-03-05 16:45:57.020307612
>+0100
>+++ gcc/testsuite/gcc.dg/pr84687.c	2018-03-05 16:45:41.977300398 +0100
>@@ -0,0 +1,19 @@
>+/* PR tree-optimization/84687 */
>+/* { dg-do compile } */
>+/* { dg-options "-Ofast" } */
>+
>+int a[64], b;
>+double pow (double, double);
>+__attribute__((__simd__)) double exp (double);
>+
>+void
>+foo (double x)
>+{
>+  int i;
>+  double c = exp (x);
>+  for (i = 0; i < 64; i++)
>+    {
>+      b = i;
>+      a[i] = pow (12.0, b) * pow (c, i);
>+    }
>+}
>
>	Jakub
diff mbox series

Patch

--- gcc/omp-simd-clone.c.jj	2018-02-13 09:33:31.107560174 +0100
+++ gcc/omp-simd-clone.c	2018-03-05 16:47:56.943365091 +0100
@@ -456,6 +456,8 @@  simd_clone_create (struct cgraph_node *o
   if (new_node == NULL)
     return new_node;
 
+  DECL_BUILT_IN_CLASS (new_node->decl) = NOT_BUILT_IN;
+  DECL_FUNCTION_CODE (new_node->decl) = (enum built_in_function) 0;
   TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl);
   DECL_COMDAT (new_node->decl) = DECL_COMDAT (old_node->decl);
   DECL_WEAK (new_node->decl) = DECL_WEAK (old_node->decl);
--- gcc/match.pd.jj	2018-02-20 14:55:28.988215213 +0100
+++ gcc/match.pd	2018-03-05 16:52:11.486487079 +0100
@@ -4030,6 +4030,16 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
       (exps (mult (logs @0) @1))
       (exp2s (mult (log2s @0) @1)))))))
 
+ /* pow(C,x)*expN(y) -> expN(logN(C)*x+y) if C > 0.  */
+ (for pows (POW)
+      exps (EXP EXP2 EXP10 POW10)
+      logs (LOG LOG2 LOG10 LOG10)
+  (simplify
+   (mult:c (pows:s REAL_CST@0 @1) (exps:s @2))
+   (if (real_compare (GT_EXPR, TREE_REAL_CST_PTR (@0), &dconst0)
+	&& real_isfinite (TREE_REAL_CST_PTR (@0)))
+    (exps (plus (mult (logs @0) @1) @2)))))
+
  (for sqrts (SQRT)
       cbrts (CBRT)
       pows (POW)
--- gcc/testsuite/gcc.dg/pr84687.c.jj	2018-03-05 16:45:57.020307612 +0100
+++ gcc/testsuite/gcc.dg/pr84687.c	2018-03-05 16:45:41.977300398 +0100
@@ -0,0 +1,19 @@ 
+/* PR tree-optimization/84687 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast" } */
+
+int a[64], b;
+double pow (double, double);
+__attribute__((__simd__)) double exp (double);
+
+void
+foo (double x)
+{
+  int i;
+  double c = exp (x);
+  for (i = 0; i < 64; i++)
+    {
+      b = i;
+      a[i] = pow (12.0, b) * pow (c, i);
+    }
+}