From patchwork Wed Sep 10 11:55:21 2014
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Richard Biener <rguenther@suse.de>
X-Patchwork-Id: 387698
Return-Path: 
 <gcc-patches-return-377319-incoming=patchwork.ozlabs.org@gcc.gnu.org>
X-Original-To: incoming@patchwork.ozlabs.org
Delivered-To: patchwork-incoming@bilbo.ozlabs.org
Received: from sourceware.org (server1.sourceware.org [209.132.180.131])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256
	bits)) (No client certificate requested)
	by ozlabs.org (Postfix) with ESMTPS id F34441400AF
	for <incoming@patchwork.ozlabs.org>;
	Wed, 10 Sep 2014 21:59:04 +1000 (EST)
DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id
	:list-unsubscribe:list-archive:list-post:list-help:sender:date
	:from:to:subject:message-id:mime-version:content-type; q=dns; s=
	default; b=d8ASB0aY7e2m0LoJbTPviBqZz+UrYwN9zjoqX5N1JCJ5Kel4dXopf
	YP/TUqGW09s7/1rEw6K19DCO+lLZDqvrPlD8ubLCzAQA+0IRah7LbwwCmUYENwgn
	SA1eiYT09R/rHJI6cbnHDNgL8zv/uZWkM2V0cyuOdfT4mzvKYDLvuA=
DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id
	:list-unsubscribe:list-archive:list-post:list-help:sender:date
	:from:to:subject:message-id:mime-version:content-type; s=
	default; bh=geC+yhN/AWkjlYzt2pLIxdS7LkU=; b=aytK9JkmbY0QP97hSonm
	NmfHdBf1js8ka99zhtil8hfH0xb2KreAPTkG78sR7AD1wQXq7P/S10W5f5gIC1BL
	XUf7NcqMBZV89QDfX8zVcwZDnF+xaahgIzyAkD8pEyyP7cnu30kypezE89pmMd8C
	UByYvw92IWlNya7jk+RFBOI=
Received: (qmail 8092 invoked by alias); 10 Sep 2014 11:58:57 -0000
Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-patches.gcc.gnu.org>
List-Unsubscribe: 
 <mailto:gcc-patches-unsubscribe-incoming=patchwork.ozlabs.org@gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-help@gcc.gnu.org>
Sender: gcc-patches-owner@gcc.gnu.org
Delivered-To: mailing list gcc-patches@gcc.gnu.org
Received: (qmail 8082 invoked by uid 89); 10 Sep 2014 11:58:57 -0000
Authentication-Results: sourceware.org; auth=none
X-Virus-Found: No
X-Spam-SWARE-Status: No, score=-4.5 required=5.0 tests=AWL, BAYES_00,
	RP_MATCHES_RCVD autolearn=ham version=3.3.2
X-HELO: mx2.suse.de
Received: from cantor2.suse.de (HELO mx2.suse.de) (195.135.220.15) by
	sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with
	(CAMELLIA256-SHA encrypted) ESMTPS; Wed, 10 Sep 2014 11:58:55 +0000
Received: from relay1.suse.de (charybdis-ext.suse.de [195.135.220.254])	by
	mx2.suse.de (Postfix) with ESMTP id 4BA85AB13	for
	<gcc-patches@gcc.gnu.org>; Wed, 10 Sep 2014 11:58:52 +0000 (UTC)
Date: Wed, 10 Sep 2014 13:55:21 +0200 (CEST)
From: Richard Biener <rguenther@suse.de>
To: gcc-patches@gcc.gnu.org
Subject: [PATCH][match-and-simplify] More flexible 'for',
	polish match-builtins.pd
Message-ID: <alpine.LSU.2.11.1409101350130.20733@zhemvz.fhfr.qr>
User-Agent: Alpine 2.11 (LSU 23 2013-08-11)
MIME-Version: 1.0

This completes match-builtins.pd by handling all builtin variants
in all present patterns.  That requires a little more flexibility
in the 'for' handling to support for example

 /* Optimize sqrt(expN(x)) = expN(x*0.5).  */
 (for expfn (BUILT_IN_EXP10F BUILT_IN_EXP10 BUILT_IN_EXP10L
             BUILT_IN_POW10F BUILT_IN_POW10 BUILT_IN_POW10L
             BUILT_IN_EXPF BUILT_IN_EXP BUILT_IN_EXPL
             BUILT_IN_EXP2F BUILT_IN_EXP2 BUILT_IN_EXP2L)
      SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL)
  (simplify
   (SQRT (expfn @0))
   (expfn (mult @0 { build_real (type, dconsthalf); }))))

without having to repeat the SQRT substitutions four times
to match the expfn substitution length.  Now we just repeat
the substitution vector as needed (but require all lengths
to be a multiple of the smallest one, just as a sanity check).

The patch also fixes the cabs() patterns and completes them
from fold_builtin_cabs.  Similarly it fixes the x * pow(x, c) -> pow (x, 
c+1) pattern to match that from fold-const.c but in addition requires
-fno-math-errno (errno differences can occur - this seems to be
a common issue with all math builtin foldings that are just
conditionalized on flag_unsafe_math_optimizations...).

Committed.

Richard.

2014-09-10  Richard Biener  <rguenther@suse.de>

	* genmatch.c (parse_for): Allow more flexible replacement counts.
	* match-builtins.pd: Fix initial patterns and complete them
	from the source functions.  Properly handle all builtin variants.

Index: gcc/genmatch.c
===================================================================
--- gcc/genmatch.c	(revision 215057)
+++ gcc/genmatch.c	(working copy)
@@ -2414,7 +2414,7 @@ parse_for (cpp_reader *r, source_locatio
   vec<const char *> user_ids = vNULL;
   vec< vec<const char *> > opers_vec = vNULL;
   const cpp_token *token;
-  unsigned n_opers = 0;
+  unsigned min_n_opers = 0, max_n_opers = 0;
 
   while (1)
     {
@@ -2445,13 +2445,29 @@ parse_for (cpp_reader *r, source_locatio
 	  
 	  opers.safe_push (oper);
 	}
+      token = expect (r, CPP_CLOSE_PAREN);
+      if (opers.length () == 0)
+	fatal_at (token, "A user-defined identifier must have at least one substitution");
+      if (opers_vec.length () == 0)
+	{
+	  min_n_opers = opers.length ();
+	  max_n_opers = opers.length ();
+	}
+      else
+	{
+	  if (opers.length () % min_n_opers != 0
+	      && min_n_opers % opers.length () != 0)
+	    fatal_at (token, "All user-defined identifiers must have a "
+		      "multiple number of operator substitutions of the "
+		      "smallest number of substitutions");
+	  if (opers.length () < min_n_opers)
+	    min_n_opers = opers.length ();
+	  else if (opers.length () > max_n_opers)
+	    max_n_opers = opers.length ();
+	}
+
       opers_vec.safe_push (opers);
-      if (n_opers == 0)
-	n_opers = opers.length ();
-      else if (n_opers != opers.length ())
-	fatal_at (token, "All user-defined identifiers must have same number of operator substitutions");
-      eat_token (r, CPP_CLOSE_PAREN);
-    }	  
+    }
 
   if (user_ids.length () == 0)
     fatal_at (token, "for requires at least one user-defined identifier");
@@ -2474,12 +2490,8 @@ parse_for (cpp_reader *r, source_locatio
     {
       simplify *s = for_simplifiers[ix];
 
-      for (unsigned j = 0; j < n_opers; ++j)
+      for (unsigned j = 0; j < max_n_opers; ++j)
 	{
-	  vec<const char *> opers = vNULL;
-	  for (unsigned i = 0; i < opers_vec.length (); ++i)
-	    opers.safe_push (opers_vec[i][j]);
-	  
 	  operand *match_op = s->match;
 	  operand *result_op = s->result;
 	  vec<if_or_with> ifexpr_vec = vNULL;
@@ -2489,16 +2501,17 @@ parse_for (cpp_reader *r, source_locatio
 
 	  for (unsigned i = 0; i < n_ids; ++i)
 	    {
-	      match_op = replace_id (match_op, user_ids[i], opers[i]);
-	      result_op = replace_id (result_op, user_ids[i], opers[i]);
+	      const char *oper = opers_vec[i][j % opers_vec[i].length ()];
+	      match_op = replace_id (match_op, user_ids[i], oper);
+	      result_op = replace_id (result_op, user_ids[i], oper);
 
 	      for (unsigned k = 0; k < s->ifexpr_vec.length (); ++k)
-		ifexpr_vec[k].cexpr = replace_id (ifexpr_vec[k].cexpr, user_ids[i], opers[i]);
+		ifexpr_vec[k].cexpr = replace_id (ifexpr_vec[k].cexpr, user_ids[i], oper);
 
 	    }
-	    simplify *ns = new simplify (s->name, match_op, s->match_location,
-					 result_op, s->result_location, ifexpr_vec);
-	    simplifiers.safe_push (ns);
+	  simplify *ns = new simplify (s->name, match_op, s->match_location,
+				       result_op, s->result_location, ifexpr_vec);
+	  simplifiers.safe_push (ns);
 	}
     }
 } 
Index: gcc/match-builtin.pd
===================================================================
--- gcc/match-builtin.pd	(revision 215011)
+++ gcc/match-builtin.pd	(working copy)
@@ -18,29 +18,44 @@ along with GCC; see the file COPYING3.
 <http://www.gnu.org/licenses/>.  */
 
 
-/* ???  For math builtins we fail to properly repeat patterns for
-   all FP type kinds (sqrtf, sqrt, sqrtl).  And we fail to provide
-   a mechanism to iterate two ops in lock-step like
-   (for fn1 in sqrt sqrtf sqrtl and fn2 in pow powf powl ...)
-   if we were to do that repetition semi-manually.
-   We could also automagically use the type of the expr to
-   always do mathfn_built_in at code-gen time and always
-   automagically iterate over kinds (but that's bogus for
-   things like (convert (BUILT_IN_SQRT @0)) -> (BUILT_IN_SQRTF @0).  */
-
-
-/* One builtin function to builtin function.  */
-(simplify
-  (BUILT_IN_CABS (complex:c @0 real_zerop))
+/* From fold_builtin_cabs.  */
+/* If either part is zero, cabs is fabs of the other.  */
+(for CABS (BUILT_IN_CABSF BUILT_IN_CABS BUILT_IN_CABSL)
+ (simplify
+  (CABS (complex:c @0 real_zerop))
   (abs @0))
-/* One builtin function to expr.  */
-(simplify
-  (BUILT_IN_CABS (complex @0 @0))
-  (mult (abs @0) { build_real (TREE_TYPE (@0), real_value_truncate (TYPE_MODE (TREE_TYPE (@0)), dconst_sqrt2 ())); }))
-/* One nested fn.  */
-(simplify
-  (mult:c (BUILT_IN_POW @0 @1) @0)
-  (BUILT_IN_POW @0 (PLUS_EXPR @1 { build_one_cst (TREE_TYPE (@1)); })))
+ /* cabs(x+xi) -> fabs(x)*sqrt(2).  */
+ (if (flag_unsafe_math_optimizations)
+  (simplify
+   (CABS (complex @0 @0))
+   (mult (abs @0) { build_real (TREE_TYPE (@0), real_value_truncate (TYPE_MODE (TREE_TYPE (@0)), dconst_sqrt2 ())); })))
+ /* Optimize cabs(-z) and cabs(conj(z)) as cabs(z).  */
+ (for op (negate conj)
+  (simplify
+   (CABS (op @0))
+   (CABS @0))))
+/* Don't do this when optimizing for size.  */
+(if (flag_unsafe_math_optimizations && optimize_function_for_speed_p (cfun))
+ (for CABS (BUILT_IN_CABSF BUILT_IN_CABS BUILT_IN_CABSL)
+      SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL)
+  (simplify
+   (CABS @0)
+   (SQRT (plus
+	  /* ???  There is no way to CSE here.  We'd need to support
+	     expression captures here, like with
+	      (mult (realpart@1 @0) @1) */
+	  (mult (realpart @0) (realpart @0))
+	  (mult (imagpart @0) (imagpart @0)))))))
+
+/* From fold_binary.  */
+/* Optimize x*pow(x,c) as pow(x,c+1).  */
+(if (flag_unsafe_math_optimizations
+     /* ???  fold-const.c does not check for flag_errno_math.  */
+     && !flag_errno_math)
+ (for POW (BUILT_IN_POW BUILT_IN_POWF BUILT_IN_POWL)
+  (simplify
+   (mult:c (POW @0 REAL_CST@1) @0)
+   (POW @0 (plus @1 { build_one_cst (TREE_TYPE (@1)); })))))
 
 /* From fold_builtin_fabs and fold_builtin_abs.  */
 /* Fold a call to fabs, fabsf or fabsl, to abs, labs, llabs or imaxabs.  */
@@ -50,14 +65,14 @@ along with GCC; see the file COPYING3.
   (abs @0)))
 
 /* From fold_builtin_pow.  */
-/* Optimize pow(1.0,y) = 1.0.  */
-(simplify
- (BUILT_IN_POW real_onep@0 @1)
- @0)
-
 (for POW (BUILT_IN_POWF BUILT_IN_POW BUILT_IN_POWL)
      SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL)
      CBRT (BUILT_IN_CBRTF BUILT_IN_CBRT BUILT_IN_CBRTL)
+ /* Optimize pow(1.0,y) = 1.0.  */
+ (simplify
+  (POW real_onep@0 @1)
+  @0)
+
 (simplify
  (POW @0 REAL_CST@1)
  (with { REAL_VALUE_TYPE c = TREE_REAL_CST (@1); }
@@ -80,14 +95,14 @@ along with GCC; see the file COPYING3.
         = real_value_truncate (TYPE_MODE (type), dconst_third ()); }
     (if (flag_unsafe_math_optimizations
 	 && REAL_VALUES_EQUAL (c, dconstroot))
-     (CBRT @0))))))
+     (CBRT @0)))))
 
-/* Strip sign ops from even integer powers.
-   ???  The code in builtins.c manages to perform this recursively
-   through the whole expression in arg0 of pow.  */
-(for sgnop (abs negate)
+ /* Strip sign ops from even integer powers.
+    ???  The code in builtins.c manages to perform this recursively
+    through the whole expression in arg0 of pow.  */
+ (for sgnop (abs negate)
   (simplify
-    (BUILT_IN_POW (sgnop @0) REAL_CST@1)
+    (POW (sgnop @0) REAL_CST@1)
     (with
       { 
 	REAL_VALUE_TYPE c = TREE_REAL_CST (@1);
@@ -98,19 +113,26 @@ along with GCC; see the file COPYING3.
       (if (real_identical (&c, &cint)
 	   && (n & 1) == 0
 	   && flag_unsafe_math_optimizations)
-       (BUILT_IN_POW @0 @1)))))
+       (POW @0 @1))))))
 
 /* From fold_builtin_sqrt.  */
 (if (flag_unsafe_math_optimizations)
  /* Optimize sqrt(expN(x)) = expN(x*0.5).  */
- (for expfn (BUILT_IN_EXP10 BUILT_IN_POW10 BUILT_IN_EXP BUILT_IN_EXP2)
+ (for expfn (BUILT_IN_EXP10F BUILT_IN_EXP10 BUILT_IN_EXP10L
+	     BUILT_IN_POW10F BUILT_IN_POW10 BUILT_IN_POW10L
+	     BUILT_IN_EXPF BUILT_IN_EXP BUILT_IN_EXPL
+	     BUILT_IN_EXP2F BUILT_IN_EXP2 BUILT_IN_EXP2L)
+      SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL)
   (simplify
-   (BUILT_IN_SQRT (expfn @0))
+   (SQRT (expfn @0))
    (expfn (mult @0 { build_real (type, dconsthalf); }))))
  /* Optimize sqrt(Nroot(x)) -> pow(x,1/(2*N)).  */
- (for rootfn (BUILT_IN_SQRT BUILT_IN_CBRT)
+ (for rootfn (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL
+	      BUILT_IN_CBRTF BUILT_IN_CBRT BUILT_IN_CBRTL)
+      SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL)
+      POW (BUILT_IN_POWF BUILT_IN_POW BUILT_IN_POWL)
   (simplify
-   (BUILT_IN_SQRT (rootfn @0))
+   (SQRT (rootfn @0))
    (with
     { REAL_VALUE_TYPE dconstroot;
       if (BUILTIN_SQRT_P (rootfn)) dconstroot = dconsthalf;
@@ -118,8 +140,10 @@ along with GCC; see the file COPYING3.
       /* Adjust for the outer root.  */
       SET_REAL_EXP (&dconstroot, REAL_EXP (&dconstroot) - 1);
       dconstroot = real_value_truncate (TYPE_MODE (type), dconstroot); }
-    (BUILT_IN_POW @0 { build_real (type, dconstroot); }))))
+    (POW @0 { build_real (type, dconstroot); }))))
  /* Optimize sqrt(pow(x,y)) = pow(|x|,y*0.5).  */
- (simplify
-  (BUILT_IN_SQRT (BUILT_IN_POW @0 @1))
-  (BUILT_IN_POW (abs @0) (mult @1 { build_real (TREE_TYPE (@1), dconsthalf); }))))
+ (for SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL)
+      POW (BUILT_IN_POWF BUILT_IN_POW BUILT_IN_POWL)
+  (simplify
+   (SQRT (POW @0 @1))
+   (POW (abs @0) (mult @1 { build_real (TREE_TYPE (@1), dconsthalf); })))))