From patchwork Wed May 25 17:43:32 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bill Schmidt X-Patchwork-Id: 97390 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 7AFF8B704E for ; Thu, 26 May 2011 03:44:43 +1000 (EST) Received: (qmail 25923 invoked by alias); 25 May 2011 17:44:39 -0000 Received: (qmail 25860 invoked by uid 22791); 25 May 2011 17:44:38 -0000 X-SWARE-Spam-Status: No, hits=-1.2 required=5.0 tests=AWL, BAYES_00, MAY_BE_FORGED, TW_HF, TW_TM, T_RP_MATCHES_RCVD X-Spam-Check-By: sourceware.org Received: from e1.ny.us.ibm.com (HELO e1.ny.us.ibm.com) (32.97.182.141) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Wed, 25 May 2011 17:44:23 +0000 Received: from d01relay07.pok.ibm.com (d01relay07.pok.ibm.com [9.56.227.147]) by e1.ny.us.ibm.com (8.14.4/8.13.1) with ESMTP id p4PHX1UU004332 for ; Wed, 25 May 2011 13:33:01 -0400 Received: from d03av06.boulder.ibm.com (d03av06.boulder.ibm.com [9.17.195.245]) by d01relay07.pok.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id p4PHiLf51409182 for ; Wed, 25 May 2011 13:44:21 -0400 Received: from d03av06.boulder.ibm.com (loopback [127.0.0.1]) by d03av06.boulder.ibm.com (8.14.4/8.13.1/NCO v10.0 AVout) with ESMTP id p4PHmgUM008185 for ; Wed, 25 May 2011 11:48:42 -0600 Received: from [9.10.86.209] (tepot-pc.rchland.ibm.com [9.10.86.209] (may be forged)) by d03av06.boulder.ibm.com (8.14.4/8.13.1/NCO v10.0 AVin) with ESMTP id p4PHmgMf008079; Wed, 25 May 2011 11:48:42 -0600 Subject: [PATCH] More pow(x, c) expansions in cse_sincos pass (PR46728, patch 3) From: "William J. Schmidt" To: gcc-patches@gcc.gnu.org Cc: richard.guenther@gmail.com Date: Wed, 25 May 2011 12:43:32 -0500 Message-Id: <1306345412.4821.56.camel@L3G5336.ibm.com> Mime-Version: 1.0 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org This patch adds logic to gimple_expand_builtin_pow () to optimize pow(x,y), where y is one of 0.5, 0.25, 0.75, 1./3., or 1./6. I noticed that there were two missing calls to gimple_set_location () in my previous patch, so I've corrected those here as well. There's one TODO comment in this patch. I don't believe the test for TREE_SIDE_EFFECTS (arg0) should be necessary; but I'm not convinced it was necessary in the code whence I copied it, either, so I left it in for comment in case I'm misunderstanding something. 2011-05-25 Bill Schmidt PR tree-optimization/46728 * tree-ssa-math-opts.c (powi_as_mults_1): Add gimple_set_location. (powi_as_mults): Add gimple_set_location. (build_and_insert_call): New. (gimple_expand_builtin_pow): Add handling for pow(x,y) when y is 0.5, 0.25, 0.75, 1./3., or 1./6. Index: gcc/tree-ssa-math-opts.c =================================================================== --- gcc/tree-ssa-math-opts.c (revision 174199) +++ gcc/tree-ssa-math-opts.c (working copy) @@ -965,6 +965,7 @@ powi_as_mults_1 (gimple_stmt_iterator *gsi, locati } mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target, op0, op1); + gimple_set_location (mult_stmt, loc); gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT); return ssa_target; @@ -999,6 +1000,7 @@ powi_as_mults (gimple_stmt_iterator *gsi, location div_stmt = gimple_build_assign_with_ops (RDIV_EXPR, target, build_real (type, dconst1), result); + gimple_set_location (div_stmt, loc); gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT); return target; @@ -1024,6 +1026,34 @@ gimple_expand_builtin_powi (gimple_stmt_iterator * return NULL_TREE; } +/* Build a gimple call statement that calls FN with argument ARG. + Set the lhs of the call statement to a fresh SSA name for + variable VAR. If VAR is NULL, first allocate it. Insert the + statement prior to GSI's current position, and return the fresh + SSA name. */ + +static tree +build_and_insert_call (gimple_stmt_iterator *gsi, tree fn, tree arg, + tree *var, location_t loc) +{ + gimple call_stmt; + tree ssa_target; + + if (!*var) + { + *var = create_tmp_var (TREE_TYPE (arg), "powroot"); + add_referenced_var (*var); + } + + call_stmt = gimple_build_call (fn, 1, arg); + ssa_target = make_ssa_name (*var, NULL); + gimple_set_lhs (call_stmt, ssa_target); + gimple_set_location (call_stmt, loc); + gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT); + + return ssa_target; +} + /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI with location info LOC. If possible, create an equivalent and less expensive sequence of statements prior to GSI, and return an @@ -1035,6 +1065,8 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *g { REAL_VALUE_TYPE c, cint; HOST_WIDE_INT n; + tree type, sqrtfn, target = NULL_TREE; + enum machine_mode mode; /* If the exponent isn't a constant, there's nothing of interest to be done. */ @@ -1054,6 +1086,108 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *g && powi_cost (n) <= POWI_MAX_MULTS))) return gimple_expand_builtin_powi (gsi, loc, arg0, n); + /* Attempt various optimizations using sqrt and cbrt. */ + type = TREE_TYPE (arg0); + mode = TYPE_MODE (type); + sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); + + if (flag_unsafe_math_optimizations && sqrtfn != NULL_TREE) + { + REAL_VALUE_TYPE dconst1_4, dconst3_4; + tree cbrtfn; + bool hw_sqrt_exists; + + /* Optimize pow(x,0.5) = sqrt(x). */ + if (REAL_VALUES_EQUAL (c, dconsthalf)) + return build_and_insert_call (gsi, sqrtfn, arg0, &target, loc); + + /* Optimize pow(x,0.25) = sqrt(sqrt(x)). */ + dconst1_4 = dconst1; + SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2); + hw_sqrt_exists = optab_handler(sqrt_optab, mode) != CODE_FOR_nothing; + + if (REAL_VALUES_EQUAL (c, dconst1_4) && hw_sqrt_exists) + { + tree sqrt_arg0; + + /* sqrt(x) */ + sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc); + + /* sqrt(sqrt(x)) */ + return build_and_insert_call (gsi, sqrtfn, sqrt_arg0, &target, loc); + } + + /* Optimize pow(x,0.75) = sqrt(x) * sqrt(sqrt(x)). */ + real_from_integer (&dconst3_4, VOIDmode, 3, 0, 0); + SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2); + + if (optimize_function_for_speed_p (cfun) + && !TREE_SIDE_EFFECTS (arg0) /* TODO: is this needed? */ + && REAL_VALUES_EQUAL (c, dconst3_4) + && hw_sqrt_exists) + { + tree sqrt_arg0, sqrt_sqrt, ssa_target; + gimple mult_stmt; + + /* sqrt(x) */ + sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc); + + /* sqrt(sqrt(x)) */ + sqrt_sqrt = build_and_insert_call (gsi, sqrtfn, sqrt_arg0, + &target, loc); + /* sqrt(x) * sqrt(sqrt(x)) */ + ssa_target = make_ssa_name (target, NULL); + mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target, + sqrt_arg0, sqrt_sqrt); + gimple_set_location (mult_stmt, loc); + gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT); + + return ssa_target; + } + + /* Optimize pow(x,1./3.) = cbrt(x), and pow(x,1./6.) = cbrt(sqrt(x)). */ + cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT); + + if (cbrtfn != NULL_TREE) + { + /* pow(x,1./3.) => cbrt(x). */ + const REAL_VALUE_TYPE dconst1_3 + = real_value_truncate (mode, dconst_third ()); + + if (REAL_VALUES_EQUAL (c, dconst1_3)) + return build_and_insert_call (gsi, cbrtfn, arg0, &target, loc); + + /* pow(x,1./6.) => cbrt(sqrt(x)). */ + if (optimize_function_for_speed_p (cfun) && hw_sqrt_exists) + { + REAL_VALUE_TYPE dconst1_6 = dconst1_3; + SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1); + + if (REAL_VALUES_EQUAL (c, dconst1_6)) + { + tree sqrt_arg0; + + /* sqrt(x) */ + sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, + &target, loc); + /* cbrt(sqrt(x)) */ + return build_and_insert_call (gsi, cbrtfn, sqrt_arg0, + &target, loc); + } + } + } + } + + /* We allow one transformation when flag_unsafe_math_optimizations is + false. Replacing pow(x,0.5) with sqrt(x) is safe, provided signed + zeros must not be maintained. For x = -0, the former produces +0, + and the latter produces -0. */ + else if (sqrtfn != NULL_TREE + && !HONOR_SIGNED_ZEROS (mode) + && REAL_VALUES_EQUAL (c, dconsthalf)) + + return build_and_insert_call (gsi, sqrtfn, arg0, &target, loc); + return NULL_TREE; }