From patchwork Tue May 31 14:00:15 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bill Schmidt X-Patchwork-Id: 98040 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id C63D1B6F6B for ; Wed, 1 Jun 2011 00:01:16 +1000 (EST) Received: (qmail 26720 invoked by alias); 31 May 2011 14:01:13 -0000 Received: (qmail 26676 invoked by uid 22791); 31 May 2011 14:01:09 -0000 X-SWARE-Spam-Status: No, hits=0.1 required=5.0 tests=AWL, BAYES_50, MAY_BE_FORGED, TW_HF, T_RP_MATCHES_RCVD X-Spam-Check-By: sourceware.org Received: from e39.co.us.ibm.com (HELO e39.co.us.ibm.com) (32.97.110.160) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Tue, 31 May 2011 14:00:50 +0000 Received: from d03relay05.boulder.ibm.com (d03relay05.boulder.ibm.com [9.17.195.107]) by e39.co.us.ibm.com (8.14.4/8.13.1) with ESMTP id p4VDkedr013827 for ; Tue, 31 May 2011 07:46:40 -0600 Received: from d03av05.boulder.ibm.com (d03av05.boulder.ibm.com [9.17.195.85]) by d03relay05.boulder.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id p4VE0QGn335776 for ; Tue, 31 May 2011 08:00:31 -0600 Received: from d03av05.boulder.ibm.com (loopback [127.0.0.1]) by d03av05.boulder.ibm.com (8.14.4/8.13.1/NCO v10.0 AVout) with ESMTP id p4VE0JS2025313 for ; Tue, 31 May 2011 08:00:20 -0600 Received: from [9.10.86.209] (tepot-pc.rchland.ibm.com [9.10.86.209] (may be forged)) by d03av05.boulder.ibm.com (8.14.4/8.13.1/NCO v10.0 AVin) with ESMTP id p4VE0HTw024953; Tue, 31 May 2011 08:00:18 -0600 Subject: [PATCH] Remove pow/powi expanders (PR46728 patch 6) From: "William J. Schmidt" To: gcc-patches@gcc.gnu.org Date: Tue, 31 May 2011 09:00:15 -0500 Message-Id: <1306850415.5243.6.camel@L3G5336.ibm.com> Mime-Version: 1.0 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org This patch removes the now-redundant support for pow and powi builtins in the expand phase. My concerns about -O0 regressions were unfounded. There are code gen differences for the unlikely combination of -O0 and -ffast-math, but that's obviously nothing to be concerned about. Bootstrapped and regtested for powerpc64-linux with no regressions. OK for trunk? 2011-05-31 Bill Schmidt PR tree-optimization/46728 * builtins.c (powi_table): Remove. (powi_lookup_cost): Remove. (powi_cost): Remove. (expand_powi_1): Remove. (expand_powi): Remove. (expand_builtin_pow_root): Remove. (expand_builtin_pow): Remove. (expand_builtin_powi): Eliminate handling of constant exponent. (expand_builtin): Use expand_builtin_mathfn_2 for BUILT_IN_POW. Index: gcc/builtins.c =================================================================== --- gcc/builtins.c (revision 174447) +++ gcc/builtins.c (working copy) @@ -2847,451 +2847,6 @@ expand_builtin_int_roundingfn_2 (tree exp, rtx tar return target; } -/* To evaluate powi(x,n), the floating point value x raised to the - constant integer exponent n, we use a hybrid algorithm that - combines the "window method" with look-up tables. For an - introduction to exponentiation algorithms and "addition chains", - see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth, - "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming", - 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation - Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */ - -/* Provide a default value for POWI_MAX_MULTS, the maximum number of - multiplications to inline before calling the system library's pow - function. powi(x,n) requires at worst 2*bits(n)-2 multiplications, - so this default never requires calling pow, powf or powl. */ - -#ifndef POWI_MAX_MULTS -#define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2) -#endif - -/* The size of the "optimal power tree" lookup table. All - exponents less than this value are simply looked up in the - powi_table below. This threshold is also used to size the - cache of pseudo registers that hold intermediate results. */ -#define POWI_TABLE_SIZE 256 - -/* The size, in bits of the window, used in the "window method" - exponentiation algorithm. This is equivalent to a radix of - (1<= POWI_TABLE_SIZE) - { - if (val & 1) - { - digit = val & ((1 << POWI_WINDOW_SIZE) - 1); - result += powi_lookup_cost (digit, cache) - + POWI_WINDOW_SIZE + 1; - val >>= POWI_WINDOW_SIZE; - } - else - { - val >>= 1; - result++; - } - } - - return result + powi_lookup_cost (val, cache); -} - -/* Recursive subroutine of expand_powi. This function takes the array, - CACHE, of already calculated exponents and an exponent N and returns - an RTX that corresponds to CACHE[1]**N, as calculated in mode MODE. */ - -static rtx -expand_powi_1 (enum machine_mode mode, unsigned HOST_WIDE_INT n, rtx *cache) -{ - unsigned HOST_WIDE_INT digit; - rtx target, result; - rtx op0, op1; - - if (n < POWI_TABLE_SIZE) - { - if (cache[n]) - return cache[n]; - - target = gen_reg_rtx (mode); - cache[n] = target; - - op0 = expand_powi_1 (mode, n - powi_table[n], cache); - op1 = expand_powi_1 (mode, powi_table[n], cache); - } - else if (n & 1) - { - target = gen_reg_rtx (mode); - digit = n & ((1 << POWI_WINDOW_SIZE) - 1); - op0 = expand_powi_1 (mode, n - digit, cache); - op1 = expand_powi_1 (mode, digit, cache); - } - else - { - target = gen_reg_rtx (mode); - op0 = expand_powi_1 (mode, n >> 1, cache); - op1 = op0; - } - - result = expand_mult (mode, op0, op1, target, 0); - if (result != target) - emit_move_insn (target, result); - return target; -} - -/* Expand the RTL to evaluate powi(x,n) in mode MODE. X is the - floating point operand in mode MODE, and N is the exponent. This - function needs to be kept in sync with powi_cost above. */ - -static rtx -expand_powi (rtx x, enum machine_mode mode, HOST_WIDE_INT n) -{ - rtx cache[POWI_TABLE_SIZE]; - rtx result; - - if (n == 0) - return CONST1_RTX (mode); - - memset (cache, 0, sizeof (cache)); - cache[1] = x; - - result = expand_powi_1 (mode, (n < 0) ? -n : n, cache); - - /* If the original exponent was negative, reciprocate the result. */ - if (n < 0) - result = expand_binop (mode, sdiv_optab, CONST1_RTX (mode), - result, NULL_RTX, 0, OPTAB_LIB_WIDEN); - - return result; -} - -/* Fold a builtin function call to pow, powf, or powl into a series of sqrts or - cbrts. Return NULL_RTX if no simplification can be made or expand the tree - if we can simplify it. */ -static rtx -expand_builtin_pow_root (location_t loc, tree arg0, tree arg1, tree type, - rtx subtarget) -{ - if (TREE_CODE (arg1) == REAL_CST - && !TREE_OVERFLOW (arg1) - && flag_unsafe_math_optimizations) - { - enum machine_mode mode = TYPE_MODE (type); - tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); - tree cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT); - REAL_VALUE_TYPE c = TREE_REAL_CST (arg1); - tree op = NULL_TREE; - - if (sqrtfn) - { - /* Optimize pow (x, 0.5) into sqrt. */ - if (REAL_VALUES_EQUAL (c, dconsthalf)) - op = build_call_nofold_loc (loc, sqrtfn, 1, arg0); - - /* Don't do this optimization if we don't have a sqrt insn. */ - else if (optab_handler (sqrt_optab, mode) != CODE_FOR_nothing) - { - REAL_VALUE_TYPE dconst1_4 = dconst1; - REAL_VALUE_TYPE dconst3_4; - SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2); - - real_from_integer (&dconst3_4, VOIDmode, 3, 0, 0); - SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2); - - /* Optimize pow (x, 0.25) into sqrt (sqrt (x)). Assume on most - machines that a builtin sqrt instruction is smaller than a - call to pow with 0.25, so do this optimization even if - -Os. */ - if (REAL_VALUES_EQUAL (c, dconst1_4)) - { - op = build_call_nofold_loc (loc, sqrtfn, 1, arg0); - op = build_call_nofold_loc (loc, sqrtfn, 1, op); - } - - /* Optimize pow (x, 0.75) = sqrt (x) * sqrt (sqrt (x)) unless we - are optimizing for space. */ - else if (optimize_insn_for_speed_p () - && !TREE_SIDE_EFFECTS (arg0) - && REAL_VALUES_EQUAL (c, dconst3_4)) - { - tree sqrt1 = build_call_expr_loc (loc, sqrtfn, 1, arg0); - tree sqrt2 = builtin_save_expr (sqrt1); - tree sqrt3 = build_call_expr_loc (loc, sqrtfn, 1, sqrt1); - op = fold_build2_loc (loc, MULT_EXPR, type, sqrt2, sqrt3); - } - } - } - - /* Check whether we can do cbrt insstead of pow (x, 1./3.) and - cbrt/sqrts instead of pow (x, 1./6.). */ - if (cbrtfn && ! op - && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode))) - { - /* First try 1/3. */ - REAL_VALUE_TYPE dconst1_3 - = real_value_truncate (mode, dconst_third ()); - - if (REAL_VALUES_EQUAL (c, dconst1_3)) - op = build_call_nofold_loc (loc, cbrtfn, 1, arg0); - - /* Now try 1/6. */ - else if (optimize_insn_for_speed_p () - && optab_handler (sqrt_optab, mode) != CODE_FOR_nothing) - { - REAL_VALUE_TYPE dconst1_6 = dconst1_3; - SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1); - - if (REAL_VALUES_EQUAL (c, dconst1_6)) - { - op = build_call_nofold_loc (loc, sqrtfn, 1, arg0); - op = build_call_nofold_loc (loc, cbrtfn, 1, op); - } - } - } - - if (op) - return expand_expr (op, subtarget, mode, EXPAND_NORMAL); - } - - return NULL_RTX; -} - -/* Expand a call to the pow built-in mathematical function. Return NULL_RTX if - a normal call should be emitted rather than expanding the function - in-line. EXP is the expression that is a call to the builtin - function; if convenient, the result should be placed in TARGET. */ - -static rtx -expand_builtin_pow (tree exp, rtx target, rtx subtarget) -{ - tree arg0, arg1; - tree fn, narg0; - tree type = TREE_TYPE (exp); - REAL_VALUE_TYPE cint, c, c2; - HOST_WIDE_INT n; - rtx op, op2; - enum machine_mode mode = TYPE_MODE (type); - - if (! validate_arglist (exp, REAL_TYPE, REAL_TYPE, VOID_TYPE)) - return NULL_RTX; - - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - - if (TREE_CODE (arg1) != REAL_CST - || TREE_OVERFLOW (arg1)) - return expand_builtin_mathfn_2 (exp, target, subtarget); - - /* Handle constant exponents. */ - - /* For integer valued exponents we can expand to an optimal multiplication - sequence using expand_powi. */ - c = TREE_REAL_CST (arg1); - n = real_to_integer (&c); - real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0); - if (real_identical (&c, &cint) - && ((n >= -1 && n <= 2) - || (flag_unsafe_math_optimizations - && optimize_insn_for_speed_p () - && powi_cost (n) <= POWI_MAX_MULTS))) - { - op = expand_expr (arg0, subtarget, VOIDmode, EXPAND_NORMAL); - if (n != 1) - { - op = force_reg (mode, op); - op = expand_powi (op, mode, n); - } - return op; - } - - narg0 = builtin_save_expr (arg0); - - /* If the exponent is not integer valued, check if it is half of an integer. - In this case we can expand to sqrt (x) * x**(n/2). */ - fn = mathfn_built_in (type, BUILT_IN_SQRT); - if (fn != NULL_TREE) - { - real_arithmetic (&c2, MULT_EXPR, &c, &dconst2); - n = real_to_integer (&c2); - real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0); - if (real_identical (&c2, &cint) - && ((flag_unsafe_math_optimizations - && optimize_insn_for_speed_p () - && powi_cost (n/2) <= POWI_MAX_MULTS) - /* Even the c == 0.5 case cannot be done unconditionally - when we need to preserve signed zeros, as - pow (-0, 0.5) is +0, while sqrt(-0) is -0. */ - || (!HONOR_SIGNED_ZEROS (mode) && n == 1) - /* For c == 1.5 we can assume that x * sqrt (x) is always - smaller than pow (x, 1.5) if sqrt will not be expanded - as a call. */ - || (n == 3 - && optab_handler (sqrt_optab, mode) != CODE_FOR_nothing))) - { - tree call_expr = build_call_nofold_loc (EXPR_LOCATION (exp), fn, 1, - narg0); - /* Use expand_expr in case the newly built call expression - was folded to a non-call. */ - op = expand_expr (call_expr, subtarget, mode, EXPAND_NORMAL); - if (n != 1) - { - op2 = expand_expr (narg0, subtarget, VOIDmode, EXPAND_NORMAL); - op2 = force_reg (mode, op2); - op2 = expand_powi (op2, mode, abs (n / 2)); - op = expand_simple_binop (mode, MULT, op, op2, NULL_RTX, - 0, OPTAB_LIB_WIDEN); - /* If the original exponent was negative, reciprocate the - result. */ - if (n < 0) - op = expand_binop (mode, sdiv_optab, CONST1_RTX (mode), - op, NULL_RTX, 0, OPTAB_LIB_WIDEN); - } - return op; - } - } - - /* Check whether we can do a series of sqrt or cbrt's instead of the pow - call. */ - op = expand_builtin_pow_root (EXPR_LOCATION (exp), arg0, arg1, type, - subtarget); - if (op) - return op; - - /* Try if the exponent is a third of an integer. In this case - we can expand to x**(n/3) * cbrt(x)**(n%3). As cbrt (x) is - different from pow (x, 1./3.) due to rounding and behavior - with negative x we need to constrain this transformation to - unsafe math and positive x or finite math. */ - fn = mathfn_built_in (type, BUILT_IN_CBRT); - if (fn != NULL_TREE - && flag_unsafe_math_optimizations - && (tree_expr_nonnegative_p (arg0) - || !HONOR_NANS (mode))) - { - REAL_VALUE_TYPE dconst3; - real_from_integer (&dconst3, VOIDmode, 3, 0, 0); - real_arithmetic (&c2, MULT_EXPR, &c, &dconst3); - real_round (&c2, mode, &c2); - n = real_to_integer (&c2); - real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0); - real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3); - real_convert (&c2, mode, &c2); - if (real_identical (&c2, &c) - && ((optimize_insn_for_speed_p () - && powi_cost (n/3) <= POWI_MAX_MULTS) - || n == 1)) - { - tree call_expr = build_call_nofold_loc (EXPR_LOCATION (exp), fn, 1, - narg0); - op = expand_builtin (call_expr, NULL_RTX, subtarget, mode, 0); - if (abs (n) % 3 == 2) - op = expand_simple_binop (mode, MULT, op, op, op, - 0, OPTAB_LIB_WIDEN); - if (n != 1) - { - op2 = expand_expr (narg0, subtarget, VOIDmode, EXPAND_NORMAL); - op2 = force_reg (mode, op2); - op2 = expand_powi (op2, mode, abs (n / 3)); - op = expand_simple_binop (mode, MULT, op, op2, NULL_RTX, - 0, OPTAB_LIB_WIDEN); - /* If the original exponent was negative, reciprocate the - result. */ - if (n < 0) - op = expand_binop (mode, sdiv_optab, CONST1_RTX (mode), - op, NULL_RTX, 0, OPTAB_LIB_WIDEN); - } - return op; - } - } - - /* Fall back to optab expansion. */ - return expand_builtin_mathfn_2 (exp, target, subtarget); -} - /* Expand a call to the powi built-in mathematical function. Return NULL_RTX if a normal call should be emitted rather than expanding the function in-line. EXP is the expression that is a call to the builtin @@ -3312,27 +2867,6 @@ expand_builtin_powi (tree exp, rtx target) arg1 = CALL_EXPR_ARG (exp, 1); mode = TYPE_MODE (TREE_TYPE (exp)); - /* Handle constant power. */ - - if (TREE_CODE (arg1) == INTEGER_CST - && !TREE_OVERFLOW (arg1)) - { - HOST_WIDE_INT n = TREE_INT_CST_LOW (arg1); - - /* If the exponent is -1, 0, 1 or 2, then expand_powi is exact. - Otherwise, check the number of multiplications required. */ - if ((TREE_INT_CST_HIGH (arg1) == 0 - || TREE_INT_CST_HIGH (arg1) == -1) - && ((n >= -1 && n <= 2) - || (optimize_insn_for_speed_p () - && powi_cost (n) <= POWI_MAX_MULTS))) - { - op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); - op0 = force_reg (mode, op0); - return expand_powi (op0, mode, n); - } - } - /* Emit a libcall to libgcc. */ /* Mode of the 2nd argument must match that of an int. */ @@ -5886,12 +5420,6 @@ expand_builtin (tree exp, rtx target, rtx subtarge return target; break; - CASE_FLT_FN (BUILT_IN_POW): - target = expand_builtin_pow (exp, target, subtarget); - if (target) - return target; - break; - CASE_FLT_FN (BUILT_IN_POWI): target = expand_builtin_powi (exp, target); if (target) @@ -5909,6 +5437,7 @@ expand_builtin (tree exp, rtx target, rtx subtarge CASE_FLT_FN (BUILT_IN_FMOD): CASE_FLT_FN (BUILT_IN_REMAINDER): CASE_FLT_FN (BUILT_IN_DREM): + CASE_FLT_FN (BUILT_IN_POW): target = expand_builtin_mathfn_2 (exp, target, subtarget); if (target) return target;