diff mbox

rtl expansion without zero/sign extension based on VRP

Message ID 519061CA.9020408@linaro.org
State New
Headers show

Commit Message

Kugan Vivekanandarajah May 13, 2013, 3:45 a.m. UTC
Hi,

This patch removes some of the redundant sign/zero
extensions using value ranges informations generated by VRP.

When GIMPLE_ASSIGN stmts with LHS type smaller than word is expanded to
rtl, if we can prove that RHS expression value can always fit in LHS
type and there is no sign conversion, truncation and extension to fit
the type is redundant. Subreg and Zero/sign extensions are therefore
redundant.

For example, when an expression is evaluated and it's value is assigned
to variable of type short, the generated rtl would look something like
the following.

(set (reg:SI 110)
           (zero_extend:SI (subreg:HI (reg:SI 117) 0)))

However, if during value range propagation, if we can say for certain
that the value of the expression which is present in register 117 is
within the limits of short and there is no sign conversion, we don’t
need to perform the subreg and zero_extend; instead we can generate the
following rtl.

(set (reg:SI 110)
           (reg:SI 117)))

Attached patch adds value range information to gimple stmts during value
range propagation pass and expands rtl without subreg and zero/sign
extension if the value range is within the limits of it's type.

This change improve the geomean of spec2k int benchmark with ref by 
about ~3.5% on an arm chromebook.

Tested  on X86_64 and ARM.

I would like review comments on this.

Thanks,
Kugan


2013-05-09  Kugan Vivekanandarajah  <kuganv@linaro.org>

     * gcc/gimple.h (gimple_is_exp_fit_lhs, gimple_set_exp_fit_lhs): New
function.
     * gcc/tree-vrp.c (process_stmt_for_ext_elimination): Likewise.
     * (is_msb_set, range_fits_type): Likewise.
     * (vrp_finalize): Call process_stmt_for_ext_elimination.
     * gcc/dojump.c (do_compare_and_jump): generates rtl without
zero/sign extension
     if process_stmt_for_ext_elimination tells so.
     * gcc/cfgexpand.c (expand_gimple_stmt_1): Likewise.
diff mbox

Patch

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index c187273..c43468b 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -2279,6 +2279,7 @@  expand_gimple_stmt_1 (gimple stmt)
 	    bool nontemporal = gimple_assign_nontemporal_move_p (stmt);
 	    struct separate_ops ops;
 	    bool promoted = false;
+        bool ext_redundant = false;
 
 	    target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
 	    if (GET_CODE (target) == SUBREG && SUBREG_PROMOTED_VAR_P (target))
@@ -2309,8 +2310,33 @@  expand_gimple_stmt_1 (gimple stmt)
 	    temp = expand_expr_real_2 (&ops, temp, GET_MODE (target),
 				       EXPAND_NORMAL);
 
+        /* During VRP if we detected extension as rdundant
+           and can be removed.  */
+        if (promoted && gimple_is_exp_fit_lhs (stmt)
+                && is_gimple_assign (stmt)
+                && INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
+          {
+            rtx src = (GET_CODE (temp) == SUBREG)
+                ? SUBREG_REG (temp) : temp;
+            rtx dst = (GET_CODE (target) == SUBREG)
+                ? SUBREG_REG (target) : target;
+            if (GET_MODE (src) == GET_MODE (dst))
+              {
+                promoted = false;
+                ext_redundant = true;
+              }
+          }
+
 	    if (temp == target)
 	      ;
+        else if (ext_redundant)
+          {
+              rtx src = (GET_CODE (temp) == SUBREG)
+                  ? SUBREG_REG (temp) : temp;
+              rtx dst = (GET_CODE (target) == SUBREG)
+                  ? SUBREG_REG (target) : target;
+              emit_move_insn (dst, src);
+          }
 	    else if (promoted)
 	      {
 		int unsignedp = SUBREG_PROMOTED_UNSIGNED_P (target);
diff --git a/gcc/dojump.c b/gcc/dojump.c
index 3f04eac..3336ff6 100644
--- a/gcc/dojump.c
+++ b/gcc/dojump.c
@@ -34,6 +34,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "ggc.h"
 #include "basic-block.h"
 #include "tm_p.h"
+#include "gimple.h"
 
 static bool prefer_and_bit_test (enum machine_mode, int);
 static void do_jump_by_parts_greater (tree, tree, int, rtx, rtx, int);
@@ -1118,6 +1119,73 @@  do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code,
       type = TREE_TYPE (treeop1);
       mode = TYPE_MODE (type);
     }
+
+  /* Is zero/sign extension redundant for op1 as per VRP.  */
+  bool promoted1 = false;
+  if (GET_CODE (op0) == SUBREG && SUBREG_PROMOTED_VAR_P (op0))
+      promoted1 = true;
+  if (promoted1)
+    {
+      gimple stmt = SSA_NAME_DEF_STMT (treeop0);
+      if (!gimple_is_exp_fit_lhs (stmt))
+        {
+          promoted1 = false;
+        }
+    }
+
+  /* Is zero/sign extension redundant for op2 as per VRP.  */
+  bool promoted2 = false;
+  if (GET_CODE (op1) == SUBREG && SUBREG_PROMOTED_VAR_P (op1))
+      promoted2 = true;
+  if (promoted2)
+    {
+      gimple stmt = SSA_NAME_DEF_STMT (treeop1);
+      if (!gimple_is_exp_fit_lhs (stmt))
+        {
+          promoted2 = false;
+        }
+    }
+
+  /* If zero/sign extension is redundant, generate RTL
+     for operands without zero/sign extension.  */
+  if ((promoted1 || TREE_CODE (treeop0) == INTEGER_CST)
+      && (promoted2 || TREE_CODE (treeop1) == INTEGER_CST))
+    {
+      /* First operand is constant.  */
+      if ((TREE_CODE (treeop1) == INTEGER_CST)
+         && (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (treeop1))) <= UNITS_PER_WORD))
+        {
+          rtx new_op0 = gen_reg_rtx (GET_MODE (SUBREG_REG (op0)));
+          emit_move_insn (new_op0, SUBREG_REG (op0));
+          op0 = new_op0;
+        }
+
+      /* Other operand is constant.  */
+      else if ((TREE_CODE (treeop0) == INTEGER_CST)
+          && (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (treeop0)))
+              <= UNITS_PER_WORD))
+        {
+          rtx new_op1 = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
+          emit_move_insn (new_op1, SUBREG_REG (op1));
+          op1 = new_op1;
+        }
+
+      /* both operands are not constant.  */
+      else if ((TREE_CODE (treeop0) != INTEGER_CST)
+             && (TREE_CODE (treeop1) != INTEGER_CST))
+        {
+          rtx new_op0 = gen_reg_rtx (GET_MODE (SUBREG_REG (op0)));
+          emit_move_insn (new_op0, SUBREG_REG (op0));
+          rtx new_op1 = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
+          emit_move_insn (new_op1, SUBREG_REG (op1));
+          if (GET_MODE (new_op0) == GET_MODE (new_op1))
+          {
+            op0 = new_op0;
+            op1 = new_op1;
+          }
+        }
+    }
+
   unsignedp = TYPE_UNSIGNED (type);
   code = unsignedp ? unsigned_code : signed_code;
 
diff --git a/gcc/gimple.h b/gcc/gimple.h
index b4de403..f0d5998 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -191,6 +191,11 @@  struct GTY((chain_next ("%h.next"))) gimple_statement_base {
      in there.  */
   unsigned int subcode		: 16;
 
+  /* if an assignment gimple statement has RHS expression that can fit
+     LHS type, zero/sign extension to truncate is redundant.
+     Set this if we detect extension as redundant during VRP.  */
+  unsigned sign_zero_ext_redundant : 1;
+
   /* UID of this statement.  This is used by passes that want to
      assign IDs to statements.  It must be assigned and used by each
      pass.  By default it should be assumed to contain garbage.  */
@@ -1208,6 +1213,21 @@  set_bb_seq (basic_block bb, gimple_seq seq)
   bb->il.gimple.seq = seq;
 }
 
+/* returns the sign/zero extension redundant flag.  */
+
+static inline bool
+gimple_is_exp_fit_lhs (gimple stmt)
+{
+  return stmt->gsbase.sign_zero_ext_redundant;
+}
+
+/* Sets the sign/zero extension redundant flag.  */
+
+static inline void
+gimple_set_exp_fit_lhs (gimple stmt, bool val_p)
+{
+  stmt->gsbase.sign_zero_ext_redundant = val_p;
+}
 
 /* Return the code for GIMPLE statement G.  */
 
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index b5de683..08f203b 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -9095,6 +9095,197 @@  simplify_stmt_for_jump_threading (gimple stmt, gimple within_stmt)
 				   gimple_cond_rhs (stmt), within_stmt);
 }
 
+/* Check to see if the value range can be safely contained in
+   with the type.  i.e.  zero/sign extension to truncate is redyndant.  */
+static bool
+range_fits_type (value_range_t *vr, tree type)
+{
+    tree type_min = TYPE_MINVAL (type);
+    tree type_max = TYPE_MAXVAL (type);
+
+    if (vr->type != VR_RANGE
+            || is_overflow_infinity (vr->min)
+            || is_overflow_infinity (vr->max)
+            || is_negative_overflow_infinity (vr->min)
+            || is_negative_overflow_infinity (vr->max))
+      {
+        /* if value range overflows.  */
+        return false;
+      }
+
+    bool unsigned_cmp = false;
+
+    /* Unsigged type and value range is positive.  */
+    if (TYPE_UNSIGNED (type)
+        && value_range_nonnegative_p (vr))
+      {
+        unsigned_cmp = true;
+      }
+
+    if (unsigned_cmp)
+      {
+        /* unsigned type and positive value range.  */
+        return INT_CST_LT_UNSIGNED (vr->max, type_max)
+            || operand_equal_p (vr->max, type_max, 0);
+      }
+    else
+      {
+        bool min_ok, max_ok;
+        /* signed comparision.  */
+        min_ok = INT_CST_LT (type_min, vr->min)
+            || operand_equal_p (type_min, vr->min, 0);
+        max_ok = INT_CST_LT (vr->max, type_max)
+            || operand_equal_p (vr->max, type_max, 0);
+
+        return (min_ok && max_ok);
+      }
+}
+
+/* Check if the most significant bit in the value for the type is set.
+   if we are sure that msb is not set, return false.
+   If in doubt return true.  */
+
+static bool
+is_msb_set (tree val, tree type)
+{
+    tree msb;
+
+    if (TREE_CODE (val) != INTEGER_CST || TYPE_SIZE (type) == NULL_TREE)
+        return true;
+
+    switch (GET_MODE_SIZE (TYPE_MODE (type)))
+      {
+    case 1:
+        msb = build_int_cstu (integer_type_node, 0x80);
+        break;
+    case 2:
+        msb = build_int_cstu (integer_type_node, 0x8000);
+        break;
+    case 4:
+        msb = build_int_cstu (integer_type_node, 0x80000000);
+        break;
+    default:
+        return true;
+      }
+
+    if (operand_less_p (val, msb) == 1)
+        return false;
+
+    return true;
+}
+
+
+/* process gimple assign stmts and see if the sign/zero extensions are
+   redundant.  i.e.  if an assignment gimple statement has RHS expression
+   value that can fit in LHS type, truncation is redundant.  Zero/sign
+   extensions in this case can be removed.
+
+   Example, if the register 117 in the following rtl has a value range
+   that can fit in HImode, zero_extend here is redundant.
+    (set (reg:SI 110)
+          (zero_extend:SI (subreg:HI (reg:SI 117) 0)))
+
+   Therefore the zero_extend can be droped and we can generate the following
+   rtl instead
+    (set (reg:SI 110)
+          (reg:SI 117))
+
+   process_stmt_for_ext_elimination process GIMPLE_ASSIGN stmts based on the
+   VRP information and adds this information to the stmt.  Later, when the
+   gimple is expanded to RTL, zero_extend/sign_extends are dropped if it is
+   redundant.  */
+
+static void
+process_stmt_for_ext_elimination ()
+{
+    basic_block bb;
+    FOR_EACH_BB (bb)
+      {
+        gimple_stmt_iterator si;
+
+        for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+          {
+            gimple stmt = gsi_stmt (si);
+            /* Only interested in GIMPLE_ASSIGN.  */
+            if (!stmt
+                || gimple_is_exp_fit_lhs (stmt)
+                || !is_gimple_assign (stmt))
+                continue;
+
+            enum tree_code stmt_code = gimple_assign_rhs_code (stmt);
+            tree lhs = gimple_get_lhs (stmt);
+            tree rhs1 = gimple_assign_rhs1 (stmt);
+            tree rhs2 = gimple_assign_rhs2 (stmt);
+
+            /* Skip if the lhs is not integeral.  */
+            if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
+                || CONSTANT_CLASS_P (lhs))
+                continue;
+
+            /* Process unary assign stmt.  */
+            if (TREE_CODE_CLASS (stmt_code) == tcc_unary)
+              {
+                value_range_t vr = VR_INITIALIZER;
+                value_range_t *p_vr = &vr;
+                if ((stmt_code == NOP_EXPR || stmt_code == CONVERT_EXPR)
+                        && (TREE_CODE (rhs1) == SSA_NAME))
+                  {
+                    p_vr = get_value_range (rhs1);
+                  }
+                else
+                  {
+                    /* Get the value range for the expression.  */
+                    extract_range_from_unary_expr (p_vr,
+                                                gimple_assign_rhs_code (stmt),
+                                                TREE_TYPE (rhs1), rhs1);
+                  }
+                if (range_int_cst_p (p_vr))
+                  {
+                    if (range_fits_type (p_vr, TREE_TYPE (lhs)))
+                      {
+                        gimple_set_exp_fit_lhs (stmt, true);
+                        gsi_set_stmt (&si, stmt);
+                      }
+                  }
+              }
+
+            /* Process binary assign stmt.  */
+            else if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
+                     == tcc_binary)
+              {
+                value_range_t vr = VR_INITIALIZER;
+                gcc_assert (rhs1);
+                gcc_assert (rhs2);
+
+                /* If MSB is set for the constant operand, RTL will
+                   sign extend even when it is positive.
+                   Skip such stmts for zero/sign extension processing.  */
+
+                if ((CONSTANT_CLASS_P (rhs1)
+                        && is_msb_set (rhs1, TREE_TYPE (lhs)))
+                    || (CONSTANT_CLASS_P(rhs2)
+                        && is_msb_set (rhs2, TREE_TYPE (lhs))))
+                  {
+                    continue;
+                  }
+
+                /* Get the value range for the expression.  */
+                extract_range_from_binary_expr (&vr,
+                                                gimple_assign_rhs_code (stmt),
+                                                TREE_TYPE (rhs1), rhs1, rhs2);
+                if (range_int_cst_p (&vr))
+                  {
+                    if (range_fits_type (&vr, TREE_TYPE (lhs)))
+                      {
+                        gimple_set_exp_fit_lhs (stmt, true);
+                        gsi_set_stmt (&si, stmt);
+                      }
+                  }
+              }
+          }
+      }
+}
+
 /* Blocks which have more than one predecessor and more than
    one successor present jump threading opportunities, i.e.,
    when the block is reached from a specific predecessor, we
@@ -9243,6 +9434,8 @@  vrp_finalize (void)
   if (warn_array_bounds)
     check_all_array_refs ();
 
+  process_stmt_for_ext_elimination ();
+
   /* We must identify jump threading opportunities before we release
      the datastructures built by VRP.  */
   identify_jump_threads ();