diff mbox

[RFC] Elimination of zext/sext - type promotion pass

Message ID 55C154DE.1050506@linaro.org
State New
Headers show

Commit Message

Kugan Vivekanandarajah Aug. 5, 2015, 12:12 a.m. UTC
> You indeed need to use CONVERT_EXPR here, maybe you can elaborate
> on the optimization issues.
>
>> 2. for inline asm (a reduced test case that might not make much as a
>> stand alone test-case, but I ran into similar cases with valid programmes)
>>
>> ;; Function fn1 (fn1, funcdef_no=0, decl_uid=4220, cgraph_uid=0,
>> symbol_order=0)
>>
>> fn1 (short int p1)
>> {
>>    <bb 2>:
>>    __asm__("" : "=r" p1_2 : "0" p1_1(D));
>>    return;
>>
>> }
>>
>>
>> I am generating something like the following which ICEs. What is the
>> expected out?
>>
>> ;; Function fn1 (fn1, funcdef_no=0, decl_uid=4220, cgraph_uid=0,
>> symbol_order=0)
>>
>> fn1 (short int p1)
>> {
>>    int _1;
>>    int _2;
>>    short int _5;
>>
>>    <bb 2>:
>>    _1 = (int) p1_4(D);
>>    _5 = (short int) _1;
>>    __asm__("" : "=r" p1_6 : "0" _5);
>>    _2 = (int) p1_6;
>>    return;
>>
>> }
>
> Parameters are indeed "interesting" to handle ;)  As we now see on ARM
> the incoming parameter (the default def) and later assignments to it
> can require different promotions (well, different extensions for ARM).
>
> The only sensible way to deal with promoting parameters is to
> promote them by changing the function signature.  Thus reflect the
> targets ABI for parameters in the GIMPLE representation (which
> includes TYPE_ARG_TYPES and DECL_ARGUMENTS).
> IMHO we should do this during gimplification of parameters / call
> arguments already.
>
> So for your example you'd end up with
>
> fn1 (int p1)
> {
>    __asm__("" : "=r" p1_6 : "0" p1_4(D));
>    return;
> }
>
> that is, promotions also apply to asm inputs/outputs (no?)


Thanks for the review and answers. For the time being, I am handling 
gimple_asm as one that has to be handled in original type. I Will look 
into improving it after getting the basic framework right.

As it is, attached patch bootstraps on x86_64-linux-gnu, arm-linux-gnu 
and aarch64-linux-gnu. There are few regressions to look into (Please 
see below).

There are cases it is working well. There are cases where it can be 
improved. I am attaching couple test cases (and their results). I am 
seeing some BIT_AND_EXPR which are inserted by promotion are not being 
optimized when they are redundant. This is especially the case when I 
invalidate the VRP range into from VRP1 during the type promotion. I am 
looking into it.

Please note that attached patch still needs to address:
* Adding gimple_debug stmts.
* Address review comment for expr.c handling SEXT_EXPR.
* Address regression failures

Based on the feedback, I will address the above and split the patch into 
logical patch set for easy detailed review.

Here are the outputs for the testcases.




Testsuite regression for x86_64-unknown-linux-gnu:
Tests that now fail, but worked before:
gfortran.dg/graphite/pr42393-1.f90   -O  (test for excess errors)


Testsuite regression for  arm-linux-gnu:
Tests that now fail, but worked before:
arm-sim: gcc.dg/fixed-point/convert-sat.c execution test
arm-sim: gcc.dg/tree-ssa/20030729-1.c scan-tree-dump-times dom2 "\\(unsigned
int\\)" 0
arm-sim: gcc.dg/tree-ssa/pr54245.c scan-tree-dump-times slsr "Inserting
initializer" 0
arm-sim: gcc.dg/tree-ssa/shorten-1.c scan-tree-dump-not optimized
"\\(int\\)"
arm-sim: gcc.dg/tree-ssa/shorten-1.c scan-tree-dump-times optimized
"\\(unsigned char\\)" 8
arm-sim: gcc.target/arm/mla-2.c scan-assembler smlalbb
arm-sim: gcc.target/arm/unsigned-extend-2.c scan-assembler ands
arm-sim: gcc.target/arm/wmul-1.c scan-assembler-times smlabb 2
arm-sim: gcc.target/arm/wmul-2.c scan-assembler-times smulbb 1
arm-sim: gcc.target/arm/wmul-3.c scan-assembler-times smulbb 2
arm-sim: gcc.target/arm/wmul-9.c scan-assembler smlalbb
arm-sim: gfortran.dg/graphite/pr42393-1.f90   -O  (test for excess errors)

Tests that now work, but didn't before:
arm-sim: gcc.dg/tree-prof/time-profiler-2.c scan-ipa-dump-times profile
"Read tp_first_run: 0" 2
arm-sim: gcc.dg/tree-prof/time-profiler-2.c scan-ipa-dump-times profile
"Read tp_first_run: 2" 1
arm-sim: gcc.dg/tree-prof/time-profiler-2.c scan-ipa-dump-times profile
"Read tp_first_run: 3" 1
arm-sim: gcc.target/arm/builtin-bswap-1.c scan-assembler-times rev16ne\\t 1
arm-sim: gcc.target/arm/builtin-bswap-1.c scan-assembler-times revshne\\t 1
arm-sim: gcc.target/arm/smlaltb-1.c scan-assembler smlaltb\\t
arm-sim: gcc.target/arm/smlaltt-1.c scan-assembler smlaltt\\t


Testsuite regression for  aarch64-linux-gnu:
Tests that now fail, but worked before:
c-c++-common/torture/vector-compare-1.c   -O3 -g  (test for excess errors)
c-c++-common/torture/vector-compare-1.c   -O3 -g  (test for excess errors)
gcc.dg/tree-ssa/20030729-1.c scan-tree-dump-times dom2 "\\(unsigned int\\)"
0
gcc.dg/tree-ssa/pr54245.c scan-tree-dump-times slsr "Inserting initializer"
0
gcc.dg/tree-ssa/shorten-1.c scan-tree-dump-not optimized "\\(int\\)"
gcc.dg/tree-ssa/shorten-1.c scan-tree-dump-times optimized "\\(unsigned
char\\)" 8

Thanks,
Kugan
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 16d5582..63c9dd2 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1493,6 +1493,7 @@ OBJS = \
 	tree-vect-slp.o \
 	tree-vectorizer.o \
 	tree-vrp.o \
+	gimple-ssa-type-promote.o \
 	tree.o \
 	valtrack.o \
 	value-prof.o \
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 0b19953..6642c01 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -4972,6 +4972,10 @@ expand_debug_expr (tree exp)
     case FMA_EXPR:
       return simplify_gen_ternary (FMA, mode, inner_mode, op0, op1, op2);
 
+    case SEXT_EXPR:
+      return op0;
+
+
     default:
     flag_unsupported:
 #ifdef ENABLE_CHECKING
diff --git a/gcc/common.opt b/gcc/common.opt
index 6d47e94..5afda05 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2374,6 +2374,10 @@ ftree-vrp
 Common Report Var(flag_tree_vrp) Init(0) Optimization
 Perform Value Range Propagation on trees
 
+ftree-type-promote
+Common Report Var(flag_tree_type_promote) Init(1) Optimization
+Perform Type Promotion on trees
+
 funit-at-a-time
 Common Report Var(flag_unit_at_a_time) Init(1)
 Compile whole compilation unit at a time
diff --git a/gcc/expr.c b/gcc/expr.c
index d601129..7483950 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9194,6 +9194,22 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
       target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, target);
       return target;
 
+    case SEXT_EXPR:
+	{
+	  rtx op0 = expand_normal (treeop0);
+	  rtx temp;
+	  if (!target)
+	    target = gen_reg_rtx (TYPE_MODE (TREE_TYPE (treeop0)));
+
+	  machine_mode inner_mode
+	    = smallest_mode_for_size (tree_to_shwi (treeop1),
+				      MODE_INT);
+	  temp = convert_modes (inner_mode,
+				TYPE_MODE (TREE_TYPE (treeop0)), op0, 0);
+	  convert_move (target, temp, 0);
+	  return target;
+	}
+
     default:
       gcc_unreachable ();
     }
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 886922f..bac899c 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -984,6 +984,10 @@ int_const_binop_1 (enum tree_code code, const_tree arg1, const_tree parg2,
       res = wi::bit_and (arg1, arg2);
       break;
 
+    case SEXT_EXPR:
+      res = wi::sext (arg1, arg2.to_uhwi ());
+      break;
+
     case RSHIFT_EXPR:
     case LSHIFT_EXPR:
       if (wi::neg_p (arg2))
diff --git a/gcc/gimple-ssa-type-promote.c b/gcc/gimple-ssa-type-promote.c
index e69de29..b5b69cc 100644
--- a/gcc/gimple-ssa-type-promote.c
+++ b/gcc/gimple-ssa-type-promote.c
@@ -0,0 +1,815 @@
+/* Type promotion of SSA names to minimise redundant zero/sign extension.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "flags.h"
+#include "hash-set.h"
+#include "machmode.h"
+#include "vec.h"
+#include "double-int.h"
+#include "input.h"
+#include "alias.h"
+#include "symtab.h"
+#include "wide-int.h"
+#include "inchash.h"
+#include "tree.h"
+#include "fold-const.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "predict.h"
+#include "hard-reg-set.h"
+#include "function.h"
+#include "dominance.h"
+#include "cfg.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "gimple-ssa.h"
+#include "tree-phinodes.h"
+#include "ssa-iterators.h"
+#include "stringpool.h"
+#include "tree-ssanames.h"
+#include "tree-pass.h"
+#include "gimple-pretty-print.h"
+#include "langhooks.h"
+#include "sbitmap.h"
+#include "domwalk.h"
+
+#include "tree-dfa.h"
+
+/* This pass applies type promotion to SSA names in the function and
+   inserts appropriate truncations.  Idea of this pass is to promote operations
+   such a way that we can minimise generation of subreg in RTL,
+   that intern results in removal of redundant zero/sign extensions.  This pass
+   will run prior to The VRP and DOM such that they will be able to optimise
+   redundant truncations and extensions.  This is based on the discussion from
+   https://gcc.gnu.org/ml/gcc-patches/2014-09/msg00472.html.
+
+*/
+
+static unsigned n_ssa_val;
+static sbitmap ssa_to_be_promoted_bitmap;
+static sbitmap ssa_sets_higher_bits_bitmap;
+static hash_map <tree, tree>  *original_type_map;
+
+/* Return the promoted type for TYPE.  */
+static tree
+get_promoted_type (tree type)
+{
+  tree promoted_type;
+  enum machine_mode mode;
+  int uns;
+  if (POINTER_TYPE_P (type)
+      || !INTEGRAL_TYPE_P (type)
+      || TYPE_PRECISION (type) % 8 != 0)
+    return type;
+  mode = TYPE_MODE (type);
+#ifdef PROMOTE_MODE
+  uns = TYPE_SIGN (type);
+  PROMOTE_MODE (mode, uns, type);
+#endif
+  uns = TYPE_SIGN (type);
+  promoted_type = lang_hooks.types.type_for_mode (mode, uns);
+  if (promoted_type
+      && (TYPE_PRECISION (promoted_type) > TYPE_PRECISION (type)))
+    type = promoted_type;
+  return type;
+}
+
+/* Return true if ssa NAME is already considered for promotion.  */
+static bool
+ssa_promoted_p (tree name)
+{
+  if (TREE_CODE (name) == SSA_NAME)
+    {
+      unsigned int index = SSA_NAME_VERSION (name);
+      if (index < n_ssa_val)
+	return bitmap_bit_p (ssa_to_be_promoted_bitmap, index);
+    }
+  return true;
+}
+
+
+/* Set ssa NAME to be already considered for promotion.  */
+static void
+set_ssa_promoted (tree name)
+{
+  if (TREE_CODE (name) == SSA_NAME)
+    {
+      unsigned int index = SSA_NAME_VERSION (name);
+      if (index < n_ssa_val)
+	bitmap_set_bit (ssa_to_be_promoted_bitmap, index);
+    }
+}
+
+/* Insert COPY_STMT along the edge from STMT to its successor.  */
+static void
+insert_stmt_on_edge (gimple stmt, gimple copy_stmt)
+{
+  edge_iterator ei;
+  edge e, edge = NULL;
+  basic_block bb = gimple_bb (stmt);
+
+  FOR_EACH_EDGE (e, ei, bb->succs)
+    if (!(e->flags & EDGE_EH))
+      {
+	gcc_assert (edge == NULL);
+	edge = e;
+      }
+
+  gcc_assert (edge);
+  gsi_insert_on_edge_immediate (edge, copy_stmt);
+}
+
+/* Return true if it is safe to promote the defined SSA_NAME in the STMT
+   itself.  */
+static bool
+safe_to_promote_def_p (gimple stmt)
+{
+  enum tree_code code = gimple_assign_rhs_code (stmt);
+  if (gimple_vuse (stmt) != NULL_TREE
+      || gimple_vdef (stmt) != NULL_TREE
+      || code == ARRAY_REF
+      || code == LROTATE_EXPR
+      || code == RROTATE_EXPR
+      || code == VIEW_CONVERT_EXPR
+      || code == BIT_FIELD_REF
+      || code == REALPART_EXPR
+      || code == IMAGPART_EXPR
+      || code == REDUC_MAX_EXPR
+      || code == REDUC_PLUS_EXPR
+      || code == REDUC_MIN_EXPR)
+    return false;
+  return true;
+}
+
+/* Return true if it is safe to promote the use in the STMT.  */
+static bool
+safe_to_promote_use_p (gimple stmt)
+{
+  enum tree_code code = gimple_assign_rhs_code (stmt);
+  tree lhs = gimple_assign_lhs (stmt);
+
+  if (gimple_vuse (stmt) != NULL_TREE
+      || gimple_vdef (stmt) != NULL_TREE
+      ||code == VIEW_CONVERT_EXPR
+      || code == LROTATE_EXPR
+      || code == RROTATE_EXPR
+      || code == CONSTRUCTOR
+      || code == BIT_FIELD_REF
+      || code == COMPLEX_EXPR
+      || code == ASM_EXPR
+      || VECTOR_TYPE_P (TREE_TYPE (lhs)))
+    return false;
+  return true;
+}
+
+/* Return true if the SSA_NAME has to be truncated to preserve the
+   semantics.  */
+static bool
+truncate_use_p (gimple stmt)
+{
+  enum tree_code code = gimple_assign_rhs_code (stmt);
+  if (TREE_CODE_CLASS (code)
+      == tcc_comparison
+      || code == TRUNC_DIV_EXPR
+      || code == CEIL_DIV_EXPR
+      || code == FLOOR_DIV_EXPR
+      || code == ROUND_DIV_EXPR
+      || code == TRUNC_MOD_EXPR
+      || code == CEIL_MOD_EXPR
+      || code == FLOOR_MOD_EXPR
+      || code == ROUND_MOD_EXPR
+      || code == LSHIFT_EXPR
+      || code == RSHIFT_EXPR)
+    return true;
+  return false;
+}
+
+/* Return true if LHS will be promoted later.  */
+static bool
+tobe_promoted_p (tree lhs)
+{
+  if (TREE_CODE (lhs) == SSA_NAME
+      && !POINTER_TYPE_P (TREE_TYPE (lhs))
+      && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
+      && !VECTOR_TYPE_P (TREE_TYPE (lhs))
+      && !ssa_promoted_p (lhs)
+      && (get_promoted_type (TREE_TYPE (lhs))
+	  != TREE_TYPE (lhs)))
+    return true;
+  else
+    return false;
+}
+
+/* Convert constant CST to TYPE.  */
+static tree
+convert_int_cst (tree type, tree cst, signop sign = SIGNED)
+{
+  wide_int wi_cons = fold_convert (type, cst);
+  wi_cons = wi::ext (wi_cons, TYPE_PRECISION (TREE_TYPE (cst)), sign);
+  return wide_int_to_tree (type, wi_cons);
+}
+
+/* Promote constants in STMT to TYPE.  If PROMOTE_COND_EXPR is true,
+   promote only the constants in conditions part of the COND_EXPR.  */
+static void
+promote_cst_in_stmt (gimple stmt, tree type, bool promote_cond = false)
+{
+  tree op;
+  ssa_op_iter iter;
+  use_operand_p oprnd;
+  int index;
+  tree op0, op1;
+  signop sign = SIGNED;
+
+  switch (gimple_code (stmt))
+    {
+    case GIMPLE_ASSIGN:
+      if (promote_cond
+	  && gimple_assign_rhs_code (stmt) == COND_EXPR)
+	{
+	  /* Promote INTEGER_CST that are tcc_compare arguments.  */
+	  sign = TYPE_SIGN (type);
+	  op = gimple_assign_rhs1 (stmt);
+	  op0 = TREE_OPERAND (op, 0);
+	  op1 = TREE_OPERAND (op, 1);
+	  if (TREE_CODE (op0) == INTEGER_CST)
+	    op0 = convert_int_cst (type, op0, sign);
+	  if (TREE_CODE (op1) == INTEGER_CST)
+	    op1 = convert_int_cst (type, op1, sign);
+	  tree new_op = build2 (TREE_CODE (op), type, op0, op1);
+	  gimple_assign_set_rhs1 (stmt, new_op);
+	}
+      else
+	{
+	  /* Promote INTEGER_CST in GIMPLE_ASSIGN.  */
+	  op = gimple_assign_rhs3 (stmt);
+	  if (op && TREE_CODE (op) == INTEGER_CST)
+	    gimple_assign_set_rhs3 (stmt, convert_int_cst (type, op, sign));
+	  if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
+	      == tcc_comparison)
+	    sign = TYPE_SIGN (type);
+	  op = gimple_assign_rhs1 (stmt);
+	  if (op && TREE_CODE (op) == INTEGER_CST)
+	    gimple_assign_set_rhs1 (stmt, convert_int_cst (type, op, sign));
+	  op = gimple_assign_rhs2 (stmt);
+	  if (op && TREE_CODE (op) == INTEGER_CST)
+	    gimple_assign_set_rhs2 (stmt, convert_int_cst (type, op, sign));
+	}
+      break;
+
+    case GIMPLE_PHI:
+	{
+	  /* Promote INTEGER_CST arguments to GIMPLE_PHI.  */
+	  gphi *phi = as_a <gphi *> (stmt);
+	  FOR_EACH_PHI_ARG (oprnd, phi, iter, SSA_OP_USE)
+	    {
+	      op = USE_FROM_PTR (oprnd);
+	      index = PHI_ARG_INDEX_FROM_USE (oprnd);
+	      if (TREE_CODE (op) == INTEGER_CST)
+		SET_PHI_ARG_DEF (phi, index, convert_int_cst (type, op, sign));
+	    }
+	}
+      break;
+
+    case GIMPLE_COND:
+	{
+	  /* Promote INTEGER_CST that are GIMPLE_COND arguments.  */
+	  gcond *cond = as_a <gcond *> (stmt);
+	  op = gimple_cond_lhs (cond);
+	  sign = TYPE_SIGN (type);
+
+	  if (op && TREE_CODE (op) == INTEGER_CST)
+	    gimple_cond_set_lhs (cond, convert_int_cst (type, op, sign));
+	  op = gimple_cond_rhs (cond);
+
+	  if (op && TREE_CODE (op) == INTEGER_CST)
+	    gimple_cond_set_rhs (cond, convert_int_cst (type, op, sign));
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Create an ssa with TYPE to copy ssa VAR.  */
+static tree
+make_promoted_copy (tree var, gimple def_stmt, tree type)
+{
+  tree new_lhs = make_ssa_name (type, def_stmt);
+  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (var))
+    SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_lhs) = 1;
+  return new_lhs;
+}
+
+/* Zero/sign extend (depending on type) VAR and truncate to WIDTH bits.
+   Assign the zero/sign extended value in NEW_VAR.  gimple statement
+   that performs the zero/sign extension is returned.  */
+static gimple
+zero_sign_extend_stmt (tree new_var, tree var, int width)
+{
+  gcc_assert (TYPE_PRECISION (TREE_TYPE (var))
+	      == TYPE_PRECISION (TREE_TYPE (new_var)));
+  gcc_assert (TYPE_PRECISION (TREE_TYPE (var)) > width);
+  gimple stmt;
+
+  if (TYPE_UNSIGNED (TREE_TYPE (new_var)))
+    /* Zero extend.  */
+    stmt = gimple_build_assign (new_var,
+				BIT_AND_EXPR,
+				var, build_int_cst (TREE_TYPE (var),
+						    ((1ULL << width) - 1)));
+  else
+    /* Sign extend.  */
+    stmt = gimple_build_assign (new_var,
+				SEXT_EXPR,
+				var, build_int_cst (TREE_TYPE (var), width));
+  return stmt;
+}
+
+
+void duplicate_default_ssa (tree to, tree from)
+{
+  SET_SSA_NAME_VAR_OR_IDENTIFIER (to, SSA_NAME_VAR (from));
+  SSA_NAME_IS_DEFAULT_DEF (to) = SSA_NAME_IS_DEFAULT_DEF (from);
+  SSA_NAME_DEF_STMT (to) = SSA_NAME_DEF_STMT (from);
+  SET_SSA_NAME_VAR_OR_IDENTIFIER (from, NULL_TREE);
+  SSA_NAME_IS_DEFAULT_DEF (to) = 1;
+  SSA_NAME_IS_DEFAULT_DEF (from) = 0;
+}
+
+/* Promote definition DEF to PROMOTED_TYPE. If the stmt that defines def
+   is def_stmt, make the type of def promoted_type. If the stmt is such
+   that, result of the def_stmt cannot be of promoted_type, create a new_def
+   of the original_type and make the def_stmt assign its value to newdef.
+   Then, create a CONVERT_EXPR to convert new_def to def of promoted type.
+
+   For example, for stmt with original_type char and promoted_type int:
+		char _1 = mem;
+	becomes:
+		char _2 = mem;
+		int _1 = (int)_2;
+
+   If the def_stmt allows def to be promoted, promote def in-place
+   (and its arguments when needed).
+
+   For example:
+		char _3 = _1 + _2;
+	becomes:
+		int _3 = _1 + _2;
+   Here, _1 and _2 will also be promoted. */
+
+static void
+promote_definition (tree def,
+		    tree promoted_type)
+{
+  gimple def_stmt = SSA_NAME_DEF_STMT (def);
+  gimple copy_stmt = NULL;
+  basic_block bb;
+  gimple_stmt_iterator gsi;
+  tree original_type = TREE_TYPE (def);
+  tree new_def;
+  bool do_not_promote = false;
+
+  switch (gimple_code (def_stmt))
+    {
+    case GIMPLE_PHI:
+	{
+	  /* Promote def by fixing its type and make def anonymous.  */
+	  TREE_TYPE (def) = promoted_type;
+	  SET_SSA_NAME_VAR_OR_IDENTIFIER (def, NULL_TREE);
+	  promote_cst_in_stmt (def_stmt, promoted_type);
+	  /* TODO: If def doesnt have a !DECL_IGNORED_P, insert a debug stmt.  */
+	  break;
+	}
+
+    case GIMPLE_ASM:
+	{
+	  gasm *asm_stmt = as_a <gasm *> (def_stmt);
+	  for (unsigned int i = 0; i < gimple_asm_noutputs (asm_stmt); ++i)
+	    {
+	      /* Promote def and copy (i.e. convert) the value defined
+		 by asm to def.  */
+	      tree link = gimple_asm_output_op (asm_stmt, i);
+	      tree op = TREE_VALUE (link);
+	      if (op == def)
+		{
+		  new_def = copy_ssa_name (def);
+		  set_ssa_promoted (new_def);
+		  duplicate_default_ssa (new_def, def);
+		  TREE_VALUE (link) = new_def;
+		  gimple_asm_set_output_op (asm_stmt, i, link);
+
+		  TREE_TYPE (def) = promoted_type;
+		  copy_stmt = gimple_build_assign (def, CONVERT_EXPR,
+						   new_def, NULL_TREE);
+		  gsi = gsi_for_stmt (def_stmt);
+		  SSA_NAME_IS_DEFAULT_DEF (new_def) = 0;
+		  gsi_insert_after (&gsi, copy_stmt, GSI_NEW_STMT);
+		  break;
+		}
+	    }
+	  break;
+	}
+
+    case GIMPLE_NOP:
+	{
+	  if (SSA_NAME_VAR (def) == NULL)
+	    {
+	      /* Promote def by fixing its type for anonymous def.  */
+	      TREE_TYPE (def) = promoted_type;
+	    }
+	  else
+	    {
+	      /* Create a promoted copy of parameters.  */
+	      bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+	      gcc_assert (bb);
+	      gsi = gsi_after_labels (bb);
+	      new_def = copy_ssa_name (def);
+	      set_ssa_promoted (new_def);
+	      set_ssa_default_def (cfun, SSA_NAME_VAR (def), new_def);
+	      duplicate_default_ssa (new_def, def);
+	      TREE_TYPE (def) = promoted_type;
+	      copy_stmt = gimple_build_assign (def, CONVERT_EXPR,
+					       new_def, NULL_TREE);
+	      SSA_NAME_DEF_STMT (def) = copy_stmt;
+	      gsi_insert_before (&gsi, copy_stmt, GSI_NEW_STMT);
+	    }
+	  break;
+	}
+
+    case GIMPLE_ASSIGN:
+	{
+	  enum tree_code code = gimple_assign_rhs_code (def_stmt);
+	  if (!safe_to_promote_def_p (def_stmt))
+	    {
+	      do_not_promote = true;
+	    }
+	  else if (CONVERT_EXPR_CODE_P (code))
+	    {
+	      tree rhs = gimple_assign_rhs1 (def_stmt);
+	      if (types_compatible_p (TREE_TYPE (rhs), promoted_type))
+		{
+		  /* As we travel statements in dominated order, arguments
+		     of def_stmt will be visited before visiting def.  If RHS
+		     is already promoted and type is compatible, we can convert
+		     them into ZERO/SIGN EXTEND stmt.  */
+		  tree &type = original_type_map->get_or_insert (rhs);
+		  if (type == NULL_TREE)
+		    type = TREE_TYPE (rhs);
+		  if (TYPE_PRECISION (original_type) < TYPE_PRECISION (type))
+		    type = original_type;
+		  gcc_assert (type != NULL_TREE);
+		  TREE_TYPE (def) = promoted_type;
+		  gimple copy_stmt =
+		    zero_sign_extend_stmt (def, rhs,
+					   TYPE_PRECISION (type));
+		  SET_SSA_NAME_VAR_OR_IDENTIFIER (def, NULL_TREE);
+		  gsi = gsi_for_stmt (def_stmt);
+		  gsi_replace (&gsi, copy_stmt, false);
+		}
+	      else
+		{
+		  /* If RHS is not promoted OR their types are not
+		     compatible, create CONVERT_EXPR that converts
+		     RHS to  promoted DEF type and perform a
+		     ZERO/SIGN EXTEND to get the required value
+		     from RHS.  */
+		  tree s = (TYPE_PRECISION (TREE_TYPE (def))
+			    < TYPE_PRECISION (TREE_TYPE (rhs)))
+		    ? TREE_TYPE (def) : TREE_TYPE (rhs);
+		  new_def = copy_ssa_name (def);
+		  set_ssa_promoted (new_def);
+		  TREE_TYPE (def) = promoted_type;
+		  TREE_TYPE (new_def) = promoted_type;
+		  SET_SSA_NAME_VAR_OR_IDENTIFIER (def, NULL_TREE);
+		  SET_SSA_NAME_VAR_OR_IDENTIFIER (new_def, NULL_TREE);
+		  gimple_set_lhs (def_stmt, new_def);
+		  gimple copy_stmt =
+		    zero_sign_extend_stmt (def, new_def,
+					   TYPE_PRECISION (s));
+		  gsi = gsi_for_stmt (def_stmt);
+		  if (lookup_stmt_eh_lp (def_stmt) > 0)
+		    insert_stmt_on_edge (def_stmt, copy_stmt);
+		  else
+		    gsi_insert_after (&gsi, copy_stmt, GSI_NEW_STMT);
+		}
+	    }
+	  else
+	    {
+	      /* Promote def by fixing its type and make def anonymous.  */
+	      SET_SSA_NAME_VAR_OR_IDENTIFIER (def, NULL_TREE);
+	      promote_cst_in_stmt (def_stmt, promoted_type);
+	      TREE_TYPE (def) = promoted_type;
+	    }
+	  break;
+	}
+
+    default:
+      do_not_promote = true;
+      break;
+    }
+
+  if (do_not_promote)
+    {
+      /* Promote def and copy (i.e. convert) the value defined
+	 by the stmt that cannot be promoted.  */
+      new_def = copy_ssa_name (def);
+      set_ssa_promoted (new_def);
+      SET_SSA_NAME_VAR_OR_IDENTIFIER (def, NULL_TREE);
+      TREE_TYPE (def) = promoted_type;
+      gimple_set_lhs (def_stmt, new_def);
+      copy_stmt = gimple_build_assign (def, CONVERT_EXPR,
+				       new_def, NULL_TREE);
+      gsi = gsi_for_stmt (def_stmt);
+      if (lookup_stmt_eh_lp (def_stmt) > 0)
+	insert_stmt_on_edge (def_stmt, copy_stmt);
+      else
+	gsi_insert_after (&gsi, copy_stmt, GSI_NEW_STMT);
+    }
+  else
+    {
+      /* Type is now promoted.  Due to this, some of the value ranges computed by
+	 VRP1 will is invalid.  TODO: We can be intelligent in deciding
+	 which ranges to be invalidated instead of invalidating everything.  */
+      SSA_NAME_RANGE_INFO (def) = NULL;
+    }
+}
+
+/* Fix the (promoted) USE in stmts where USE cannot be be promoted.  */
+static unsigned int
+fixup_uses (tree use, tree promoted_type, tree old_type)
+{
+  gimple stmt;
+  imm_use_iterator ui;
+  gimple_stmt_iterator gsi;
+  use_operand_p op;
+
+  FOR_EACH_IMM_USE_STMT (stmt, ui, use)
+    {
+      bool do_not_promote = false;
+      switch (gimple_code (stmt))
+	{
+	case GIMPLE_DEBUG:
+	    {
+	      gsi = gsi_for_stmt (stmt);
+	      gsi_remove (&gsi, true);
+	      break;
+	    }
+
+	case GIMPLE_ASM:
+	case GIMPLE_CALL:
+	case GIMPLE_RETURN:
+	    {
+	      /* USE cannot be promoted here.  */
+	      do_not_promote = true;
+	      break;
+	    }
+
+	case GIMPLE_ASSIGN:
+	    {
+	      enum tree_code code = gimple_assign_rhs_code (stmt);
+	      tree lhs = gimple_assign_lhs (stmt);
+	      if (!safe_to_promote_use_p (stmt))
+		{
+		  do_not_promote = true;
+		}
+	      else if (truncate_use_p (stmt))
+		{
+		  /* In some stmts, value in USE has to be ZERO/SIGN
+		     Extended based on the original type for correct
+		     result.  */
+		  tree temp = make_promoted_copy (use, NULL, TREE_TYPE (use));
+		  gimple copy_stmt =
+		    zero_sign_extend_stmt (temp, use,
+					   TYPE_PRECISION (old_type));
+		  gsi = gsi_for_stmt (stmt);
+		  gsi_insert_before (&gsi, copy_stmt, GSI_NEW_STMT);
+
+		  FOR_EACH_IMM_USE_ON_STMT (op, ui)
+		    SET_USE (op, temp);
+		  if (TREE_CODE_CLASS (code)
+		      == tcc_comparison)
+		    promote_cst_in_stmt (stmt, promoted_type, true);
+		  update_stmt (stmt);
+		}
+	      else if (CONVERT_EXPR_CODE_P (code))
+		{
+		  if (types_compatible_p (TREE_TYPE (lhs), promoted_type))
+		    {
+		      /* Type of LHS and promoted RHS are compatible, we can
+			 convert this into ZERO/SIGN EXTEND stmt.  */
+		      gimple copy_stmt =
+			zero_sign_extend_stmt (lhs, use,
+					       TYPE_PRECISION (old_type));
+		      gsi = gsi_for_stmt (stmt);
+		      set_ssa_promoted (lhs);
+		      gsi_replace (&gsi, copy_stmt, false);
+		    }
+		  else if (tobe_promoted_p (lhs))
+		    {
+		      /* If LHS will be promoted later, store the original
+			 type of RHS so that we can convert it to ZERO/SIGN
+			 EXTEND when LHS is promoted.  */
+		      tree rhs = gimple_assign_rhs1 (stmt);
+		      tree &type = original_type_map->get_or_insert (rhs);
+		      type = TREE_TYPE (old_type);
+		    }
+		  else
+		    {
+		      do_not_promote = true;
+		    }
+		}
+	      break;
+	    }
+
+	case GIMPLE_COND:
+	    {
+	      /* In GIMPLE_COND, value in USE has to be ZERO/SIGN
+		 Extended based on the original type for correct
+		 result.  */
+	      tree temp = make_promoted_copy (use, NULL, TREE_TYPE (use));
+	      gimple copy_stmt =
+		zero_sign_extend_stmt (temp, use,
+				       TYPE_PRECISION (old_type));
+	      gsi = gsi_for_stmt (stmt);
+	      gsi_insert_before (&gsi, copy_stmt, GSI_NEW_STMT);
+
+	      FOR_EACH_IMM_USE_ON_STMT (op, ui)
+		SET_USE (op, temp);
+	      promote_cst_in_stmt (stmt, promoted_type, true);
+	      update_stmt (stmt);
+	      break;
+	    }
+
+	default:
+	  break;
+	}
+
+      if (do_not_promote)
+	{
+	  /* FOR stmts where USE canoot be promoted, create an
+	     original type copy.  */
+	  tree temp;
+	  temp = copy_ssa_name (use);
+	  set_ssa_promoted (temp);
+	  TREE_TYPE (temp) = old_type;
+	  gimple copy_stmt = gimple_build_assign (temp, CONVERT_EXPR,
+						  use, NULL_TREE);
+	  gsi = gsi_for_stmt (stmt);
+	  gsi_insert_before (&gsi, copy_stmt, GSI_NEW_STMT);
+	  FOR_EACH_IMM_USE_ON_STMT (op, ui)
+	    SET_USE (op, temp);
+	  update_stmt (stmt);
+	}
+    }
+  return 0;
+}
+
+/* Promote definition of NAME and adjust its uses if necessary.  */
+static unsigned int
+promote_def_and_uses (tree name)
+{
+  tree type;
+  if (tobe_promoted_p (name))
+    {
+      type = get_promoted_type (TREE_TYPE (name));
+      tree old_type = TREE_TYPE (name);
+      promote_definition (name, type);
+      fixup_uses (name, type, old_type);
+      set_ssa_promoted (name);
+    }
+  return 0;
+}
+
+/* Promote all the stmts in the basic block.  */
+static void
+promote_all_stmts (basic_block bb)
+{
+  gimple_stmt_iterator gsi;
+  ssa_op_iter iter;
+  tree def;
+
+  for (gphi_iterator gpi = gsi_start_phis (bb);
+       !gsi_end_p (gpi); gsi_next (&gpi))
+    {
+      gphi *phi = gpi.phi ();
+      use_operand_p op;
+
+      FOR_EACH_PHI_ARG (op, phi, iter, SSA_OP_USE)
+	{
+	  def = USE_FROM_PTR (op);
+	  promote_def_and_uses (def);
+	}
+      def = PHI_RESULT (phi);
+      promote_def_and_uses (def);
+    }
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gimple stmt = gsi_stmt (gsi);
+
+      FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_USE | SSA_OP_DEF)
+	promote_def_and_uses (def);
+    }
+}
+
+
+class type_promotion_dom_walker : public dom_walker
+{
+public:
+  type_promotion_dom_walker (cdi_direction direction)
+    : dom_walker (direction) {}
+  virtual void before_dom_children (basic_block bb)
+    {
+      promote_all_stmts (bb);
+    }
+};
+
+/* Main entry point to the pass.  */
+static unsigned int
+execute_type_promotion (void)
+{
+  n_ssa_val = num_ssa_names;
+  original_type_map = new hash_map<tree, tree>;
+  ssa_to_be_promoted_bitmap = sbitmap_alloc (n_ssa_val);
+  bitmap_clear (ssa_to_be_promoted_bitmap);
+  ssa_sets_higher_bits_bitmap = sbitmap_alloc (n_ssa_val);
+  bitmap_clear (ssa_sets_higher_bits_bitmap);
+
+  calculate_dominance_info (CDI_DOMINATORS);
+  /* Walk the CFG in dominator order.  */
+  type_promotion_dom_walker (CDI_DOMINATORS)
+    .walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+
+  sbitmap_free (ssa_to_be_promoted_bitmap);
+  sbitmap_free (ssa_sets_higher_bits_bitmap);
+  free_dominance_info (CDI_DOMINATORS);
+  delete original_type_map;
+  return 0;
+}
+
+namespace {
+const pass_data pass_data_type_promotion =
+{
+  GIMPLE_PASS, /* type */
+  "promotion", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_TREE_TYPE_PROMOTE, /* tv_id */
+  PROP_ssa, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  (TODO_cleanup_cfg | TODO_update_ssa | TODO_verify_all),
+};
+
+class pass_type_promotion : public gimple_opt_pass
+{
+public:
+  pass_type_promotion (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_type_promotion, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  opt_pass * clone () { return new pass_type_promotion (m_ctxt); }
+  virtual bool gate (function *) { return flag_tree_type_promote != 0; }
+  virtual unsigned int execute (function *)
+    {
+      return execute_type_promotion ();
+    }
+
+}; // class pass_type_promotion
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_type_promote (gcc::context *ctxt)
+{
+  return new pass_type_promotion (ctxt);
+}
+
diff --git a/gcc/passes.def b/gcc/passes.def
index 64fc4d9..254496b 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -270,6 +270,7 @@ along with GCC; see the file COPYING3.  If not see
       POP_INSERT_PASSES ()
       NEXT_PASS (pass_simduid_cleanup);
       NEXT_PASS (pass_lower_vector_ssa);
+      NEXT_PASS (pass_type_promote);
       NEXT_PASS (pass_cse_reciprocals);
       NEXT_PASS (pass_reassoc);
       NEXT_PASS (pass_strength_reduction);
diff --git a/gcc/timevar.def b/gcc/timevar.def
index aee36e6..38b8d7d 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -276,6 +276,7 @@ DEFTIMEVAR (TV_VTABLE_VERIFICATION   , "vtable verification")
 DEFTIMEVAR (TV_TREE_UBSAN            , "tree ubsan")
 DEFTIMEVAR (TV_INITIALIZE_RTL        , "initialize rtl")
 DEFTIMEVAR (TV_GIMPLE_LADDRESS       , "address lowering")
+DEFTIMEVAR (TV_TREE_TYPE_PROMOTE     , "tree type promote")
 
 /* Everything else in rest_of_compilation not included above.  */
 DEFTIMEVAR (TV_EARLY_LOCAL	     , "early local passes")
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index 66f999e..1db888d 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -3748,6 +3748,18 @@ verify_gimple_assign_binary (gassign *stmt)
         return false;
       }
 
+    case SEXT_EXPR:
+      {
+	if (!INTEGRAL_TYPE_P (lhs_type)
+	    || !INTEGRAL_TYPE_P (rhs1_type)
+	    || TREE_CODE (rhs2) != INTEGER_CST)
+	  {
+	    error ("invalid operands in sext expr");
+	    return true;
+	  }
+	return false;
+      }
+
     case VEC_WIDEN_LSHIFT_HI_EXPR:
     case VEC_WIDEN_LSHIFT_LO_EXPR:
       {
@@ -5192,6 +5204,7 @@ gimple_verify_flow_info (void)
 
 	  if (found_ctrl_stmt)
 	    {
+	      dump_bb (stderr, gimple_bb (stmt), 0, 0);
 	      error ("control flow in the middle of basic block %d",
 		     bb->index);
 	      err = 1;
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index e1ceea4..272c409 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -3884,6 +3884,7 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights,
     case BIT_XOR_EXPR:
     case BIT_AND_EXPR:
     case BIT_NOT_EXPR:
+    case SEXT_EXPR:
 
     case TRUTH_ANDIF_EXPR:
     case TRUTH_ORIF_EXPR:
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 7b66a1c..7ddb55c 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -431,6 +431,7 @@ extern gimple_opt_pass *make_pass_fre (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_check_data_deps (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_copy_prop (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_isolate_erroneous_paths (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_type_promote (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_vrp (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_uncprop (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_return_slot (gcc::context *ctxt);
diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c
index 7cd1fe7..04f6777 100644
--- a/gcc/tree-pretty-print.c
+++ b/gcc/tree-pretty-print.c
@@ -1794,6 +1794,14 @@ dump_generic_node (pretty_printer *pp, tree node, int spc, int flags,
       }
       break;
 
+    case SEXT_EXPR:
+      pp_string (pp, "SEXT_EXPR <");
+      dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false);
+      pp_string (pp, ", ");
+      dump_generic_node (pp, TREE_OPERAND (node, 1), spc, flags, false);
+      pp_greater (pp);
+      break;
+
     case MODIFY_EXPR:
     case INIT_EXPR:
       dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags,
@@ -3414,6 +3422,9 @@ op_symbol_code (enum tree_code code)
     case MIN_EXPR:
       return "min";
 
+    case SEXT_EXPR:
+      return "sext from bit";
+
     default:
       return "<<< ??? >>>";
     }
diff --git a/gcc/tree-ssanames.c b/gcc/tree-ssanames.c
index 910cb19..19aa918 100644
--- a/gcc/tree-ssanames.c
+++ b/gcc/tree-ssanames.c
@@ -190,7 +190,8 @@ set_range_info (tree name, enum value_range_type range_type,
   unsigned int precision = TYPE_PRECISION (TREE_TYPE (name));
 
   /* Allocate if not available.  */
-  if (ri == NULL)
+  if (ri == NULL
+      || (precision != ri->get_min ().get_precision ()))
     {
       size_t size = (sizeof (range_info_def)
 		     + trailing_wide_ints <3>::extra_size (precision));
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index d962683..dee8f6f 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -2389,6 +2389,7 @@ extract_range_from_binary_expr_1 (value_range_t *vr,
       && code != LSHIFT_EXPR
       && code != MIN_EXPR
       && code != MAX_EXPR
+      && code != SEXT_EXPR
       && code != BIT_AND_EXPR
       && code != BIT_IOR_EXPR
       && code != BIT_XOR_EXPR)
@@ -2949,6 +2950,57 @@ extract_range_from_binary_expr_1 (value_range_t *vr,
       extract_range_from_multiplicative_op_1 (vr, code, &vr0, &vr1);
       return;
     }
+  else if (code == SEXT_EXPR)
+    {
+      gcc_assert (range_int_cst_p (&vr1));
+      unsigned int prec = tree_to_uhwi (vr1.min);
+      type = vr0.type;
+      wide_int tmin, tmax;
+      wide_int type_min, type_max;
+      wide_int may_be_nonzero, must_be_nonzero;
+      wide_int mask = wi::shwi (((1 << (prec - 1)) - 1),
+				TYPE_PRECISION (TREE_TYPE (vr0.max)));
+
+      gcc_assert (!TYPE_UNSIGNED (expr_type));
+      type_min = wi::shwi (1 << (prec - 1),
+			   TYPE_PRECISION (TREE_TYPE (vr0.min)));
+      type_max = wi::shwi (((1 << (prec - 1)) - 1),
+			   TYPE_PRECISION (TREE_TYPE (vr0.max)));
+      if (zero_nonzero_bits_from_vr (expr_type, &vr0,
+				     &may_be_nonzero,
+				     &must_be_nonzero))
+	{
+	  HOST_WIDE_INT int_may_be_nonzero = may_be_nonzero.to_uhwi ();
+	  HOST_WIDE_INT int_must_be_nonzero = must_be_nonzero.to_uhwi ();
+
+	  if (int_must_be_nonzero & (1 << (prec - 1)))
+	    {
+	      /* If to-be-extended sign bit is one.  */
+	      tmin = type_min;
+	      tmax = may_be_nonzero & mask;
+	    }
+	  else if ((int_may_be_nonzero & (1 << (prec - 1))) == 0)
+	    {
+	      /* If to-be-extended sign bit is zero.  */
+	      tmin = must_be_nonzero & mask;
+	      tmax = may_be_nonzero & mask;
+	    }
+	  else
+	    {
+	      tmin = type_min;
+	      tmax = type_max;
+	    }
+	}
+      else
+	{
+	  tmin = type_min;
+	  tmax = type_max;
+	}
+      tmin = wi::sext (tmin, prec);
+      tmax = wi::sext (tmax, prec);
+      min = wide_int_to_tree (expr_type, tmin);
+      max = wide_int_to_tree (expr_type, tmax);
+    }
   else if (code == RSHIFT_EXPR
 	   || code == LSHIFT_EXPR)
     {
@@ -9279,6 +9331,30 @@ simplify_bit_ops_using_ranges (gimple_stmt_iterator *gsi, gimple stmt)
 	  break;
 	}
       break;
+    case SEXT_EXPR:
+	{
+	  gcc_assert (is_gimple_min_invariant (op1));
+	  unsigned int prec = tree_to_uhwi (op1);
+	  wide_int mask;
+	  HOST_WIDE_INT may_be_nonzero = may_be_nonzero0.to_uhwi ();
+	  HOST_WIDE_INT must_be_nonzero = must_be_nonzero0.to_uhwi ();
+	  mask = wi::shwi (((1 << (prec - 1)) - 1),
+			   TYPE_PRECISION (TREE_TYPE (vr0.max)));
+	  mask = wi::bit_not (mask);
+	  if (must_be_nonzero & (1 << (prec - 1)))
+	    {
+	      /* If to-be-extended sign bit is one.  */
+	      if (wi::bit_and (must_be_nonzero0, mask) == mask)
+		op = op0;
+	    }
+	  else if ((may_be_nonzero & (1 << (prec - 1))) == 0)
+	    {
+	      /* If to-be-extended sign bit is zero.  */
+	      if (wi::bit_and (may_be_nonzero0, mask) == 0)
+		op = op0;
+	    }
+	}
+      break;
     default:
       gcc_unreachable ();
     }
@@ -9980,6 +10056,7 @@ simplify_stmt_using_ranges (gimple_stmt_iterator *gsi)
 
 	case BIT_AND_EXPR:
 	case BIT_IOR_EXPR:
+	case SEXT_EXPR:
 	  /* Optimize away BIT_AND_EXPR and BIT_IOR_EXPR
 	     if all the bits being cleared are already cleared or
 	     all the bits being set are already set.  */
diff --git a/gcc/tree.def b/gcc/tree.def
index 56580af..d98c999 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -752,6 +752,10 @@ DEFTREECODE (BIT_XOR_EXPR, "bit_xor_expr", tcc_binary, 2)
 DEFTREECODE (BIT_AND_EXPR, "bit_and_expr", tcc_binary, 2)
 DEFTREECODE (BIT_NOT_EXPR, "bit_not_expr", tcc_unary, 1)
 
+/*  Sign-extend operation. It will sign extend first operand from
+ the sign bit specified by the second operand. */
+DEFTREECODE (SEXT_EXPR, "sext_expr", tcc_binary, 2)
+
 /* ANDIF and ORIF allow the second operand not to be computed if the
    value of the expression is determined from the first operand.  AND,
    OR, and XOR always compute the second operand whether its value is
gcc/ChangeLog:

2015-07-05  Kugan Vivekanandarajah  <kuganv@linaro.org>

	* Makefile.in: Add gimple-ssa-type-promote.o.
	* cfgexpand.c (expand_debug_expr): Handle SEXT_EXPR.
	* common.opt: New option -ftree-type-promote.
	* expr.c (expand_expr_real_2): Handle SEXT_EXPR.
	* fold-const.c (int_const_binop_1):
	* gimple-ssa-type-promote.c: New file.
	* passes.def: Define new pass_type_promote.
	* timevar.def: Define new TV_TREE_TYPE_PROMOTE.
	* tree-cfg.c (verify_gimple_assign_binary): Handle SEXT_EXPR.
	* tree-inline.c (estimate_operator_cost):
	* tree-pass.h (make_pass_type_promote): New.
	* tree-pretty-print.c (dump_generic_node): Handle SEXT_EXPR.
	(op_symbol_code): Likewise.
	* tree-vrp.c (extract_range_from_binary_expr_1): Likewise.
	(simplify_bit_ops_using_ranges): Likewise.
	* tree.def: Define new SEXT_EXPR.

Comments

Richard Biener Aug. 5, 2015, 9:10 a.m. UTC | #1
On Wed, Aug 5, 2015 at 2:12 AM, kugan <kugan.vivekanandarajah@linaro.org> wrote:
>
>> You indeed need to use CONVERT_EXPR here, maybe you can elaborate
>> on the optimization issues.
>>
>>> 2. for inline asm (a reduced test case that might not make much as a
>>> stand alone test-case, but I ran into similar cases with valid
>>> programmes)
>>>
>>> ;; Function fn1 (fn1, funcdef_no=0, decl_uid=4220, cgraph_uid=0,
>>> symbol_order=0)
>>>
>>> fn1 (short int p1)
>>> {
>>>    <bb 2>:
>>>    __asm__("" : "=r" p1_2 : "0" p1_1(D));
>>>    return;
>>>
>>> }
>>>
>>>
>>> I am generating something like the following which ICEs. What is the
>>> expected out?
>>>
>>> ;; Function fn1 (fn1, funcdef_no=0, decl_uid=4220, cgraph_uid=0,
>>> symbol_order=0)
>>>
>>> fn1 (short int p1)
>>> {
>>>    int _1;
>>>    int _2;
>>>    short int _5;
>>>
>>>    <bb 2>:
>>>    _1 = (int) p1_4(D);
>>>    _5 = (short int) _1;
>>>    __asm__("" : "=r" p1_6 : "0" _5);
>>>    _2 = (int) p1_6;
>>>    return;
>>>
>>> }
>>
>>
>> Parameters are indeed "interesting" to handle ;)  As we now see on ARM
>> the incoming parameter (the default def) and later assignments to it
>> can require different promotions (well, different extensions for ARM).
>>
>> The only sensible way to deal with promoting parameters is to
>> promote them by changing the function signature.  Thus reflect the
>> targets ABI for parameters in the GIMPLE representation (which
>> includes TYPE_ARG_TYPES and DECL_ARGUMENTS).
>> IMHO we should do this during gimplification of parameters / call
>> arguments already.
>>
>> So for your example you'd end up with
>>
>> fn1 (int p1)
>> {
>>    __asm__("" : "=r" p1_6 : "0" p1_4(D));
>>    return;
>> }
>>
>> that is, promotions also apply to asm inputs/outputs (no?)
>
>
>
> Thanks for the review and answers. For the time being, I am handling
> gimple_asm as one that has to be handled in original type. I Will look into
> improving it after getting the basic framework right.

Yeah, that's always a possibility.  I also see from the dumps that we probably
want to promote function arguments and results on GIMPLE as well.  Possibly
very early during gimplification or as an early IPA pass (as it needs to adjust
the IL for calls as well, exposing ABI required promotions / extensions).

> As it is, attached patch bootstraps on x86_64-linux-gnu, arm-linux-gnu and
> aarch64-linux-gnu. There are few regressions to look into (Please see
> below).
>
> There are cases it is working well. There are cases where it can be
> improved. I am attaching couple test cases (and their results). I am seeing
> some BIT_AND_EXPR which are inserted by promotion are not being optimized
> when they are redundant. This is especially the case when I invalidate the
> VRP range into from VRP1 during the type promotion. I am looking into it.
>
> Please note that attached patch still needs to address:
> * Adding gimple_debug stmts.
> * Address review comment for expr.c handling SEXT_EXPR.
> * Address regression failures
>
> Based on the feedback, I will address the above and split the patch into
> logical patch set for easy detailed review.
>
> Here are the outputs for the testcases.
>
> --- c5.c.142t.veclower21        2015-08-05 08:50:11.367135339 +1000
> +++ c5.c.143t.promotion 2015-08-05 08:50:11.367135339 +1000
> @@ -1,34 +1,45 @@
>
>  ;; Function unPack (unPack, funcdef_no=0, decl_uid=4145, cgraph_uid=0,
> symbol_order=0)
>
>  unPack (unsigned char c)
>  {
> -  short int _1;
> -  unsigned short _4;
> -  unsigned short _5;
> -  short int _6;
> -  short int _7;
> +  int _1;
> +  unsigned int _2;
> +  unsigned int _3;
> +  unsigned int _4;
> +  unsigned int _5;
> +  int _6;
> +  int _7;
> +  unsigned int _9;
> +  int _11;
> +  int _12;
> +  short int _13;
>
>    <bb 2>:
> -  c_3 = c_2(D) & 15;
> -  if (c_3 > 7)
> +  _2 = (unsigned int) c_10(D);
> +  _3 = _2 & 15;
> +  _9 = _3 & 255;
> +  if (_9 > 7)
>      goto <bb 3>;
>    else
>      goto <bb 4>;
>
>    <bb 3>:
> -  _4 = (unsigned short) c_3;
> -  _5 = _4 + 65531;
> -  _6 = (short int) _5;
> +  _4 = _3 & 65535;
> +  _5 = _4 + 4294967291;
> +  _11 = (int) _5;
> +  _6 = (_11) sext from bit (16);

Ok, so in GIMPLE we still have sign-changing conversions.  Another
thing we might want to lower at some stage ... ;)

>    goto <bb 5>;
>
>    <bb 4>:
> -  _7 = (short int) c_3;
> +  _12 = (int) _3;
> +  _7 = (_12) sext from bit (16);
>
>    <bb 5>:
>    # _1 = PHI <_6(3), _7(4)>
> -  return _1;
> +  _13 = (short int) _1;
> +  return _13;
>
>  }

Overall this looks like what I'd have expected - also pointing out the
missing argument/return value promotion.

>
> --- c5.org.s    2015-08-05 08:51:44.619133892 +1000
> +++ c5.new.s    2015-08-05 08:51:29.643134124 +1000
> @@ -16,16 +16,14 @@
>         .syntax divided
>         .arm
>         .type   unPack, %function
>  unPack:
>         @ args = 0, pretend = 0, frame = 0
>         @ frame_needed = 0, uses_anonymous_args = 0
>         @ link register save eliminated.
>         and     r0, r0, #15
>         cmp     r0, #7
>         subhi   r0, r0, #5
> -       uxth    r0, r0
> -       sxth    r0, r0

Nice.

>         bx      lr
>         .size   unPack, .-unPack
>         .ident  "GCC: (GNU) 6.0.0 20150724 (experimental)"
>         .section        .note.GNU-stack,"",%progbits
> --- crc.c.142t.veclower21       2015-08-05 08:52:43.811132974 +1000
> +++ crc.c.143t.promotion        2015-08-05 08:52:43.811132974 +1000
> @@ -1,52 +1,78 @@
>
>  ;; Function crc2 (crc2, funcdef_no=0, decl_uid=4146, cgraph_uid=0,
> symbol_order=0)
>
>  crc2 (short unsigned int crc, unsigned char data)
>  {
>    unsigned char carry;
>    unsigned char x16;
>    unsigned char i;
> -  unsigned char ivtmp_5;
> -  unsigned char _9;
> -  unsigned char _10;
> -  unsigned char ivtmp_18;
> +  unsigned int _2;
> +  unsigned int _3;
> +  unsigned int _5;
> +  unsigned int _7;
> +  unsigned int _8;
> +  unsigned int _9;
> +  unsigned int _10;
> +  unsigned int _11;
> +  unsigned int _12;
> +  unsigned int _13;
> +  unsigned int _15;
> +  unsigned int _16;
> +  unsigned int _18;
> +  unsigned int _19;
> +  unsigned int _21;
> +  unsigned int _22;
> +  unsigned int _24;
> +  short unsigned int _25;
> +  unsigned int _26;
> +  unsigned int _27;
> +  unsigned int _28;
> +  unsigned int _29;
>
>    <bb 2>:
> +  _8 = (unsigned int) data_4(D);
> +  _7 = (unsigned int) crc_30(D);
>
>    <bb 3>:
> -  # crc_28 = PHI <crc_2(5), crc_7(D)(2)>
> -  # data_29 = PHI <data_12(5), data_8(D)(2)>
> -  # ivtmp_18 = PHI <ivtmp_5(5), 8(2)>
> -  _9 = (unsigned char) crc_28;
> -  _10 = _9 ^ data_29;
> -  x16_11 = _10 & 1;
> -  data_12 = data_29 >> 1;
> -  if (x16_11 == 1)
> +  # _28 = PHI <_2(5), _7(2)>
> +  # _29 = PHI <_12(5), _8(2)>
> +  # _18 = PHI <_5(5), 8(2)>
> +  _9 = _28 & 255;
> +  _10 = _9 ^ _29;
> +  _11 = _10 & 1;
> +  _3 = _29 & 255;
> +  _12 = _3 >> 1;
> +  _27 = _11 & 255;
> +  if (_27 == 1)
>      goto <bb 4>;
>    else
>      goto <bb 7>;
>
>    <bb 4>:
> -  crc_13 = crc_28 ^ 16386;
> -  crc_24 = crc_13 >> 1;
> -  crc_15 = crc_24 | 32768;
> +  _13 = _28 ^ 16386;
> +  _26 = _13 & 65535;
> +  _24 = _26 >> 1;
> +  _15 = _24 | 4294934528;
>
>    <bb 5>:
> -  # crc_2 = PHI <crc_15(4), crc_21(7)>
> -  ivtmp_5 = ivtmp_18 - 1;
> -  if (ivtmp_5 != 0)
> +  # _2 = PHI <_15(4), _21(7)>
> +  _5 = _18 - 1;
> +  _22 = _5 & 255;
> +  if (_22 != 0)
>      goto <bb 3>;
>    else
>      goto <bb 6>;
>
>    <bb 6>:
> -  # crc_19 = PHI <crc_2(5)>
> -  return crc_19;
> +  # _19 = PHI <_2(5)>
> +  _25 = (short unsigned int) _19;
> +  return _25;
>
>    <bb 7>:
> -  crc_21 = crc_28 >> 1;
> +  _16 = _28 & 65535;
> +  _21 = _16 >> 1;
>    goto <bb 5>;
>
>  }
>
>
> --- crc.org.s   2015-08-05 08:54:17.491131520 +1000
> +++ crc.new.s   2015-08-05 08:53:12.183132534 +1000
> @@ -15,27 +15,28 @@
>         .global crc2
>         .syntax divided
>         .arm
>         .type   crc2, %function
>  crc2:
>         @ args = 0, pretend = 0, frame = 0
>         @ frame_needed = 0, uses_anonymous_args = 0
>         mov     ip, #32768
>         movt    ip, 65535
>         str     lr, [sp, #-4]!
> -       mov     r3, #8
> +       mov     r2, #8
>         movw    lr, #16386
>  .L3:
> -       eor     r2, r1, r0
> -       sub     r3, r3, #1
> -       tst     r2, #1
> +       uxtb    r3, r0
> +       eor     r3, r3, r1
>         mov     r1, r1, lsr #1
> +       tst     r3, #1
>         eorne   r0, r0, lr
> -       moveq   r0, r0, lsr #1
> -       orrne   r0, ip, r0, lsr #1
> -       uxthne  r0, r0
> -       ands    r3, r3, #255
> +       ubfxeq  r0, r0, #1, #15
> +       ubfxne  r0, r0, #1, #15
> +       orrne   r0, r0, ip
> +       subs    r2, r2, #1
>         bne     .L3
> +       uxth    r0, r0
>         ldr     pc, [sp], #4
>         .size   crc2, .-crc2
>         .ident  "GCC: (GNU) 6.0.0 20150724 (experimental)"
>         .section        .note.GNU-stack,"",%progbits

Can't really dechipher this changes...

>
>
> Testsuite regression for x86_64-unknown-linux-gnu:
> Tests that now fail, but worked before:
> gfortran.dg/graphite/pr42393-1.f90   -O  (test for excess errors)

I see this on pristine trunk as well.

>
> Testsuite regression for  arm-linux-gnu:
> Tests that now fail, but worked before:
> arm-sim: gcc.dg/fixed-point/convert-sat.c execution test
> arm-sim: gcc.dg/tree-ssa/20030729-1.c scan-tree-dump-times dom2 "\\(unsigned
> int\\)" 0
> arm-sim: gcc.dg/tree-ssa/pr54245.c scan-tree-dump-times slsr "Inserting
> initializer" 0
> arm-sim: gcc.dg/tree-ssa/shorten-1.c scan-tree-dump-not optimized
> "\\(int\\)"
> arm-sim: gcc.dg/tree-ssa/shorten-1.c scan-tree-dump-times optimized
> "\\(unsigned char\\)" 8
> arm-sim: gcc.target/arm/mla-2.c scan-assembler smlalbb
> arm-sim: gcc.target/arm/unsigned-extend-2.c scan-assembler ands
> arm-sim: gcc.target/arm/wmul-1.c scan-assembler-times smlabb 2
> arm-sim: gcc.target/arm/wmul-2.c scan-assembler-times smulbb 1
> arm-sim: gcc.target/arm/wmul-3.c scan-assembler-times smulbb 2
> arm-sim: gcc.target/arm/wmul-9.c scan-assembler smlalbb
> arm-sim: gfortran.dg/graphite/pr42393-1.f90   -O  (test for excess errors)
>
> Tests that now work, but didn't before:
> arm-sim: gcc.dg/tree-prof/time-profiler-2.c scan-ipa-dump-times profile
> "Read tp_first_run: 0" 2
> arm-sim: gcc.dg/tree-prof/time-profiler-2.c scan-ipa-dump-times profile
> "Read tp_first_run: 2" 1
> arm-sim: gcc.dg/tree-prof/time-profiler-2.c scan-ipa-dump-times profile
> "Read tp_first_run: 3" 1
> arm-sim: gcc.target/arm/builtin-bswap-1.c scan-assembler-times rev16ne\\t 1
> arm-sim: gcc.target/arm/builtin-bswap-1.c scan-assembler-times revshne\\t 1
> arm-sim: gcc.target/arm/smlaltb-1.c scan-assembler smlaltb\\t
> arm-sim: gcc.target/arm/smlaltt-1.c scan-assembler smlaltt\\t
>
>
> Testsuite regression for  aarch64-linux-gnu:
> Tests that now fail, but worked before:
> c-c++-common/torture/vector-compare-1.c   -O3 -g  (test for excess errors)
> c-c++-common/torture/vector-compare-1.c   -O3 -g  (test for excess errors)
> gcc.dg/tree-ssa/20030729-1.c scan-tree-dump-times dom2 "\\(unsigned int\\)"
> 0
> gcc.dg/tree-ssa/pr54245.c scan-tree-dump-times slsr "Inserting initializer"
> 0
> gcc.dg/tree-ssa/shorten-1.c scan-tree-dump-not optimized "\\(int\\)"
> gcc.dg/tree-ssa/shorten-1.c scan-tree-dump-times optimized "\\(unsigned
> char\\)" 8

tree-dump scan differences are expected, of course.  Others need to be
investigated.

Thanks for continuing to work on this!  I hope to have a closer look
at the updated patch later.

Thanks,
Richard.


> Thanks,
> Kugan
diff mbox

Patch

--- c5.c.142t.veclower21	2015-08-05 08:50:11.367135339 +1000
+++ c5.c.143t.promotion	2015-08-05 08:50:11.367135339 +1000
@@ -1,34 +1,45 @@ 

  ;; Function unPack (unPack, funcdef_no=0, decl_uid=4145, cgraph_uid=0, 
symbol_order=0)

  unPack (unsigned char c)
  {
-  short int _1;
-  unsigned short _4;
-  unsigned short _5;
-  short int _6;
-  short int _7;
+  int _1;
+  unsigned int _2;
+  unsigned int _3;
+  unsigned int _4;
+  unsigned int _5;
+  int _6;
+  int _7;
+  unsigned int _9;
+  int _11;
+  int _12;
+  short int _13;

    <bb 2>:
-  c_3 = c_2(D) & 15;
-  if (c_3 > 7)
+  _2 = (unsigned int) c_10(D);
+  _3 = _2 & 15;
+  _9 = _3 & 255;
+  if (_9 > 7)
      goto <bb 3>;
    else
      goto <bb 4>;

    <bb 3>:
-  _4 = (unsigned short) c_3;
-  _5 = _4 + 65531;
-  _6 = (short int) _5;
+  _4 = _3 & 65535;
+  _5 = _4 + 4294967291;
+  _11 = (int) _5;
+  _6 = (_11) sext from bit (16);
    goto <bb 5>;

    <bb 4>:
-  _7 = (short int) c_3;
+  _12 = (int) _3;
+  _7 = (_12) sext from bit (16);

    <bb 5>:
    # _1 = PHI <_6(3), _7(4)>
-  return _1;
+  _13 = (short int) _1;
+  return _13;

  }


--- c5.org.s	2015-08-05 08:51:44.619133892 +1000
+++ c5.new.s	2015-08-05 08:51:29.643134124 +1000
@@ -16,16 +16,14 @@ 
  	.syntax divided
  	.arm
  	.type	unPack, %function
  unPack:
  	@ args = 0, pretend = 0, frame = 0
  	@ frame_needed = 0, uses_anonymous_args = 0
  	@ link register save eliminated.
  	and	r0, r0, #15
  	cmp	r0, #7
  	subhi	r0, r0, #5
-	uxth	r0, r0
-	sxth	r0, r0
  	bx	lr
  	.size	unPack, .-unPack
  	.ident	"GCC: (GNU) 6.0.0 20150724 (experimental)"
  	.section	.note.GNU-stack,"",%progbits
--- crc.c.142t.veclower21	2015-08-05 08:52:43.811132974 +1000
+++ crc.c.143t.promotion	2015-08-05 08:52:43.811132974 +1000
@@ -1,52 +1,78 @@ 

  ;; Function crc2 (crc2, funcdef_no=0, decl_uid=4146, cgraph_uid=0, 
symbol_order=0)

  crc2 (short unsigned int crc, unsigned char data)
  {
    unsigned char carry;
    unsigned char x16;
    unsigned char i;
-  unsigned char ivtmp_5;
-  unsigned char _9;
-  unsigned char _10;
-  unsigned char ivtmp_18;
+  unsigned int _2;
+  unsigned int _3;
+  unsigned int _5;
+  unsigned int _7;
+  unsigned int _8;
+  unsigned int _9;
+  unsigned int _10;
+  unsigned int _11;
+  unsigned int _12;
+  unsigned int _13;
+  unsigned int _15;
+  unsigned int _16;
+  unsigned int _18;
+  unsigned int _19;
+  unsigned int _21;
+  unsigned int _22;
+  unsigned int _24;
+  short unsigned int _25;
+  unsigned int _26;
+  unsigned int _27;
+  unsigned int _28;
+  unsigned int _29;

    <bb 2>:
+  _8 = (unsigned int) data_4(D);
+  _7 = (unsigned int) crc_30(D);

    <bb 3>:
-  # crc_28 = PHI <crc_2(5), crc_7(D)(2)>
-  # data_29 = PHI <data_12(5), data_8(D)(2)>
-  # ivtmp_18 = PHI <ivtmp_5(5), 8(2)>
-  _9 = (unsigned char) crc_28;
-  _10 = _9 ^ data_29;
-  x16_11 = _10 & 1;
-  data_12 = data_29 >> 1;
-  if (x16_11 == 1)
+  # _28 = PHI <_2(5), _7(2)>
+  # _29 = PHI <_12(5), _8(2)>
+  # _18 = PHI <_5(5), 8(2)>
+  _9 = _28 & 255;
+  _10 = _9 ^ _29;
+  _11 = _10 & 1;
+  _3 = _29 & 255;
+  _12 = _3 >> 1;
+  _27 = _11 & 255;
+  if (_27 == 1)
      goto <bb 4>;
    else
      goto <bb 7>;

    <bb 4>:
-  crc_13 = crc_28 ^ 16386;
-  crc_24 = crc_13 >> 1;
-  crc_15 = crc_24 | 32768;
+  _13 = _28 ^ 16386;
+  _26 = _13 & 65535;
+  _24 = _26 >> 1;
+  _15 = _24 | 4294934528;

    <bb 5>:
-  # crc_2 = PHI <crc_15(4), crc_21(7)>
-  ivtmp_5 = ivtmp_18 - 1;
-  if (ivtmp_5 != 0)
+  # _2 = PHI <_15(4), _21(7)>
+  _5 = _18 - 1;
+  _22 = _5 & 255;
+  if (_22 != 0)
      goto <bb 3>;
    else
      goto <bb 6>;

    <bb 6>:
-  # crc_19 = PHI <crc_2(5)>
-  return crc_19;
+  # _19 = PHI <_2(5)>
+  _25 = (short unsigned int) _19;
+  return _25;

    <bb 7>:
-  crc_21 = crc_28 >> 1;
+  _16 = _28 & 65535;
+  _21 = _16 >> 1;
    goto <bb 5>;

  }


--- crc.org.s	2015-08-05 08:54:17.491131520 +1000
+++ crc.new.s	2015-08-05 08:53:12.183132534 +1000
@@ -15,27 +15,28 @@ 
  	.global	crc2
  	.syntax divided
  	.arm
  	.type	crc2, %function
  crc2:
  	@ args = 0, pretend = 0, frame = 0
  	@ frame_needed = 0, uses_anonymous_args = 0
  	mov	ip, #32768
  	movt	ip, 65535
  	str	lr, [sp, #-4]!
-	mov	r3, #8
+	mov	r2, #8
  	movw	lr, #16386
  .L3:
-	eor	r2, r1, r0
-	sub	r3, r3, #1
-	tst	r2, #1
+	uxtb	r3, r0
+	eor	r3, r3, r1
  	mov	r1, r1, lsr #1
+	tst	r3, #1
  	eorne	r0, r0, lr
-	moveq	r0, r0, lsr #1
-	orrne	r0, ip, r0, lsr #1
-	uxthne	r0, r0
-	ands	r3, r3, #255
+	ubfxeq	r0, r0, #1, #15
+	ubfxne	r0, r0, #1, #15
+	orrne	r0, r0, ip
+	subs	r2, r2, #1
  	bne	.L3
+	uxth	r0, r0
  	ldr	pc, [sp], #4
  	.size	crc2, .-crc2
  	.ident	"GCC: (GNU) 6.0.0 20150724 (experimental)"
  	.section	.note.GNU-stack,"",%progbits