diff mbox

RFC [1/2] divmod transform

Message ID CAAgBjM=GrLmtmr7LpkzmTXFauoCuuAqJzYQK4ExxTV+xm4_qsg@mail.gmail.com
State New
Headers show

Commit Message

Prathamesh Kulkarni May 23, 2016, 8:58 a.m. UTC
Hi,
I have updated my patch for divmod (attached), which was originally
based on Kugan's patch.
The patch transforms stmts with code TRUNC_DIV_EXPR and TRUNC_MOD_EXPR
having same operands to divmod representation, so we can cse computation of mod.

t1 = a TRUNC_DIV_EXPR b;
t2 = a TRUNC_MOD_EXPR b
is transformed to:
complex_tmp = DIVMOD (a, b);
t1 = REALPART_EXPR (complex_tmp);
t2 = IMAGPART_EXPR (complex_tmp);

* New hook divmod_expand_libfunc
The rationale for introducing the hook is that different targets have
incompatible calling conventions for divmod libfunc.
Currently three ports define divmod libfunc: c6x, spu and arm.
c6x and spu follow the convention of libgcc2.c:__udivmoddi4:
return quotient and store remainder in argument passed as pointer,
while the arm version takes two arguments and returns both
quotient and remainder having mode double the size of the operand mode.
The port should hence override the hook expand_divmod_libfunc
to generate call to target-specific divmod.
Ports should define this hook if:
a) The port does not have divmod or div insn for the given mode.
b) The port defines divmod libfunc for the given mode.
The default hook default_expand_divmod_libfunc() generates call
to libgcc2.c:__udivmoddi4 provided the operands are unsigned and
are of DImode.

Patch passes bootstrap+test on x86_64-unknown-linux-gnu and
cross-tested on arm*-*-*.
Bootstrap+test in progress on arm-linux-gnueabihf.
Does this patch look OK ?

Thanks,
Prathamesh
diff mbox

Patch

diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 8c7f2a1..111f19f 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6963,6 +6963,12 @@  This is firstly introduced on ARM/AArch64 targets, please refer to
 the hook implementation for how different fusion types are supported.
 @end deftypefn
 
+@deftypefn {Target Hook} void TARGET_EXPAND_DIVMOD_LIBFUNC (bool @var{unsignedp}, machine_mode @var{mode}, @var{rtx}, @var{rtx}, rtx *@var{quot}, rtx *@var{rem})
+Define this hook if the port does not have hardware div and divmod insn for
+the given mode but has divmod libfunc, which is incompatible
+with libgcc2.c:__udivmoddi4
+@end deftypefn
+
 @node Sections
 @section Dividing the Output into Sections (Texts, Data, @dots{})
 @c the above section title is WAY too long.  maybe cut the part between
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index f963a58..2c9a800 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4848,6 +4848,8 @@  them: try the first ones in this list first.
 
 @hook TARGET_SCHED_FUSION_PRIORITY
 
+@hook TARGET_EXPAND_DIVMOD_LIBFUNC
+
 @node Sections
 @section Dividing the Output into Sections (Texts, Data, @dots{})
 @c the above section title is WAY too long.  maybe cut the part between
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index c867ddc..0cb59f7 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -2276,6 +2276,48 @@  multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
 #define direct_mask_store_optab_supported_p direct_optab_supported_p
 #define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p
 
+/* Expand DIVMOD() using:
+ a) optab handler for udivmod/sdivmod if it is available.
+ b) If optab_handler doesn't exist, Generate call to
+    optab_libfunc for udivmod/sdivmod.  */
+
+static void 
+expand_DIVMOD (internal_fn, gcall *stmt)
+{
+  tree lhs = gimple_call_lhs (stmt);
+  tree arg0 = gimple_call_arg (stmt, 0);
+  tree arg1 = gimple_call_arg (stmt, 1);
+
+  gcc_assert (TREE_CODE (TREE_TYPE (lhs)) == COMPLEX_TYPE);
+  tree type = TREE_TYPE (TREE_TYPE (lhs));
+  machine_mode mode = TYPE_MODE (type);
+  bool unsignedp = TYPE_UNSIGNED (type);
+  optab tab = (unsignedp) ? udivmod_optab : sdivmod_optab;
+
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+
+  rtx quotient, remainder;
+
+  /* Check if optab handler exists for [u]divmod.  */
+  if (optab_handler (tab, mode) != CODE_FOR_nothing)
+    {
+      quotient = gen_reg_rtx (mode);
+      remainder = gen_reg_rtx (mode);
+      expand_twoval_binop (tab, op0, op1, quotient, remainder, unsignedp);
+    }
+  else
+    targetm.expand_divmod_libfunc (unsignedp, mode, op0, op1,
+				   &quotient, &remainder);
+
+  /* Wrap the return value (quotient, remainder) within COMPLEX_EXPR.  */
+  expand_expr (build2 (COMPLEX_EXPR, TREE_TYPE (lhs),
+		       make_tree (TREE_TYPE (arg0), quotient),
+		       make_tree (TREE_TYPE (arg1), remainder)),
+	       target, VOIDmode, EXPAND_NORMAL);
+}
+
 /* Return true if FN is supported for the types in TYPES when the
    optimization type is OPT_TYPE.  The types are those associated with
    the "type0" and "type1" fields of FN's direct_internal_fn_info
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index e729d85..56a80f1 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -194,6 +194,9 @@  DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_SET, ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_COMPLEMENT, ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_RESET, ECF_LEAF | ECF_NOTHROW, NULL)
 
+/* Divmod function.  */
+DEF_INTERNAL_FN (DIVMOD, ECF_CONST | ECF_LEAF, NULL)
+
 #undef DEF_INTERNAL_INT_FN
 #undef DEF_INTERNAL_FLT_FN
 #undef DEF_INTERNAL_OPTAB_FN
diff --git a/gcc/target.def b/gcc/target.def
index 6392e73..4496f9a 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4948,6 +4948,16 @@  Normally, this is not needed.",
  bool, (const_tree field, machine_mode mode),
  default_member_type_forces_blk)
 
+/* See tree-ssa-math-opts.c:divmod_candidate_p for conditions that gate
+   the divmod transform.  */
+DEFHOOK
+(expand_divmod_libfunc,
+ "Define this hook if the port does not have hardware div and divmod insn for\n\
+the given mode but has divmod libfunc, which is incompatible\n\
+with libgcc2.c:__udivmoddi4",
+ void, (bool unsignedp, machine_mode mode, rtx, rtx, rtx *quot, rtx *rem),
+ default_expand_divmod_libfunc)
+
 /* Return the class for a secondary reload, and fill in extra information.  */
 DEFHOOK
 (secondary_reload,
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 6b4601b..e4a021a 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1965,4 +1965,31 @@  default_optab_supported_p (int, machine_mode, machine_mode, optimization_type)
   return true;
 }
 
+void
+default_expand_divmod_libfunc (bool unsignedp, machine_mode mode,
+			       rtx op0, rtx op1,
+			       rtx *quot_p, rtx *rem_p)
+{
+  gcc_assert (mode == DImode);
+  gcc_assert (unsignedp);
+
+  /* Generate call to
+     DImode __udivmoddi4 (DImode op0, DImode op1, DImode *rem).  */
+
+  rtx libfunc = optab_libfunc (udivmod_optab, DImode);
+  gcc_assert (libfunc);
+
+  rtx remainder = assign_stack_temp (DImode, GET_MODE_SIZE (DImode));
+  rtx address = XEXP (remainder, 0);
+
+  rtx quotient = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
+					  DImode, 3,
+					  op0, GET_MODE (op0),
+					  op1, GET_MODE (op1),
+					  address, GET_MODE (address));
+
+  *quot_p = quotient;
+  *rem_p = remainder;
+}
+
 #include "gt-targhooks.h"
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 7687c39..dc5e8e7 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -254,4 +254,7 @@  extern void default_setup_incoming_vararg_bounds (cumulative_args_t ca ATTRIBUTE
 extern bool default_optab_supported_p (int, machine_mode, machine_mode,
 				       optimization_type);
 
+extern void default_expand_divmod_libfunc (bool, machine_mode,
+					   rtx, rtx, rtx *, rtx *);
+
 #endif /* GCC_TARGHOOKS_H */
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 81688cd..a04d6d3 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -112,6 +112,9 @@  along with GCC; see the file COPYING3.  If not see
 #include "params.h"
 #include "internal-fn.h"
 #include "case-cfn-macros.h"
+#include "optabs-libfuncs.h"
+#include "tree-eh.h"
+#include "targhooks.h"
 
 /* This structure represents one basic block that either computes a
    division, or is a common dominator for basic block that compute a
@@ -184,6 +187,9 @@  static struct
 
   /* Number of fp fused multiply-add ops inserted.  */
   int fmas_inserted;
+
+  /* Number of divmod calls inserted.  */
+  int divmod_calls_inserted;
 } widen_mul_stats;
 
 /* The instance of "struct occurrence" representing the highest
@@ -3784,6 +3790,187 @@  match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
   return true;
 }
 
+/* Return true if target has support for divmod.  */
+
+static bool
+target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode, bool unsignedp)
+{
+  /* If target supports hardware divmod insn, use it for divmod.  */
+  if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
+    return true;
+
+  /* Check if libfunc for divmod is available.  */
+  if (optab_libfunc (divmod_optab, mode) != NULL_RTX) 
+    {
+      /* If target supports hardware insn, then we don't
+	 want to use divmod libfunc.  */
+      if (optab_handler (div_optab, mode) != CODE_FOR_nothing)
+	return false;
+
+      /* If target overrides expand_divmod_libfunc hook
+	 then perform divmod by generating call to the target-specifc divmod libfunc.  */
+      if (targetm.expand_divmod_libfunc != default_expand_divmod_libfunc)
+	return true;
+
+      /* Fall back to using libgcc2.c:__udivmoddi4.  */
+      return (mode == DImode && unsignedp);
+    }
+
+  return false;
+}
+
+/* Check if stmt is candidate for divmod transform.  */
+
+static bool
+divmod_candidate_p (gassign *stmt)
+{
+  tree type = TREE_TYPE (gimple_assign_lhs (stmt));
+  enum machine_mode mode = TYPE_MODE (type);
+  optab divmod_optab, div_optab;
+
+  if (TYPE_UNSIGNED (type))
+    {
+      divmod_optab = udivmod_optab;
+      div_optab = udiv_optab;
+    }
+  else
+    {
+      divmod_optab = sdivmod_optab;
+      div_optab = sdiv_optab;
+    }
+
+  if (!target_supports_divmod_p (divmod_optab, div_optab,
+				 mode, TYPE_UNSIGNED (type)))
+    return false;
+
+  tree op1 = gimple_assign_rhs1 (stmt);
+  tree op2 = gimple_assign_rhs2 (stmt);
+
+  /* Disable the transform if either is a constant, since division-by-constant
+     may have specialized expansion.  */
+  if (TREE_CONSTANT (op1) || TREE_CONSTANT (op2))
+    return false;
+
+  if (TYPE_OVERFLOW_TRAPS (type))
+    return false;
+
+  return true;
+}
+
+/* This function looks for:
+   t1 = a TRUNC_DIV_EXPR b;
+   t2 = a TRUNC_MOD_EXPR b;
+   and transforms it to the following sequence:
+   complex_tmp = DIVMOD (a, b);
+   t1 = REALPART_EXPR(a);
+   t2 = IMAGPART_EXPR(b);
+   For conditions enabling the transform see divmod_candidate_p().
+
+   The pass works in two phases:
+   1) Walk through all immediate uses of stmt's operand and find a
+      TRUNC_DIV_EXPR with matching operands and if such a stmt is found add
+      it to stmts vector.
+   2) Insert DIVMOD call before first div/mod stmt in top_bb (basic block that
+      dominates other div/mod stmts with same operands) and update entries in
+      stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
+      IMAGPART_EXPR for mod).  */
+
+static bool
+convert_to_divmod (gassign *stmt)
+{
+  if (!divmod_candidate_p (stmt))
+    return false;
+
+  tree op1 = gimple_assign_rhs1 (stmt);
+  tree op2 = gimple_assign_rhs2 (stmt);
+
+  vec<gimple *> stmts = vNULL;
+  stmts.safe_push (stmt);
+
+  imm_use_iterator use_iter;
+  gimple *use_stmt;
+  gimple *top_stmt = stmt;
+
+  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
+    {
+      /* Check if use_stmt is TRUNC_DIV_EXPR with same operands as stmt.  */
+      if (is_gimple_assign (use_stmt)
+	  && gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
+	  && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
+	  && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
+	{
+	  basic_block bb = gimple_bb (use_stmt);
+	  basic_block top_bb = gimple_bb (top_stmt);
+
+	  if (top_bb == bb)
+	    {
+	      stmts.safe_push (use_stmt);
+	      if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
+		top_stmt = use_stmt;
+	    }
+	  else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
+	    {
+	      stmts.safe_push (use_stmt);
+	      top_stmt = use_stmt;
+	    }
+	  else if (dominated_by_p (CDI_DOMINATORS, bb, top_bb))
+	    stmts.safe_push (use_stmt);
+	}
+    }
+
+  /* No statements added to stmts vector.  */
+  if (stmts.length () == 1)
+    return false;
+
+  /* Create libcall to internal fn DIVMOD:
+     divmod_tmp = DIVMOD (op1, op2).  */
+
+  gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
+  tree res = make_temp_ssa_name (
+		build_complex_type (TREE_TYPE (op1)),
+		call_stmt, "divmod_tmp");
+  gimple_call_set_lhs (call_stmt, res);
+
+  /* Insert the call before top_stmt.  */
+  gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
+  gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
+
+  widen_mul_stats.divmod_calls_inserted++;		
+
+  /* Update all statements in stmts.
+     if stmt is lhs = op1 TRUNC_DIV_EXPR op2, change to lhs = REALPART_EXPR<divmod_tmp>
+     if stmt is lhs = op1 TRUNC_MOD_EXPR op2, change to lhs = IMAGPART_EXPR<divmod_tmp>.  */
+
+  bool cfg_changed = false;
+  for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
+    {
+      tree new_rhs;
+
+      switch (gimple_assign_rhs_code (use_stmt))
+	{
+	  case TRUNC_DIV_EXPR:
+	    new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
+	    break;
+
+	  case TRUNC_MOD_EXPR:
+	    new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op2), res);
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	}
+
+      gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
+      gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
+      update_stmt (use_stmt);
+
+      if (maybe_clean_or_replace_eh_stmt (use_stmt, use_stmt))
+	cfg_changed = true;
+    }
+
+  stmts.release ();
+  return cfg_changed;
+}    
 
 /* Find integer multiplications where the operands are extended from
    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
@@ -3828,6 +4015,8 @@  pass_optimize_widening_mul::execute (function *fun)
   bool cfg_changed = false;
 
   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
+  calculate_dominance_info (CDI_DOMINATORS);
+  renumber_gimple_stmt_uids ();
 
   FOR_EACH_BB_FN (bb, fun)
     {
@@ -3861,6 +4050,10 @@  pass_optimize_widening_mul::execute (function *fun)
 		    match_uaddsub_overflow (&gsi, stmt, code);
 		  break;
 
+		case TRUNC_MOD_EXPR:
+		  cfg_changed = convert_to_divmod (as_a<gassign *> (stmt));
+		  break;
+
 		default:;
 		}
 	    }
@@ -3907,6 +4100,8 @@  pass_optimize_widening_mul::execute (function *fun)
 			    widen_mul_stats.maccs_inserted);
   statistics_counter_event (fun, "fused multiply-adds inserted",
 			    widen_mul_stats.fmas_inserted);
+  statistics_counter_event (fun, "divmod calls inserted",
+			    widen_mul_stats.divmod_calls_inserted);
 
   return cfg_changed ? TODO_cleanup_cfg : 0;
 }