@@ -207,6 +207,9 @@ static struct
/* Number of divmod calls inserted. */
int divmod_calls_inserted;
+
+ /* Number of highpart multiplication ops inserted. */
+ int highpart_mults_inserted;
} widen_mul_stats;
/* The instance of "struct occurrence" representing the highest
@@ -4548,9 +4551,129 @@ convert_to_divmod (gassign *stmt)
return true;
}
+/* Process a single gimple statement STMT, which has a RSHIFT_EXPR as
+ its rhs, and try to convert it into a MULT_HIGHPART_EXPR. The return
+ value is true iff we converted the statement. */
+
+static bool
+convert_mult_to_highpart (gimple *stmt, gimple_stmt_iterator *gsi)
+{
+ tree lhs = gimple_assign_lhs (stmt);
+ tree stype = TREE_TYPE (lhs);
+ tree sarg0 = gimple_assign_rhs1 (stmt);
+ tree sarg1 = gimple_assign_rhs2 (stmt);
+
+ if (TREE_CODE (stype) != INTEGER_TYPE
+ || TREE_CODE (sarg1) != INTEGER_CST
+ || TREE_CODE (sarg0) != SSA_NAME
+ || !tree_fits_uhwi_p (sarg1)
+ || !has_single_use (sarg0))
+ return false;
+
+ gimple *def = SSA_NAME_DEF_STMT (sarg0);
+ if (!is_gimple_assign (def))
+ return false;
+
+ enum tree_code mcode = gimple_assign_rhs_code (def);
+ if (mcode == NOP_EXPR)
+ {
+ tree tmp = gimple_assign_rhs1 (def);
+ if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (tmp))
+ return false;
+ def = SSA_NAME_DEF_STMT (tmp);
+ if (!is_gimple_assign (def))
+ return false;
+ mcode = gimple_assign_rhs_code (def);
+ }
+
+ if (mcode != WIDEN_MULT_EXPR
+ || gimple_bb (def) != gimple_bb (stmt))
+ return false;
+ tree mtype = TREE_TYPE (gimple_assign_lhs (def));
+ if (TREE_CODE (mtype) != INTEGER_TYPE
+ || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
+ return false;
+
+ tree mop1 = gimple_assign_rhs1 (def);
+ tree mop2 = gimple_assign_rhs2 (def);
+ tree optype = TREE_TYPE (mop1);
+ bool unsignedp = TYPE_UNSIGNED (optype);
+ unsigned int prec = TYPE_PRECISION (optype);
+
+ if (optype != TREE_TYPE (mop2)
+ || unsignedp != TYPE_UNSIGNED (mtype)
+ || TYPE_PRECISION (mtype) != 2 * prec)
+ return false;
+
+ unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1);
+ if (bits < prec || bits >= 2 * prec)
+ return false;
+
+ machine_mode mode = TYPE_MODE (optype);
+ optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
+ if (optab_handler (tab, mode) == CODE_FOR_nothing)
+ return false;
+
+ location_t loc = gimple_location (stmt);
+ tree highpart1 = build_and_insert_binop (gsi, loc, "highparttmp",
+ MULT_HIGHPART_EXPR, mop1, mop2);
+ tree highpart2 = highpart1;
+ tree ntype = optype;
+
+ if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype))
+ {
+ ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype)
+ : signed_type_for (optype);
+ highpart2 = build_and_insert_cast (gsi, loc, ntype, highpart1);
+ }
+ if (bits > prec)
+ highpart2 = build_and_insert_binop (gsi, loc, "highparttmp",
+ RSHIFT_EXPR, highpart2,
+ build_int_cst (ntype, bits - prec));
+
+ gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2);
+ gsi_replace (gsi, new_stmt, true);
+
+ /* The above transformation often creates useless type conversions, i.e.
+ when followed by a truncation, that aren't cleaned up elsewhere. */
+ gimple *use_stmt;
+ imm_use_iterator use_iter;
+ FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
+ if (gimple_assign_cast_p (use_stmt))
+ {
+ tree utype = TREE_TYPE (gimple_assign_lhs (use_stmt));
+ if (useless_type_conversion_p (ntype, utype))
+ {
+ gimple_stmt_iterator use_gsi = gsi_for_stmt (use_stmt);
+ new_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt),
+ highpart2);
+ gsi_replace (&use_gsi, new_stmt, true);
+ }
+ else if (bits == prec && useless_type_conversion_p (optype, utype))
+ {
+ gimple_stmt_iterator use_gsi = gsi_for_stmt (use_stmt);
+ new_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt),
+ highpart1);
+ gsi_replace (&use_gsi, new_stmt, true);
+ }
+ else if (TREE_CODE (utype) == INTEGER_TYPE
+ && TYPE_PRECISION (utype) <= prec)
+ {
+ gimple_stmt_iterator use_gsi = gsi_for_stmt (use_stmt);
+ new_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt),
+ NOP_EXPR, highpart2);
+ gsi_replace (&use_gsi, new_stmt, true);
+ }
+ }
+
+ widen_mul_stats.highpart_mults_inserted++;
+ return true;
+}
+
+
/* Find integer multiplications where the operands are extended from
smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
- where appropriate. */
+ or MULT_HIGHPART_EXPR where appropriate. */
namespace {
@@ -4656,6 +4779,10 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
convert_to_divmod (as_a<gassign *> (stmt));
break;
+ case RSHIFT_EXPR:
+ convert_mult_to_highpart (stmt, &gsi);
+ break;
+
default:;
}
}
@@ -4738,6 +4865,8 @@ pass_optimize_widening_mul::execute (function *fun)
widen_mul_stats.fmas_inserted);
statistics_counter_event (fun, "divmod calls inserted",
widen_mul_stats.divmod_calls_inserted);
+ statistics_counter_event (fun, "highpart multiplications inserted",
+ widen_mul_stats.highpart_mults_inserted);
return cfg_changed ? TODO_cleanup_cfg : 0;
}
new file mode 100644
@@ -0,0 +1,167 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -Wno-long-long -fdump-tree-optimized" } */
+
+typedef unsigned int __attribute ((mode(TI))) uti_t;
+typedef int __attribute ((mode(TI))) ti_t;
+
+long long stest1(long long x, long long y)
+{
+ return ((ti_t)x * (ti_t)y) >> 64;
+}
+
+long long stest2(long long x)
+{
+ return ((ti_t)x * 19065) >> 64;
+}
+
+long long stest3(long long x, long long y)
+{
+ return (uti_t)((ti_t)x * (ti_t)y) >> 64;
+}
+
+long long stest4(long long x)
+{
+ return (uti_t)((ti_t)x * 19065) >> 64;
+}
+
+ti_t stest5(long long x, long long y)
+{
+ return ((ti_t)x * (ti_t)y) >> 64;
+}
+
+ti_t stest6(long long x)
+{
+ return ((ti_t)x * 19065) >> 64;
+}
+
+uti_t stest7(long long x, long long y)
+{
+ return (uti_t)((ti_t)x * (ti_t)y) >>64;
+}
+
+uti_t stest8(long long x)
+{
+ return (uti_t)((ti_t)x * 19065) >> 64;
+}
+
+long long stest9(long long x, long long y)
+{
+ return ((ti_t)x * (ti_t)y) >> 72;
+}
+
+long long stest10(long long x)
+{
+ return ((ti_t)x * 19065) >> 72;
+}
+
+long long stest11(long long x, long long y)
+{
+ return (uti_t)((ti_t)x * (ti_t)y) >> 72;
+}
+
+long long stest12(long long x)
+{
+ return (uti_t)((ti_t)x * 19065) >> 72;
+}
+
+ti_t stest13(long long x, long long y)
+{
+ return ((ti_t)x * (ti_t)y) >> 72;
+}
+
+ti_t stest14(long long x)
+{
+ return ((ti_t)x * 19065) >> 72;
+}
+
+uti_t stest15(long long x, long long y)
+{
+ return (uti_t)((ti_t)x * (ti_t)y) >> 72;
+}
+
+uti_t stest16(long long x)
+{
+ return (uti_t)((ti_t)x * 19065) >> 72;
+}
+
+unsigned long long utest1(unsigned long long x, unsigned long long y)
+{
+ return ((uti_t)x * (uti_t)y) >> 64;
+}
+
+unsigned long long utest2(unsigned long long x)
+{
+ return ((uti_t)x * 19065) >> 64;
+}
+
+unsigned long long utest3(unsigned long long x, unsigned long long y)
+{
+ return (ti_t)((uti_t)x * (uti_t)y) >> 64;
+}
+
+unsigned long long utest4(unsigned long long x)
+{
+ return (ti_t)((uti_t)x * 19065) >> 64;
+}
+
+uti_t utest5(unsigned long long x, unsigned long long y)
+{
+ return ((uti_t)x * (uti_t)y) >> 64;
+}
+
+uti_t utest6(unsigned long long x)
+{
+ return ((uti_t)x * 19065) >> 64;
+}
+
+ti_t utest7(unsigned long long x, unsigned long long y)
+{
+ return (ti_t)((uti_t)x * (uti_t)y) >>64;
+}
+
+ti_t utest8(long long x)
+{
+ return (uti_t)((ti_t)x * 19065) >> 64;
+}
+
+unsigned long long utest9(unsigned long long x, unsigned long long y)
+{
+ return ((uti_t)x * (uti_t)y) >> 72;
+}
+
+unsigned long long utest10(unsigned long long x)
+{
+ return ((uti_t)x * 19065) >> 72;
+}
+
+unsigned long long utest11(unsigned long long x, unsigned long long y)
+{
+ return (ti_t)((uti_t)x * (uti_t)y) >> 72;
+}
+
+unsigned long long utest12(unsigned long long x)
+{
+ return (ti_t)((uti_t)x * 19065) >> 72;
+}
+
+uti_t utest13(unsigned long long x, unsigned long long y)
+{
+ return ((uti_t)x * (uti_t)y) >> 72;
+}
+
+uti_t utest14(unsigned long long x)
+{
+ return ((uti_t)x * 19065) >> 72;
+}
+
+ti_t utest15(unsigned long long x, unsigned long long y)
+{
+ return (ti_t)((uti_t)x * (uti_t)y) >> 72;
+}
+
+ti_t utest16(unsigned long long x)
+{
+ return (ti_t)((uti_t)x * 19065) >> 72;
+}
+
+/* { dg-final { scan-tree-dump-times " h\\* " 32 "optimized" } } */