diff mbox series

Recognize highpart multiplication during RTL expansion

Message ID 000101d78c4a$2cbdbe20$86393a60$@nextmovesoftware.com
State New
Headers show
Series Recognize highpart multiplication during RTL expansion | expand

Commit Message

Roger Sayle Aug. 8, 2021, 11:40 a.m. UTC
Doh! ENOPATCH.  This time with attachments...
https://gcc.gnu.org/pipermail/gcc-patches/2021-August/576922.html

Roger
--

/* { dg-do compile } */
/* { dg-options "-O2 -Wno-long-long" } */

typedef unsigned int __attribute ((mode(TI))) uti_t;
typedef int __attribute ((mode(TI))) ti_t;

long test1(long x, long y)
{
  return ((ti_t)x * (ti_t)y) >> 64;
}

long test2(long x)
{
  return ((ti_t)x * 19065) >> 64;
}

long test3(long x, long y)
{
  return (uti_t)((ti_t)x * (ti_t)y) >> 64;
}

long test4(long x)
{
  return (uti_t)((ti_t)x * 19065) >> 64;
}

ti_t test5(long x, long y)
{
  return ((ti_t)x * (ti_t)y) >> 64;
}

ti_t test6(long x)
{
  return ((ti_t)x * 19065) >> 64;
}

uti_t test7(long x, long y)
{
  return (uti_t)((ti_t)x * (ti_t)y) >> 64;
}

uti_t test8(long x)
{
  return (uti_t)((ti_t)x * 19065) >> 64;
}

/* { dg-final { scan-assembler-times "mul.hi.s64" 8 } } */
/* { dg-do compile } */
/* { dg-options "-O2 -Wno-long-long" } */

typedef unsigned int __attribute ((mode(TI))) uti_t;
typedef int __attribute ((mode(TI))) ti_t;

unsigned long test1(unsigned long x, unsigned long y)
{
  return ((uti_t)x * (uti_t)y) >> 64;
}

unsigned long test2(unsigned long x)
{
  return ((uti_t)x * 19065) >> 64;
}

unsigned long test3(unsigned long x, unsigned long y)
{
  return (ti_t)((uti_t)x * (uti_t)y) >> 64;
}

unsigned long test4(unsigned long x)
{
  return (ti_t)((uti_t)x * 19065) >> 64;
}

uti_t test5(unsigned long x, unsigned long y)
{
  return ((uti_t)x * (uti_t)y) >> 64;
}

uti_t test6(unsigned long x)
{
  return ((uti_t)x * 19065) >> 64;
}

ti_t test7(unsigned long x, unsigned long y)
{
  return (ti_t)((uti_t)x * (uti_t)y) >> 64;
}

ti_t test8(unsigned long x)
{
  return (ti_t)((uti_t)x * 19065) >> 64;
}

/* { dg-final { scan-assembler-times "mul.hi.u64" 8 } } */

Comments

Richard Biener Aug. 9, 2021, 10:13 a.m. UTC | #1
On Sun, Aug 8, 2021 at 1:40 PM Roger Sayle <roger@nextmovesoftware.com> wrote:
>
>
> Doh! ENOPATCH.  This time with attachments...
> https://gcc.gnu.org/pipermail/gcc-patches/2021-August/576922.html

Note I was originally suggesting to perform
the matching on GIMPLE but only in one of the "instruction selection passes".
There's pass_optimize_widening_mul at -O1+ (but not -Og) and there's
pass_gimple_isel at all optimization levels.  The advantage of those
would be cases where TER leaves you with no get_def_for_expr
because of multiple uses or uses across calls, etc.

So while I think the patch is technically OK it works in a somewhat
backward direction from the intent to simplify the RTL expansion
process and make its "tricks" more general and visible on GIMPLE
prior to going out of SSA.

Richard.

> Roger
> --
>
diff mbox series

Patch

diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 108de1c..2b18f6a 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -614,6 +614,18 @@ 
   ""
   "%.\\tmul.hi.s32\\t%0, %1, %2;")
 
+(define_insn "smuldi3_highpart"
+  [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (sign_extend:TI
+		    (match_operand:DI 1 "nvptx_register_operand" "R"))
+		   (sign_extend:TI
+		    (match_operand:DI 2 "nvptx_register_operand" "R")))
+	  (const_int 64))))]
+  ""
+  "%.\\tmul.hi.s64\\t%0, %1, %2;")
+
 (define_insn "umulhi3_highpart"
   [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
 	(truncate:HI
@@ -638,6 +650,18 @@ 
   ""
   "%.\\tmul.hi.u32\\t%0, %1, %2;")
 
+(define_insn "umuldi3_highpart"
+  [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (zero_extend:TI
+		    (match_operand:DI 1 "nvptx_register_operand" "R"))
+		   (zero_extend:TI
+		    (match_operand:DI 2 "nvptx_register_operand" "R")))
+	  (const_int 64))))]
+  ""
+  "%.\\tmul.hi.u64\\t%0, %1, %2;")
+
 ;; Shifts
 
 (define_insn "ashl<mode>3"
diff --git a/gcc/expr.c b/gcc/expr.c
index b65cfcf..c032d54 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -8904,6 +8904,91 @@  expand_expr_divmod (tree_code code, machine_mode mode, tree treeop0,
   return expand_divmod (mod_p, code, mode, op0, op1, target, unsignedp);
 }
 
+/* Helper function of expand_expr_real_2, to recognized and expand a
+   (rshift (mult (convert X) (convert Y)) INTEGER_CST) as a highpart
+   multiplication.  This also handles multiplication by a constant.  */
+
+static rtx
+try_expand_mult_highpart (sepops ops, rtx target, machine_mode tmode)
+{
+  tree stype = ops->type;
+  tree sarg0 = ops->op0;
+  tree sarg1 = ops->op1;
+
+  if (!INTEGRAL_TYPE_P (stype)
+      || TREE_CODE (sarg1) != INTEGER_CST
+      || TREE_CODE (sarg0) != SSA_NAME
+      || !tree_fits_uhwi_p (sarg1))
+    return NULL_RTX;
+
+  gimple *def = get_def_for_expr (sarg0, MULT_EXPR);
+  if (!def)
+    {
+      /* Allow NOP_EXPR between the multiplication and the rshift.  */
+      def = get_def_for_expr (sarg0, NOP_EXPR);
+      if (def)
+	def = get_def_for_expr (gimple_assign_rhs1 (def), MULT_EXPR);
+      if (!def)
+	return NULL_RTX;
+    }
+
+  tree marg0 = gimple_assign_rhs1 (def);
+  tree mtype = TREE_TYPE (marg0);
+  if (!INTEGRAL_TYPE_P (mtype)
+      || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
+    return NULL_RTX;
+
+  gimple *ldef = get_def_for_expr (marg0, NOP_EXPR);
+  if (!ldef)
+    return NULL_RTX;
+
+  tree lhs = gimple_assign_rhs1 (ldef);
+  tree ltype = TREE_TYPE (lhs);
+  bool unsignedp = TYPE_UNSIGNED (mtype);
+  
+  if (TYPE_UNSIGNED (ltype) != unsignedp
+      || TYPE_PRECISION (ltype) != tree_to_uhwi (sarg1)
+      || TYPE_PRECISION (mtype) < 2 * TYPE_PRECISION (ltype))
+    return NULL_RTX;
+
+  tree marg1 = gimple_assign_rhs2 (def);
+  tree rhs;
+  if (TREE_CODE (marg1) == INTEGER_CST)
+    {
+      if (!int_fits_type_p (marg1, ltype))
+	return NULL_RTX;
+      rhs = fold_convert (ltype, marg1);
+    }
+  else
+    {
+      gimple *rdef = get_def_for_expr (marg1, NOP_EXPR);
+      if (!rdef)
+	return NULL_RTX;
+
+      rhs = gimple_assign_rhs1 (rdef);
+      if (TYPE_MAIN_VARIANT (ltype) != TYPE_MAIN_VARIANT (TREE_TYPE (rhs)))
+	return NULL_RTX;
+    }
+
+  machine_mode mode = TYPE_MODE (ltype);
+  optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
+  if (optab_handler (tab, mode) == CODE_FOR_nothing)
+    return NULL_RTX;
+  
+  /* Commit to RTL expansion.  */
+  rtx op0, op1, result;
+  expand_operands (lhs, rhs, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
+  if (tmode != mode)
+    target = NULL_RTX;
+  result = expand_binop (mode, tab, op0, op1, target, unsignedp,
+			 OPTAB_LIB_WIDEN);
+  if (!result)
+    return NULL_RTX;
+  if (tmode != mode)
+    result = convert_to_mode (tmode, result, TYPE_UNSIGNED (stype));
+  return result;
+}
+
 rtx
 expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
 		    enum expand_modifier modifier)
@@ -9771,6 +9856,14 @@  expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
 	if (ALL_FIXED_POINT_MODE_P (mode))
 	  goto binop;
 
+	/* Try to use mul_highpart optab if available.  */
+	if (code == RSHIFT_EXPR)
+	  {
+	    temp = try_expand_mult_highpart (ops, target, tmode);
+	    if (temp)
+	      return REDUCE_BIT_FIELD (temp);
+	  }
+
 	if (! safe_from_p (subtarget, treeop1, 1))
 	  subtarget = 0;
 	if (modifier == EXPAND_STACK_PARM)