diff mbox

[AArch64] Properly handle SHIFT ops and EXTEND in aarch64_rtx_mult_cost

Message ID 55352AD9.1030408@arm.com
State New
Headers show

Commit Message

Kyrylo Tkachov April 20, 2015, 4:35 p.m. UTC
Hi all,

The aarch64_rtx_mult_cost helper is supposed to handle multiplication costs as well as
PLUS/MINUS operations combined with multiplication or shift operations. The shift
operations may contain an extension. Currently we do not handle all these cases properly.
We also don't handle other supported shift types besides ASHIFT.

This patch addresses that by beefing up aarch64_rtx_mult_cost to handle
extensions inside the shifts and handling the other kinds of supported shifts.

Bootstrapped and tested on aarch64-linux.

Ok for trunk?

Thanks,
Kyrill

2015-04-20  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * config/aarch64/aarch64.c (aarch64_shift_p): New function.
     (aarch64_rtx_mult_cost): Update comment to reflect that it also handles
     combined arithmetic-shift ops.  Properly handle all shift and extend
     operations that can occur in combination with PLUS/MINUS.
     Rename maybe_fma to compound_p.
     (aarch64_rtx_costs): Use aarch64_shift_p when costing compound
     arithmetic and shift operations.

Comments

Marcus Shawcroft April 30, 2015, 12:13 p.m. UTC | #1
On 20 April 2015 at 17:35, Kyrill Tkachov <kyrylo.tkachov@arm.com> wrote:
> Hi all,
>
> The aarch64_rtx_mult_cost helper is supposed to handle multiplication costs
> as well as
> PLUS/MINUS operations combined with multiplication or shift operations. The
> shift
> operations may contain an extension. Currently we do not handle all these
> cases properly.
> We also don't handle other supported shift types besides ASHIFT.
>
> This patch addresses that by beefing up aarch64_rtx_mult_cost to handle
> extensions inside the shifts and handling the other kinds of supported
> shifts.
>
> Bootstrapped and tested on aarch64-linux.
>
> Ok for trunk?
>
> Thanks,
> Kyrill
>
> 2015-04-20  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>
>     * config/aarch64/aarch64.c (aarch64_shift_p): New function.
>     (aarch64_rtx_mult_cost): Update comment to reflect that it also handles
>     combined arithmetic-shift ops.  Properly handle all shift and extend
>     operations that can occur in combination with PLUS/MINUS.
>     Rename maybe_fma to compound_p.
>     (aarch64_rtx_costs): Use aarch64_shift_p when costing compound
>     arithmetic and shift operations.

+/* Return true iff CODE is a shift supported in combination
+   with arithmetic instructions.  */
+static bool
+aarch64_shift_p (enum rtx_code code)

OK, but add the blank line between the function comment and the
function please. /Marcus
diff mbox

Patch

commit 5c9d34ca7f6758ea0402cc0ef97d5db481ba7e40
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Mon Mar 2 12:04:27 2015 +0000

    [AArch64] Properly handle SHIFT ops and EXTEND in aarch64_rtx_mult_cost.

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 2023f04..65be1b98 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -5161,9 +5161,17 @@  aarch64_strip_extend (rtx x)
   return x;
 }
 
+/* Return true iff CODE is a shift supported in combination
+   with arithmetic instructions.  */
+static bool
+aarch64_shift_p (enum rtx_code code)
+{
+  return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
+}
+
 /* Helper function for rtx cost calculation.  Calculate the cost of
-   a MULT, which may be part of a multiply-accumulate rtx.  Return
-   the calculated cost of the expression, recursing manually in to
+   a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
+   Return the calculated cost of the expression, recursing manually in to
    operands where needed.  */
 
 static int
@@ -5173,7 +5181,7 @@  aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
   const struct cpu_cost_table *extra_cost
     = aarch64_tune_params->insn_extra_cost;
   int cost = 0;
-  bool maybe_fma = (outer == PLUS || outer == MINUS);
+  bool compound_p = (outer == PLUS || outer == MINUS);
   machine_mode mode = GET_MODE (x);
 
   gcc_checking_assert (code == MULT);
@@ -5188,18 +5196,35 @@  aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
   if (GET_MODE_CLASS (mode) == MODE_INT)
     {
       /* The multiply will be canonicalized as a shift, cost it as such.  */
-      if (CONST_INT_P (op1)
-	  && exact_log2 (INTVAL (op1)) > 0)
+      if (aarch64_shift_p (GET_CODE (x))
+	  || (CONST_INT_P (op1)
+	      && exact_log2 (INTVAL (op1)) > 0))
 	{
+	  bool is_extend = GET_CODE (op0) == ZERO_EXTEND
+	                   || GET_CODE (op0) == SIGN_EXTEND;
 	  if (speed)
 	    {
-	      if (maybe_fma)
-		/* ADD (shifted register).  */
-		cost += extra_cost->alu.arith_shift;
+	      if (compound_p)
+	        {
+	          if (REG_P (op1))
+		    /* ARITH + shift-by-register.  */
+		    cost += extra_cost->alu.arith_shift_reg;
+		  else if (is_extend)
+		    /* ARITH + extended register.  We don't have a cost field
+		       for ARITH+EXTEND+SHIFT, so use extend_arith here.  */
+		    cost += extra_cost->alu.extend_arith;
+		  else
+		    /* ARITH + shift-by-immediate.  */
+		    cost += extra_cost->alu.arith_shift;
+		}
 	      else
 		/* LSL (immediate).  */
-		cost += extra_cost->alu.shift;
+	        cost += extra_cost->alu.shift;
+
 	    }
+	  /* Strip extends as we will have costed them in the case above.  */
+	  if (is_extend)
+	    op0 = aarch64_strip_extend (op0);
 
 	  cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
 
@@ -5217,7 +5242,7 @@  aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
 
 	  if (speed)
 	    {
-	      if (maybe_fma)
+	      if (compound_p)
 		/* MADD/SMADDL/UMADDL.  */
 		cost += extra_cost->mult[0].extend_add;
 	      else
@@ -5235,7 +5260,7 @@  aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
 
       if (speed)
 	{
-	  if (maybe_fma)
+	  if (compound_p)
 	    /* MADD.  */
 	    cost += extra_cost->mult[mode == DImode].add;
 	  else
@@ -5256,7 +5281,7 @@  aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
 	  if (GET_CODE (op1) == NEG)
 	    op1 = XEXP (op1, 0);
 
-	  if (maybe_fma)
+	  if (compound_p)
 	    /* FMADD/FNMADD/FNMSUB/FMSUB.  */
 	    cost += extra_cost->fp[mode == DFmode].fma;
 	  else
@@ -5833,7 +5858,7 @@  cost_minus:
 
 	/* Cost this as an FMA-alike operation.  */
 	if ((GET_CODE (new_op1) == MULT
-	     || GET_CODE (new_op1) == ASHIFT)
+	     || aarch64_shift_p (GET_CODE (new_op1)))
 	    && code != COMPARE)
 	  {
 	    *cost += aarch64_rtx_mult_cost (new_op1, MULT,
@@ -5904,7 +5929,7 @@  cost_plus:
 	new_op0 = aarch64_strip_extend (op0);
 
 	if (GET_CODE (new_op0) == MULT
-	    || GET_CODE (new_op0) == ASHIFT)
+	    || aarch64_shift_p (GET_CODE (new_op0)))
 	  {
 	    *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
 					    speed);