Patchwork [AArch64,costs,12/18] Improve costs for sign/zero extracts

login
register
mail settings
Submitter James Greenhalgh
Date March 27, 2014, 5:33 p.m.
Message ID <1395941622-22926-13-git-send-email-james.greenhalgh@arm.com>
Download mbox | patch
Permalink /patch/334426/
State New
Headers show

Comments

James Greenhalgh - March 27, 2014, 5:33 p.m.
Hi,

Next SIGN_EXTRACT/ZERO_EXTRACT.

Tested in series on aarch64-none-elf.

OK for stage 1?

Thanks,
James

---
2014-03-27  James Greenhalgh  <james.greenhalgh@arm.com>
	    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>

	* config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costs for
	SIGN/ZERO_EXTRACT.

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a8de1e3..338f6b3 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4798,6 +4798,35 @@  aarch64_address_cost (rtx x,
   return cost;
 }
 
+/* Return true if the RTX X in mode MODE is a zero or sign extract
+   usable in an ADD or SUB (extended register) instruction.  */
+static bool
+aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode)
+{
+  /* Catch add with a sign extract.
+     This is add_<optab><mode>_multp2.  */
+  if (GET_CODE (x) == SIGN_EXTRACT
+      || GET_CODE (x) == ZERO_EXTRACT)
+    {
+      rtx op0 = XEXP (x, 0);
+      rtx op1 = XEXP (x, 1);
+      rtx op2 = XEXP (x, 2);
+
+      if (GET_CODE (op0) == MULT
+	  && CONST_INT_P (op1)
+	  && op2 == const0_rtx
+	  && CONST_INT_P (XEXP (op0, 1))
+	  && aarch64_is_extend_from_extract (mode,
+					     XEXP (op0, 1),
+					     op1))
+	{
+	  return true;
+	}
+    }
+
+  return false;
+}
+
 /* Calculate the cost of calculating X, storing it in *COST.  Result
    is true if the total cost of the operation has now been calculated.  */
 static bool
@@ -5062,6 +5091,18 @@  cost_minus:
 
 	  }
 
+	/* Look for SUB (extended register).  */
+        if (aarch64_rtx_arith_op_extract_p (op1, mode))
+	  {
+	    if (speed)
+	      *cost += extra_cost->alu.arith_shift;
+
+	    *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
+			       (enum rtx_code) GET_CODE (op1),
+			       0, speed);
+	    return true;
+	  }
+
 	rtx new_op1 = aarch64_strip_extend (op1);
 
 	/* Cost this as an FMA-alike operation.  */
@@ -5118,6 +5159,18 @@  cost_minus:
 	    return true;
 	  }
 
+	/* Look for ADD (extended register).  */
+        if (aarch64_rtx_arith_op_extract_p (op0, mode))
+	  {
+	    if (speed)
+	      *cost += extra_cost->alu.arith_shift;
+
+	    *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
+			       (enum rtx_code) GET_CODE (op0),
+			       0, speed);
+	    return true;
+	  }
+
 	/* Strip any extend, leave shifts behind as we will
 	   cost them through mult_cost.  */
 	new_op0 = aarch64_strip_extend (op0);
@@ -5353,7 +5406,13 @@  cost_minus:
 
     case ZERO_EXTRACT:
     case SIGN_EXTRACT:
-      *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
+      /* UBFX/SBFX.  */
+      if (speed)
+	*cost += extra_cost->alu.bfx;
+
+      /* We can trust that the immediates used will be correct (there
+	 are no by-register forms), so we need only cost op0.  */
+      *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
       return true;
 
     case MULT:
@@ -9030,7 +9089,7 @@  aarch64_cannot_change_mode_class (enum machine_mode from,
 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
 
 #undef TARGET_RTX_COSTS
-#define TARGET_RTX_COSTS aarch64_rtx_costs
+#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
 
 #undef TARGET_SCHED_ISSUE_RATE
 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate