diff mbox

[AARCH64] Adjust costs so udiv is preferred over sdiv when both are valid. [Patch (1/2)]

Message ID VI1PR0801MB2031451A1018AA38163513C7FF170@VI1PR0801MB2031.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Tamar Christina May 2, 2017, 3:37 p.m. UTC
Hi All, 

This patch adjusts the cost model so that when both sdiv and udiv are possible
it prefers udiv over sdiv. This was done by making sdiv slightly more expensive
instead of making udiv cheaper to keep the baseline costs of a division the same
as before.

For aarch64 this patch along with my other two related mid-end changes
makes a big difference in division by constants.

Given:

int f2(int x)
{
  return ((x * x) % 300) + ((x * x) / 300);
}

we now generate

f2:
	mul	w0, w0, w0
	mov	w1, 33205
	movk	w1, 0x1b4e, lsl 16
	mov	w2, 300
	umull	x1, w0, w1
	lsr	x1, x1, 37
	msub	w0, w1, w2, w0
	add	w0, w0, w1
	ret

as opposed to

f2:
	mul	w0, w0, w0
	mov	w2, 33205
	movk	w2, 0x1b4e, lsl 16
	mov	w3, 300
	smull	x1, w0, w2
	umull	x2, w0, w2
	asr	x1, x1, 37
	sub	w1, w1, w0, asr 31
	lsr	x2, x2, 37
	msub	w0, w1, w3, w0
	add	w0, w0, w2
	ret

Bootstrapped and reg tested on aarch64-none-linux-gnu with no regressions.

OK for trunk?

Thanks,
Tamar


gcc/
2017-05-02  Tamar Christina  <tamar.christina@arm.com>

	* config/aarch64/aarch64.c (aarch64_rtx_costs): Make sdiv more expensive than udiv.
	Remove floating point cases from mod.

gcc/testsuite/
2017-05-02  Tamar Christina  <tamar.christina@arm.com>

	* gcc.target/aarch64/sdiv_costs_1.c: New.

Comments

Tamar Christina May 15, 2017, 8:33 a.m. UTC | #1
Ping
James Greenhalgh June 6, 2017, 1:11 p.m. UTC | #2
On Tue, May 02, 2017 at 04:37:16PM +0100, Tamar Christina wrote:
> Hi All, 
> 
> This patch adjusts the cost model so that when both sdiv and udiv are possible
> it prefers udiv over sdiv. This was done by making sdiv slightly more expensive
> instead of making udiv cheaper to keep the baseline costs of a division the same
> as before.
> 
> For aarch64 this patch along with my other two related mid-end changes
> makes a big difference in division by constants.

This patch seems to have an unrelated change to the MOD/UMOD costs to delete
the handling of floating-point values. That change makes sense, but would
have been better in a separate patch.

> Given:
> 
> int f2(int x)
> {
>   return ((x * x) % 300) + ((x * x) / 300);
> }
> 
> we now generate
> 
> f2:
> 	mul	w0, w0, w0
> 	mov	w1, 33205
> 	movk	w1, 0x1b4e, lsl 16
> 	mov	w2, 300
> 	umull	x1, w0, w1
> 	lsr	x1, x1, 37
> 	msub	w0, w1, w2, w0
> 	add	w0, w0, w1
> 	ret
> 
> as opposed to
> 
> f2:
> 	mul	w0, w0, w0
> 	mov	w2, 33205
> 	movk	w2, 0x1b4e, lsl 16
> 	mov	w3, 300
> 	smull	x1, w0, w2
> 	umull	x2, w0, w2
> 	asr	x1, x1, 37
> 	sub	w1, w1, w0, asr 31
> 	lsr	x2, x2, 37
> 	msub	w0, w1, w3, w0
> 	add	w0, w0, w2
> 	ret
> 
> Bootstrapped and reg tested on aarch64-none-linux-gnu with no regressions.
> 
> OK for trunk?

OK.

Thanks,
James

> gcc/
> 2017-05-02  Tamar Christina  <tamar.christina@arm.com>
> 
> 	* config/aarch64/aarch64.c (aarch64_rtx_costs): Make sdiv more expensive than udiv.
> 	Remove floating point cases from mod.
> 
> gcc/testsuite/
> 2017-05-02  Tamar Christina  <tamar.christina@arm.com>
> 
> 	* gcc.target/aarch64/sdiv_costs_1.c: New.
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 4f769a40a4e9de83cb5aacfd3ff58301c2feeb78..1f4fe51eda9057f1ccaded8e0d5ccd4bc3bc11ab 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7484,17 +7484,13 @@  cost_plus:
     case UMOD:
       if (speed)
 	{
+	  /* Slighly prefer UMOD over SMOD.  */
 	  if (VECTOR_MODE_P (mode))
 	    *cost += extra_cost->vect.alu;
 	  else if (GET_MODE_CLASS (mode) == MODE_INT)
 	    *cost += (extra_cost->mult[mode == DImode].add
-		      + extra_cost->mult[mode == DImode].idiv);
-	  else if (mode == DFmode)
-	    *cost += (extra_cost->fp[1].mult
-		      + extra_cost->fp[1].div);
-	  else if (mode == SFmode)
-	    *cost += (extra_cost->fp[0].mult
-		      + extra_cost->fp[0].div);
+		      + extra_cost->mult[mode == DImode].idiv
+		      + (code == MOD ? 1 : 0));
 	}
       return false;  /* All arguments need to be in registers.  */
 
@@ -7508,7 +7504,9 @@  cost_plus:
 	  else if (GET_MODE_CLASS (mode) == MODE_INT)
 	    /* There is no integer SQRT, so only DIV and UDIV can get
 	       here.  */
-	    *cost += extra_cost->mult[mode == DImode].idiv;
+	    *cost += (extra_cost->mult[mode == DImode].idiv
+		     /* Slighly prefer UDIV over SDIV.  */
+		     + (code == DIV ? 1 : 0));
 	  else
 	    *cost += extra_cost->fp[mode == DFmode].div;
 	}
diff --git a/gcc/testsuite/gcc.target/aarch64/sdiv_costs_1.c b/gcc/testsuite/gcc.target/aarch64/sdiv_costs_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..24d7f7df2089398288bdf67a489eb71d733a4450
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sdiv_costs_1.c
@@ -0,0 +1,38 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+/* Both sdiv and udiv can be used here, so prefer udiv.  */
+int f1 (unsigned char *p)
+{
+  return 100 / p[1];
+}
+
+int f2 (unsigned char *p, unsigned short x)
+{
+  return x / p[0];
+}
+
+int f3 (unsigned char *p, int x)
+{
+  x &= 0x7fffffff;
+  return x / p[0];
+}
+
+int f5 (unsigned char *p, unsigned short x)
+{
+  return x % p[0];
+}
+
+/* This should only generate signed divisions.  */
+int f4 (unsigned char *p)
+{
+  return -100 / p[1];
+}
+
+int f6 (unsigned char *p, short x)
+{
+  return x % p[0];
+}
+
+/* { dg-final { scan-assembler-times "udiv\tw\[0-9\]+, w\[0-9\]+" 4 } } */
+/* { dg-final { scan-assembler-times "sdiv\tw\[0-9\]+, w\[0-9\]+" 2 } } */