diff mbox

[ivopt] Try aligned offset when get_address_cost

Message ID 000001cfafad$5de00840$19a018c0$@arm.com
State New
Headers show

Commit Message

Zhenqiang Chen Aug. 4, 2014, 6:28 a.m. UTC
Hi,

For some TARGET, like ARM THUMB1, the offset in load/store should be nature
aligned. But in function get_address_cost, when computing max_offset, it
only tries byte-aligned offsets:

  ((unsigned HOST_WIDE_INT) 1 << i) - 1

which can not meet thumb_legitimate_offset_p check called from
thumb1_legitimate_address_p for HImode and SImode.

The patch adds additional try for aligned offset:

  ((unsigned HOST_WIDE_INT) 1 << i) - GET_MODE_SIZE (address_mode).

Bootstrap and no make check regression on X86-64.
No make check regression on qemu for Cortex-m0 and Cortex-m3.
For Cortex-m0, no performance changes with coremark and dhrystone. Coremark
code size is ~0.44 smaller. And eembcv2 code size is ~0.22 smaller. CSiBE
code size is ~0.05% smaller.

OK for trunk?

Thanks!
-Zhenqiang

ChangeLog
2014-08-04  Zhenqiang Chen  <zhenqiang.chen@arm.com>

	* tree-ssa-loop-ivopts.c (get_address_cost): Try aligned offset.

testsuite/ChangeLog:
2014-08-04  Zhenqiang Chen  <zhenqiang.chen@arm.com>

	* gcc.target/arm/get_address_cost_aligned_max_offset.c: New test.

Comments

Bin.Cheng Aug. 4, 2014, 8:40 a.m. UTC | #1
On Mon, Aug 4, 2014 at 2:28 PM, Zhenqiang Chen <zhenqiang.chen@arm.com> wrote:
> Hi,
>
> For some TARGET, like ARM THUMB1, the offset in load/store should be nature
> aligned. But in function get_address_cost, when computing max_offset, it
> only tries byte-aligned offsets:
>
>   ((unsigned HOST_WIDE_INT) 1 << i) - 1
>
> which can not meet thumb_legitimate_offset_p check called from
> thumb1_legitimate_address_p for HImode and SImode.
>
> The patch adds additional try for aligned offset:
>
>   ((unsigned HOST_WIDE_INT) 1 << i) - GET_MODE_SIZE (address_mode).
>
> Bootstrap and no make check regression on X86-64.
> No make check regression on qemu for Cortex-m0 and Cortex-m3.
> For Cortex-m0, no performance changes with coremark and dhrystone. Coremark
> code size is ~0.44 smaller. And eembcv2 code size is ~0.22 smaller. CSiBE
> code size is ~0.05% smaller.
>
> OK for trunk?
>
> Thanks!
> -Zhenqiang
>
> ChangeLog
> 2014-08-04  Zhenqiang Chen  <zhenqiang.chen@arm.com>
>
>         * tree-ssa-loop-ivopts.c (get_address_cost): Try aligned offset.
>
> testsuite/ChangeLog:
> 2014-08-04  Zhenqiang Chen  <zhenqiang.chen@arm.com>
>
>         * gcc.target/arm/get_address_cost_aligned_max_offset.c: New test.
>
> diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
> index 3b4a6cd..562122a 100644
> --- a/gcc/tree-ssa-loop-ivopts.c
> +++ b/gcc/tree-ssa-loop-ivopts.c
> @@ -3308,6 +3308,18 @@ get_address_cost (bool symbol_present, bool
> var_present,
>           XEXP (addr, 1) = gen_int_mode (off, address_mode);
>           if (memory_address_addr_space_p (mem_mode, addr, as))
>             break;
> +         /* For some TARGET, like ARM THUMB1, the offset should be nature
> +            aligned.  Try an aligned offset if address_mode is not QImode.
> */
> +         off = (address_mode == QImode)
> +               ? 0
> +               : ((unsigned HOST_WIDE_INT) 1 << i)
> +                   - GET_MODE_SIZE (address_mode);
> +         if (off > 0)
> +           {
> +             XEXP (addr, 1) = gen_int_mode (off, address_mode);
> +             if (memory_address_addr_space_p (mem_mode, addr, as))
> +               break;
> +           }
Hi, Why not just check "address_mode != QImode"? Set off to 0 then
check it seems unnecessary.

Thanks,
bin
>         }
>        if (i == -1)
>          off = 0;
> diff --git
> a/gcc/testsuite/gcc.target/arm/get_address_cost_aligned_max_offset.c
> b/gcc/testsuite/gcc.target/arm/get_address_cost_aligned_max_offset.c
> new file mode 100644
> index 0000000..cc3e2f7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/get_address_cost_aligned_max_offset.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mthumb -O2" }  */
> +/* { dg-require-effective-target arm_thumb1_ok } */
> +
> +unsigned int
> +test (const short p16[6 * 64])
> +{
> +  unsigned int i = 6;
> +  unsigned int ret = 0;
> +
> +  do
> +    {
> +      unsigned long long *p64 = (unsigned long long*) p16;
> +      unsigned int *p32 = (unsigned int*) p16;
> +      ret += ret;
> +      if (p16[1] || p32[1])
> +       ret++;
> +      else if (p64[1] | p64[2] | p64[3])
> +       ret++;
> +      p16 += 64;
> +      i--;
> +    } while (i != 0);
> +
> +  return ret;
> +}
> +
> +/* { dg-final { scan-assembler-not "#22" } } */
> +/* { dg-final { scan-assembler-not "#14" } } */
>
>
>
diff mbox

Patch

diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 3b4a6cd..562122a 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -3308,6 +3308,18 @@  get_address_cost (bool symbol_present, bool
var_present,
 	  XEXP (addr, 1) = gen_int_mode (off, address_mode);
 	  if (memory_address_addr_space_p (mem_mode, addr, as))
 	    break;
+	  /* For some TARGET, like ARM THUMB1, the offset should be nature
+	     aligned.  Try an aligned offset if address_mode is not QImode.
*/
+	  off = (address_mode == QImode)
+		? 0
+		: ((unsigned HOST_WIDE_INT) 1 << i)
+		    - GET_MODE_SIZE (address_mode);
+	  if (off > 0)
+	    {
+	      XEXP (addr, 1) = gen_int_mode (off, address_mode);
+	      if (memory_address_addr_space_p (mem_mode, addr, as))
+		break;
+	    }
 	}
       if (i == -1)
         off = 0;
diff --git
a/gcc/testsuite/gcc.target/arm/get_address_cost_aligned_max_offset.c
b/gcc/testsuite/gcc.target/arm/get_address_cost_aligned_max_offset.c
new file mode 100644
index 0000000..cc3e2f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/get_address_cost_aligned_max_offset.c
@@ -0,0 +1,28 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mthumb -O2" }  */
+/* { dg-require-effective-target arm_thumb1_ok } */
+
+unsigned int
+test (const short p16[6 * 64])
+{
+  unsigned int i = 6;
+  unsigned int ret = 0;
+
+  do
+    {
+      unsigned long long *p64 = (unsigned long long*) p16;
+      unsigned int *p32 = (unsigned int*) p16;
+      ret += ret;
+      if (p16[1] || p32[1])
+	ret++;
+      else if (p64[1] | p64[2] | p64[3])
+	ret++;
+      p16 += 64;
+      i--;
+    } while (i != 0);
+
+  return ret;
+}
+
+/* { dg-final { scan-assembler-not "#22" } } */
+/* { dg-final { scan-assembler-not "#14" } } */