diff mbox

[AArch64] Use MOVN to generate 64-bit negative immediates where sensible

Message ID 000001cf6ae4$059c2280$10d46780$@bolton@arm.com
State New
Headers show

Commit Message

Ian Bolton May 8, 2014, 5:36 p.m. UTC
Hi,

It currently takes 4 instructions to generate certain immediates on
AArch64 (unless we put them in the constant pool).

For example ...

  long long
  ffffbeefcafebabe ()
  {
    return 0xFFFFBEEFCAFEBABEll;
  }

leads to ...

  mov x0, 0x47806
  mov x0, 0xcafe, lsl 16
  mov x0, 0xbeef, lsl 32
  orr x0, x0, -281474976710656

The above case is tackled in this patch by employing MOVN
to generate the top 32-bits in a single instruction ...

  mov x0, -71536975282177
  movk x0, 0xcafe, lsl 16
  movk x0, 0xbabe, lsl 0

Note that where at least two half-words are 0xffff, existing
code that does the immediate in two instructions is still used.)

Tested on standard gcc regressions and the attached test case.

OK for commit?

Cheers,
Ian


2014-05-08  Ian Bolton  <ian.bolton@arm.com>

gcc/
	* config/aarch64/aarch64.c (aarch64_expand_mov_immediate):
	Use MOVN when top-most half-word (and only that half-word)
	is 0xffff.
gcc/testsuite/
	* gcc.target/aarch64/movn_1.c: New test.

Comments

Ian Bolton May 16, 2014, 9:17 a.m. UTC | #1
Ping.  This should be relatively simple to review.

Many thanks.

> -----Original Message-----
> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
> owner@gcc.gnu.org] On Behalf Of Ian Bolton
> Sent: 08 May 2014 18:36
> To: gcc-patches
> Subject: [PATCH, AArch64] Use MOVN to generate 64-bit negative
> immediates where sensible
> 
> Hi,
> 
> It currently takes 4 instructions to generate certain immediates on
> AArch64 (unless we put them in the constant pool).
> 
> For example ...
> 
>   long long
>   ffffbeefcafebabe ()
>   {
>     return 0xFFFFBEEFCAFEBABEll;
>   }
> 
> leads to ...
> 
>   mov x0, 0x47806
>   mov x0, 0xcafe, lsl 16
>   mov x0, 0xbeef, lsl 32
>   orr x0, x0, -281474976710656
> 
> The above case is tackled in this patch by employing MOVN
> to generate the top 32-bits in a single instruction ...
> 
>   mov x0, -71536975282177
>   movk x0, 0xcafe, lsl 16
>   movk x0, 0xbabe, lsl 0
> 
> Note that where at least two half-words are 0xffff, existing
> code that does the immediate in two instructions is still used.)
> 
> Tested on standard gcc regressions and the attached test case.
> 
> OK for commit?
> 
> Cheers,
> Ian
> 
> 
> 2014-05-08  Ian Bolton  <ian.bolton@arm.com>
> 
> gcc/
> 	* config/aarch64/aarch64.c (aarch64_expand_mov_immediate):
> 	Use MOVN when top-most half-word (and only that half-word)
> 	is 0xffff.
> gcc/testsuite/
> 	* gcc.target/aarch64/movn_1.c: New test.
Richard Earnshaw May 16, 2014, 12:35 p.m. UTC | #2
On 08/05/14 18:36, Ian Bolton wrote:
> Hi,
> 
> It currently takes 4 instructions to generate certain immediates on
> AArch64 (unless we put them in the constant pool).
> 
> For example ...
> 
>   long long
>   ffffbeefcafebabe ()
>   {
>     return 0xFFFFBEEFCAFEBABEll;
>   }
> 
> leads to ...
> 
>   mov x0, 0x47806
>   mov x0, 0xcafe, lsl 16
>   mov x0, 0xbeef, lsl 32
>   orr x0, x0, -281474976710656
> 
> The above case is tackled in this patch by employing MOVN
> to generate the top 32-bits in a single instruction ...
> 
>   mov x0, -71536975282177
>   movk x0, 0xcafe, lsl 16
>   movk x0, 0xbabe, lsl 0
> 
> Note that where at least two half-words are 0xffff, existing
> code that does the immediate in two instructions is still used.)
> 
> Tested on standard gcc regressions and the attached test case.
> 
> OK for commit?

What about:

long long a()
{
  return 0x1234ffff56789abcll;
}

long long b()
{
  return 0x12345678ffff9abcll;
}

long long c()
{
  return 0x123456789abcffffll;
}

?

Surely these can also benefit from this sort of optimization, but it
looks as though you only handle the top 16 bits being set.

R.

> 
> Cheers,
> Ian
> 
> 
> 2014-05-08  Ian Bolton  <ian.bolton@arm.com>
> 
> gcc/
> 	* config/aarch64/aarch64.c (aarch64_expand_mov_immediate):
> 	Use MOVN when top-most half-word (and only that half-word)
> 	is 0xffff.
> gcc/testsuite/
> 	* gcc.target/aarch64/movn_1.c: New test.
> 
> 
> aarch64-movn-exploitation-patch-v5.txt
> 
> 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 43a83566..a8e504e 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -1177,6 +1177,18 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
>  	}
>      }
>  
> +  /* Look for case where upper 16 bits are set, so we can use MOVN.  */
> +  if ((val & 0xffff000000000000ll) == 0xffff000000000000ll)
> +    {
> +      emit_insn (gen_rtx_SET (VOIDmode, dest,
> +			      GEN_INT (~ (~val & (0xffffll << 32)))));
> +      emit_insn (gen_insv_immdi (dest, GEN_INT (16),
> +				 GEN_INT ((val >> 16) & 0xffff)));
> +      emit_insn (gen_insv_immdi (dest, GEN_INT (0),
> +				 GEN_INT (val & 0xffff)));
> +      return;
> +    }
> +
>   simple_sequence:
>    first = true;
>    mask = 0xffff;
> diff --git a/gcc/testsuite/gcc.target/aarch64/movn_1.c b/gcc/testsuite/gcc.target/aarch64/movn_1.c
> new file mode 100644
> index 0000000..cc11ade
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/movn_1.c
> @@ -0,0 +1,27 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fno-inline --save-temps" } */
> +
> +extern void abort (void);
> +
> +long long
> +foo ()
> +{
> +  /* { dg-final { scan-assembler "mov\tx\[0-9\]+, -71536975282177" } } */
> +  return 0xffffbeefcafebabell;
> +}
> +
> +long long
> +merge4 (int a, int b, int c, int d)
> +{
> +  return ((long long) a << 48 | (long long) b << 32
> +	  | (long long) c << 16 | (long long) d);
> +}
> +
> +int main ()
> +{
> +  if (foo () != merge4 (0xffff, 0xbeef, 0xcafe, 0xbabe))
> +    abort ();
> +  return 0;
> +}
> +
> +/* { dg-final { cleanup-saved-temps } } */
>
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 43a83566..a8e504e 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1177,6 +1177,18 @@  aarch64_expand_mov_immediate (rtx dest, rtx imm)
 	}
     }
 
+  /* Look for case where upper 16 bits are set, so we can use MOVN.  */
+  if ((val & 0xffff000000000000ll) == 0xffff000000000000ll)
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, dest,
+			      GEN_INT (~ (~val & (0xffffll << 32)))));
+      emit_insn (gen_insv_immdi (dest, GEN_INT (16),
+				 GEN_INT ((val >> 16) & 0xffff)));
+      emit_insn (gen_insv_immdi (dest, GEN_INT (0),
+				 GEN_INT (val & 0xffff)));
+      return;
+    }
+
  simple_sequence:
   first = true;
   mask = 0xffff;
diff --git a/gcc/testsuite/gcc.target/aarch64/movn_1.c b/gcc/testsuite/gcc.target/aarch64/movn_1.c
new file mode 100644
index 0000000..cc11ade
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movn_1.c
@@ -0,0 +1,27 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline --save-temps" } */
+
+extern void abort (void);
+
+long long
+foo ()
+{
+  /* { dg-final { scan-assembler "mov\tx\[0-9\]+, -71536975282177" } } */
+  return 0xffffbeefcafebabell;
+}
+
+long long
+merge4 (int a, int b, int c, int d)
+{
+  return ((long long) a << 48 | (long long) b << 32
+	  | (long long) c << 16 | (long long) d);
+}
+
+int main ()
+{
+  if (foo () != merge4 (0xffff, 0xbeef, 0xcafe, 0xbabe))
+    abort ();
+  return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */