Patchwork [AArch64_BE,1/4] Big-Endian lane numbering fix

login
register
mail settings
Submitter Alex Velenko
Date Jan. 16, 2014, 11:49 a.m.
Message ID <52D7C737.8070507@arm.com>
Download mbox | patch
Permalink /patch/311715/
State New
Headers show

Comments

Alex Velenko - Jan. 16, 2014, 11:49 a.m.
Hi,
This patch is the first patch in a series of patches fixing Big-Endian
lane numbering. The goal of this series of patches is to make proper
bridge between pure GCC big-endian view on lane numbering and internal
architected view.

Approach taken is to catch lane indexing when internal vector lane
indexes are passed to GCC lane indexing world view.

This will have a short-term impact on big-endian NEON intrinsics and
introduces a number of regressions. But this is the correct thing to do
to ensure that auto-vectorized and GCC vector extension code works
correctly.

This particular patch fixes vld1_<type> and vst1_<type> to generate st1 
and ld1 instructions, correcting their BE behaviour.

Regression tested on aarch64-none-elf and aarch64_be-none-elf with 
recent vec-perm with no unexpected issues.

Is it okay for trunk?

Regards,
Alex Velenko



gcc/
2014-01-16  Alex Velenko  <Alex.Velenko@arm.com>

	* config/aarch64/aarch64-simd.md (aarch64_be_ld1<mode>):
	New define_insn.
	(aarch64_be_st1<mode>): Likewise.
	(aarch_ld1<VALL:mode>): Define_expand modified.
	(aarch_st1<VALL:mode>): Likewise.
	* config/aarch64/aarch64.md (UNSPEC_LD1): New unspec definition.
	(UNSPEC_ST1): Likewise

gcc/testsuite
2014-01-16  Alex Velenko  <Alex.Velenko@arm.com>

	* /gcc.target/aarch64/vld1-vst1_1.c: New test_case.
Marcus Shawcroft - Jan. 21, 2014, 1:27 p.m.
On 16 January 2014 11:49, Alex Velenko <Alex.Velenko@arm.com> wrote:
> Hi,
> This patch is the first patch in a series of patches fixing Big-Endian
> lane numbering. The goal of this series of patches is to make proper
> bridge between pure GCC big-endian view on lane numbering and internal
> architected view.

OK /Marcus
Alex Velenko - Jan. 21, 2014, 7 p.m.
Hi,
Can someone, please, commit this patch as I do not have privileges to
do so.
Kind regards,
Alex Velenko

On 21/01/14 13:27, Marcus Shawcroft wrote:
> On 16 January 2014 11:49, Alex Velenko <Alex.Velenko@arm.com> wrote:
>> Hi,
>> This patch is the first patch in a series of patches fixing Big-Endian
>> lane numbering. The goal of this series of patches is to make proper
>> bridge between pure GCC big-endian view on lane numbering and internal
>> architected view.
>
> OK /Marcus
>

Patch

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 158b3dca6da12322de0af80d35f593039d716de6..2f2e74f6bccd54accd265a55cc8dbcfe2db2e76f 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3544,6 +3544,24 @@ 
    (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
 )
 
+(define_insn "aarch64_be_ld1<mode>"
+  [(set (match_operand:VALLDI 0	"register_operand" "=w")
+	(unspec:VALLDI [(match_operand:VALLDI 1 "aarch64_simd_struct_operand" "Utv")]
+	UNSPEC_LD1))]
+  "TARGET_SIMD"
+  "ld1\\t{%0<Vmtype>}, %1"
+  [(set_attr "type" "neon_load1_1reg<q>")]
+)
+
+(define_insn "aarch64_be_st1<mode>"
+  [(set (match_operand:VALLDI 0 "aarch64_simd_struct_operand" "=Utv")
+	(unspec:VALLDI [(match_operand:VALLDI 1 "register_operand" "w")]
+	UNSPEC_ST1))]
+  "TARGET_SIMD"
+  "st1\\t{%1<Vmtype>}, %0"
+  [(set_attr "type" "neon_store1_1reg<q>")]
+)
+
 (define_split
   [(set (match_operand:OI 0 "register_operand" "")
 	(match_operand:OI 1 "register_operand" ""))]
@@ -3762,7 +3780,11 @@ 
 {
   enum machine_mode mode = <VALL:MODE>mode;
   rtx mem = gen_rtx_MEM (mode, operands[1]);
-  emit_move_insn (operands[0], mem);
+
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_aarch64_be_ld1<VALL:mode> (operands[0], mem));
+  else
+    emit_move_insn (operands[0], mem);
   DONE;
 })
 
@@ -3988,7 +4010,11 @@ 
 {
   enum machine_mode mode = <VALL:MODE>mode;
   rtx mem = gen_rtx_MEM (mode, operands[0]);
-  emit_move_insn (mem, operands[1]);
+
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_aarch64_be_st1<VALL:mode> (mem, operands[1]));
+  else
+    emit_move_insn (mem, operands[1]);
   DONE;
 })
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c83622d6cad59883923f6eb0454c735c24a1eb3f..d5186f6211ec795672fc2631d7bbb1247a2d2773 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -81,6 +81,7 @@ 
     UNSPEC_GOTSMALLPIC
     UNSPEC_GOTSMALLTLS
     UNSPEC_GOTTINYPIC
+    UNSPEC_LD1
     UNSPEC_LD2
     UNSPEC_LD3
     UNSPEC_LD4
@@ -92,6 +93,7 @@ 
     UNSPEC_SISD_SSHL
     UNSPEC_SISD_USHL
     UNSPEC_SSHL_2S
+    UNSPEC_ST1
     UNSPEC_ST2
     UNSPEC_ST3
     UNSPEC_ST4
diff --git a/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c b/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..d1834a264708fe6ab901ac1a27544ca8ebb815cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c
@@ -0,0 +1,52 @@ 
+/* Test vld1 and vst1 maintain consistent indexing.  */
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+#include <arm_neon.h>
+
+extern void abort (void);
+
+int __attribute__ ((noinline))
+test_vld1_vst1 ()
+{
+  int8x8_t a;
+  int8x8_t b;
+  int i = 0;
+  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+  int8_t d[8];
+  a = vld1_s8 (c);
+  asm volatile ("":::"memory");
+  vst1_s8 (d, a);
+  asm volatile ("":::"memory");
+  for (; i < 8; i++)
+    if (c[i] != d[i])
+      return 1;
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vld1q_vst1q ()
+{
+  int16x8_t a;
+  int16x8_t b;
+  int i = 0;
+  int16_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+  int16_t d[8];
+  a = vld1q_s16 (c);
+  asm volatile ("":::"memory");
+  vst1q_s16 (d, a);
+  asm volatile ("":::"memory");
+  for (; i < 8; i++)
+    if (c[i] != d[i])
+      return 1;
+  return 0;
+}
+
+int
+main ()
+{
+  if (test_vld1_vst1 ())
+    abort ();
+  if (test_vld1q_vst1q ())
+    abort ();
+  return 0;
+}