Patchwork ARM/NEON: vld1q_dup_s64 builtin

login
register
mail settings
Submitter Christophe LYON
Date May 25, 2012, 2:48 p.m.
Message ID <4FBF9BC6.4000809@st.com>
Download mbox | patch
Permalink /patch/161376/
State New
Headers show

Comments

Christophe LYON - May 25, 2012, 2:48 p.m.
On 21.05.2012 11:16, Christophe Lyon wrote:
>> I tried applying your patch but ran into trouble with patch not liking
>> this . My suspicion is mailer munging white spaces in some form -
>> Could you send the patch as an attachment please rather than inline in
>> your mail ?
>>
>> regards,
>> Ramana
>>
> Here it is, as an attachment. Note however that this patch is against GCC-4.6.3.
>
> Thanks for testing.
>
> Christophe.
>
Hi,
I have attached the version for GCC trunk.

Christophe.
2012-05-25  Christophe Lyon  <christophe.lyon@st.com>

	* gcc/config/arm/neon.md (neon_vld1_dup): Restrict to VQ
	operands.
	(neon_vld1_dupv2di): New, fixes vld1q_dup_s64.
	* gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c: New test.
Ramana Radhakrishnan - June 6, 2012, 9 a.m.
Hi Christophe,

Sorry it's taken me a while to get back on this patch - I've been traveling.


> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index 4568dea..0a4d00b 100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -4397,20 +4397,32 @@
>  )

>  (define_insn "neon_vld1_dup<mode>"
> -  [(set (match_operand:VQX 0 "s_register_operand" "=w")
> -        (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")]
> +  [(set (match_operand:VQ 0 "s_register_operand" "=w")
> +        (unspec:VQ [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")]
>                      UNSPEC_VLD1_DUP))]

Why do we still have UNSPEC:VQ here ? I probably wasn't clear enough
in my earlier mail. There's no reason for this to remain an unspec,
we might as well replace this with a vec_duplicate form as below.

Please do the same with the neon_vld1_dup that iterates over VDX as well.


>    "TARGET_NEON"
>  {
> -  if (GET_MODE_NUNITS (<MODE>mode) > 2)
> -    return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
> -  else
> -    return "vld1.<V_sz_elem>\t%h0, %A1";
> +  return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
>  }
>    [(set (attr "neon_type")

Use the shorter set_attr "neon_type" form . In that case you don't need
a const_string in this case.


> -      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
> -                    (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")
> -                    (const_string "neon_vld1_1_2_regs")))]
> +      (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes"))]
> +)
> +
> +(define_insn_and_split "neon_vld1_dupv2di"
> +   [(set (match_operand:V2DI 0 "s_register_operand" "=w")
> +    (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
> +   "TARGET_NEON"
> +   "#"
> +   "&& reload_completed"
> +   [(const_int 0)]
> +   {
> +    rtx tmprtx = gen_lowpart (DImode, operands[0]);
> +    emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
> +    emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
> +    DONE;
> +    }
> +  [(set_attr "length" "8")
> +   (set (attr "neon_type") (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes"))]

Same comment about set_attr vs set (attr

Ok with those changes.

Ramana

Patch

diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 4568dea..0a4d00b 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -4397,20 +4397,32 @@ 
 )
 
 (define_insn "neon_vld1_dup<mode>"
-  [(set (match_operand:VQX 0 "s_register_operand" "=w")
-        (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")]
+  [(set (match_operand:VQ 0 "s_register_operand" "=w")
+        (unspec:VQ [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")]
                     UNSPEC_VLD1_DUP))]
   "TARGET_NEON"
 {
-  if (GET_MODE_NUNITS (<MODE>mode) > 2)
-    return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
-  else
-    return "vld1.<V_sz_elem>\t%h0, %A1";
+  return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
 }
   [(set (attr "neon_type")
-      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
-                    (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")
-                    (const_string "neon_vld1_1_2_regs")))]
+      (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes"))]
+)
+
+(define_insn_and_split "neon_vld1_dupv2di"
+   [(set (match_operand:V2DI 0 "s_register_operand" "=w")
+    (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
+   "TARGET_NEON"
+   "#"
+   "&& reload_completed"
+   [(const_int 0)]
+   {
+    rtx tmprtx = gen_lowpart (DImode, operands[0]);
+    emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
+    emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
+    DONE;
+    }
+  [(set_attr "length" "8")
+   (set (attr "neon_type") (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes"))]
 )
 
 (define_expand "vec_store_lanes<mode><mode>"
diff --git a/gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c b/gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c
new file mode 100644
index 0000000..b5793bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c
@@ -0,0 +1,24 @@ 
+/* Test the `vld1q_s64' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O0" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include <stdlib.h>
+
+int main (void)
+{
+  int64x1_t input[2] = {(int64x1_t)0x0123456776543210LL,
+			(int64x1_t)0x89abcdeffedcba90LL};
+  int64x1_t output[2] = {0, 0};
+  int64x2_t var = vld1q_dup_s64(input);
+
+  vst1q_s64(output, var);
+  if (output[0] != (int64x1_t)0x0123456776543210LL)
+    abort();
+  if (output[1] != (int64x1_t)0x0123456776543210LL)
+    abort();
+  return 0;
+}