Patchwork ARM/NEON: vld1q_dup_s64 builtin

login
register
mail settings
Submitter Christophe LYON
Date May 16, 2012, 1:51 p.m.
Message ID <4FB3B0CA.1090605@st.com>
Download mbox | patch
Permalink /patch/159676/
State New
Headers show

Comments

Christophe LYON - May 16, 2012, 1:51 p.m.
On 11.05.2012 16:48, Ramana Radhakrishnan wrote:
> I would change the iterator from VQX to VQ in the pattern above (you
> can also simplify the setting of neon_type in that case as well as
> change that to be a vec_duplicate as below and get rid of any
> lingering definitions of UNSPEC_VLD1_DUP if they exist), define a
> separate pattern that expressed this as a define_insn_and_split as
> below.
>
>   (define_insn_and_split "neon_vld1_dupv2di"
>     [(set (match_operand:V2DI 0 "s_register_operand" "=w")
>       (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
>     "TARGET_NEON"
>     "#"
>     "&&  reload_completed"
>     [(const_int 0)]
>     {
>      rtx tmprtx = gen_lowpart (DImode, operands[0]);
>      emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
>      emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
>      DONE;
>      }
> (set_attr "length" "8")
> (set_attr "neon_type" "<fromearlierpattern">)
> )
>
> Do you want to try this and see what you get ?

Thanks for this example and suggestion, it does work.

> I'd rather have an extra regression test in gcc.target/arm that was a run time test. for e.g. take a look at gcc.target/arm/neon-vadds64.c . 

Here is an updated patch:

2012-05-16  Christophe Lyon <christophe.lyon@st.com>

     * gcc/config/arm/neon.md (neon_vld1_dup): Restrict to VQ
     operands.
     (neon_vld1_dupv2di): New, fixes vld1q_dup_s64.
     * gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c: New test.
Ramana Radhakrishnan - May 18, 2012, 10:45 p.m.
On 16 May 2012 14:51, Christophe Lyon <christophe.lyon@st.com> wrote:
> On 11.05.2012 16:48, Ramana Radhakrishnan wrote:
>>
>> I would change the iterator from VQX to VQ in the pattern above (you
>> can also simplify the setting of neon_type in that case as well as
>> change that to be a vec_duplicate as below and get rid of any
>> lingering definitions of UNSPEC_VLD1_DUP if they exist), define a
>> separate pattern that expressed this as a define_insn_and_split as
>> below.
>>
>>  (define_insn_and_split "neon_vld1_dupv2di"
>>    [(set (match_operand:V2DI 0 "s_register_operand" "=w")
>>      (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand"
>> "Um")))]
>>    "TARGET_NEON"
>>    "#"
>>    "&&  reload_completed"
>>    [(const_int 0)]
>>    {
>>     rtx tmprtx = gen_lowpart (DImode, operands[0]);
>>     emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
>>     emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
>>     DONE;
>>     }
>> (set_attr "length" "8")
>> (set_attr "neon_type" "<fromearlierpattern">)
>> )
>>
>> Do you want to try this and see what you get ?
>
>
> Thanks for this example and suggestion, it does work.
>
>
>> I'd rather have an extra regression test in gcc.target/arm that was a run
>> time test. for e.g. take a look at gcc.target/arm/neon-vadds64.c .
>
>
> Here is an updated patch:

I tried applying your patch but ran into trouble with patch not liking
this . My suspicion is mailer munging white spaces in some form -
Could you send the patch as an attachment please rather than inline in
your mail ?

regards,
Ramana
> 2012-05-16  Christophe Lyon <christophe.lyon@st.com>
>
>    * gcc/config/arm/neon.md (neon_vld1_dup): Restrict to VQ
>    operands.
>    (neon_vld1_dupv2di): New, fixes vld1q_dup_s64.
>    * gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c: New test.
>
> Index: gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c
> ===================================================================
> --- gcc.orig/gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c    (revision 0)
> +++ gcc.new/gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c    (revision 0)
> @@ -0,0 +1,24 @@
> +/* Test the `vld1q_s64' ARM Neon intrinsic.  */
> +
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O0" } */
> +/* { dg-add-options arm_neon } */
> +
> +#include "arm_neon.h"
> +#include <stdlib.h>
> +
> +int main (void)
> +{
> +  int64x1_t input[2] = {(int64x1_t)0x0123456776543210LL,
> +            (int64x1_t)0x89abcdeffedcba90LL};
> +  int64x1_t output[2] = {0, 0};
> +  int64x2_t var = vld1q_dup_s64(input);
> +
> +  vst1q_s64(output, var);
> +  if (output[0] != (int64x1_t)0x0123456776543210LL)
> +    abort();
> +  if (output[1] != (int64x1_t)0x0123456776543210LL)
> +    abort();
> +  return 0;
> +}
> Index: gcc/config/arm/neon.md
> ===================================================================
> --- gcc.orig/gcc/config/arm/neon.md    (revision 2659)
> +++ gcc.new/gcc/config/arm/neon.md    (working copy)
> @@ -4195,20 +4195,32 @@
>  )
>
>  (define_insn "neon_vld1_dup<mode>"
> -  [(set (match_operand:VQX 0 "s_register_operand" "=w")
> -        (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")]
> +  [(set (match_operand:VQ 0 "s_register_operand" "=w")
> +        (unspec:VQ [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")]
>                     UNSPEC_VLD1_DUP))]
>   "TARGET_NEON"
>  {
> -  if (GET_MODE_NUNITS (<MODE>mode) > 2)
>
>     return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
> -  else
>
> -    return "vld1.<V_sz_elem>\t%h0, %A1";
>  }
>   [(set (attr "neon_type")
> -      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string
> "1"))
> -                    (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")
> -                    (const_string "neon_vld1_1_2_regs")))]
> +      (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes"))]
> +)
> +
> +(define_insn_and_split "neon_vld1_dupv2di"
> +   [(set (match_operand:V2DI 0 "s_register_operand" "=w")
> +    (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
> +   "TARGET_NEON"
> +   "#"
> +   "&& reload_completed"
> +   [(const_int 0)]
> +   {
> +    rtx tmprtx = gen_lowpart (DImode, operands[0]);
> +    emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
> +    emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
> +    DONE;
> +    }
> +  [(set_attr "length" "8")
> +   (set (attr "neon_type") (const_string
> "neon_vld2_2_regs_vld1_vld2_all_lanes"))]
>  )
>
>  (define_expand "vec_store_lanes<mode><mode>"
>
>
>

Patch

Index: gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c
===================================================================
--- gcc.orig/gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c    (revision 0)
+++ gcc.new/gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c    (revision 0)
@@ -0,0 +1,24 @@ 
+/* Test the `vld1q_s64' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O0" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include <stdlib.h>
+
+int main (void)
+{
+  int64x1_t input[2] = {(int64x1_t)0x0123456776543210LL,
+            (int64x1_t)0x89abcdeffedcba90LL};
+  int64x1_t output[2] = {0, 0};
+  int64x2_t var = vld1q_dup_s64(input);
+
+  vst1q_s64(output, var);
+  if (output[0] != (int64x1_t)0x0123456776543210LL)
+    abort();
+  if (output[1] != (int64x1_t)0x0123456776543210LL)
+    abort();
+  return 0;
+}
Index: gcc/config/arm/neon.md
===================================================================
--- gcc.orig/gcc/config/arm/neon.md    (revision 2659)
+++ gcc.new/gcc/config/arm/neon.md    (working copy)
@@ -4195,20 +4195,32 @@ 
  )

  (define_insn "neon_vld1_dup<mode>"
-  [(set (match_operand:VQX 0 "s_register_operand" "=w")
-        (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")]
+  [(set (match_operand:VQ 0 "s_register_operand" "=w")
+        (unspec:VQ [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")]
                      UNSPEC_VLD1_DUP))]
    "TARGET_NEON"
  {
-  if (GET_MODE_NUNITS (<MODE>mode) > 2)
      return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
-  else
-    return "vld1.<V_sz_elem>\t%h0, %A1";
  }
    [(set (attr "neon_type")
-      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
-                    (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")
-                    (const_string "neon_vld1_1_2_regs")))]
+      (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes"))]
+)
+
+(define_insn_and_split "neon_vld1_dupv2di"
+   [(set (match_operand:V2DI 0 "s_register_operand" "=w")
+    (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
+   "TARGET_NEON"
+   "#"
+   "&& reload_completed"
+   [(const_int 0)]
+   {
+    rtx tmprtx = gen_lowpart (DImode, operands[0]);
+    emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
+    emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
+    DONE;
+    }
+  [(set_attr "length" "8")
+   (set (attr "neon_type") (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes"))]
  )

  (define_expand "vec_store_lanes<mode><mode>"