Message ID | 4FBF9BC6.4000809@st.com |
---|---|
State | New |
Headers | show |
Hi Christophe, Sorry it's taken me a while to get back on this patch - I've been traveling. > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md > index 4568dea..0a4d00b 100644 > --- a/gcc/config/arm/neon.md > +++ b/gcc/config/arm/neon.md > @@ -4397,20 +4397,32 @@ > ) > (define_insn "neon_vld1_dup<mode>" > - [(set (match_operand:VQX 0 "s_register_operand" "=w") > - (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")] > + [(set (match_operand:VQ 0 "s_register_operand" "=w") > + (unspec:VQ [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")] > UNSPEC_VLD1_DUP))] Why do we still have UNSPEC:VQ here ? I probably wasn't clear enough in my earlier mail. There's no reason for this to remain an unspec, we might as well replace this with a vec_duplicate form as below. Please do the same with the neon_vld1_dup that iterates over VDX as well. > "TARGET_NEON" > { > - if (GET_MODE_NUNITS (<MODE>mode) > 2) > - return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; > - else > - return "vld1.<V_sz_elem>\t%h0, %A1"; > + return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; > } > [(set (attr "neon_type") Use the shorter set_attr "neon_type" form . In that case you don't need a const_string in this case. > - (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) > - (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes") > - (const_string "neon_vld1_1_2_regs")))] > + (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes"))] > +) > + > +(define_insn_and_split "neon_vld1_dupv2di" > + [(set (match_operand:V2DI 0 "s_register_operand" "=w") > + (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))] > + "TARGET_NEON" > + "#" > + "&& reload_completed" > + [(const_int 0)] > + { > + rtx tmprtx = gen_lowpart (DImode, operands[0]); > + emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1])); > + emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx ); > + DONE; > + } > + [(set_attr "length" "8") > + (set (attr "neon_type") (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes"))] Same comment about set_attr vs set (attr Ok with those changes. Ramana
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 4568dea..0a4d00b 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -4397,20 +4397,32 @@ ) (define_insn "neon_vld1_dup<mode>" - [(set (match_operand:VQX 0 "s_register_operand" "=w") - (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")] + [(set (match_operand:VQ 0 "s_register_operand" "=w") + (unspec:VQ [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")] UNSPEC_VLD1_DUP))] "TARGET_NEON" { - if (GET_MODE_NUNITS (<MODE>mode) > 2) - return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; - else - return "vld1.<V_sz_elem>\t%h0, %A1"; + return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; } [(set (attr "neon_type") - (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) - (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes") - (const_string "neon_vld1_1_2_regs")))] + (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes"))] +) + +(define_insn_and_split "neon_vld1_dupv2di" + [(set (match_operand:V2DI 0 "s_register_operand" "=w") + (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))] + "TARGET_NEON" + "#" + "&& reload_completed" + [(const_int 0)] + { + rtx tmprtx = gen_lowpart (DImode, operands[0]); + emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1])); + emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx ); + DONE; + } + [(set_attr "length" "8") + (set (attr "neon_type") (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes"))] ) (define_expand "vec_store_lanes<mode><mode>" diff --git a/gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c b/gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c new file mode 100644 index 0000000..b5793bf --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c @@ -0,0 +1,24 @@ +/* Test the `vld1q_s64' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O0" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include <stdlib.h> + +int main (void) +{ + int64x1_t input[2] = {(int64x1_t)0x0123456776543210LL, + (int64x1_t)0x89abcdeffedcba90LL}; + int64x1_t output[2] = {0, 0}; + int64x2_t var = vld1q_dup_s64(input); + + vst1q_s64(output, var); + if (output[0] != (int64x1_t)0x0123456776543210LL) + abort(); + if (output[1] != (int64x1_t)0x0123456776543210LL) + abort(); + return 0; +}