From patchwork Tue Aug 31 09:39:21 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [ARM] Implement support for NEON vmovn. Date: Mon, 30 Aug 2010 23:39:21 -0000 From: tejas belagod X-Patchwork-Id: 63217 Message-Id: <1283247561.30429.112.camel@e102484-lin.cambridge.arm.com> To: gcc-patches@gcc.gnu.org Hi, Attached is a patch that implements support for generating NEON VMOVN.i. This patch also refactors move_(lo,hi)_quad_(v4si,v4sf,v8hi) into move_(lo,hi)_quad_ expansions. Tested with arm-linux-gnueabi. OK for trunk? --- Tejas Belagod ARM. gcc/ 2010-08-26 Tejas Belagod * config/arm/neon.md (vec_pack_trunc_): Instruction pattern for vmovn. Expansion in case of non -mvectorize-with-neon-quad. (neon_vec_pack_trunc_): Instruction pattern for vmovn for non- -mvectorize-with-neon-quad case. (move_lo_quad_): New expansion to vmov into low part. (move_hi_quad_): New expansion to vmov into high part. (move_lo_quad_v4si): Refactor to move_lo_quad_ expansion. (move_lo_quad_v4sf): Likewise. (move_lo_quad_v8hi): Likewise. (neon_move_lo_quad_): Instruction pattern for vmov into low part. (neon_move_hi_quad_): Instruction pattern for vmov into high part. * config/arm/iterators.md (ANY128): New mode iterator. (V_narrow_pack): New mode attribute. (V_HALF): Add attribute. (V_DOUBLE): Add attribute. (V_mode_nunits): Add attribute. Index: gcc/config/arm/neon.md =================================================================== --- gcc/config/arm/neon.md (revision 163568) +++ gcc/config/arm/neon.md (working copy) @@ -1115,12 +1115,13 @@ ; vector registers. Make an attempt at removing unnecessary moves, though ; we're really at the mercy of the register allocator. -(define_insn "move_lo_quad_v4si" - [(set (match_operand:V4SI 0 "s_register_operand" "+w") - (vec_concat:V4SI - (match_operand:V2SI 1 "s_register_operand" "w") - (vec_select:V2SI (match_dup 0) - (parallel [(const_int 2) (const_int 3)]))))] +(define_insn "neon_move_lo_quad_" + [(set (match_operand:ANY128 0 "s_register_operand" "+w") + (vec_concat:ANY128 + (match_operand: 1 "s_register_operand" "w") + (vec_select: + (match_dup 0) + (match_operand:ANY128 2 "vect_par_constant_high" ""))))] "TARGET_NEON" { int dest = REGNO (operands[0]); @@ -1134,67 +1135,62 @@ [(set_attr "neon_type" "neon_bp_simple")] ) -(define_insn "move_lo_quad_v4sf" - [(set (match_operand:V4SF 0 "s_register_operand" "+w") - (vec_concat:V4SF - (match_operand:V2SF 1 "s_register_operand" "w") - (vec_select:V2SF (match_dup 0) - (parallel [(const_int 2) (const_int 3)]))))] +(define_insn "neon_move_hi_quad_" + [(set (match_operand:ANY128 0 "s_register_operand" "+w") + (vec_concat:ANY128 + (match_operand: 1 "s_register_operand" "w") + (vec_select: + (match_dup 0) + (match_operand:ANY128 2 "vect_par_constant_low" ""))))] "TARGET_NEON" { int dest = REGNO (operands[0]); int src = REGNO (operands[1]); if (dest != src) - return "vmov\t%e0, %P1"; + return "vmov\t%f0, %P1"; else return ""; } [(set_attr "neon_type" "neon_bp_simple")] ) -(define_insn "move_lo_quad_v8hi" - [(set (match_operand:V8HI 0 "s_register_operand" "+w") - (vec_concat:V8HI - (match_operand:V4HI 1 "s_register_operand" "w") - (vec_select:V4HI (match_dup 0) - (parallel [(const_int 4) (const_int 5) - (const_int 6) (const_int 7)]))))] - "TARGET_NEON" +(define_expand "move_hi_quad_" + [(match_operand:ANY128 0 "s_register_operand" "") + (match_operand: 1 "s_register_operand" "")] + "TARGET_NEON" { - int dest = REGNO (operands[0]); - int src = REGNO (operands[1]); + rtvec v = rtvec_alloc (/2); + rtx t1; + int i; - if (dest != src) - return "vmov\t%e0, %P1"; - else - return ""; -} - [(set_attr "neon_type" "neon_bp_simple")] -) + for (i=0; i < (/2); i++) + RTVEC_ELT (v, i) = GEN_INT (i); -(define_insn "move_lo_quad_v16qi" - [(set (match_operand:V16QI 0 "s_register_operand" "+w") - (vec_concat:V16QI - (match_operand:V8QI 1 "s_register_operand" "w") - (vec_select:V8QI (match_dup 0) - (parallel [(const_int 8) (const_int 9) - (const_int 10) (const_int 11) - (const_int 12) (const_int 13) - (const_int 14) (const_int 15)]))))] - "TARGET_NEON" + t1 = gen_rtx_PARALLEL (mode, v); + emit_insn (gen_neon_move_hi_quad_ (operands[0], operands[1], t1)); + + DONE; +}) + +(define_expand "move_lo_quad_" + [(match_operand:ANY128 0 "s_register_operand" "") + (match_operand: 1 "s_register_operand" "")] + "TARGET_NEON" { - int dest = REGNO (operands[0]); - int src = REGNO (operands[1]); + rtvec v = rtvec_alloc (/2); + rtx t1; + int i; - if (dest != src) - return "vmov\t%e0, %P1"; - else - return ""; -} - [(set_attr "neon_type" "neon_bp_simple")] -) + for (i=0; i < (/2); i++) + RTVEC_ELT (v, i) = GEN_INT ((/2) + i); + t1 = gen_rtx_PARALLEL (mode, v); + emit_insn (gen_neon_move_lo_quad_ (operands[0], operands[1], t1)); + + DONE; +}) + ;; Reduction operations (define_expand "reduc_splus_" @@ -5179,3 +5175,38 @@ } ) + +(define_insn "vec_pack_trunc_" + [(set (match_operand: 0 "register_operand" "=&w") + (vec_concat: + (truncate: + (match_operand:VN 1 "register_operand" "w")) + (truncate: + (match_operand:VN 2 "register_operand" "w"))))] + "TARGET_NEON" + "vmovn.i\t%e0, %q1\n\tvmovn.i\t%f0, %q2" + [(set_attr "neon_type" "neon_shift_1")] +) + +;; For the non-quad case. +(define_insn "neon_vec_pack_trunc_" + [(set (match_operand: 0 "register_operand" "=w") + (truncate: (match_operand:VN 1 "register_operand" "")))] + "TARGET_NEON" + "vmovn.i\t%0, %q1" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_pack_trunc_" + [(match_operand: 0 "register_operand" "") + (match_operand:VSHFT 1 "register_operand" "") + (match_operand:VSHFT 2 "register_operand")] + "TARGET_NEON" +{ + rtx tempreg = gen_reg_rtx (mode); + + emit_insn (gen_move_lo_quad_ (tempreg, operands[1])); + emit_insn (gen_move_hi_quad_ (tempreg, operands[2])); + emit_insn (gen_neon_vec_pack_trunc_ (operands[0], tempreg)); + DONE; +}) Index: gcc/config/arm/iterators.md =================================================================== --- gcc/config/arm/iterators.md (revision 163568) +++ gcc/config/arm/iterators.md (working copy) @@ -28,6 +28,8 @@ ;; registers. (define_mode_iterator ANY64 [DI DF V8QI V4HI V2SI V2SF]) +(define_mode_iterator ANY128 [V2DI V2DF V16QI V8HI V4SI V4SF]) + ;; A list of integer modes that are up to one word long (define_mode_iterator QHSI [QI HI SI]) @@ -227,9 +229,13 @@ ;; Narrower modes with the same number of elements. (define_mode_attr V_narrow [(V8HI "V8QI") (V4SI "V4HI") (V2DI "V2SI")]) +;; Narrower modes with double the number of elements. +(define_mode_attr V_narrow_pack [(V4SI "V8HI") (V8HI "V16QI") (V2DI "V4SI") + (V4HI "V8QI") (V2SI "V4HI") (DI "V2SI")]) + ;; Modes with half the number of equal-sized elements. (define_mode_attr V_HALF [(V16QI "V8QI") (V8HI "V4HI") - (V4SI "V2SI") (V4SF "V2SF") + (V4SI "V2SI") (V4SF "V2SF") (V2DF "DF") (V2DI "DI")]) ;; Same, but lower-case. @@ -239,7 +245,7 @@ ;; Modes with twice the number of equal-sized elements. (define_mode_attr V_DOUBLE [(V8QI "V16QI") (V4HI "V8HI") - (V2SI "V4SI") (V2SF "V4SF") + (V2SI "V4SI") (V2SF "V4SF") (DF "V2DF") (DI "V2DI")]) ;; Same, but lower-case. @@ -362,7 +368,8 @@ (V4HI "4") (V8HI "8") (V2SI "2") (V4SI "4") (V2SF "2") (V4SF "4") - (DI "1") (V2DI "2")]) + (DI "1") (V2DI "2") + (DF "1") (V2DF "2")]) ;; Same as V_widen, but lower-case. (define_mode_attr V_widen_l [(V8QI "v8hi") (V4HI "v4si") ( V2SI "v2di")])