diff mbox

[ARM] Optimise handling of neon_vget_high/low

Message ID g4r5315fh1.fsf@richards-thinkpad.stglab.manchester.uk.ibm.com
State New
Headers show

Commit Message

Richard Sandiford Sept. 28, 2011, 6:49 a.m. UTC
Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> writes:
> On 14 September 2011 13:30, Richard Sandiford
> <richard.sandiford@linaro.org> wrote:
>> neon_vget_high and neon_vget_low extract one half of a vector.
>> The patterns look like:
>>
>> (define_insn "neon_vget_highv16qi"
>>  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
>>        (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
>>                         (parallel [(const_int 8) (const_int 9)
>>                                    (const_int 10) (const_int 11)
>>                                    (const_int 12) (const_int 13)
>>                                    (const_int 14) (const_int 15)])))]
>>  "TARGET_NEON"
>> {
>>  int dest = REGNO (operands[0]);
>>  int src = REGNO (operands[1]);
>>
>>  if (dest != src + 2)
>>    return "vmov\t%P0, %f1";
>>  else
>>    return "";
>> }
>>  [(set_attr "neon_type" "neon_bp_simple")]
>> )
>>
>> But there's nothing here to tell the register allocator what's expected
>> of it, so we do often get the move.  The patch below makes the patterns
>> expand to normal subreg moves instead.
>>
>> Unfortunately, when I first tried this, I ran across some bugs in
>> simplify-rtx.c.  They should be fixed now.  Of course, I can't
>> guarantee that there are other similar bugs elsewhere, but I'll
>> try to fix any that crop up.
>>
>> The new patterns preserve the current treatment on big-endian targets.
>> Namely, the "low" half is always in the lower-numbered registers
>> (subreg byte offset 0).
>>
>> Tested on arm-linux-gnueabi.  OK to install?
>
>
> This is OK . Please watch out for any fallout that comes as a result
> of this . It would be useful to do some big endian testing at some
> point but I'm not going to let that hold up this patch.
>
> While you are here can you look at the quad_halves_<code>v4si etc.
> patterns neon_move_lo_quad_<mode> and neon_move_hi_quad_<mode>
> patterns which seem to have the same problem ?

Ah, yeah, thanks for the pointer.

Tested on arm-linux-gnueabi, and on benchmarks which should (and did)
benefit from it.  OK to install?

Richard


gcc/
	* config/arm/neon.md (neon_move_lo_quad_<mode>): Delete.
	(neon_move_hi_quad_<mode>): Likewise.
	(move_hi_quad_<mode>, move_lo_quad_<mode>): Use subreg moves.

Comments

Ramana Radhakrishnan Sept. 28, 2011, 11:34 a.m. UTC | #1
>
> Tested on arm-linux-gnueabi, and on benchmarks which should (and did)
> benefit from it.  OK to install?

OK.

Ramana
diff mbox

Patch

Index: gcc/config/arm/neon.md
===================================================================
--- gcc/config/arm/neon.md	2011-09-27 09:27:18.000000000 +0100
+++ gcc/config/arm/neon.md	2011-09-27 09:45:26.008275971 +0100
@@ -1251,66 +1251,14 @@  (define_insn "quad_halves_<code>v16qi"
                     (const_string "neon_int_1") (const_string "neon_int_5")))]
 )
 
-; FIXME: We wouldn't need the following insns if we could write subregs of
-; vector registers. Make an attempt at removing unnecessary moves, though
-; we're really at the mercy of the register allocator.
-
-(define_insn "neon_move_lo_quad_<mode>"
-  [(set (match_operand:ANY128 0 "s_register_operand" "+w")
-        (vec_concat:ANY128
-          (match_operand:<V_HALF> 1 "s_register_operand" "w")
-          (vec_select:<V_HALF> 
-		(match_dup 0)
-	        (match_operand:ANY128 2 "vect_par_constant_high" ""))))]
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src)
-    return "vmov\t%e0, %P1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_move_hi_quad_<mode>"
-  [(set (match_operand:ANY128 0 "s_register_operand" "+w")
-        (vec_concat:ANY128
-          (vec_select:<V_HALF>
-		(match_dup 0)
-	        (match_operand:ANY128 2 "vect_par_constant_low" ""))
-          (match_operand:<V_HALF> 1 "s_register_operand" "w")))]
-	   
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src)
-    return "vmov\t%f0, %P1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
 (define_expand "move_hi_quad_<mode>"
  [(match_operand:ANY128 0 "s_register_operand" "")
   (match_operand:<V_HALF> 1 "s_register_operand" "")]
  "TARGET_NEON"
 {
-  rtvec v = rtvec_alloc (<V_mode_nunits>/2);
-  rtx t1;
-  int i;
-
-  for (i=0; i < (<V_mode_nunits>/2); i++)
-     RTVEC_ELT (v, i) = GEN_INT (i);
-
-  t1 = gen_rtx_PARALLEL (<MODE>mode, v);
-  emit_insn (gen_neon_move_hi_quad_<mode> (operands[0], operands[1], t1));
-
+  emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
+				       GET_MODE_SIZE (<V_HALF>mode)),
+		  operands[1]);
   DONE;
 })
 
@@ -1319,16 +1267,9 @@  (define_expand "move_lo_quad_<mode>"
   (match_operand:<V_HALF> 1 "s_register_operand" "")]
  "TARGET_NEON"
 {
-  rtvec v = rtvec_alloc (<V_mode_nunits>/2);
-  rtx t1;
-  int i;
-
-  for (i=0; i < (<V_mode_nunits>/2); i++)
-     RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
-
-  t1 = gen_rtx_PARALLEL (<MODE>mode, v);
-  emit_insn (gen_neon_move_lo_quad_<mode> (operands[0], operands[1], t1));
-
+  emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
+				       <MODE>mode, 0),
+		  operands[1]);
   DONE;
 })