Patchwork [ia64,committed] Fix vector instructions on IA64 HP-UX

login
register
mail settings
Submitter Steve Ellcey
Date Jan. 26, 2011, 5:48 p.m.
Message ID <201101261748.p0QHm5H21375@lucas.cup.hp.com>
Download mbox | patch
Permalink /patch/80533/
State New
Headers show

Comments

Steve Ellcey - Jan. 26, 2011, 5:48 p.m.
I am checking in this patch to fix some of the new IA64 vector
instructions for TARGET_BIG_ENDIAN and re-enable them for HP-UX.  It
was tested on IA64 HP-UX and Linux with no regressions.  There are still
3 instructions enabled on Linux (little-endian) and not HP-UX:
vec_extract_evenv2sf, vec_extract_oddv2sf, and vec_pack_trunc_v4hi.  I
hope to look at those soon.

Steve Ellcey
sje@cup.hp.com


2011-01-26  Steve Ellcey  <sje@cup.hp.com>

	PR target/46997
	* vect.md (mulv2si3): Enable and fix for TARGET_BIG_ENDIAN.
	(*mux2): Ditto.
	(vec_extract_evenodd_help): Ditto.
	(vec_extract_evenv4hi): Ditto.
	(vec_extract_oddv4hi): Ditto.
	(vec_interleave_lowv2si): Ditto.
	(vec_interleave_highv2si): Ditto.
	(vec_extract_evenv2si): Ditto.
	(vec_extract_oddv2si: Ditto.
	(vec_pack_trunc_v2si): Ditto.
Richard Henderson - Jan. 26, 2011, 6:53 p.m.
On 01/26/2011 09:48 AM, Steve Ellcey wrote:
> @@ -390,8 +390,12 @@ (define_expand "mulv2si3"
>       of the full 32-bit product.  */
>  
>    /* T0 = CDBA.  */
> -  x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, const1_rtx, const0_rtx,
> -					     GEN_INT (3), const2_rtx));
> +  if (TARGET_BIG_ENDIAN)
> +    x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, GEN_INT (3), const2_rtx,
> +					       const1_rtx, const0_rtx));
> +  else
> +    x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, const1_rtx, const0_rtx,
> +					       GEN_INT (3), const2_rtx));

This looks to me like it swaps the two SImode values around for big-endian.

		LE		BE
  before	3210		0123
  after		2301		3210
  correct	2301		1032

IMO there should be no change to this PARALLEL.

> +  if (TARGET_BIG_ENDIAN)
> +    {
> +      mask  = INTVAL (operands[2]) << 4;
> +      mask |= INTVAL (operands[3]) << 6;
> +      mask |= INTVAL (operands[4]);
> +      mask |= INTVAL (operands[5]) << 2;
> +    }
> +  else
> +    {
> +      mask  = INTVAL (operands[2]);
> +      mask |= INTVAL (operands[3]) << 2;
> +      mask |= INTVAL (operands[4]) << 4;
> +      mask |= INTVAL (operands[5]) << 6;
> +    }

Ah, here's the problem for the above.

For BE you need to remap the numbers two directions.  Note that the
element numbering for the insn is LE, so we need to remap that first:

  (3 - INTVAL (operands[N]))

Second, the remapped elements need to be placed into the mask in the
right order.    Notice the nice increasing sequence for the shifts
for LE.  There should be a nice decreasing sequence for BE, not some
strange out-of-order sequence.

  mask  = (3 - INTVAL (operands[2])) << 6;
  mask |= (3 - INTVAL (operands[3])) << 4;
  mask |= (3 - INTVAL (operands[4])) << 2;
  mask |= (3 - INTVAL (operands[5]));

> @@ -1054,7 +1074,7 @@ (define_insn "vec_interleave_highv2si"
>  {
>    /* Recall that vector elements are numbered in memory order.  */
>    if (TARGET_BIG_ENDIAN)
> -    return "%,unpack4.l %0 = %r1, %r2";
> +    return "%,unpack4.h %0 = %r1, %r2";
>    else
>      return "%,unpack4.h %0 = %r2, %r1";
>  }

I can't believe this is true.  Especially if you're only changing
one instance.


r~

Patch

Index: config/ia64/vect.md
===================================================================
--- config/ia64/vect.md	(revision 169270)
+++ config/ia64/vect.md	(working copy)
@@ -370,7 +370,7 @@  (define_expand "mulv2si3"
   [(set (match_operand:V2SI 0 "gr_register_operand" "")
 	(mult:V2SI (match_operand:V2SI 1 "gr_register_operand" "r")
 		   (match_operand:V2SI 2 "gr_register_operand" "r")))]
-  "!TARGET_BIG_ENDIAN"
+  ""
 {
   rtx t0, t1, t2, t3, t4, t5, t6, t7, x;
   rtx op1h = gen_lowpart (V4HImode, operands[1]);
@@ -390,8 +390,12 @@  (define_expand "mulv2si3"
      of the full 32-bit product.  */
 
   /* T0 = CDBA.  */
-  x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, const1_rtx, const0_rtx,
-					     GEN_INT (3), const2_rtx));
+  if (TARGET_BIG_ENDIAN)
+    x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, GEN_INT (3), const2_rtx,
+					       const1_rtx, const0_rtx));
+  else
+    x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, const1_rtx, const0_rtx,
+					       GEN_INT (3), const2_rtx));
   x = gen_rtx_VEC_SELECT (V4HImode, op1h, x);
   emit_insn (gen_rtx_SET (VOIDmode, t0, x));
 
@@ -971,10 +975,20 @@  (define_insn "*mux2"
   ""
 {
   int mask;
-  mask  = INTVAL (operands[2]);
-  mask |= INTVAL (operands[3]) << 2;
-  mask |= INTVAL (operands[4]) << 4;
-  mask |= INTVAL (operands[5]) << 6;
+  if (TARGET_BIG_ENDIAN)
+    {
+      mask  = INTVAL (operands[2]) << 4;
+      mask |= INTVAL (operands[3]) << 6;
+      mask |= INTVAL (operands[4]);
+      mask |= INTVAL (operands[5]) << 2;
+    }
+  else
+    {
+      mask  = INTVAL (operands[2]);
+      mask |= INTVAL (operands[3]) << 2;
+      mask |= INTVAL (operands[4]) << 4;
+      mask |= INTVAL (operands[5]) << 6;
+    }
   operands[2] = GEN_INT (mask);
   return "%,mux2 %0 = %1, %2";
 }
@@ -988,16 +1002,19 @@  (define_expand "vec_extract_evenodd_help
 		     (const_int 2)
 		     (const_int 1)
 		     (const_int 3)])))]
-  "!TARGET_BIG_ENDIAN")
+  "")
 
 (define_expand "vec_extract_evenv4hi"
   [(match_operand:V4HI 0 "gr_register_operand")
    (match_operand:V4HI 1 "gr_reg_or_0_operand")
    (match_operand:V4HI 2 "gr_reg_or_0_operand")]
-  "!TARGET_BIG_ENDIAN"
+  ""
 {
   rtx temp = gen_reg_rtx (V4HImode);
-  emit_insn (gen_mix2_r (temp, operands[1], operands[2]));
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_mix2_l (temp, operands[1], operands[2]));
+  else
+    emit_insn (gen_mix2_r (temp, operands[1], operands[2]));
   emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp));
   DONE;
 })
@@ -1006,10 +1023,13 @@  (define_expand "vec_extract_oddv4hi"
   [(match_operand:V4HI 0 "gr_register_operand")
    (match_operand:V4HI 1 "gr_reg_or_0_operand")
    (match_operand:V4HI 2 "gr_reg_or_0_operand")]
-  "!TARGET_BIG_ENDIAN"
+  ""
 {
   rtx temp = gen_reg_rtx (V4HImode);
-  emit_insn (gen_mix2_l (temp, operands[1], operands[2]));
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_mix2_r (temp, operands[1], operands[2]));
+  else
+    emit_insn (gen_mix2_l (temp, operands[1], operands[2]));
   emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp));
   DONE;
 })
@@ -1035,7 +1055,7 @@  (define_insn "vec_interleave_lowv2si"
 {
   /* Recall that vector elements are numbered in memory order.  */
   if (TARGET_BIG_ENDIAN)
-    return "%,unpack4.h %0 = %r1, %r2";
+    return "%,unpack4.l %0 = %r1, %r2";
   else
     return "%,unpack4.l %0 = %r2, %r1";
 }
@@ -1054,7 +1074,7 @@  (define_insn "vec_interleave_highv2si"
 {
   /* Recall that vector elements are numbered in memory order.  */
   if (TARGET_BIG_ENDIAN)
-    return "%,unpack4.l %0 = %r1, %r2";
+    return "%,unpack4.h %0 = %r1, %r2";
   else
     return "%,unpack4.h %0 = %r2, %r1";
 }
@@ -1064,10 +1084,14 @@  (define_expand "vec_extract_evenv2si"
   [(match_operand:V2SI 0 "gr_register_operand" "")
    (match_operand:V2SI 1 "gr_register_operand" "")
    (match_operand:V2SI 2 "gr_register_operand" "")]
-  "!TARGET_BIG_ENDIAN"
+  ""
 {
-  emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1],
-					 operands[2]));
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1],
+					   operands[2]));
+  else
+    emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1],
+					   operands[2]));
   DONE;
 })
 
@@ -1075,10 +1099,14 @@  (define_expand "vec_extract_oddv2si"
   [(match_operand:V2SI 0 "gr_register_operand" "")
    (match_operand:V2SI 1 "gr_register_operand" "")
    (match_operand:V2SI 2 "gr_register_operand" "")]
-  "!TARGET_BIG_ENDIAN"
+  ""
 {
-  emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1],
-					  operands[2]));
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1],
+					    operands[2]));
+  else
+    emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1],
+					    operands[2]));
   DONE;
 })
 
@@ -1594,11 +1622,14 @@  (define_expand "vec_pack_trunc_v2si"
   [(match_operand:V4HI 0 "gr_register_operand" "")
    (match_operand:V2SI 1 "gr_register_operand" "")
    (match_operand:V2SI 2 "gr_register_operand" "")]
-  "!TARGET_BIG_ENDIAN"
+  ""
 {
   rtx op1 = gen_lowpart(V4HImode, operands[1]);
   rtx op2 = gen_lowpart(V4HImode, operands[2]);
-  emit_insn (gen_vec_extract_evenv4hi (operands[0], op1, op2));
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_vec_extract_oddv4hi (operands[0], op1, op2));
+  else
+    emit_insn (gen_vec_extract_evenv4hi (operands[0], op1, op2));
   DONE;
 })