diff mbox

Wire-up missing ARM iwmmxt intrinsics (bugs 35294, 36798, 36966)

Message ID CAEdQ38ECXJahf8sp8b7Yq5NgOQBh9MQ7s3LxfSmZJhJva-UfDg@mail.gmail.com
State New
Headers show

Commit Message

Matt Turner Aug. 19, 2011, 4:13 a.m. UTC
Hi,

Attached is a patch based on gcc-4.6.1 that wires-up missing ARM
iwmmxt intrinsics. Without it, gcc is completely useless when it comes
to using a large portion of the intrinsics documented on this page:
http://gcc.gnu.org/onlinedocs/gcc/ARM-iWMMXt-Built_002din-Functions.html

The patch is based on the work of <serowk@yandex.ru> in bug 35294.

I do not know why the check_opsmode hack is necessary. Perhaps serowk
can help with that. I also do not know if this wires up all the
missing intrinsics, but it is sufficient to build a working
iwmmxt-optimized pixman:
http://cgit.freedesktop.org/~mattst88/pixman/log/?h=iwmmxt-optimizations

I have seen much more extensive patches from Xinyu Qi, but I do not
suppose that they will be available in gcc 4.6.

Thanks,
Matt Turner

Comments

Matt Turner Aug. 19, 2011, 4:18 a.m. UTC | #1
On Fri, Aug 19, 2011 at 12:13 AM, Matt Turner <mattst88@gmail.com> wrote:
> Hi,
>
> Attached is a patch based on gcc-4.6.1 that wires-up missing ARM
> iwmmxt intrinsics. Without it, gcc is completely useless when it comes
> to using a large portion of the intrinsics documented on this page:
> http://gcc.gnu.org/onlinedocs/gcc/ARM-iWMMXt-Built_002din-Functions.html
>
> The patch is based on the work of <serowk@yandex.ru> in bug 35294.
>
> I do not know why the check_opsmode hack is necessary. Perhaps serowk
> can help with that. I also do not know if this wires up all the
> missing intrinsics, but it is sufficient to build a working
> iwmmxt-optimized pixman:
> http://cgit.freedesktop.org/~mattst88/pixman/log/?h=iwmmxt-optimizations
>
> I have seen much more extensive patches from Xinyu Qi, but I do not
> suppose that they will be available in gcc 4.6.
>
> Thanks,
> Matt Turner

<Sent to correct @marvell address>
Xinyu Qi Aug. 19, 2011, 6:09 a.m. UTC | #2
At 2011-08-19 12:18:10,"Matt Turner" <mattst88@gmail.com> wrote:> Subject: Re: 
> 
> On Fri, Aug 19, 2011 at 12:13 AM, Matt Turner <mattst88@gmail.com> wrote:
> > Hi,
> >
> > Attached is a patch based on gcc-4.6.1 that wires-up missing ARM
> > iwmmxt intrinsics. Without it, gcc is completely useless when it comes
> > to using a large portion of the intrinsics documented on this page:
> > http://gcc.gnu.org/onlinedocs/gcc/ARM-iWMMXt-Built_002din-Functions.html
> >
> > The patch is based on the work of <serowk@yandex.ru> in bug 35294.
> >
> > I do not know why the check_opsmode hack is necessary.

Hi,

I think check_opsmode in this patch is used to solve something that could be solved by
-  gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
+  gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
+	      && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
in my patch.
For example, in the shift intrinsics, the shift count could be either a variable, or a CONST_INT which has VOIDmode.

> >I also do not know if this wires up all the missing intrinsics. 

I'm afraid not. Trunk misses all iWMMXt2 intrinsics and the bugs could be found everywhere since it is lack of maintenance for a long time.

> > I have seen much more extensive patches from Xinyu Qi, but I do not
> > suppose that they will be available in gcc 4.6.

The patches I submitted have some conflict with 4.6 code base.

Thanks,
Xinyu

> >
> > Thanks,
> > Matt Turner
> 
> <Sent to correct @marvell address>
diff mbox

Patch

--- arm.c.orig	2011-08-19 00:03:06.163195724 -0400
+++ arm.c	2011-08-19 00:03:10.872195933 -0400
@@ -157,7 +157,7 @@ 
 static void arm_init_builtins (void);
 static void arm_init_iwmmxt_builtins (void);
 static rtx safe_vector_operand (rtx, enum machine_mode);
-static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
+static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx, bool);
 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
 static void emit_constant_insn (rtx cond, rtx pattern);
@@ -19197,7 +19197,7 @@ 
 
 static rtx
 arm_expand_binop_builtin (enum insn_code icode,
-			  tree exp, rtx target)
+			  tree exp, rtx target, bool check_opsmode)
 {
   rtx pat;
   tree arg0 = CALL_EXPR_ARG (exp, 0);
@@ -19218,7 +19218,8 @@ 
       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
     target = gen_reg_rtx (tmode);
 
-  gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
+  if (check_opsmode)
+    gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
 
   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
     op0 = copy_to_mode_reg (mode0, op0);
@@ -19760,13 +19761,13 @@ 
       return target;
 
     case ARM_BUILTIN_WSADB:
-      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
+      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target, true);
     case ARM_BUILTIN_WSADH:
-      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
+      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target, true);
     case ARM_BUILTIN_WSADBZ:
-      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
+      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target, true);
     case ARM_BUILTIN_WSADHZ:
-      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
+      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target, true);
 
       /* Several three-argument builtins.  */
     case ARM_BUILTIN_WMACS:
@@ -19814,6 +19815,65 @@ 
       emit_insn (pat);
       return target;
 
+    case ARM_BUILTIN_WSLLH:
+    case ARM_BUILTIN_WSLLHI:
+    case ARM_BUILTIN_WSLLW:
+    case ARM_BUILTIN_WSLLWI:
+    case ARM_BUILTIN_WSLLD:
+    case ARM_BUILTIN_WSLLDI:
+    case ARM_BUILTIN_WSRAH:
+    case ARM_BUILTIN_WSRAHI:
+    case ARM_BUILTIN_WSRAW:
+    case ARM_BUILTIN_WSRAWI:
+    case ARM_BUILTIN_WSRAD:
+    case ARM_BUILTIN_WSRADI:
+    case ARM_BUILTIN_WSRLH:
+    case ARM_BUILTIN_WSRLHI:
+    case ARM_BUILTIN_WSRLW:
+    case ARM_BUILTIN_WSRLWI:
+    case ARM_BUILTIN_WSRLD:
+    case ARM_BUILTIN_WSRLDI:
+    case ARM_BUILTIN_WRORH:
+    case ARM_BUILTIN_WRORHI:
+    case ARM_BUILTIN_WRORW:
+    case ARM_BUILTIN_WRORWI:
+    case ARM_BUILTIN_WRORD:
+    case ARM_BUILTIN_WRORDI:
+    case ARM_BUILTIN_WAND:
+    case ARM_BUILTIN_WANDN:
+    case ARM_BUILTIN_WOR:
+    case ARM_BUILTIN_WXOR:
+      icode = (fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
+	       : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
+	       : fcode == ARM_BUILTIN_WSLLW  ? CODE_FOR_ashlv2si3_di
+	       : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
+	       : fcode == ARM_BUILTIN_WSLLD  ? CODE_FOR_ashldi3_di
+	       : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
+	       : fcode == ARM_BUILTIN_WSRAH  ? CODE_FOR_ashrv4hi3_di
+	       : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
+	       : fcode == ARM_BUILTIN_WSRAW  ? CODE_FOR_ashrv2si3_di
+	       : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
+	       : fcode == ARM_BUILTIN_WSRAD  ? CODE_FOR_ashrdi3_di
+	       : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
+	       : fcode == ARM_BUILTIN_WSRLH  ? CODE_FOR_lshrv4hi3_di
+	       : fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
+	       : fcode == ARM_BUILTIN_WSRLW  ? CODE_FOR_lshrv2si3_di
+	       : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
+	       : fcode == ARM_BUILTIN_WSRLD  ? CODE_FOR_lshrdi3_di
+	       : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
+	       : fcode == ARM_BUILTIN_WRORH  ? CODE_FOR_rorv4hi3_di
+	       : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
+	       : fcode == ARM_BUILTIN_WRORW  ? CODE_FOR_rorv2si3_di
+	       : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
+	       : fcode == ARM_BUILTIN_WRORD  ? CODE_FOR_rordi3_di
+	       : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
+	       : fcode == ARM_BUILTIN_WAND   ? CODE_FOR_iwmmxt_anddi3
+	       : fcode == ARM_BUILTIN_WANDN  ? CODE_FOR_iwmmxt_nanddi3
+	       : fcode == ARM_BUILTIN_WOR    ? CODE_FOR_iwmmxt_iordi3
+	       : fcode == ARM_BUILTIN_WXOR   ? CODE_FOR_iwmmxt_xordi3
+	       : CODE_FOR_rordi3);
+      return arm_expand_binop_builtin (icode, exp, target, false);
+
     case ARM_BUILTIN_WZERO:
       target = gen_reg_rtx (DImode);
       emit_insn (gen_iwmmxt_clrdi (target));
@@ -19828,7 +19888,7 @@ 
 
   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
     if (d->code == (const enum arm_builtins) fcode)
-      return arm_expand_binop_builtin (d->icode, exp, target);
+      return arm_expand_binop_builtin (d->icode, exp, target, true);
 
   for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
     if (d->code == (const enum arm_builtins) fcode)