diff mbox

PING [ARM] [1/3] Refactor NEON builtin code

Message ID 4D135366.3050904@codesourcery.com
State New
Headers show

Commit Message

Jie Zhang Dec. 23, 2010, 1:49 p.m. UTC
http://gcc.gnu.org/ml/gcc-patches/2010-10/msg00851.html

I just updated the patch to the latest trunk.


Regards,

Comments

Jie Zhang Jan. 10, 2011, 2:55 a.m. UTC | #1
PING 2

Jie

On 12/23/2010 09:49 PM, Jie Zhang wrote:
> http://gcc.gnu.org/ml/gcc-patches/2010-10/msg00851.html
>
> I just updated the patch to the latest trunk.
>
Jie Zhang Jan. 17, 2011, 1:57 a.m. UTC | #2
PING 3

Jie

On 01/10/2011 10:55 AM, Jie Zhang wrote:
> PING 2
>
> Jie
>
> On 12/23/2010 09:49 PM, Jie Zhang wrote:
>> http://gcc.gnu.org/ml/gcc-patches/2010-10/msg00851.html
>>
>> I just updated the patch to the latest trunk.
>>
Julian Brown Jan. 19, 2011, 9:38 p.m. UTC | #3
Hi,

On Mon, 17 Jan 2011 09:57:45 +0800
Jie Zhang <jie@codesourcery.com> wrote:

> > On 12/23/2010 09:49 PM, Jie Zhang wrote:
> >> http://gcc.gnu.org/ml/gcc-patches/2010-10/msg00851.html
> >>
> >> I just updated the patch to the latest trunk.

I can't approve/disapprove this patch, but this part makes me nervous:
IIUC it means that the builtins will depend on the order of patterns in
neon.md, which seems like a potential maintenance nightmare. I think it
also means that we depend on the order in which mode iterators get
expanded, which I strongly suspect isn't specified anywhere.

Can we avoid making that linkage?

Cheers,

Julian
Jie Zhang Jan. 20, 2011, 5:55 a.m. UTC | #4
On 01/20/2011 05:38 AM, Julian Brown wrote:
> Hi,
>
> On Mon, 17 Jan 2011 09:57:45 +0800
> Jie Zhang<jie@codesourcery.com>  wrote:
>
>>> On 12/23/2010 09:49 PM, Jie Zhang wrote:
>>>> http://gcc.gnu.org/ml/gcc-patches/2010-10/msg00851.html
>>>>
>>>> I just updated the patch to the latest trunk.
>
> I can't approve/disapprove this patch, but this part makes me nervous:
> IIUC it means that the builtins will depend on the order of patterns in
> neon.md, which seems like a potential maintenance nightmare. I think it
> also means that we depend on the order in which mode iterators get
> expanded, which I strongly suspect isn't specified anywhere.
>
> Can we avoid making that linkage?
>
Thanks for your review!

I don't think this patch will make builtins depends on the order of 
patterns in neon.md or the order of mode iterators. That patch just 
flattens neon_builtin_data[].

Before this patch, one entry in neon_builtin_data[], for example,
VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) will be expanded into

{ "vaddl", NEON_BINOP, T_V8QI | T_V4HI | T_V2SI,
     { CODE_FOR_neon_vaddlv8qi,
       CODE_FOR_neon_vaddlv4hi,
       CODE_FOR_neon_vaddlv2si },
   3, 0 },

After this patch, it will be expanded into

{"vaddl", NEON_BINOP, T_V8QI, CODE_FOR_neon_vaddlv8qi, 0},
{"vaddl", NEON_BINOP, T_V4HI, CODE_FOR_neon_vaddlv4hi, 0},
{"vaddl", NEON_BINOP, T_V2SI, CODE_FOR_neon_vaddlv2si, 0},


Maybe you can be more specific?


Regards,
Julian Brown Jan. 21, 2011, 1:04 a.m. UTC | #5
On Thu, 20 Jan 2011 13:55:44 +0800
Jie Zhang <jie@codesourcery.com> wrote:

> On 01/20/2011 05:38 AM, Julian Brown wrote:
> >>> On 12/23/2010 09:49 PM, Jie Zhang wrote:
> >>>> http://gcc.gnu.org/ml/gcc-patches/2010-10/msg00851.html
> >>>>
> >>>> I just updated the patch to the latest trunk.

> I don't think this patch will make builtins depends on the order of 
> patterns in neon.md or the order of mode iterators. That patch just 
> flattens neon_builtin_data[].
> 
> Before this patch, one entry in neon_builtin_data[], for example,
> VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) will be expanded into
> 
> { "vaddl", NEON_BINOP, T_V8QI | T_V4HI | T_V2SI,
>      { CODE_FOR_neon_vaddlv8qi,
>        CODE_FOR_neon_vaddlv4hi,
>        CODE_FOR_neon_vaddlv2si },
>    3, 0 },
> 
> After this patch, it will be expanded into
> 
> {"vaddl", NEON_BINOP, T_V8QI, CODE_FOR_neon_vaddlv8qi, 0},
> {"vaddl", NEON_BINOP, T_V4HI, CODE_FOR_neon_vaddlv4hi, 0},
> {"vaddl", NEON_BINOP, T_V2SI, CODE_FOR_neon_vaddlv2si, 0},

OK, thanks for clarifying -- it looks like I totally misunderstood how
the patch worked. Sorry for the noise!

Cheers,

Julian
Jie Zhang Jan. 21, 2011, 2:14 a.m. UTC | #6
On 01/21/2011 09:04 AM, Julian Brown wrote:
> On Thu, 20 Jan 2011 13:55:44 +0800
> Jie Zhang<jie@codesourcery.com>  wrote:
>
>> On 01/20/2011 05:38 AM, Julian Brown wrote:
>>>>> On 12/23/2010 09:49 PM, Jie Zhang wrote:
>>>>>> http://gcc.gnu.org/ml/gcc-patches/2010-10/msg00851.html
>>>>>>
>>>>>> I just updated the patch to the latest trunk.
>
>> I don't think this patch will make builtins depends on the order of
>> patterns in neon.md or the order of mode iterators. That patch just
>> flattens neon_builtin_data[].
>>
>> Before this patch, one entry in neon_builtin_data[], for example,
>> VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) will be expanded into
>>
>> { "vaddl", NEON_BINOP, T_V8QI | T_V4HI | T_V2SI,
>>       { CODE_FOR_neon_vaddlv8qi,
>>         CODE_FOR_neon_vaddlv4hi,
>>         CODE_FOR_neon_vaddlv2si },
>>     3, 0 },
>>
>> After this patch, it will be expanded into
>>
>> {"vaddl", NEON_BINOP, T_V8QI, CODE_FOR_neon_vaddlv8qi, 0},
>> {"vaddl", NEON_BINOP, T_V4HI, CODE_FOR_neon_vaddlv4hi, 0},
>> {"vaddl", NEON_BINOP, T_V2SI, CODE_FOR_neon_vaddlv2si, 0},
>
> OK, thanks for clarifying -- it looks like I totally misunderstood how
> the patch worked. Sorry for the noise!
>
No problem. It's good that there was someone starting to look at my 
patches. :-)
diff mbox

Patch


	* config/arm/arm.c (neon_builtin_type_bits): Remove.
	(typedef enum neon_builtin_mode): New.
	(T_MAX): Don't define.
	(typedef enum neon_builtin_datum): Remove bits, codes[],
	num_vars and base_fcode.  Add mode, code and fcode.
	(VAR1, VAR2, VAR3, VAR4, VAR5, VAR6, VAR7, VAR8, VAR9
	VAR10): Change accordingly.
	(neon_builtin_data[]): Change accordingly
	(arm_init_neon_builtins): Change accordingly.
	(neon_builtin_compare): Remove.
	(locate_neon_builtin_icode): Remove.
	(arm_expand_neon_builtin): Change accordingly.

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 2aaec8c..3f83cf0 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -18180,21 +18180,22 @@  arm_init_tls_builtins (void)
   TREE_READONLY (decl) = 1;
 }
 
-enum neon_builtin_type_bits {
-  T_V8QI  = 0x0001,
-  T_V4HI  = 0x0002,
-  T_V2SI  = 0x0004,
-  T_V2SF  = 0x0008,
-  T_DI    = 0x0010,
-  T_V16QI = 0x0020,
-  T_V8HI  = 0x0040,
-  T_V4SI  = 0x0080,
-  T_V4SF  = 0x0100,
-  T_V2DI  = 0x0200,
-  T_TI	  = 0x0400,
-  T_EI	  = 0x0800,
-  T_OI	  = 0x1000
-};
+typedef enum {
+  T_V8QI,
+  T_V4HI,
+  T_V2SI,
+  T_V2SF,
+  T_DI,
+  T_V16QI,
+  T_V8HI,
+  T_V4SI,
+  T_V4SF,
+  T_V2DI,
+  T_TI,
+  T_EI,
+  T_OI,
+  T_MAX
+} neon_builtin_mode;
 
 #define v8qi_UP  T_V8QI
 #define v4hi_UP  T_V4HI
@@ -18212,8 +18213,6 @@  enum neon_builtin_type_bits {
 
 #define UP(X) X##_UP
 
-#define T_MAX 13
-
 typedef enum {
   NEON_BINOP,
   NEON_TERNOP,
@@ -18257,49 +18256,42 @@  typedef enum {
 typedef struct {
   const char *name;
   const neon_itype itype;
-  const int bits;
-  const enum insn_code codes[T_MAX];
-  const unsigned int num_vars;
-  unsigned int base_fcode;
+  const neon_builtin_mode mode;
+  const enum insn_code code;
+  unsigned int fcode;
 } neon_builtin_datum;
 
 #define CF(N,X) CODE_FOR_neon_##N##X
 
 #define VAR1(T, N, A) \
-  #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
+  {#N, NEON_##T, UP (A), CF (N, A), 0}
 #define VAR2(T, N, A, B) \
-  #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
+  VAR1 (T, N, A), \
+  {#N, NEON_##T, UP (B), CF (N, B), 0}
 #define VAR3(T, N, A, B, C) \
-  #N, NEON_##T, UP (A) | UP (B) | UP (C), \
-  { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
+  VAR2 (T, N, A, B), \
+  {#N, NEON_##T, UP (C), CF (N, C), 0}
 #define VAR4(T, N, A, B, C, D) \
-  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
+  VAR3 (T, N, A, B, C), \
+  {#N, NEON_##T, UP (D), CF (N, D), 0}
 #define VAR5(T, N, A, B, C, D, E) \
-  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
+  VAR4 (T, N, A, B, C, D), \
+  {#N, NEON_##T, UP (E), CF (N, E), 0}
 #define VAR6(T, N, A, B, C, D, E, F) \
-  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
+  VAR5 (T, N, A, B, C, D, E), \
+  {#N, NEON_##T, UP (F), CF (N, F), 0}
 #define VAR7(T, N, A, B, C, D, E, F, G) \
-  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
-    CF (N, G) }, 7, 0
+  VAR6 (T, N, A, B, C, D, E, F), \
+  {#N, NEON_##T, UP (G), CF (N, G), 0}
 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
-  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
-                | UP (H), \
-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
-    CF (N, G), CF (N, H) }, 8, 0
+  VAR7 (T, N, A, B, C, D, E, F, G), \
+  {#N, NEON_##T, UP (H), CF (N, H), 0}
 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
-  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
-                | UP (H) | UP (I), \
-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
-    CF (N, G), CF (N, H), CF (N, I) }, 9, 0
+  VAR8 (T, N, A, B, C, D, E, F, G, H), \
+  {#N, NEON_##T, UP (I), CF (N, I), 0}
 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
-  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
-                | UP (H) | UP (I) | UP (J), \
-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
-    CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
+  VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
+  {#N, NEON_##T, UP (J), CF (N, J), 0}
 
 /* The mode entries in the following table correspond to the "key" type of the
    instruction variant, i.e. equivalent to that which would be specified after
@@ -18307,192 +18299,190 @@  typedef struct {
    (Signed/unsigned/polynomial types are not differentiated between though, and
    are all mapped onto the same mode for a given element size.) The modes
    listed per instruction should be the same as those defined for that
-   instruction's pattern in neon.md.
-   WARNING: Variants should be listed in the same increasing order as
-   neon_builtin_type_bits.  */
+   instruction's pattern in neon.md.  */
 
 static neon_builtin_datum neon_builtin_data[] =
 {
-  { VAR10 (BINOP, vadd,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
-  { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
-  { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
-  { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
-  { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
-  { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
-  { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
-  { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
-  { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
-  { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
-  { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
-  { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
-  { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
-  { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
-  { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
-  { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
-  { VAR2 (BINOP, vqdmull, v4hi, v2si) },
-  { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
-  { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
-  { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
-  { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
-  { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR10 (BINOP, vsub,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
-  { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
-  { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
-  { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
-  { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR2 (BINOP, vcage, v2sf, v4sf) },
-  { VAR2 (BINOP, vcagt, v2sf, v4sf) },
-  { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
-  { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
-  { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
-  { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
-  { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
-  { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
-  { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
-  { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
-  { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
-  { VAR2 (BINOP, vrecps, v2sf, v4sf) },
-  { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
-  { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
-  { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
-  { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
-  { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
-  { VAR2 (UNOP, vcnt, v8qi, v16qi) },
-  { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
-  { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
-  { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+  VAR10 (BINOP, vadd,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
+  VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
+  VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
+  VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
+  VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
+  VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
+  VAR2 (TERNOP, vqdmlal, v4hi, v2si),
+  VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
+  VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
+  VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
+  VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
+  VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
+  VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
+  VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
+  VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
+  VAR2 (BINOP, vqdmull, v4hi, v2si),
+  VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
+  VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
+  VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
+  VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
+  VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR10 (BINOP, vsub,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
+  VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
+  VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
+  VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR2 (BINOP, vcage, v2sf, v4sf),
+  VAR2 (BINOP, vcagt, v2sf, v4sf),
+  VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
+  VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
+  VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
+  VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
+  VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
+  VAR2 (BINOP, vrecps, v2sf, v4sf),
+  VAR2 (BINOP, vrsqrts, v2sf, v4sf),
+  VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  VAR2 (UNOP, vcnt, v8qi, v16qi),
+  VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
+  VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
+  VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
   /* FIXME: vget_lane supports more variants than this!  */
-  { VAR10 (GETLANE, vget_lane,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (SETLANE, vset_lane,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
-  { VAR10 (DUP, vdup_n,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (DUPLANE, vdup_lane,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
-  { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
-  { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
-  { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
-  { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
-  { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
-  { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
-  { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
-  { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
-  { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
-  { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
-  { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
-  { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
-  { VAR10 (BINOP, vext,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
-  { VAR2 (UNOP, vrev16, v8qi, v16qi) },
-  { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
-  { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
-  { VAR10 (SELECT, vbsl,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR1 (VTBL, vtbl1, v8qi) },
-  { VAR1 (VTBL, vtbl2, v8qi) },
-  { VAR1 (VTBL, vtbl3, v8qi) },
-  { VAR1 (VTBL, vtbl4, v8qi) },
-  { VAR1 (VTBX, vtbx1, v8qi) },
-  { VAR1 (VTBX, vtbx2, v8qi) },
-  { VAR1 (VTBX, vtbx3, v8qi) },
-  { VAR1 (VTBX, vtbx4, v8qi) },
-  { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
-  { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
-  { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
-  { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
-  { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
-  { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (LOAD1, vld1,
-           v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (LOAD1LANE, vld1_lane,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (LOAD1, vld1_dup,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (STORE1, vst1,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (STORE1LANE, vst1_lane,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR9 (LOADSTRUCT,
-	  vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
-  { VAR7 (LOADSTRUCTLANE, vld2_lane,
-	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
-  { VAR9 (STORESTRUCT, vst2,
-	  v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
-  { VAR7 (STORESTRUCTLANE, vst2_lane,
-	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR9 (LOADSTRUCT,
-	  vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
-  { VAR7 (LOADSTRUCTLANE, vld3_lane,
-	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
-  { VAR9 (STORESTRUCT, vst3,
-	  v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
-  { VAR7 (STORESTRUCTLANE, vst3_lane,
-	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR9 (LOADSTRUCT, vld4,
-	  v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
-  { VAR7 (LOADSTRUCTLANE, vld4_lane,
-	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
-  { VAR9 (STORESTRUCT, vst4,
-	  v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
-  { VAR7 (STORESTRUCTLANE, vst4_lane,
-	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR10 (LOGICBINOP, vand,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (LOGICBINOP, vorr,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (BINOP, veor,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (LOGICBINOP, vbic,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (LOGICBINOP, vorn,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
+  VAR10 (GETLANE, vget_lane,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (SETLANE, vset_lane,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
+  VAR10 (DUP, vdup_n,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (DUPLANE, vdup_lane,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
+  VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
+  VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
+  VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
+  VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
+  VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
+  VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
+  VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
+  VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
+  VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
+  VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
+  VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
+  VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
+  VAR10 (BINOP, vext,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
+  VAR2 (UNOP, vrev16, v8qi, v16qi),
+  VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
+  VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
+  VAR10 (SELECT, vbsl,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR1 (VTBL, vtbl1, v8qi),
+  VAR1 (VTBL, vtbl2, v8qi),
+  VAR1 (VTBL, vtbl3, v8qi),
+  VAR1 (VTBL, vtbl4, v8qi),
+  VAR1 (VTBX, vtbx1, v8qi),
+  VAR1 (VTBX, vtbx2, v8qi),
+  VAR1 (VTBX, vtbx3, v8qi),
+  VAR1 (VTBX, vtbx4, v8qi),
+  VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
+  VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
+  VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
+  VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
+  VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
+  VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (LOAD1, vld1,
+         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (LOAD1LANE, vld1_lane,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (LOAD1, vld1_dup,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (STORE1, vst1,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (STORE1LANE, vst1_lane,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR9 (LOADSTRUCT,
+	vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+  VAR7 (LOADSTRUCTLANE, vld2_lane,
+	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
+  VAR9 (STORESTRUCT, vst2,
+	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+  VAR7 (STORESTRUCTLANE, vst2_lane,
+	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR9 (LOADSTRUCT,
+	vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+  VAR7 (LOADSTRUCTLANE, vld3_lane,
+	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
+  VAR9 (STORESTRUCT, vst3,
+	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+  VAR7 (STORESTRUCTLANE, vst3_lane,
+	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR9 (LOADSTRUCT, vld4,
+	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+  VAR7 (LOADSTRUCTLANE, vld4_lane,
+	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
+  VAR9 (STORESTRUCT, vst4,
+	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+  VAR7 (STORESTRUCTLANE, vst4_lane,
+	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR10 (LOGICBINOP, vand,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (LOGICBINOP, vorr,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (BINOP, veor,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (LOGICBINOP, vbic,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (LOGICBINOP, vorn,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
 };
 
 #undef CF
@@ -18761,25 +18751,16 @@  arm_init_neon_builtins (void)
   for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
     {
       neon_builtin_datum *d = &neon_builtin_data[i];
-      unsigned int j, codeidx = 0;
 
-      d->base_fcode = fcode;
-
-      for (j = 0; j < T_MAX; j++)
-	{
 	  const char* const modenames[] = {
 	    "v8qi", "v4hi", "v2si", "v2sf", "di",
 	    "v16qi", "v8hi", "v4si", "v4sf", "v2di"
 	  };
 	  char namebuf[60];
 	  tree ftype = NULL;
-	  enum insn_code icode;
 	  int is_load = 0, is_store = 0;
 
-          if ((d->bits & (1 << j)) == 0)
-            continue;
-
-          icode = d->codes[codeidx++];
+	  d->fcode = fcode;
 
           switch (d->itype)
             {
@@ -18830,7 +18811,7 @@  arm_init_neon_builtins (void)
 		/* Build a function type directly from the insn_data for this
 		   builtin.  The build_function_type() function takes care of
 		   removing duplicates for us.  */
-		for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
+		for (k = insn_data[d->code].n_operands - 1; k >= 0; k--)
 		  {
 		    tree eltype;
 
@@ -18840,9 +18821,9 @@  arm_init_neon_builtins (void)
 			   (a SImode pointer) in the operand 1 position.  We
 			   want a const pointer to the element type in that
 			   position.  */
-		        gcc_assert (insn_data[icode].operand[k].mode == SImode);
+		        gcc_assert (insn_data[d->code].operand[k].mode == SImode);
 
-			switch (1 << j)
+			switch (d->mode)
 			  {
 			  case T_V8QI:
 			  case T_V16QI:
@@ -18878,9 +18859,9 @@  arm_init_neon_builtins (void)
 			   the memory location to store to (a SImode pointer).
 			   Use a pointer to the element type of the store in
 			   that position.  */
-			gcc_assert (insn_data[icode].operand[k].mode == SImode);
+			gcc_assert (insn_data[d->code].operand[k].mode == SImode);
 
-			switch (1 << j)
+			switch (d->mode)
 			  {
 			  case T_V8QI:
 			  case T_V16QI:
@@ -18912,7 +18893,7 @@  arm_init_neon_builtins (void)
 		      }
 		    else
 		      {
-			switch (insn_data[icode].operand[k].mode)
+			switch (insn_data[d->code].operand[k].mode)
 	        	  {
 			  case VOIDmode: eltype = void_type_node; break;
 			  /* Scalars.  */
@@ -18953,7 +18934,7 @@  arm_init_neon_builtins (void)
 
 	    case NEON_RESULTPAIR:
               {
-                switch (insn_data[icode].operand[1].mode)
+                switch (insn_data[d->code].operand[1].mode)
                   {
 		  case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
                   case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
@@ -18974,8 +18955,8 @@  arm_init_neon_builtins (void)
               {
                 /* We iterate over 5 doubleword types, then 5 quadword
                    types.  */
-                int rhs = j % 5;
-                switch (insn_data[icode].operand[0].mode)
+                int rhs = d->mode % 5;
+                switch (insn_data[d->code].operand[0].mode)
                   {
                   case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
                   case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
@@ -18998,11 +18979,10 @@  arm_init_neon_builtins (void)
 
           gcc_assert (ftype != NULL);
 
-          sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
+          sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
 
           add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
 				NULL_TREE);
-        }
     }
 }
 
@@ -19181,44 +19161,6 @@  arm_expand_unop_builtin (enum insn_code icode,
   return target;
 }
 
-static int
-neon_builtin_compare (const void *a, const void *b)
-{
-  const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
-  const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
-  unsigned int soughtcode = key->base_fcode;
-
-  if (soughtcode >= memb->base_fcode
-      && soughtcode < memb->base_fcode + memb->num_vars)
-    return 0;
-  else if (soughtcode < memb->base_fcode)
-    return -1;
-  else
-    return 1;
-}
-
-static enum insn_code
-locate_neon_builtin_icode (int fcode, neon_itype *itype)
-{
-  neon_builtin_datum key
-    = { NULL, (neon_itype) 0, 0, { CODE_FOR_nothing }, 0, 0 };
-  neon_builtin_datum *found;
-  int idx;
-
-  key.base_fcode = fcode;
-  found = (neon_builtin_datum *)
-    bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
-		   sizeof (neon_builtin_data[0]), neon_builtin_compare);
-  gcc_assert (found);
-  idx = fcode - (int) found->base_fcode;
-  gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
-
-  if (itype)
-    *itype = found->itype;
-
-  return found->codes[idx];
-}
-
 typedef enum {
   NEON_ARG_COPY_TO_REG,
   NEON_ARG_CONSTANT,
@@ -19353,8 +19295,9 @@  arm_expand_neon_args (rtx target, int icode, int have_retval,
 static rtx
 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
 {
-  neon_itype itype;
-  enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
+  neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
+  neon_itype itype = d->itype;
+  enum insn_code icode = d->code;
 
   switch (itype)
     {