Patchwork [ARM,4/6] Improve Neon intrinsics testsuite.

login
register
mail settings
Submitter Ramana Radhakrishnan
Date July 30, 2012, 11:51 a.m.
Message ID <CACUk7=XnRV55tPE89jaDg+9fKQdM9hpQ0BTSUCccUoTVOxoDtw@mail.gmail.com>
Download mbox | patch
Permalink /patch/174004/
State New
Headers show

Comments

Ramana Radhakrishnan - July 30, 2012, 11:51 a.m.
On 30 July 2012 12:41, Ramana Radhakrishnan
<ramana.radhakrishnan@linaro.org> wrote:
> Hi,
>      I've been working on a small project to improve neon intrinsic
> and  I kept getting bothered by random failures in gcc.target/arm/neon
> and I got sufficiently irritated that I decided to clean that bit up
> and then found myself in a maze of rabbit holes. I've always been
> somewhat bothered by the Neon intrinsics tests and took the
> opportunity to actually do some proper cleanup work in that space.
> It's not as good as having proper execute tests but this is certainly
> better than the tests that are in place today.
>
> Patch 1 fixes up the vaba and vabal patterns to use a canonical RTL
> form with the first operand to the plus being the more complex one.
> Patch 2 is a bug fix that fixes up the splitters so that they take
> into account the right register for the right mode . For instance a
> register not fit for a TImode value shouldn't be put in one even if
> the larger mode allows a different register . This is possible for
> OImode values or indeed HFA style values being passed around as
> parameters and is potentially an issue for folks building hard-float
> systems with neon and using some of the large structures.
> Patch 3 fixes up the costs so that lower-subreg doesn't go bonkers
> with splitting large values before it is visible . More in the actual
> patch description. It is possibly the most contentious of the lot and
> could do with some review. I think there is still quite a lot more to
> be done around costs for some of the vector operations.
> Patch 4 - Improves the testsuite for the Neon intrinsics.  There are
> still testisms for a number of these but it boils down to the regexps
> needing to be corrected for a number of these tests. I thought before
> I spend more time on ML wrangling , I should get this out for some
> review. Again a contentious one and probably could do with some
> discussion.

   This patch converts the testsuite generator to actually produce
something more sensible than the current set of tests. It changes
these to generate the following form for a test instead of the previous
set of tests.

It's careful to use the hard-fp variant so that we actually
produce an instruction (atleast a move of the appropriate form) and
uses a dummy floating point parameter to ensure this. This ensures that
most tests are alright. This does increase test times quite a bit
and I'm considering a follow-up to the build system that tries to do
some of these tests in parallel.

It's been useful and instructive so far and has found a few issues
in the compiler and probably been the twistiest passage in this maze
of twisty little passages.

There are still failures in these tests and some of them are down to testisms
and some them down to real issues which I'm still looking at. I'm only attaching
the non-autogenerated parts with the patch.


Thoughts ?

Ramana

2012-07-27  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>

    	* config/arm/neon-testgen.ml: Update copyright years.
    	(emit_prologue): Do not restrict runs to O0. Remove function start.
    	(emit_automatics): Rename to emit_test_prologue.
    	(emit_test_prologue test_name const_valuator): Additional parameters.
    	(emit_test_prologue print_arg print_args): New routines.
    	(emit_test_prologue): Update comment. Print test_name. Convert
    	automatics to function parameters and use above.
    	(emit_call): Handle printing of return value and close off
    	test function.
    	(emit_epilogue): Delete printing of end of function.
    	(test_intrinsic): Adjust calls to changed functions.

    	testsuite/
    	* gcc.target/arm/neon/neon.exp: Update copyright year and
    	change into a torture test.
    	* gcc.target/arm/neon/*.c: Regenerate.

    /* Test the `vaddf32' ARM Neon intrinsic.  */
    /* This file was autogenerated by neon-testgen.  */

    /* { dg-do assemble } */
    /* { dg-require-effective-target arm_neon_ok } */
    /* { dg-options "-save-temps" } */
    /* { dg-add-options arm_neon } */

    float32x2_t __attribute__ ((pcs ("aapcs-vfp")))
    test_vaddf32 (float dummy_param, float32x2_t  arg0_float32x2_t,
    	float32x2_t  arg1_float32x2_t)
    {
      float32x2_t out_float32x2_t;

      out_float32x2_t = vadd_f32 (arg0_float32x2_t, arg1_float32x2_t);
      return out_float32x2_t;
    }

    /* { dg-final { scan-assembler "vadd\.f32\[ 	\]+\[dD\]\[0-9\]+,
\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
    /* { dg-final { cleanup-saved-temps } } */

Patch

diff --git a/gcc/config/arm/neon-testgen.ml b/gcc/config/arm/neon-testgen.ml
index a69a539..7da3489 100644
--- a/gcc/config/arm/neon-testgen.ml
+++ b/gcc/config/arm/neon-testgen.ml
@@ -1,5 +1,6 @@ 
 (* Auto-generate ARM Neon intrinsics tests.
-   Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+   Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software
+   Foundation, Inc.
    Contributed by CodeSourcery.

    This file is part of GCC.
@@ -51,30 +52,38 @@  let emit_prologue chan test_name =
   Printf.fprintf chan "/* This file was autogenerated by
neon-testgen.  */\n\n";
   Printf.fprintf chan "/* { dg-do assemble } */\n";
   Printf.fprintf chan "/* { dg-require-effective-target arm_neon_ok } */\n";
-  Printf.fprintf chan "/* { dg-options \"-save-temps -O0\" } */\n";
+  Printf.fprintf chan "/* { dg-options \"-save-temps\" } */\n";
   Printf.fprintf chan "/* { dg-add-options arm_neon } */\n";
-  Printf.fprintf chan "\n#include \"arm_neon.h\"\n\n";
-  Printf.fprintf chan "void test_%s (void)\n{\n" test_name
+  Printf.fprintf chan "\n#include \"arm_neon.h\"\n\n"

-(* Emit declarations of local variables that are going to be passed
+
+(* Emit function with parameters and local variables that are going
to be passed
    to an intrinsic, together with one to take a returned value if needed.  *)
-let emit_automatics chan c_types features =
-  let emit () =
-    ignore (
-      List.fold_left (fun arg_number -> fun (flags, ty) ->
-                        let pointer_bit =
-                          if List.mem Pointer flags then "*" else ""
-                        in
-                          (* Const arguments to builtins are directly
-                             written in as constants.  *)
-                          if not (List.mem Const flags) then
-                            Printf.fprintf chan "  %s %sarg%d_%s;\n"
-                                           ty pointer_bit arg_number ty;
-                        arg_number + 1)
-                     0 (List.tl c_types))
+let emit_test_prologue chan c_types features test_name const_valuator =
+  let print_arg chan arg_number (flags, ty) =
+    (* If the argument is of const type, then directly write in the
+       constant now.  *)
+    begin
+      if arg_number <> 0 then
+	Printf.fprintf chan ",\n\t";
+      let pointer_bit = if List.mem Pointer flags then "*" else ""
+      in
+      Printf.fprintf chan "%s %s arg%d_%s" ty pointer_bit arg_number ty
+    end	
+  in
+  let rec print_args arg_number tys =
+    match tys with
+      [] -> ()
+    | [ty] -> print_arg chan arg_number ty
+    | ty::tys ->
+      print_arg chan arg_number ty;
+      print_args (arg_number + 1) tys
   in
     match c_types with
       (_, return_ty) :: tys ->
+	Printf.fprintf chan "%s __attribute__ ((pcs
(\"aapcs-vfp\")))\ntest_%s (float dummy_param, " return_ty test_name;
+	print_args 0 (List.tl c_types);
+	Printf.fprintf chan ")\n{\n";
         if return_ty <> "void" then begin
           (* The intrinsic returns a value.  We need to do explict register
              allocation for vget_low tests or they fail because of copy
@@ -86,11 +95,8 @@  let emit_automatics chan c_types features =
               Printf.fprintf chan "  register %s out_%s asm (\"r0\");\n"
                              return_ty return_ty
             else
-              Printf.fprintf chan "  %s out_%s;\n" return_ty return_ty);
-	   emit ())
-        end else
-          (* The intrinsic does not return a value.  *)
-          emit ()
+              Printf.fprintf chan "  %s out_%s;\n" return_ty return_ty);)
+        end
     | _ -> assert false

 (* Emit code to call an intrinsic.  *)
@@ -107,10 +113,10 @@  let emit_call chan const_valuator c_types name elt_ty =
       match const_valuator with
         None ->
           if List.mem Pointer flags then
-            Printf.fprintf chan "0"
+            Printf.fprintf chan "arg%d_%s " arg_number ty
           else
-            Printf.fprintf chan "1"
-      | Some f -> Printf.fprintf chan "%s" (string_of_int (f arg_number))
+            Printf.fprintf chan "1 "
+      | Some f -> Printf.fprintf chan "%s " (string_of_int (f arg_number))
     else
       Printf.fprintf chan "arg%d_%s" arg_number ty
   in
@@ -124,7 +130,9 @@  let emit_call chan const_valuator c_types name elt_ty =
       print_args (arg_number + 1) tys
   in
     print_args 0 (List.tl c_types);
-    Printf.fprintf chan ");\n"
+    Printf.fprintf chan ");\n" ;
+    (if snd (List.hd c_types) <> "void" then
+      Printf.fprintf chan "  return out_%s;\n" (snd (List.hd c_types)))

 (* Emit epilogue code to a test source file.  *)
 let emit_epilogue chan features regexps =
@@ -265,12 +273,12 @@  let test_intrinsic dir opcode features shape
name munge elt_ty =
   in
     (* Emit file and function prologues.  *)
     emit_prologue chan test_name;
-    (* Emit local variable declarations.  *)
-    emit_automatics chan c_types features;
+    (* Emit prologue for the test function.  *)
+    emit_test_prologue chan c_types features test_name const_valuator;
     Printf.fprintf chan "\n";
     (* Emit the call to the intrinsic.  *)
     emit_call chan const_valuator c_types name elt_ty;
-    (* Emit the function epilogue and the DejaGNU scan-assembler
directives.  *)
+    (* Emit the DejaGNU scan-assembler directives.  *)
     emit_epilogue chan features regexps;
     (* Close the test file.  *)
     close_out chan
diff --git a/gcc/testsuite/gcc.target/arm/neon/neon.exp
b/gcc/testsuite/gcc.target/arm/neon/neon.exp
index fcc4333..3455f35 100644
--- a/gcc/testsuite/gcc.target/arm/neon/neon.exp
+++ b/gcc/testsuite/gcc.target/arm/neon/neon.exp
@@ -1,4 +1,4 @@ 
-# Copyright (C) 1997, 2004, 2006, 2007 Free Software Foundation, Inc.
+# Copyright (C) 1997, 2004, 2006, 2007, 2012 Free Software Foundation, Inc.

 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -21,15 +21,8 @@  if ![istarget arm*-*-*] then {
   return
 }

-# Load support procs.
 load_lib gcc-dg.exp
-
-# Initialize `dg'.
 dg-init
-
 # Main loop.
-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
-	"" ""
-
-# All done.
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] ""
 dg-finish