Patchwork SLP vectorize calls (take 3)

login
register
mail settings
Submitter Jakub Jelinek
Date Nov. 8, 2011, 9:32 a.m.
Message ID <20111108093258.GD17997@tyan-ft48-01.lab.bos.redhat.com>
Download mbox | patch
Permalink /patch/124303/
State New
Headers show

Comments

Jakub Jelinek - Nov. 8, 2011, 9:32 a.m.
On Tue, Nov 08, 2011 at 10:03:23AM +0200, Ira Rosen wrote:
> The second option would be nicer.
...

Thanks.  Here is an updated patch, will bootstrap/regtest it now.
Ok for trunk if it passes?

2011-11-08  Jakub Jelinek  <jakub@redhat.com>

	* tree-vect-stmts.c (vectorizable_call): Add SLP_NODE argument.
	Handle vectorization of SLP calls.
	(vect_analyze_stmt): Adjust caller, add call to it for SLP too.
	(vect_transform_stmt): Adjust vectorizable_call caller, remove
	assertion.
	* tree-vect-slp.c (vect_get_and_check_slp_defs): For calls start
	with op_idx 3.
	(vect_build_slp_tree): Allow CALL_EXPR.

	* lib/target-supports.exp (check_effective_target_vect_call_sqrtf,
	check_effective_target_vect_call_copysignf,
	check_effective_target_vect_call_lrint): New procedures.
	* gcc.dg/vect/vect.exp: Run fast-math-bb-slp* tests using
	$VECT_SLP_CFLAGS with -ffast-math.
	* gcc.dg/vect/fast-math-vect-call-1.c: New test.
	* gcc.dg/vect/fast-math-vect-call-2.c: New test.
	* gcc.dg/vect/fast-math-bb-slp-call-1.c: New test.
	* gcc.dg/vect/fast-math-bb-slp-call-2.c: New test.



	Jakub
Ira Rosen - Nov. 8, 2011, 9:47 a.m.
On 8 November 2011 11:32, Jakub Jelinek <jakub@redhat.com> wrote:
> On Tue, Nov 08, 2011 at 10:03:23AM +0200, Ira Rosen wrote:
>> The second option would be nicer.
> ...
>
> Thanks.  Here is an updated patch, will bootstrap/regtest it now.
> Ok for trunk if it passes?

Yes.

Thanks,
Ira

>
> 2011-11-08  Jakub Jelinek  <jakub@redhat.com>
>
>        * tree-vect-stmts.c (vectorizable_call): Add SLP_NODE argument.
>        Handle vectorization of SLP calls.
>        (vect_analyze_stmt): Adjust caller, add call to it for SLP too.
>        (vect_transform_stmt): Adjust vectorizable_call caller, remove
>        assertion.
>        * tree-vect-slp.c (vect_get_and_check_slp_defs): For calls start
>        with op_idx 3.
>        (vect_build_slp_tree): Allow CALL_EXPR.
>
>        * lib/target-supports.exp (check_effective_target_vect_call_sqrtf,
>        check_effective_target_vect_call_copysignf,
>        check_effective_target_vect_call_lrint): New procedures.
>        * gcc.dg/vect/vect.exp: Run fast-math-bb-slp* tests using
>        $VECT_SLP_CFLAGS with -ffast-math.
>        * gcc.dg/vect/fast-math-vect-call-1.c: New test.
>        * gcc.dg/vect/fast-math-vect-call-2.c: New test.
>        * gcc.dg/vect/fast-math-bb-slp-call-1.c: New test.
>        * gcc.dg/vect/fast-math-bb-slp-call-2.c: New test.
>
> --- gcc/tree-vect-slp.c.jj      2011-11-07 20:32:03.000000000 +0100
> +++ gcc/tree-vect-slp.c 2011-11-08 09:28:12.000000000 +0100
> @@ -202,7 +202,10 @@ vect_get_and_check_slp_defs (loop_vec_in
>     loop = LOOP_VINFO_LOOP (loop_vinfo);
>
>   if (is_gimple_call (stmt))
> -    number_of_oprnds = gimple_call_num_args (stmt);
> +    {
> +      number_of_oprnds = gimple_call_num_args (stmt);
> +      op_idx = 3;
> +    }
>   else if (is_gimple_assign (stmt))
>     {
>       number_of_oprnds = gimple_num_ops (stmt) - 1;
> @@ -558,7 +561,25 @@ vect_build_slp_tree (loop_vec_info loop_
>       ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
>
>       if (is_gimple_call (stmt))
> -       rhs_code = CALL_EXPR;
> +       {
> +         rhs_code = CALL_EXPR;
> +         if (gimple_call_internal_p (stmt)
> +             || gimple_call_tail_p (stmt)
> +             || gimple_call_noreturn_p (stmt)
> +             || !gimple_call_nothrow_p (stmt)
> +             || gimple_call_chain (stmt))
> +           {
> +             if (vect_print_dump_info (REPORT_SLP))
> +               {
> +                 fprintf (vect_dump,
> +                          "Build SLP failed: unsupported call type ");
> +                 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
> +               }
> +
> +             vect_free_oprnd_info (&oprnds_info, true);
> +             return false;
> +           }
> +       }
>       else
>        rhs_code = gimple_assign_rhs_code (stmt);
>
> @@ -653,6 +674,27 @@ vect_build_slp_tree (loop_vec_info loop_
>              vect_free_oprnd_info (&oprnds_info, true);
>              return false;
>            }
> +
> +         if (rhs_code == CALL_EXPR)
> +           {
> +             gimple first_stmt = VEC_index (gimple, stmts, 0);
> +             if (gimple_call_num_args (stmt) != nops
> +                 || !operand_equal_p (gimple_call_fn (first_stmt),
> +                                      gimple_call_fn (stmt), 0)
> +                 || gimple_call_fntype (first_stmt)
> +                    != gimple_call_fntype (stmt))
> +               {
> +                 if (vect_print_dump_info (REPORT_SLP))
> +                   {
> +                     fprintf (vect_dump,
> +                              "Build SLP failed: different calls in ");
> +                     print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
> +                   }
> +
> +                 vect_free_oprnd_info (&oprnds_info, true);
> +                 return false;
> +               }
> +           }
>        }
>
>       /* Strided store or load.  */
> @@ -786,7 +828,8 @@ vect_build_slp_tree (loop_vec_info loop_
>          /* Not memory operation.  */
>          if (TREE_CODE_CLASS (rhs_code) != tcc_binary
>              && TREE_CODE_CLASS (rhs_code) != tcc_unary
> -              && rhs_code != COND_EXPR)
> +             && rhs_code != COND_EXPR
> +             && rhs_code != CALL_EXPR)
>            {
>              if (vect_print_dump_info (REPORT_SLP))
>                {
> --- gcc/tree-vect-stmts.c.jj    2011-11-07 20:32:09.000000000 +0100
> +++ gcc/tree-vect-stmts.c       2011-11-08 09:28:55.000000000 +0100
> @@ -1521,7 +1521,8 @@ vectorizable_function (gimple call, tree
>    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
>
>  static bool
> -vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
> +vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
> +                  slp_tree slp_node)
>  {
>   tree vec_dest;
>   tree scalar_dest;
> @@ -1532,6 +1533,7 @@ vectorizable_call (gimple stmt, gimple_s
>   int nunits_in;
>   int nunits_out;
>   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
> +  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
>   tree fndecl, new_temp, def, rhs_type;
>   gimple def_stmt;
>   enum vect_def_type dt[3]
> @@ -1543,19 +1545,12 @@ vectorizable_call (gimple stmt, gimple_s
>   size_t i, nargs;
>   tree lhs;
>
> -  /* FORNOW: unsupported in basic block SLP.  */
> -  gcc_assert (loop_vinfo);
> -
> -  if (!STMT_VINFO_RELEVANT_P (stmt_info))
> +  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
>     return false;
>
>   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
>     return false;
>
> -  /* FORNOW: SLP not supported.  */
> -  if (STMT_SLP_TYPE (stmt_info))
> -    return false;
> -
>   /* Is STMT a vectorizable call?   */
>   if (!is_gimple_call (stmt))
>     return false;
> @@ -1596,7 +1591,7 @@ vectorizable_call (gimple stmt, gimple_s
>       if (!rhs_type)
>        rhs_type = TREE_TYPE (op);
>
> -      if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
> +      if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
>                                 &def_stmt, &def, &dt[i], &opvectype))
>        {
>          if (vect_print_dump_info (REPORT_DETAILS))
> @@ -1658,7 +1653,9 @@ vectorizable_call (gimple stmt, gimple_s
>
>   gcc_assert (!gimple_vuse (stmt));
>
> -  if (modifier == NARROW)
> +  if (slp_node || PURE_SLP_STMT (stmt_info))
> +    ncopies = 1;
> +  else if (modifier == NARROW)
>     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
>   else
>     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
> @@ -1697,6 +1694,50 @@ vectorizable_call (gimple stmt, gimple_s
>          else
>            VEC_truncate (tree, vargs, 0);
>
> +         if (slp_node)
> +           {
> +             VEC (slp_void_p, heap) *vec_defs
> +               = VEC_alloc (slp_void_p, heap, nargs);
> +             VEC (tree, heap) *vec_oprnds0;
> +
> +             for (i = 0; i < nargs; i++)
> +               VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
> +             vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
> +             vec_oprnds0
> +               = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
> +
> +             /* Arguments are ready.  Create the new vector stmt.  */
> +             FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
> +               {
> +                 size_t k;
> +                 for (k = 0; k < nargs; k++)
> +                   {
> +                     VEC (tree, heap) *vec_oprndsk
> +                       = (VEC (tree, heap) *)
> +                         VEC_index (slp_void_p, vec_defs, k);
> +                     VEC_replace (tree, vargs, k,
> +                                  VEC_index (tree, vec_oprndsk, i));
> +                   }
> +                 new_stmt = gimple_build_call_vec (fndecl, vargs);
> +                 new_temp = make_ssa_name (vec_dest, new_stmt);
> +                 gimple_call_set_lhs (new_stmt, new_temp);
> +                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
> +                 mark_symbols_for_renaming (new_stmt);
> +                 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
> +                                 new_stmt);
> +               }
> +
> +             for (i = 0; i < nargs; i++)
> +               {
> +                 VEC (tree, heap) *vec_oprndsi
> +                   = (VEC (tree, heap) *)
> +                     VEC_index (slp_void_p, vec_defs, i);
> +                 VEC_free (tree, heap, vec_oprndsi);
> +               }
> +             VEC_free (slp_void_p, heap, vec_defs);
> +             continue;
> +           }
> +
>          for (i = 0; i < nargs; i++)
>            {
>              op = gimple_call_arg (stmt, i);
> @@ -1739,6 +1780,54 @@ vectorizable_call (gimple stmt, gimple_s
>          else
>            VEC_truncate (tree, vargs, 0);
>
> +         if (slp_node)
> +           {
> +             VEC (slp_void_p, heap) *vec_defs
> +               = VEC_alloc (slp_void_p, heap, nargs);
> +             VEC (tree, heap) *vec_oprnds0;
> +
> +             for (i = 0; i < nargs; i++)
> +               VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
> +             vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
> +             vec_oprnds0
> +               = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
> +
> +             /* Arguments are ready.  Create the new vector stmt.  */
> +             for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
> +                  i += 2)
> +               {
> +                 size_t k;
> +                 VEC_truncate (tree, vargs, 0);
> +                 for (k = 0; k < nargs; k++)
> +                   {
> +                     VEC (tree, heap) *vec_oprndsk
> +                       = (VEC (tree, heap) *)
> +                         VEC_index (slp_void_p, vec_defs, k);
> +                     VEC_quick_push (tree, vargs,
> +                                     VEC_index (tree, vec_oprndsk, i));
> +                     VEC_quick_push (tree, vargs,
> +                                     VEC_index (tree, vec_oprndsk, i + 1));
> +                   }
> +                 new_stmt = gimple_build_call_vec (fndecl, vargs);
> +                 new_temp = make_ssa_name (vec_dest, new_stmt);
> +                 gimple_call_set_lhs (new_stmt, new_temp);
> +                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
> +                 mark_symbols_for_renaming (new_stmt);
> +                 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
> +                                 new_stmt);
> +               }
> +
> +             for (i = 0; i < nargs; i++)
> +               {
> +                 VEC (tree, heap) *vec_oprndsi
> +                   = (VEC (tree, heap) *)
> +                     VEC_index (slp_void_p, vec_defs, i);
> +                 VEC_free (tree, heap, vec_oprndsi);
> +               }
> +             VEC_free (slp_void_p, heap, vec_defs);
> +             continue;
> +           }
> +
>          for (i = 0; i < nargs; i++)
>            {
>              op = gimple_call_arg (stmt, i);
> @@ -1804,7 +1893,8 @@ vectorizable_call (gimple stmt, gimple_s
>     lhs = gimple_call_lhs (stmt);
>   new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
>   set_vinfo_for_stmt (new_stmt, stmt_info);
> -  set_vinfo_for_stmt (stmt, NULL);
> +  if (!slp_node)
> +    set_vinfo_for_stmt (stmt, NULL);
>   STMT_VINFO_STMT (stmt_info) = new_stmt;
>   gsi_replace (gsi, new_stmt, false);
>   SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
> @@ -5265,7 +5355,7 @@ vect_analyze_stmt (gimple stmt, bool *ne
>             || vectorizable_operation (stmt, NULL, NULL, NULL)
>             || vectorizable_assignment (stmt, NULL, NULL, NULL)
>             || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
> -            || vectorizable_call (stmt, NULL, NULL)
> +           || vectorizable_call (stmt, NULL, NULL, NULL)
>             || vectorizable_store (stmt, NULL, NULL, NULL)
>             || vectorizable_reduction (stmt, NULL, NULL, NULL)
>             || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
> @@ -5277,6 +5367,7 @@ vect_analyze_stmt (gimple stmt, bool *ne
>                 || vectorizable_operation (stmt, NULL, NULL, node)
>                 || vectorizable_assignment (stmt, NULL, NULL, node)
>                 || vectorizable_load (stmt, NULL, NULL, node, NULL)
> +               || vectorizable_call (stmt, NULL, NULL, node)
>                 || vectorizable_store (stmt, NULL, NULL, node)
>                 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
>       }
> @@ -5391,8 +5482,7 @@ vect_transform_stmt (gimple stmt, gimple
>       break;
>
>     case call_vec_info_type:
> -      gcc_assert (!slp_node);
> -      done = vectorizable_call (stmt, gsi, &vec_stmt);
> +      done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
>       stmt = gsi_stmt (*gsi);
>       break;
>
> --- gcc/testsuite/lib/target-supports.exp.jj    2011-11-08 09:26:58.000000000 +0100
> +++ gcc/testsuite/lib/target-supports.exp       2011-11-08 10:15:38.000000000 +0100
> @@ -3520,6 +3520,58 @@ proc check_effective_target_vect64 { } {
>     return $et_vect64_saved
>  }
>
> +# Return 1 if the target supports vector copysignf calls.
> +
> +proc check_effective_target_vect_call_copysignf { } {
> +    global et_vect_call_copysignf_saved
> +
> +    if [info exists et_vect_call_copysignf_saved] {
> +       verbose "check_effective_target_vect_call_copysignf: using cached result" 2
> +    } else {
> +       set et_vect_call_copysignf_saved 0
> +       if { [istarget i?86-*-*]
> +            || [istarget x86_64-*-*]
> +            || [istarget powerpc*-*-*] } {
> +          set et_vect_call_copysignf_saved 1
> +       }
> +    }
> +
> +    verbose "check_effective_target_vect_call_copysignf: returning $et_vect_call_copysignf_saved" 2
> +    return $et_vect_call_copysignf_saved
> +}
> +
> +# Return 1 if the target supports vector sqrtf calls.
> +
> +proc check_effective_target_vect_call_sqrtf { } {
> +    global et_vect_call_sqrtf_saved
> +
> +    if [info exists et_vect_call_sqrtf_saved] {
> +       verbose "check_effective_target_vect_call_sqrtf: using cached result" 2
> +    } else {
> +       set et_vect_call_sqrtf_saved 0
> +       if { [istarget i?86-*-*]
> +            || [istarget x86_64-*-*]
> +            || ([istarget powerpc*-*-*] && [check_vsx_hw_available]) } {
> +           set et_vect_call_sqrtf_saved 1
> +       }
> +    }
> +
> +    verbose "check_effective_target_vect_call_sqrtf: returning $et_vect_call_sqrtf_saved" 2
> +    return $et_vect_call_sqrtf_saved
> +}
> +
> +# Return 1 if the target supports vector lrint calls.
> +
> +proc check_effective_target_vect_call_lrint { } {
> +    set et_vect_call_lrint 0
> +    if { ([istarget i?86-*-*] || [istarget x86_64-*-*]) && [check_effective_target_ilp32] } {
> +       set et_vect_call_lrint 1
> +    }
> +
> +    verbose "check_effective_target_vect_call_lrint: returning $et_vect_call_lrint" 2
> +    return $et_vect_call_lrint
> +}
> +
>  # Return 1 if the target supports section-anchors
>
>  proc check_effective_target_section_anchors { } {
> --- gcc/testsuite/gcc.dg/vect/vect.exp.jj       2011-10-24 12:21:08.000000000 +0200
> +++ gcc/testsuite/gcc.dg/vect/vect.exp  2011-11-08 10:09:27.000000000 +0100
> @@ -104,9 +104,15 @@ dg-runtest [lsort [glob -nocomplain $src
>  # -ffast-math tests
>  set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
>  lappend DEFAULT_VECTCFLAGS "-ffast-math"
> -dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/fast-math-*.\[cS\]]]  \
> +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/fast-math-\[ipsv\]*.\[cS\]]]  \
>        "" $DEFAULT_VECTCFLAGS
>
> +# -ffast-math SLP tests
> +set VECT_SLP_CFLAGS $SAVED_VECT_SLP_CFLAGS
> +lappend VECT_SLP_CFLAGS "-ffast-math"
> +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/fast-math-bb-slp-*.\[cS\]]]  \
> +        "" $VECT_SLP_CFLAGS
> +
>  # -fno-fast-math tests
>  set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
>  lappend DEFAULT_VECTCFLAGS "-fno-fast-math"
> --- gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c.jj        2011-11-08 09:28:12.000000000 +0100
> +++ gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c   2011-11-08 09:57:19.000000000 +0100
> @@ -0,0 +1,81 @@
> +#include "tree-vect.h"
> +
> +extern float copysignf (float, float);
> +extern float sqrtf (float);
> +extern float fabsf (float);
> +extern void abort (void);
> +float a[64], b[64], c[64], d[64];
> +
> +__attribute__((noinline, noclone)) void
> +f1 (int n)
> +{
> +  int i;
> +  for (i = 0; i < n; i++)
> +    {
> +      a[4 * i + 0] = copysignf (b[4 * i + 0], c[4 * i + 0]) + 1.0f + sqrtf (d[4 * i + 0]);
> +      a[4 * i + 1] = copysignf (b[4 * i + 1], c[4 * i + 1]) + 2.0f + sqrtf (d[4 * i + 1]);
> +      a[4 * i + 2] = copysignf (b[4 * i + 2], c[4 * i + 2]) + 3.0f + sqrtf (d[4 * i + 2]);
> +      a[4 * i + 3] = copysignf (b[4 * i + 3], c[4 * i + 3]) + 4.0f + sqrtf (d[4 * i + 3]);
> +    }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f2 (int n)
> +{
> +  int i;
> +  for (i = 0; i < 2 * n; i++)
> +    {
> +      a[2 * i + 0] = copysignf (b[2 * i + 0], c[2 * i + 0]) + 1.0f + sqrtf (d[2 * i + 0]);
> +      a[2 * i + 1] = copysignf (b[2 * i + 1], c[2 * i + 1]) + 2.0f + sqrtf (d[2 * i + 1]);
> +    }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f3 (void)
> +{
> +  int i;
> +  for (i = 0; i < 64; i++)
> +    a[i] = copysignf (b[i], c[i]) + 1.0f + sqrtf (d[i]);
> +}
> +
> +__attribute__((noinline, noclone)) int
> +main1 ()
> +{
> +  int i;
> +
> +  for (i = 0; i < 64; i++)
> +    {
> +      asm ("");
> +      b[i] = (i & 1) ? -4 * i : 4 * i;
> +      c[i] = (i & 2) ? -8 * i : 8 * i;
> +      d[i] = i * i;
> +    }
> +  f1 (16);
> +  for (i = 0; i < 64; i++)
> +    if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + (i & 3) + i - a[i]) >= 0.0001f)
> +      abort ();
> +    else
> +      a[i] = 131.25;
> +  f2 (16);
> +  for (i = 0; i < 64; i++)
> +    if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + (i & 1) + i - a[i]) >= 0.0001f)
> +      abort ();
> +    else
> +      a[i] = 131.25;
> +  f3 ();
> +  for (i = 0; i < 64; i++)
> +    if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + i - a[i]) >= 0.0001f)
> +      abort ();
> +  return 0;
> +}
> +
> +int
> +main ()
> +{
> +  check_vect ();
> +  return main1 ();
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" { target { vect_call_copysignf && vect_call_sqrtf } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_call_copysignf && vect_call_sqrtf } } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> --- gcc/testsuite/gcc.dg/vect/fast-math-vect-call-2.c.jj        2011-11-08 09:28:12.000000000 +0100
> +++ gcc/testsuite/gcc.dg/vect/fast-math-vect-call-2.c   2011-11-08 10:03:37.000000000 +0100
> @@ -0,0 +1,128 @@
> +#include "tree-vect.h"
> +
> +extern long int lrint (double);
> +extern void abort (void);
> +long int a[64];
> +double b[64];
> +
> +__attribute__((noinline, noclone)) void
> +f1 (int n)
> +{
> +  int i;
> +  for (i = 0; i < n; i++)
> +    {
> +      a[4 * i + 0] = lrint (b[4 * i + 0]) + 1;
> +      a[4 * i + 1] = lrint (b[4 * i + 1]) + 2;
> +      a[4 * i + 2] = lrint (b[4 * i + 2]) + 3;
> +      a[4 * i + 3] = lrint (b[4 * i + 3]) + 4;
> +    }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f2 (int n)
> +{
> +  int i;
> +  for (i = 0; i < 2 * n; i++)
> +    {
> +      a[2 * i + 0] = lrint (b[2 * i + 0]) + 1;
> +      a[2 * i + 1] = lrint (b[2 * i + 1]) + 2;
> +    }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f3 (void)
> +{
> +  int i;
> +  for (i = 0; i < 64; i++)
> +    a[i] = lrint (b[i]) + 1;
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f4 (int n)
> +{
> +  int i;
> +  for (i = 0; i < n; i++)
> +    {
> +      a[4 * i + 0] = lrint (b[4 * i + 0]);
> +      a[4 * i + 1] = lrint (b[4 * i + 1]);
> +      a[4 * i + 2] = lrint (b[4 * i + 2]);
> +      a[4 * i + 3] = lrint (b[4 * i + 3]);
> +    }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f5 (int n)
> +{
> +  int i;
> +  for (i = 0; i < 2 * n; i++)
> +    {
> +      a[2 * i + 0] = lrint (b[2 * i + 0]);
> +      a[2 * i + 1] = lrint (b[2 * i + 1]);
> +    }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f6 (void)
> +{
> +  int i;
> +  for (i = 0; i < 64; i++)
> +    a[i] = lrint (b[i]);
> +}
> +
> +__attribute__((noinline, noclone)) int
> +main1 ()
> +{
> +  int i;
> +
> +  for (i = 0; i < 64; i++)
> +    {
> +      asm ("");
> +      b[i] = ((i & 1) ? -4 * i : 4 * i) + 0.25;
> +    }
> +  f1 (16);
> +  for (i = 0; i < 64; i++)
> +    if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1 + (i & 3))
> +      abort ();
> +    else
> +      a[i] = 131.25;
> +  f2 (16);
> +  for (i = 0; i < 64; i++)
> +    if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1 + (i & 1))
> +      abort ();
> +    else
> +      a[i] = 131.25;
> +  f3 ();
> +  for (i = 0; i < 64; i++)
> +    if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1)
> +      abort ();
> +    else
> +      a[i] = 131.25;
> +  f4 (16);
> +  for (i = 0; i < 64; i++)
> +    if (a[i] != ((i & 1) ? -4 * i : 4 * i))
> +      abort ();
> +    else
> +      a[i] = 131.25;
> +  f5 (16);
> +  for (i = 0; i < 64; i++)
> +    if (a[i] != ((i & 1) ? -4 * i : 4 * i))
> +      abort ();
> +    else
> +      a[i] = 131.25;
> +  f6 ();
> +  for (i = 0; i < 64; i++)
> +    if (a[i] != ((i & 1) ? -4 * i : 4 * i))
> +      abort ();
> +  return 0;
> +}
> +
> +int
> +main ()
> +{
> +  check_vect ();
> +  return main1 ();
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 6 "vect" { target vect_call_lrint } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target vect_call_lrint } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> --- gcc/testsuite/gcc.dg/vect/fast-math-bb-slp-call-1.c.jj      2011-11-08 09:46:00.000000000 +0100
> +++ gcc/testsuite/gcc.dg/vect/fast-math-bb-slp-call-1.c 2011-11-08 09:49:49.000000000 +0100
> @@ -0,0 +1,49 @@
> +#include "tree-vect.h"
> +
> +extern float copysignf (float, float);
> +extern float sqrtf (float);
> +extern float fabsf (float);
> +extern void abort (void);
> +float a[64], b[64], c[64], d[64];
> +
> +__attribute__((noinline, noclone)) void
> +f1 (void)
> +{
> +  a[0] = copysignf (b[0], c[0]) + 1.0f + sqrtf (d[0]);
> +  a[1] = copysignf (b[1], c[1]) + 2.0f + sqrtf (d[1]);
> +  a[2] = copysignf (b[2], c[2]) + 3.0f + sqrtf (d[2]);
> +  a[3] = copysignf (b[3], c[3]) + 4.0f + sqrtf (d[3]);
> +  a[4] = copysignf (b[4], c[4]) + 5.0f + sqrtf (d[4]);
> +  a[5] = copysignf (b[5], c[5]) + 6.0f + sqrtf (d[5]);
> +  a[6] = copysignf (b[6], c[6]) + 7.0f + sqrtf (d[6]);
> +  a[7] = copysignf (b[7], c[7]) + 8.0f + sqrtf (d[7]);
> +}
> +
> +__attribute__((noinline, noclone)) int
> +main1 ()
> +{
> +  int i;
> +
> +  for (i = 0; i < 8; i++)
> +    {
> +      asm ("");
> +      b[i] = (i & 1) ? -4 * i : 4 * i;
> +      c[i] = (i & 2) ? -8 * i : 8 * i;
> +      d[i] = i * i;
> +    }
> +  f1 ();
> +  for (i = 0; i < 8; i++)
> +    if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + i + i - a[i]) >= 0.0001f)
> +      abort ();
> +  return 0;
> +}
> +
> +int
> +main ()
> +{
> +  check_vect ();
> +  return main1 ();
> +}
> +
> +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_call_copysignf && vect_call_sqrtf } } } } */
> +/* { dg-final { cleanup-tree-dump "slp" } } */
> --- gcc/testsuite/gcc.dg/vect/fast-math-bb-slp-call-2.c.jj      2011-11-08 09:46:04.000000000 +0100
> +++ gcc/testsuite/gcc.dg/vect/fast-math-bb-slp-call-2.c 2011-11-08 10:11:20.000000000 +0100
> @@ -0,0 +1,65 @@
> +#include "tree-vect.h"
> +
> +extern long int lrint (double);
> +extern void abort (void);
> +long int a[64];
> +double b[64];
> +
> +__attribute__((noinline, noclone)) void
> +f1 (void)
> +{
> +  a[0] = lrint (b[0]) + 1;
> +  a[1] = lrint (b[1]) + 2;
> +  a[2] = lrint (b[2]) + 3;
> +  a[3] = lrint (b[3]) + 4;
> +  a[4] = lrint (b[4]) + 5;
> +  a[5] = lrint (b[5]) + 6;
> +  a[6] = lrint (b[6]) + 7;
> +  a[7] = lrint (b[7]) + 8;
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f2 (void)
> +{
> +  a[0] = lrint (b[0]);
> +  a[1] = lrint (b[1]);
> +  a[2] = lrint (b[2]);
> +  a[3] = lrint (b[3]);
> +  a[4] = lrint (b[4]);
> +  a[5] = lrint (b[5]);
> +  a[6] = lrint (b[6]);
> +  a[7] = lrint (b[7]);
> +}
> +
> +__attribute__((noinline, noclone)) int
> +main1 ()
> +{
> +  int i;
> +
> +  for (i = 0; i < 8; i++)
> +    {
> +      asm ("");
> +      b[i] = ((i & 1) ? -4 * i : 4 * i) + 0.25;
> +    }
> +  f1 ();
> +  for (i = 0; i < 8; i++)
> +    if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1 + i)
> +      abort ();
> +    else
> +      a[i] = 131.25;
> +  f2 ();
> +  for (i = 0; i < 8; i++)
> +    if (a[i] != ((i & 1) ? -4 * i : 4 * i))
> +      abort ();
> +  return 0;
> +}
> +
> +int
> +main ()
> +{
> +  check_vect ();
> +  return main1 ();
> +}
> +
> +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 2 "slp" { target vect_call_lrint } } } */
> +/* { dg-final { cleanup-tree-dump "slp" } } */
>
>
>        Jakub
>

Patch

--- gcc/tree-vect-slp.c.jj	2011-11-07 20:32:03.000000000 +0100
+++ gcc/tree-vect-slp.c	2011-11-08 09:28:12.000000000 +0100
@@ -202,7 +202,10 @@  vect_get_and_check_slp_defs (loop_vec_in
     loop = LOOP_VINFO_LOOP (loop_vinfo);
 
   if (is_gimple_call (stmt))
-    number_of_oprnds = gimple_call_num_args (stmt);
+    {
+      number_of_oprnds = gimple_call_num_args (stmt);
+      op_idx = 3;
+    }
   else if (is_gimple_assign (stmt))
     {
       number_of_oprnds = gimple_num_ops (stmt) - 1;
@@ -558,7 +561,25 @@  vect_build_slp_tree (loop_vec_info loop_
       ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
 
       if (is_gimple_call (stmt))
-	rhs_code = CALL_EXPR;
+	{
+	  rhs_code = CALL_EXPR;
+	  if (gimple_call_internal_p (stmt)
+	      || gimple_call_tail_p (stmt)
+	      || gimple_call_noreturn_p (stmt)
+	      || !gimple_call_nothrow_p (stmt)
+	      || gimple_call_chain (stmt))
+	    {
+	      if (vect_print_dump_info (REPORT_SLP))
+		{
+		  fprintf (vect_dump,
+			   "Build SLP failed: unsupported call type ");
+		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+		}
+
+	      vect_free_oprnd_info (&oprnds_info, true);
+	      return false;
+	    }
+	}
       else
 	rhs_code = gimple_assign_rhs_code (stmt);
 
@@ -653,6 +674,27 @@  vect_build_slp_tree (loop_vec_info loop_
 	      vect_free_oprnd_info (&oprnds_info, true);
 	      return false;
 	    }
+
+	  if (rhs_code == CALL_EXPR)
+	    {
+	      gimple first_stmt = VEC_index (gimple, stmts, 0);
+	      if (gimple_call_num_args (stmt) != nops
+		  || !operand_equal_p (gimple_call_fn (first_stmt),
+				       gimple_call_fn (stmt), 0)
+		  || gimple_call_fntype (first_stmt)
+		     != gimple_call_fntype (stmt))
+		{
+		  if (vect_print_dump_info (REPORT_SLP))
+		    {
+		      fprintf (vect_dump,
+			       "Build SLP failed: different calls in ");
+		      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+		    }
+
+		  vect_free_oprnd_info (&oprnds_info, true);
+		  return false;
+		}
+	    }
 	}
 
       /* Strided store or load.  */
@@ -786,7 +828,8 @@  vect_build_slp_tree (loop_vec_info loop_
 	  /* Not memory operation.  */
 	  if (TREE_CODE_CLASS (rhs_code) != tcc_binary
 	      && TREE_CODE_CLASS (rhs_code) != tcc_unary
-              && rhs_code != COND_EXPR)
+	      && rhs_code != COND_EXPR
+	      && rhs_code != CALL_EXPR)
 	    {
 	      if (vect_print_dump_info (REPORT_SLP))
 		{
--- gcc/tree-vect-stmts.c.jj	2011-11-07 20:32:09.000000000 +0100
+++ gcc/tree-vect-stmts.c	2011-11-08 09:28:55.000000000 +0100
@@ -1521,7 +1521,8 @@  vectorizable_function (gimple call, tree
    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
 
 static bool
-vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
+vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
+		   slp_tree slp_node)
 {
   tree vec_dest;
   tree scalar_dest;
@@ -1532,6 +1533,7 @@  vectorizable_call (gimple stmt, gimple_s
   int nunits_in;
   int nunits_out;
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
   tree fndecl, new_temp, def, rhs_type;
   gimple def_stmt;
   enum vect_def_type dt[3]
@@ -1543,19 +1545,12 @@  vectorizable_call (gimple stmt, gimple_s
   size_t i, nargs;
   tree lhs;
 
-  /* FORNOW: unsupported in basic block SLP.  */
-  gcc_assert (loop_vinfo);
-
-  if (!STMT_VINFO_RELEVANT_P (stmt_info))
+  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
     return false;
 
   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
     return false;
 
-  /* FORNOW: SLP not supported.  */
-  if (STMT_SLP_TYPE (stmt_info))
-    return false;
-
   /* Is STMT a vectorizable call?   */
   if (!is_gimple_call (stmt))
     return false;
@@ -1596,7 +1591,7 @@  vectorizable_call (gimple stmt, gimple_s
       if (!rhs_type)
 	rhs_type = TREE_TYPE (op);
 
-      if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
+      if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
 				 &def_stmt, &def, &dt[i], &opvectype))
 	{
 	  if (vect_print_dump_info (REPORT_DETAILS))
@@ -1658,7 +1653,9 @@  vectorizable_call (gimple stmt, gimple_s
 
   gcc_assert (!gimple_vuse (stmt));
 
-  if (modifier == NARROW)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
+    ncopies = 1;
+  else if (modifier == NARROW)
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -1697,6 +1694,50 @@  vectorizable_call (gimple stmt, gimple_s
 	  else
 	    VEC_truncate (tree, vargs, 0);
 
+	  if (slp_node)
+	    {
+	      VEC (slp_void_p, heap) *vec_defs
+		= VEC_alloc (slp_void_p, heap, nargs);
+	      VEC (tree, heap) *vec_oprnds0;
+
+	      for (i = 0; i < nargs; i++)
+		VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
+	      vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
+	      vec_oprnds0
+		= (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
+
+	      /* Arguments are ready.  Create the new vector stmt.  */
+	      FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
+		{
+		  size_t k;
+		  for (k = 0; k < nargs; k++)
+		    {
+		      VEC (tree, heap) *vec_oprndsk
+			= (VEC (tree, heap) *)
+			  VEC_index (slp_void_p, vec_defs, k);
+		      VEC_replace (tree, vargs, k,
+				   VEC_index (tree, vec_oprndsk, i));
+		    }
+		  new_stmt = gimple_build_call_vec (fndecl, vargs);
+		  new_temp = make_ssa_name (vec_dest, new_stmt);
+		  gimple_call_set_lhs (new_stmt, new_temp);
+		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
+		  mark_symbols_for_renaming (new_stmt);
+		  VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
+				  new_stmt);
+		}
+
+	      for (i = 0; i < nargs; i++)
+		{
+		  VEC (tree, heap) *vec_oprndsi
+		    = (VEC (tree, heap) *)
+		      VEC_index (slp_void_p, vec_defs, i);
+		  VEC_free (tree, heap, vec_oprndsi);
+		}
+	      VEC_free (slp_void_p, heap, vec_defs);
+	      continue;
+	    }
+
 	  for (i = 0; i < nargs; i++)
 	    {
 	      op = gimple_call_arg (stmt, i);
@@ -1739,6 +1780,54 @@  vectorizable_call (gimple stmt, gimple_s
 	  else
 	    VEC_truncate (tree, vargs, 0);
 
+	  if (slp_node)
+	    {
+	      VEC (slp_void_p, heap) *vec_defs
+		= VEC_alloc (slp_void_p, heap, nargs);
+	      VEC (tree, heap) *vec_oprnds0;
+
+	      for (i = 0; i < nargs; i++)
+		VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
+	      vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
+	      vec_oprnds0
+		= (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
+
+	      /* Arguments are ready.  Create the new vector stmt.  */
+	      for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
+		   i += 2)
+		{
+		  size_t k;
+		  VEC_truncate (tree, vargs, 0);
+		  for (k = 0; k < nargs; k++)
+		    {
+		      VEC (tree, heap) *vec_oprndsk
+			= (VEC (tree, heap) *)
+			  VEC_index (slp_void_p, vec_defs, k);
+		      VEC_quick_push (tree, vargs,
+				      VEC_index (tree, vec_oprndsk, i));
+		      VEC_quick_push (tree, vargs,
+				      VEC_index (tree, vec_oprndsk, i + 1));
+		    }
+		  new_stmt = gimple_build_call_vec (fndecl, vargs);
+		  new_temp = make_ssa_name (vec_dest, new_stmt);
+		  gimple_call_set_lhs (new_stmt, new_temp);
+		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
+		  mark_symbols_for_renaming (new_stmt);
+		  VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
+				  new_stmt);
+		}
+
+	      for (i = 0; i < nargs; i++)
+		{
+		  VEC (tree, heap) *vec_oprndsi
+		    = (VEC (tree, heap) *)
+		      VEC_index (slp_void_p, vec_defs, i);
+		  VEC_free (tree, heap, vec_oprndsi);
+		}
+	      VEC_free (slp_void_p, heap, vec_defs);
+	      continue;
+	    }
+
 	  for (i = 0; i < nargs; i++)
 	    {
 	      op = gimple_call_arg (stmt, i);
@@ -1804,7 +1893,8 @@  vectorizable_call (gimple stmt, gimple_s
     lhs = gimple_call_lhs (stmt);
   new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
   set_vinfo_for_stmt (new_stmt, stmt_info);
-  set_vinfo_for_stmt (stmt, NULL);
+  if (!slp_node)
+    set_vinfo_for_stmt (stmt, NULL);
   STMT_VINFO_STMT (stmt_info) = new_stmt;
   gsi_replace (gsi, new_stmt, false);
   SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
@@ -5265,7 +5355,7 @@  vect_analyze_stmt (gimple stmt, bool *ne
             || vectorizable_operation (stmt, NULL, NULL, NULL)
             || vectorizable_assignment (stmt, NULL, NULL, NULL)
             || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
-            || vectorizable_call (stmt, NULL, NULL)
+	    || vectorizable_call (stmt, NULL, NULL, NULL)
             || vectorizable_store (stmt, NULL, NULL, NULL)
             || vectorizable_reduction (stmt, NULL, NULL, NULL)
             || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
@@ -5277,6 +5367,7 @@  vect_analyze_stmt (gimple stmt, bool *ne
                 || vectorizable_operation (stmt, NULL, NULL, node)
                 || vectorizable_assignment (stmt, NULL, NULL, node)
                 || vectorizable_load (stmt, NULL, NULL, node, NULL)
+		|| vectorizable_call (stmt, NULL, NULL, node)
                 || vectorizable_store (stmt, NULL, NULL, node)
                 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
       }
@@ -5391,8 +5482,7 @@  vect_transform_stmt (gimple stmt, gimple
       break;
 
     case call_vec_info_type:
-      gcc_assert (!slp_node);
-      done = vectorizable_call (stmt, gsi, &vec_stmt);
+      done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
       stmt = gsi_stmt (*gsi);
       break;
 
--- gcc/testsuite/lib/target-supports.exp.jj	2011-11-08 09:26:58.000000000 +0100
+++ gcc/testsuite/lib/target-supports.exp	2011-11-08 10:15:38.000000000 +0100
@@ -3520,6 +3520,58 @@  proc check_effective_target_vect64 { } {
     return $et_vect64_saved
 }
 
+# Return 1 if the target supports vector copysignf calls.
+
+proc check_effective_target_vect_call_copysignf { } {
+    global et_vect_call_copysignf_saved
+
+    if [info exists et_vect_call_copysignf_saved] {
+	verbose "check_effective_target_vect_call_copysignf: using cached result" 2
+    } else {
+	set et_vect_call_copysignf_saved 0
+	if { [istarget i?86-*-*]
+	     || [istarget x86_64-*-*]
+	     || [istarget powerpc*-*-*] } {
+	   set et_vect_call_copysignf_saved 1
+	}
+    }
+
+    verbose "check_effective_target_vect_call_copysignf: returning $et_vect_call_copysignf_saved" 2
+    return $et_vect_call_copysignf_saved
+}
+
+# Return 1 if the target supports vector sqrtf calls.
+
+proc check_effective_target_vect_call_sqrtf { } {
+    global et_vect_call_sqrtf_saved
+
+    if [info exists et_vect_call_sqrtf_saved] {
+	verbose "check_effective_target_vect_call_sqrtf: using cached result" 2
+    } else {
+	set et_vect_call_sqrtf_saved 0
+	if { [istarget i?86-*-*]
+	     || [istarget x86_64-*-*]
+	     || ([istarget powerpc*-*-*] && [check_vsx_hw_available]) } {
+	    set et_vect_call_sqrtf_saved 1
+	}
+    }
+
+    verbose "check_effective_target_vect_call_sqrtf: returning $et_vect_call_sqrtf_saved" 2
+    return $et_vect_call_sqrtf_saved
+}
+
+# Return 1 if the target supports vector lrint calls.
+
+proc check_effective_target_vect_call_lrint { } {
+    set et_vect_call_lrint 0
+    if { ([istarget i?86-*-*] || [istarget x86_64-*-*]) && [check_effective_target_ilp32] } {
+	set et_vect_call_lrint 1
+    }
+
+    verbose "check_effective_target_vect_call_lrint: returning $et_vect_call_lrint" 2
+    return $et_vect_call_lrint
+}
+
 # Return 1 if the target supports section-anchors
 
 proc check_effective_target_section_anchors { } {
--- gcc/testsuite/gcc.dg/vect/vect.exp.jj	2011-10-24 12:21:08.000000000 +0200
+++ gcc/testsuite/gcc.dg/vect/vect.exp	2011-11-08 10:09:27.000000000 +0100
@@ -104,9 +104,15 @@  dg-runtest [lsort [glob -nocomplain $src
 # -ffast-math tests
 set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
 lappend DEFAULT_VECTCFLAGS "-ffast-math"
-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/fast-math-*.\[cS\]]]  \
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/fast-math-\[ipsv\]*.\[cS\]]]  \
 	"" $DEFAULT_VECTCFLAGS
 
+# -ffast-math SLP tests
+set VECT_SLP_CFLAGS $SAVED_VECT_SLP_CFLAGS
+lappend VECT_SLP_CFLAGS "-ffast-math"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/fast-math-bb-slp-*.\[cS\]]]  \
+        "" $VECT_SLP_CFLAGS
+
 # -fno-fast-math tests
 set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
 lappend DEFAULT_VECTCFLAGS "-fno-fast-math"
--- gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c.jj	2011-11-08 09:28:12.000000000 +0100
+++ gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c	2011-11-08 09:57:19.000000000 +0100
@@ -0,0 +1,81 @@ 
+#include "tree-vect.h"
+
+extern float copysignf (float, float);
+extern float sqrtf (float);
+extern float fabsf (float);
+extern void abort (void);
+float a[64], b[64], c[64], d[64];
+
+__attribute__((noinline, noclone)) void
+f1 (int n)
+{
+  int i;
+  for (i = 0; i < n; i++)
+    {
+      a[4 * i + 0] = copysignf (b[4 * i + 0], c[4 * i + 0]) + 1.0f + sqrtf (d[4 * i + 0]);
+      a[4 * i + 1] = copysignf (b[4 * i + 1], c[4 * i + 1]) + 2.0f + sqrtf (d[4 * i + 1]);
+      a[4 * i + 2] = copysignf (b[4 * i + 2], c[4 * i + 2]) + 3.0f + sqrtf (d[4 * i + 2]);
+      a[4 * i + 3] = copysignf (b[4 * i + 3], c[4 * i + 3]) + 4.0f + sqrtf (d[4 * i + 3]);
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f2 (int n)
+{
+  int i;
+  for (i = 0; i < 2 * n; i++)
+    {
+      a[2 * i + 0] = copysignf (b[2 * i + 0], c[2 * i + 0]) + 1.0f + sqrtf (d[2 * i + 0]);
+      a[2 * i + 1] = copysignf (b[2 * i + 1], c[2 * i + 1]) + 2.0f + sqrtf (d[2 * i + 1]);
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f3 (void)
+{
+  int i;
+  for (i = 0; i < 64; i++)
+    a[i] = copysignf (b[i], c[i]) + 1.0f + sqrtf (d[i]);
+}
+
+__attribute__((noinline, noclone)) int
+main1 ()
+{
+  int i;
+
+  for (i = 0; i < 64; i++)
+    {
+      asm ("");
+      b[i] = (i & 1) ? -4 * i : 4 * i;
+      c[i] = (i & 2) ? -8 * i : 8 * i;
+      d[i] = i * i;
+    }
+  f1 (16);
+  for (i = 0; i < 64; i++)
+    if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + (i & 3) + i - a[i]) >= 0.0001f)
+      abort ();
+    else
+      a[i] = 131.25;
+  f2 (16);
+  for (i = 0; i < 64; i++)
+    if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + (i & 1) + i - a[i]) >= 0.0001f)
+      abort ();
+    else
+      a[i] = 131.25;
+  f3 ();
+  for (i = 0; i < 64; i++)
+    if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + i - a[i]) >= 0.0001f)
+      abort ();
+  return 0;
+}
+
+int
+main ()
+{
+  check_vect ();
+  return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" { target { vect_call_copysignf && vect_call_sqrtf } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_call_copysignf && vect_call_sqrtf } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.dg/vect/fast-math-vect-call-2.c.jj	2011-11-08 09:28:12.000000000 +0100
+++ gcc/testsuite/gcc.dg/vect/fast-math-vect-call-2.c	2011-11-08 10:03:37.000000000 +0100
@@ -0,0 +1,128 @@ 
+#include "tree-vect.h"
+
+extern long int lrint (double);
+extern void abort (void);
+long int a[64];
+double b[64];
+
+__attribute__((noinline, noclone)) void
+f1 (int n)
+{
+  int i;
+  for (i = 0; i < n; i++)
+    {
+      a[4 * i + 0] = lrint (b[4 * i + 0]) + 1;
+      a[4 * i + 1] = lrint (b[4 * i + 1]) + 2;
+      a[4 * i + 2] = lrint (b[4 * i + 2]) + 3;
+      a[4 * i + 3] = lrint (b[4 * i + 3]) + 4;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f2 (int n)
+{
+  int i;
+  for (i = 0; i < 2 * n; i++)
+    {
+      a[2 * i + 0] = lrint (b[2 * i + 0]) + 1;
+      a[2 * i + 1] = lrint (b[2 * i + 1]) + 2;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f3 (void)
+{
+  int i;
+  for (i = 0; i < 64; i++)
+    a[i] = lrint (b[i]) + 1;
+}
+
+__attribute__((noinline, noclone)) void
+f4 (int n)
+{
+  int i;
+  for (i = 0; i < n; i++)
+    {
+      a[4 * i + 0] = lrint (b[4 * i + 0]);
+      a[4 * i + 1] = lrint (b[4 * i + 1]);
+      a[4 * i + 2] = lrint (b[4 * i + 2]);
+      a[4 * i + 3] = lrint (b[4 * i + 3]);
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f5 (int n)
+{
+  int i;
+  for (i = 0; i < 2 * n; i++)
+    {
+      a[2 * i + 0] = lrint (b[2 * i + 0]);
+      a[2 * i + 1] = lrint (b[2 * i + 1]);
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f6 (void)
+{
+  int i;
+  for (i = 0; i < 64; i++)
+    a[i] = lrint (b[i]);
+}
+
+__attribute__((noinline, noclone)) int
+main1 ()
+{
+  int i;
+
+  for (i = 0; i < 64; i++)
+    {
+      asm ("");
+      b[i] = ((i & 1) ? -4 * i : 4 * i) + 0.25;
+    }
+  f1 (16);
+  for (i = 0; i < 64; i++)
+    if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1 + (i & 3))
+      abort ();
+    else
+      a[i] = 131.25;
+  f2 (16);
+  for (i = 0; i < 64; i++)
+    if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1 + (i & 1))
+      abort ();
+    else
+      a[i] = 131.25;
+  f3 ();
+  for (i = 0; i < 64; i++)
+    if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1)
+      abort ();
+    else
+      a[i] = 131.25;
+  f4 (16);
+  for (i = 0; i < 64; i++)
+    if (a[i] != ((i & 1) ? -4 * i : 4 * i))
+      abort ();
+    else
+      a[i] = 131.25;
+  f5 (16);
+  for (i = 0; i < 64; i++)
+    if (a[i] != ((i & 1) ? -4 * i : 4 * i))
+      abort ();
+    else
+      a[i] = 131.25;
+  f6 ();
+  for (i = 0; i < 64; i++)
+    if (a[i] != ((i & 1) ? -4 * i : 4 * i))
+      abort ();
+  return 0;
+}
+
+int
+main ()
+{
+  check_vect ();
+  return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 6 "vect" { target vect_call_lrint } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target vect_call_lrint } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.dg/vect/fast-math-bb-slp-call-1.c.jj	2011-11-08 09:46:00.000000000 +0100
+++ gcc/testsuite/gcc.dg/vect/fast-math-bb-slp-call-1.c	2011-11-08 09:49:49.000000000 +0100
@@ -0,0 +1,49 @@ 
+#include "tree-vect.h"
+
+extern float copysignf (float, float);
+extern float sqrtf (float);
+extern float fabsf (float);
+extern void abort (void);
+float a[64], b[64], c[64], d[64];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+  a[0] = copysignf (b[0], c[0]) + 1.0f + sqrtf (d[0]);
+  a[1] = copysignf (b[1], c[1]) + 2.0f + sqrtf (d[1]);
+  a[2] = copysignf (b[2], c[2]) + 3.0f + sqrtf (d[2]);
+  a[3] = copysignf (b[3], c[3]) + 4.0f + sqrtf (d[3]);
+  a[4] = copysignf (b[4], c[4]) + 5.0f + sqrtf (d[4]);
+  a[5] = copysignf (b[5], c[5]) + 6.0f + sqrtf (d[5]);
+  a[6] = copysignf (b[6], c[6]) + 7.0f + sqrtf (d[6]);
+  a[7] = copysignf (b[7], c[7]) + 8.0f + sqrtf (d[7]);
+}
+
+__attribute__((noinline, noclone)) int
+main1 ()
+{
+  int i;
+
+  for (i = 0; i < 8; i++)
+    {
+      asm ("");
+      b[i] = (i & 1) ? -4 * i : 4 * i;
+      c[i] = (i & 2) ? -8 * i : 8 * i;
+      d[i] = i * i;
+    }
+  f1 ();
+  for (i = 0; i < 8; i++)
+    if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + i + i - a[i]) >= 0.0001f)
+      abort ();
+  return 0;
+}
+
+int
+main ()
+{
+  check_vect ();
+  return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_call_copysignf && vect_call_sqrtf } } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
--- gcc/testsuite/gcc.dg/vect/fast-math-bb-slp-call-2.c.jj	2011-11-08 09:46:04.000000000 +0100
+++ gcc/testsuite/gcc.dg/vect/fast-math-bb-slp-call-2.c	2011-11-08 10:11:20.000000000 +0100
@@ -0,0 +1,65 @@ 
+#include "tree-vect.h"
+
+extern long int lrint (double);
+extern void abort (void);
+long int a[64];
+double b[64];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+  a[0] = lrint (b[0]) + 1;
+  a[1] = lrint (b[1]) + 2;
+  a[2] = lrint (b[2]) + 3;
+  a[3] = lrint (b[3]) + 4;
+  a[4] = lrint (b[4]) + 5;
+  a[5] = lrint (b[5]) + 6;
+  a[6] = lrint (b[6]) + 7;
+  a[7] = lrint (b[7]) + 8;
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+  a[0] = lrint (b[0]);
+  a[1] = lrint (b[1]);
+  a[2] = lrint (b[2]);
+  a[3] = lrint (b[3]);
+  a[4] = lrint (b[4]);
+  a[5] = lrint (b[5]);
+  a[6] = lrint (b[6]);
+  a[7] = lrint (b[7]);
+}
+
+__attribute__((noinline, noclone)) int
+main1 ()
+{
+  int i;
+
+  for (i = 0; i < 8; i++)
+    {
+      asm ("");
+      b[i] = ((i & 1) ? -4 * i : 4 * i) + 0.25;
+    }
+  f1 ();
+  for (i = 0; i < 8; i++)
+    if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1 + i)
+      abort ();
+    else
+      a[i] = 131.25;
+  f2 ();
+  for (i = 0; i < 8; i++)
+    if (a[i] != ((i & 1) ? -4 * i : 4 * i))
+      abort ();
+  return 0;
+}
+
+int
+main ()
+{
+  check_vect ();
+  return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 2 "slp" { target vect_call_lrint } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */