diff mbox series

[2/2] vect: Make partial trapping ops use predication [PR96373]

Message ID mptk018kzex.fsf@arm.com
State New
Headers show
Series [1/2] Add support for conditional xorsign [PR96373] | expand

Commit Message

Richard Sandiford Jan. 27, 2023, 11:08 a.m. UTC
PR96373 points out that a predicated SVE loop currently converts
trapping unconditional ops into unpredicated vector ops.  Doing
the operation on inactive lanes can then raise an exception.

As discussed in the PR trail, we aren't 100% consistent about
whether we preserve traps or not.  But the direction of travel
is clearly to improve that rather than live with it.  This patch
tries to do that for the SVE case.

Doing this regresses gcc.target/aarch64/sve/fabd_1.c.  I've added
-fno-trapping-math for now and filed PR108571 to track it.
A similar problem applies to fsubr_1.d.

I think this is likely to regress Power 10, since conditional
operations are only available for masked loops.  I think we'll
need to add -fno-trapping-math to any affected testcases,
but I don't have a Power 10 system to test on.  Kewen, would you
mind giving this a spin and seeing how bad the fallout is?

Tested on aarch64-linux-gnu.  OK to install assuming no blockers
on the Power 10 side?

Richard


gcc/
	PR tree-optimization/96373
	* tree-vect-stmts.cc (vectorizable_operation): Predicate trapping
	operations on the loop mask.  Reject partial vectors if this isn't
	possible.

gcc/testsuite/
	PR tree-optimization/96373
	PR tree-optimization/108571
	* gcc.target/aarch64/sve/fabd_1.c: Add -fno-trapping-math.
	* gcc.target/aarch64/sve/fsubr_1.c: Likewise.
	* gcc.target/aarch64/sve/fmul_1.c: Expect predicate ops.
	* gcc.target/aarch64/sve/fp_arith_1.c: Likewise.
---
 gcc/testsuite/gcc.target/aarch64/sve/fabd_1.c |  2 +-
 gcc/testsuite/gcc.target/aarch64/sve/fmul_1.c | 12 +++----
 .../gcc.target/aarch64/sve/fp_arith_1.c       | 12 +++----
 .../gcc.target/aarch64/sve/fsubr_1.c          |  2 +-
 gcc/tree-vect-stmts.cc                        | 32 ++++++++++++++-----
 5 files changed, 38 insertions(+), 22 deletions(-)

Comments

Richard Biener Jan. 27, 2023, 11:37 a.m. UTC | #1
On Fri, 27 Jan 2023, Richard Sandiford wrote:

> PR96373 points out that a predicated SVE loop currently converts
> trapping unconditional ops into unpredicated vector ops.  Doing
> the operation on inactive lanes can then raise an exception.
> 
> As discussed in the PR trail, we aren't 100% consistent about
> whether we preserve traps or not.  But the direction of travel
> is clearly to improve that rather than live with it.  This patch
> tries to do that for the SVE case.
> 
> Doing this regresses gcc.target/aarch64/sve/fabd_1.c.  I've added
> -fno-trapping-math for now and filed PR108571 to track it.
> A similar problem applies to fsubr_1.d.
> 
> I think this is likely to regress Power 10, since conditional
> operations are only available for masked loops.  I think we'll
> need to add -fno-trapping-math to any affected testcases,
> but I don't have a Power 10 system to test on.  Kewen, would you
> mind giving this a spin and seeing how bad the fallout is?
> 
> Tested on aarch64-linux-gnu.  OK to install assuming no blockers
> on the Power 10 side?

OK.

Thanks,
Richard.

> Richard
> 
> 
> gcc/
> 	PR tree-optimization/96373
> 	* tree-vect-stmts.cc (vectorizable_operation): Predicate trapping
> 	operations on the loop mask.  Reject partial vectors if this isn't
> 	possible.
> 
> gcc/testsuite/
> 	PR tree-optimization/96373
> 	PR tree-optimization/108571
> 	* gcc.target/aarch64/sve/fabd_1.c: Add -fno-trapping-math.
> 	* gcc.target/aarch64/sve/fsubr_1.c: Likewise.
> 	* gcc.target/aarch64/sve/fmul_1.c: Expect predicate ops.
> 	* gcc.target/aarch64/sve/fp_arith_1.c: Likewise.
> ---
>  gcc/testsuite/gcc.target/aarch64/sve/fabd_1.c |  2 +-
>  gcc/testsuite/gcc.target/aarch64/sve/fmul_1.c | 12 +++----
>  .../gcc.target/aarch64/sve/fp_arith_1.c       | 12 +++----
>  .../gcc.target/aarch64/sve/fsubr_1.c          |  2 +-
>  gcc/tree-vect-stmts.cc                        | 32 ++++++++++++++-----
>  5 files changed, 38 insertions(+), 22 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fabd_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fabd_1.c
> index 13ad83be24c..30bde6f0df7 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/fabd_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fabd_1.c
> @@ -1,5 +1,5 @@
>  /* { dg-do assemble { target aarch64_asm_sve_ok } } */
> -/* { dg-options "-O3 --save-temps" } */
> +/* { dg-options "-O3 --save-temps -fno-trapping-math" } */
>  
>  #define N 16
>  
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fmul_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fmul_1.c
> index 4a3e7c06745..0245a8c1422 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/fmul_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fmul_1.c
> @@ -27,20 +27,20 @@ DO_ARITH_OPS (_Float16, *, mul)
>  DO_ARITH_OPS (float, *, mul)
>  DO_ARITH_OPS (double, *, mul)
>  
> -/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
> +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
>  /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 1 } } */
> -/* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2} } } */
> +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2.0\n} 1 } } */
>  /* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #5} } } */
>  /* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #-} } } */
>  
> -/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
> +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
>  /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */
> -/* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2} } } */
> +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2.0\n} 1 } } */
>  /* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #5} } } */
>  /* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #-} } } */
>  
> -/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
> +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
>  /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0.5\n} 1 } } */
> -/* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2} } } */
> +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2.0\n} 1 } } */
>  /* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #5} } } */
>  /* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #-} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fp_arith_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fp_arith_1.c
> index 5aed0dcb490..419d6e1b5ec 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/fp_arith_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fp_arith_1.c
> @@ -34,37 +34,37 @@ DO_ARITH_OPS (double, -, minus)
>  
>  /* No specific count because it's valid to use fadd or fsub for the
>     out-of-range constants.  */
> -/* { dg-final { scan-assembler {\tfadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} } } */
> +/* { dg-final { scan-assembler {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} } } */
>  /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */
>  /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */
>  /* { dg-final { scan-assembler-not   {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2} } } */
>  /* { dg-final { scan-assembler-not   {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #-} } } */
>  
> -/* { dg-final { scan-assembler {\tfsub\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} } } */
> +/* { dg-final { scan-assembler {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} } } */
>  /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */
>  /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */
>  /* { dg-final { scan-assembler-not   {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2} } } */
>  /* { dg-final { scan-assembler-not   {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #-} } } */
>  
> -/* { dg-final { scan-assembler {\tfadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} } } */
> +/* { dg-final { scan-assembler {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} } } */
>  /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */
>  /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */
>  /* { dg-final { scan-assembler-not   {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2} } } */
>  /* { dg-final { scan-assembler-not   {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #-} } } */
>  
> -/* { dg-final { scan-assembler {\tfsub\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} } } */
> +/* { dg-final { scan-assembler {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} } } */
>  /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */
>  /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */
>  /* { dg-final { scan-assembler-not   {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2} } } */
>  /* { dg-final { scan-assembler-not   {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #-} } } */
>  
> -/* { dg-final { scan-assembler {\tfadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} } } */
> +/* { dg-final { scan-assembler {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} } } */
>  /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1.0\n} 2 } } */
>  /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0.5\n} 2 } } */
>  /* { dg-final { scan-assembler-not   {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2} } } */
>  /* { dg-final { scan-assembler-not   {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #-} } } */
>  
> -/* { dg-final { scan-assembler {\tfsub\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} } } */
> +/* { dg-final { scan-assembler {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} } } */
>  /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1.0\n} 2 } } */
>  /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0.5\n} 2 } } */
>  /* { dg-final { scan-assembler-not   {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fsubr_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fsubr_1.c
> index f47a360dee9..012cf6e9e5d 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/fsubr_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fsubr_1.c
> @@ -1,5 +1,5 @@
>  /* { dg-do assemble { target aarch64_asm_sve_ok } } */
> -/* { dg-options "-O3 --save-temps" } */
> +/* { dg-options "-O3 --save-temps -fno-trapping-math" } */
>  
>  #define DO_IMMEDIATE_OPS(VALUE, TYPE, NAME)			\
>  void vsubrarithimm_##NAME##_##TYPE (TYPE *dst, int count)	\
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index eb4ca1f184e..56e3c30658e 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -6301,6 +6301,7 @@ vectorizable_operation (vec_info *vinfo,
>    int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
>    vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
>    internal_fn cond_fn = get_conditional_internal_fn (code);
> +  bool could_trap = gimple_could_trap_p (stmt);
>  
>    if (!vec_stmt) /* transformation not required.  */
>      {
> @@ -6309,7 +6310,7 @@ vectorizable_operation (vec_info *vinfo,
>  	 keeping the inactive lanes as-is.  */
>        if (loop_vinfo
>  	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
> -	  && reduc_idx >= 0)
> +	  && (could_trap || reduc_idx >= 0))
>  	{
>  	  if (cond_fn == IFN_LAST
>  	      || !direct_internal_fn_supported_p (cond_fn, vectype,
> @@ -6452,16 +6453,31 @@ vectorizable_operation (vec_info *vinfo,
>        vop1 = ((op_type == binary_op || op_type == ternary_op)
>  	      ? vec_oprnds1[i] : NULL_TREE);
>        vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
> -      if (masked_loop_p && reduc_idx >= 0)
> +      if (masked_loop_p && (reduc_idx >= 0 || could_trap))
>  	{
> -	  /* Perform the operation on active elements only and take
> -	     inactive elements from the reduction chain input.  */
> -	  gcc_assert (!vop2);
> -	  vop2 = reduc_idx == 1 ? vop1 : vop0;
>  	  tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
>  					  vectype, i);
> -	  gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
> -						    vop0, vop1, vop2);
> +	  auto_vec<tree> vops (5);
> +	  vops.quick_push (mask);
> +	  vops.quick_push (vop0);
> +	  if (vop1)
> +	    vops.quick_push (vop1);
> +	  if (vop2)
> +	    vops.quick_push (vop2);
> +	  if (reduc_idx >= 0)
> +	    {
> +	      /* Perform the operation on active elements only and take
> +		 inactive elements from the reduction chain input.  */
> +	      gcc_assert (!vop2);
> +	      vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
> +	    }
> +	  else
> +	    {
> +	      auto else_value = targetm.preferred_else_value
> +		(cond_fn, vectype, vops.length () - 1, &vops[1]);
> +	      vops.quick_push (else_value);
> +	    }
> +	  gcall *call = gimple_build_call_internal_vec (cond_fn, vops);
>  	  new_temp = make_ssa_name (vec_dest, call);
>  	  gimple_call_set_lhs (call, new_temp);
>  	  gimple_call_set_nothrow (call, true);
>
Kewen.Lin Feb. 13, 2023, 12:42 p.m. UTC | #2
Hi Richard,

on 2023/1/27 19:08, Richard Sandiford via Gcc-patches wrote:
> PR96373 points out that a predicated SVE loop currently converts
> trapping unconditional ops into unpredicated vector ops.  Doing
> the operation on inactive lanes can then raise an exception.
> 
> As discussed in the PR trail, we aren't 100% consistent about
> whether we preserve traps or not.  But the direction of travel
> is clearly to improve that rather than live with it.  This patch
> tries to do that for the SVE case.
> 
> Doing this regresses gcc.target/aarch64/sve/fabd_1.c.  I've added
> -fno-trapping-math for now and filed PR108571 to track it.
> A similar problem applies to fsubr_1.d.
> 
> I think this is likely to regress Power 10, since conditional
> operations are only available for masked loops.  I think we'll
> need to add -fno-trapping-math to any affected testcases,
> but I don't have a Power 10 system to test on.  Kewen, would you
> mind giving this a spin and seeing how bad the fallout is?
> 

Sorry for the late reply, I'm just back from vacation.

Thank you for fixing this and caring about Power10!

I tested your proposed patch on one Power10 machine (ppc64le),
it's bootstrapped but some test failures got exposed as below.

< FAIL: gcc.target/powerpc/p9-vec-length-epil-1.c scan-assembler-times \\\\mlxvl\\\\M 14
< FAIL: gcc.target/powerpc/p9-vec-length-epil-1.c scan-assembler-times \\\\mstxvl\\\\M 7
< FAIL: gcc.target/powerpc/p9-vec-length-epil-2.c scan-assembler-times \\\\mlxvl\\\\M 20
< FAIL: gcc.target/powerpc/p9-vec-length-epil-2.c scan-assembler-times \\\\mstxvl\\\\M 10
< FAIL: gcc.target/powerpc/p9-vec-length-epil-3.c scan-assembler-times \\\\mlxvl\\\\M 14
< FAIL: gcc.target/powerpc/p9-vec-length-epil-3.c scan-assembler-times \\\\mstxvl\\\\M 7
< FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times \\\\mlxvl\\\\M 70
< FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times \\\\mlxvx?\\\\M 120
< FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times \\\\mstxvl\\\\M 70
< FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times \\\\mstxvx?\\\\M 70
< FAIL: gcc.target/powerpc/p9-vec-length-epil-5.c scan-assembler-times \\\\mlxvl\\\\M 21
< FAIL: gcc.target/powerpc/p9-vec-length-epil-5.c scan-assembler-times \\\\mstxvl\\\\M 21
< FAIL: gcc.target/powerpc/p9-vec-length-epil-5.c scan-assembler-times \\\\mstxvx?\\\\M 21
< FAIL: gcc.target/powerpc/p9-vec-length-epil-6.c scan-assembler-times \\\\mlxvl\\\\M 10
< FAIL: gcc.target/powerpc/p9-vec-length-epil-6.c scan-assembler-times \\\\mlxvx?\\\\M 42
< FAIL: gcc.target/powerpc/p9-vec-length-epil-6.c scan-assembler-times \\\\mstxvl\\\\M 10
< FAIL: gcc.target/powerpc/p9-vec-length-epil-8.c scan-assembler-times \\\\mlxvl\\\\M 16
< FAIL: gcc.target/powerpc/p9-vec-length-epil-8.c scan-assembler-times \\\\mstxvl\\\\M 7
< FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-not \\\\mlxvx\\\\M
< FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-not \\\\mstxvx\\\\M
< FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-times \\\\mlxvl\\\\M 20
< FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-times \\\\mstxvl\\\\M 10
< FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-not \\\\mlxvx\\\\M
< FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-not \\\\mstxvx\\\\M
< FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-times \\\\mlxvl\\\\M 20
< FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-times \\\\mstxvl\\\\M 10
< FAIL: gcc.target/powerpc/p9-vec-length-full-3.c scan-assembler-times \\\\mlxvl\\\\M 14
< FAIL: gcc.target/powerpc/p9-vec-length-full-3.c scan-assembler-times \\\\mstxvl\\\\M 7
< FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-not \\\\mlxvx\\\\M
< FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-not \\\\mstxv\\\\M
< FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-not \\\\mstxvx\\\\M
< FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-times \\\\mlxvl\\\\M 70
< FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-times \\\\mstxvl\\\\M 70
< FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-not \\\\mlxvx\\\\M
< FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-not \\\\mstxv\\\\M
< FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-not \\\\mstxvx\\\\M
< FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-times \\\\mlxvl\\\\M 21
< FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-times \\\\mstxvl\\\\M 21
< FAIL: gcc.target/powerpc/p9-vec-length-full-6.c scan-assembler-times \\\\mlxvl\\\\M 10
< FAIL: gcc.target/powerpc/p9-vec-length-full-6.c scan-assembler-times \\\\mstxvl\\\\M 10
< FAIL: gcc.target/powerpc/p9-vec-length-full-6.c scan-assembler-times \\\\mstxvx?\\\\M 6
< FAIL: gcc.target/powerpc/p9-vec-length-full-8.c scan-assembler-times \\\\mlxvl\\\\M 30
< FAIL: gcc.target/powerpc/p9-vec-length-full-8.c scan-assembler-times \\\\mstxvl\\\\M 10

By checking several of them, it's due to that we don't vectorize
some loop having float type involved with partial vector any more.

As you suggested above, I fixed them with an extra option
"-fno-trapping-math" and verified all of them can pass again.
I also noticed that the original test case in PR96373 fails
on Power10 too, so I added one constructed case pr96373.c
into sub bucket gcc.target/powerpc for testing coverage
on Power.

One re-spin with the attached adjustment shows there is no
regression failure any more, and the new test case works well
on both ppc64 (P8) and ppc64le (P10) Linux.

BR,
Kewen
-----
From 5267731d8949c242b6188c3e9f7b0d561e6e092d Mon Sep 17 00:00:00 2001
From: Kewen Lin <linkw@linux.ibm.com>
Date: Mon, 13 Feb 2023 17:07:50 +0800
Subject: [PATCH] rs6000 test cases adjustment

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/p9-vec-length-epil-1.c: Add -fno-trapping-math.
	* gcc.target/powerpc/p9-vec-length-epil-2.c: Likewise.
	* gcc.target/powerpc/p9-vec-length-epil-3.c: Likewise.
	* gcc.target/powerpc/p9-vec-length-epil-4.c: Likewise.
	* gcc.target/powerpc/p9-vec-length-epil-5.c: Likewise.
	* gcc.target/powerpc/p9-vec-length-epil-6.c: Likewise.
	* gcc.target/powerpc/p9-vec-length-epil-8.c: Likewise.
	* gcc.target/powerpc/p9-vec-length-full-1.c: Likewise.
	* gcc.target/powerpc/p9-vec-length-full-2.c: Likewise.
	* gcc.target/powerpc/p9-vec-length-full-3.c: Likewise.
	* gcc.target/powerpc/p9-vec-length-full-4.c: Likewise.
	* gcc.target/powerpc/p9-vec-length-full-5.c: Likewise.
	* gcc.target/powerpc/p9-vec-length-full-6.c: Likewise.
	* gcc.target/powerpc/p9-vec-length-full-8.c: Likewise.
	* gcc.target/powerpc/pr96373.c: New test.
---
 .../gcc.target/powerpc/p9-vec-length-epil-1.c |  2 +-
 .../gcc.target/powerpc/p9-vec-length-epil-2.c |  2 +-
 .../gcc.target/powerpc/p9-vec-length-epil-3.c |  2 +-
 .../gcc.target/powerpc/p9-vec-length-epil-4.c |  2 +-
 .../gcc.target/powerpc/p9-vec-length-epil-5.c |  2 +-
 .../gcc.target/powerpc/p9-vec-length-epil-6.c |  2 +-
 .../gcc.target/powerpc/p9-vec-length-epil-8.c |  2 +-
 .../gcc.target/powerpc/p9-vec-length-full-1.c |  2 +-
 .../gcc.target/powerpc/p9-vec-length-full-2.c |  2 +-
 .../gcc.target/powerpc/p9-vec-length-full-3.c |  2 +-
 .../gcc.target/powerpc/p9-vec-length-full-4.c |  2 +-
 .../gcc.target/powerpc/p9-vec-length-full-5.c |  2 +-
 .../gcc.target/powerpc/p9-vec-length-full-6.c |  2 +-
 .../gcc.target/powerpc/p9-vec-length-full-8.c |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr96373.c    | 31 +++++++++++++++++++
 15 files changed, 45 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr96373.c

diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-1.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-1.c
index d248f091b52..dfcc0e95320 100644
--- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
-/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
 
 /* { dg-additional-options "--param=vect-partial-vector-usage=1" } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-2.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-2.c
index 9f78a447ec7..e63f1bf2372 100644
--- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
-/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
 
 /* { dg-additional-options "--param=vect-partial-vector-usage=1" } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-3.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-3.c
index a08797fcbca..4a99e3a3265 100644
--- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-3.c
+++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
-/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
 
 /* { dg-additional-options "--param=vect-partial-vector-usage=1" } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-4.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-4.c
index ad051fb1cef..9fbee6a4324 100644
--- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-4.c
+++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-4.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
-/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
 
 /* { dg-additional-options "--param=vect-partial-vector-usage=1" } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-5.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-5.c
index a24c30feeba..d023a998c3b 100644
--- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-5.c
+++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-5.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
-/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
 
 /* { dg-additional-options "--param=vect-partial-vector-usage=1" } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-6.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-6.c
index 4eaeb3b005f..dbce90757e3 100644
--- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-6.c
+++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-6.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
-/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
 
 /* { dg-additional-options "--param=vect-partial-vector-usage=1" } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c
index 8b9c9107814..e56fd55f623 100644
--- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c
+++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
-/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
 
 /* { dg-additional-options "--param=vect-partial-vector-usage=1" } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-1.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-1.c
index 3336752edbb..f01f1c54fa5 100644
--- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
-/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
 
 /* { dg-additional-options "--param=vect-partial-vector-usage=2" } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-2.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-2.c
index 98abf8b33b1..f546e97fa7d 100644
--- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
-/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
 
 /* { dg-additional-options "--param=vect-partial-vector-usage=2" } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-3.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-3.c
index 0881d1a960a..65142b3fecd 100644
--- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-3.c
+++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
-/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
 
 /* { dg-additional-options "--param=vect-partial-vector-usage=2" } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-4.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-4.c
index 8ce3dc19a60..a4cc7aafaeb 100644
--- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-4.c
+++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-4.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
-/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
 
 /* { dg-additional-options "--param=vect-partial-vector-usage=2" } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-5.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-5.c
index f9f58ba11f5..4b0b9070c84 100644
--- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-5.c
+++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-5.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
-/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
 
 /* { dg-additional-options "--param=vect-partial-vector-usage=2" } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-6.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-6.c
index 5d2357aabfa..65ddf2b098a 100644
--- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-6.c
+++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-6.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
-/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
 
 /* { dg-additional-options "--param=vect-partial-vector-usage=2" } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-8.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-8.c
index 1fc2af1e753..7fe0dd00431 100644
--- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-8.c
+++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-8.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
-/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
 
 /* { dg-additional-options "--param=vect-partial-vector-usage=2" } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/pr96373.c b/gcc/testsuite/gcc.target/powerpc/pr96373.c
new file mode 100644
index 00000000000..f0471b6f68b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr96373.c
@@ -0,0 +1,31 @@
+/* { dg-do run { target { powerpc*-*-linux* } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+/* Verify it can run successfully, especially on Power10 and later.   */
+
+#define _GNU_SOURCE
+#include <fenv.h>
+
+__attribute__ ((noipa)) void
+div (double *d, double *s, int n)
+{
+  for (; n; n--, d++, s++)
+    *d = *d / *s;
+}
+
+int main()
+{
+  double d[] = {1,2,3,4,5,6,7,8,9,10,11};
+  double s[] = {11,10,9,8,7,6,5,4,3,2,1};
+
+  feenableexcept(FE_DIVBYZERO|FE_INVALID);
+  div(d, s, 11);
+
+  int i;
+  for (i = 0; i < 11; i++)
+    __builtin_printf(" %f", d[i]);
+
+  __builtin_printf("\n");
+
+  return 0;
+}
Richard Sandiford Feb. 13, 2023, 1:57 p.m. UTC | #3
"Kewen.Lin" <linkw@linux.ibm.com> writes:
> Hi Richard,
>
> on 2023/1/27 19:08, Richard Sandiford via Gcc-patches wrote:
>> PR96373 points out that a predicated SVE loop currently converts
>> trapping unconditional ops into unpredicated vector ops.  Doing
>> the operation on inactive lanes can then raise an exception.
>> 
>> As discussed in the PR trail, we aren't 100% consistent about
>> whether we preserve traps or not.  But the direction of travel
>> is clearly to improve that rather than live with it.  This patch
>> tries to do that for the SVE case.
>> 
>> Doing this regresses gcc.target/aarch64/sve/fabd_1.c.  I've added
>> -fno-trapping-math for now and filed PR108571 to track it.
>> A similar problem applies to fsubr_1.d.
>> 
>> I think this is likely to regress Power 10, since conditional
>> operations are only available for masked loops.  I think we'll
>> need to add -fno-trapping-math to any affected testcases,
>> but I don't have a Power 10 system to test on.  Kewen, would you
>> mind giving this a spin and seeing how bad the fallout is?
>> 
>
> Sorry for the late reply, I'm just back from vacation.
>
> Thank you for fixing this and caring about Power10!
>
> I tested your proposed patch on one Power10 machine (ppc64le),
> it's bootstrapped but some test failures got exposed as below.
>
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-1.c scan-assembler-times \\\\mlxvl\\\\M 14
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-1.c scan-assembler-times \\\\mstxvl\\\\M 7
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-2.c scan-assembler-times \\\\mlxvl\\\\M 20
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-2.c scan-assembler-times \\\\mstxvl\\\\M 10
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-3.c scan-assembler-times \\\\mlxvl\\\\M 14
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-3.c scan-assembler-times \\\\mstxvl\\\\M 7
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times \\\\mlxvl\\\\M 70
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times \\\\mlxvx?\\\\M 120
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times \\\\mstxvl\\\\M 70
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times \\\\mstxvx?\\\\M 70
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-5.c scan-assembler-times \\\\mlxvl\\\\M 21
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-5.c scan-assembler-times \\\\mstxvl\\\\M 21
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-5.c scan-assembler-times \\\\mstxvx?\\\\M 21
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-6.c scan-assembler-times \\\\mlxvl\\\\M 10
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-6.c scan-assembler-times \\\\mlxvx?\\\\M 42
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-6.c scan-assembler-times \\\\mstxvl\\\\M 10
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-8.c scan-assembler-times \\\\mlxvl\\\\M 16
> < FAIL: gcc.target/powerpc/p9-vec-length-epil-8.c scan-assembler-times \\\\mstxvl\\\\M 7
> < FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-not \\\\mlxvx\\\\M
> < FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-not \\\\mstxvx\\\\M
> < FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-times \\\\mlxvl\\\\M 20
> < FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-times \\\\mstxvl\\\\M 10
> < FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-not \\\\mlxvx\\\\M
> < FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-not \\\\mstxvx\\\\M
> < FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-times \\\\mlxvl\\\\M 20
> < FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-times \\\\mstxvl\\\\M 10
> < FAIL: gcc.target/powerpc/p9-vec-length-full-3.c scan-assembler-times \\\\mlxvl\\\\M 14
> < FAIL: gcc.target/powerpc/p9-vec-length-full-3.c scan-assembler-times \\\\mstxvl\\\\M 7
> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-not \\\\mlxvx\\\\M
> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-not \\\\mstxv\\\\M
> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-not \\\\mstxvx\\\\M
> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-times \\\\mlxvl\\\\M 70
> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-times \\\\mstxvl\\\\M 70
> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-not \\\\mlxvx\\\\M
> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-not \\\\mstxv\\\\M
> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-not \\\\mstxvx\\\\M
> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-times \\\\mlxvl\\\\M 21
> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-times \\\\mstxvl\\\\M 21
> < FAIL: gcc.target/powerpc/p9-vec-length-full-6.c scan-assembler-times \\\\mlxvl\\\\M 10
> < FAIL: gcc.target/powerpc/p9-vec-length-full-6.c scan-assembler-times \\\\mstxvl\\\\M 10
> < FAIL: gcc.target/powerpc/p9-vec-length-full-6.c scan-assembler-times \\\\mstxvx?\\\\M 6
> < FAIL: gcc.target/powerpc/p9-vec-length-full-8.c scan-assembler-times \\\\mlxvl\\\\M 30
> < FAIL: gcc.target/powerpc/p9-vec-length-full-8.c scan-assembler-times \\\\mstxvl\\\\M 10
>
> By checking several of them, it's due to that we don't vectorize
> some loop having float type involved with partial vector any more.
>
> As you suggested above, I fixed them with an extra option
> "-fno-trapping-math" and verified all of them can pass again.
> I also noticed that the original test case in PR96373 fails
> on Power10 too, so I added one constructed case pr96373.c
> into sub bucket gcc.target/powerpc for testing coverage
> on Power.
>
> One re-spin with the attached adjustment shows there is no
> regression failure any more, and the new test case works well
> on both ppc64 (P8) and ppc64le (P10) Linux.

Thanks for doing this.  The patch is OK, if you need approval.
I'll push mine once it's in.

Richard

>
> BR,
> Kewen
> -----
>
> From 5267731d8949c242b6188c3e9f7b0d561e6e092d Mon Sep 17 00:00:00 2001
> From: Kewen Lin <linkw@linux.ibm.com>
> Date: Mon, 13 Feb 2023 17:07:50 +0800
> Subject: [PATCH] rs6000 test cases adjustment
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/powerpc/p9-vec-length-epil-1.c: Add -fno-trapping-math.
> 	* gcc.target/powerpc/p9-vec-length-epil-2.c: Likewise.
> 	* gcc.target/powerpc/p9-vec-length-epil-3.c: Likewise.
> 	* gcc.target/powerpc/p9-vec-length-epil-4.c: Likewise.
> 	* gcc.target/powerpc/p9-vec-length-epil-5.c: Likewise.
> 	* gcc.target/powerpc/p9-vec-length-epil-6.c: Likewise.
> 	* gcc.target/powerpc/p9-vec-length-epil-8.c: Likewise.
> 	* gcc.target/powerpc/p9-vec-length-full-1.c: Likewise.
> 	* gcc.target/powerpc/p9-vec-length-full-2.c: Likewise.
> 	* gcc.target/powerpc/p9-vec-length-full-3.c: Likewise.
> 	* gcc.target/powerpc/p9-vec-length-full-4.c: Likewise.
> 	* gcc.target/powerpc/p9-vec-length-full-5.c: Likewise.
> 	* gcc.target/powerpc/p9-vec-length-full-6.c: Likewise.
> 	* gcc.target/powerpc/p9-vec-length-full-8.c: Likewise.
> 	* gcc.target/powerpc/pr96373.c: New test.
> ---
>  .../gcc.target/powerpc/p9-vec-length-epil-1.c |  2 +-
>  .../gcc.target/powerpc/p9-vec-length-epil-2.c |  2 +-
>  .../gcc.target/powerpc/p9-vec-length-epil-3.c |  2 +-
>  .../gcc.target/powerpc/p9-vec-length-epil-4.c |  2 +-
>  .../gcc.target/powerpc/p9-vec-length-epil-5.c |  2 +-
>  .../gcc.target/powerpc/p9-vec-length-epil-6.c |  2 +-
>  .../gcc.target/powerpc/p9-vec-length-epil-8.c |  2 +-
>  .../gcc.target/powerpc/p9-vec-length-full-1.c |  2 +-
>  .../gcc.target/powerpc/p9-vec-length-full-2.c |  2 +-
>  .../gcc.target/powerpc/p9-vec-length-full-3.c |  2 +-
>  .../gcc.target/powerpc/p9-vec-length-full-4.c |  2 +-
>  .../gcc.target/powerpc/p9-vec-length-full-5.c |  2 +-
>  .../gcc.target/powerpc/p9-vec-length-full-6.c |  2 +-
>  .../gcc.target/powerpc/p9-vec-length-full-8.c |  2 +-
>  gcc/testsuite/gcc.target/powerpc/pr96373.c    | 31 +++++++++++++++++++
>  15 files changed, 45 insertions(+), 14 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr96373.c
>
> diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-1.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-1.c
> index d248f091b52..dfcc0e95320 100644
> --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-1.c
> +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-1.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
> -/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
> +/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
>  
>  /* { dg-additional-options "--param=vect-partial-vector-usage=1" } */
>  
> diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-2.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-2.c
> index 9f78a447ec7..e63f1bf2372 100644
> --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-2.c
> +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-2.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
> -/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
> +/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
>  
>  /* { dg-additional-options "--param=vect-partial-vector-usage=1" } */
>  
> diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-3.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-3.c
> index a08797fcbca..4a99e3a3265 100644
> --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-3.c
> +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-3.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
> -/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
> +/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
>  
>  /* { dg-additional-options "--param=vect-partial-vector-usage=1" } */
>  
> diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-4.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-4.c
> index ad051fb1cef..9fbee6a4324 100644
> --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-4.c
> +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-4.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
> -/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
> +/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
>  
>  /* { dg-additional-options "--param=vect-partial-vector-usage=1" } */
>  
> diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-5.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-5.c
> index a24c30feeba..d023a998c3b 100644
> --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-5.c
> +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-5.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
> -/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
> +/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
>  
>  /* { dg-additional-options "--param=vect-partial-vector-usage=1" } */
>  
> diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-6.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-6.c
> index 4eaeb3b005f..dbce90757e3 100644
> --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-6.c
> +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-6.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
> -/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
> +/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
>  
>  /* { dg-additional-options "--param=vect-partial-vector-usage=1" } */
>  
> diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c
> index 8b9c9107814..e56fd55f623 100644
> --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c
> +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
> -/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
> +/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
>  
>  /* { dg-additional-options "--param=vect-partial-vector-usage=1" } */
>  
> diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-1.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-1.c
> index 3336752edbb..f01f1c54fa5 100644
> --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-1.c
> +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-1.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
> -/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
> +/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
>  
>  /* { dg-additional-options "--param=vect-partial-vector-usage=2" } */
>  
> diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-2.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-2.c
> index 98abf8b33b1..f546e97fa7d 100644
> --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-2.c
> +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-2.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
> -/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
> +/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
>  
>  /* { dg-additional-options "--param=vect-partial-vector-usage=2" } */
>  
> diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-3.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-3.c
> index 0881d1a960a..65142b3fecd 100644
> --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-3.c
> +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-3.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
> -/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
> +/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
>  
>  /* { dg-additional-options "--param=vect-partial-vector-usage=2" } */
>  
> diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-4.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-4.c
> index 8ce3dc19a60..a4cc7aafaeb 100644
> --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-4.c
> +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-4.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
> -/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
> +/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
>  
>  /* { dg-additional-options "--param=vect-partial-vector-usage=2" } */
>  
> diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-5.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-5.c
> index f9f58ba11f5..4b0b9070c84 100644
> --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-5.c
> +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-5.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
> -/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
> +/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
>  
>  /* { dg-additional-options "--param=vect-partial-vector-usage=2" } */
>  
> diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-6.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-6.c
> index 5d2357aabfa..65ddf2b098a 100644
> --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-6.c
> +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-6.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
> -/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
> +/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
>  
>  /* { dg-additional-options "--param=vect-partial-vector-usage=2" } */
>  
> diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-8.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-8.c
> index 1fc2af1e753..7fe0dd00431 100644
> --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-8.c
> +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-8.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { lp64 && powerpc_p9vector_ok } } } */
> -/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
> +/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
>  
>  /* { dg-additional-options "--param=vect-partial-vector-usage=2" } */
>  
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr96373.c b/gcc/testsuite/gcc.target/powerpc/pr96373.c
> new file mode 100644
> index 00000000000..f0471b6f68b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr96373.c
> @@ -0,0 +1,31 @@
> +/* { dg-do run { target { powerpc*-*-linux* } } } */
> +/* { dg-options "-O2 -ftree-vectorize" } */
> +
> +/* Verify it can run successfully, especially on Power10 and later.   */
> +
> +#define _GNU_SOURCE
> +#include <fenv.h>
> +
> +__attribute__ ((noipa)) void
> +div (double *d, double *s, int n)
> +{
> +  for (; n; n--, d++, s++)
> +    *d = *d / *s;
> +}
> +
> +int main()
> +{
> +  double d[] = {1,2,3,4,5,6,7,8,9,10,11};
> +  double s[] = {11,10,9,8,7,6,5,4,3,2,1};
> +
> +  feenableexcept(FE_DIVBYZERO|FE_INVALID);
> +  div(d, s, 11);
> +
> +  int i;
> +  for (i = 0; i < 11; i++)
> +    __builtin_printf(" %f", d[i]);
> +
> +  __builtin_printf("\n");
> +
> +  return 0;
> +}
Kewen.Lin Feb. 14, 2023, 2:17 a.m. UTC | #4
on 2023/2/13 21:57, Richard Sandiford wrote:
> "Kewen.Lin" <linkw@linux.ibm.com> writes:
>> Hi Richard,
>>
>> on 2023/1/27 19:08, Richard Sandiford via Gcc-patches wrote:
>>> PR96373 points out that a predicated SVE loop currently converts
>>> trapping unconditional ops into unpredicated vector ops.  Doing
>>> the operation on inactive lanes can then raise an exception.
>>>
>>> As discussed in the PR trail, we aren't 100% consistent about
>>> whether we preserve traps or not.  But the direction of travel
>>> is clearly to improve that rather than live with it.  This patch
>>> tries to do that for the SVE case.
>>>
>>> Doing this regresses gcc.target/aarch64/sve/fabd_1.c.  I've added
>>> -fno-trapping-math for now and filed PR108571 to track it.
>>> A similar problem applies to fsubr_1.d.
>>>
>>> I think this is likely to regress Power 10, since conditional
>>> operations are only available for masked loops.  I think we'll
>>> need to add -fno-trapping-math to any affected testcases,
>>> but I don't have a Power 10 system to test on.  Kewen, would you
>>> mind giving this a spin and seeing how bad the fallout is?
>>>
>>
>> Sorry for the late reply, I'm just back from vacation.
>>
>> Thank you for fixing this and caring about Power10!
>>
>> I tested your proposed patch on one Power10 machine (ppc64le),
>> it's bootstrapped but some test failures got exposed as below.
>>
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-1.c scan-assembler-times \\\\mlxvl\\\\M 14
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-1.c scan-assembler-times \\\\mstxvl\\\\M 7
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-2.c scan-assembler-times \\\\mlxvl\\\\M 20
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-2.c scan-assembler-times \\\\mstxvl\\\\M 10
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-3.c scan-assembler-times \\\\mlxvl\\\\M 14
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-3.c scan-assembler-times \\\\mstxvl\\\\M 7
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times \\\\mlxvl\\\\M 70
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times \\\\mlxvx?\\\\M 120
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times \\\\mstxvl\\\\M 70
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times \\\\mstxvx?\\\\M 70
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-5.c scan-assembler-times \\\\mlxvl\\\\M 21
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-5.c scan-assembler-times \\\\mstxvl\\\\M 21
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-5.c scan-assembler-times \\\\mstxvx?\\\\M 21
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-6.c scan-assembler-times \\\\mlxvl\\\\M 10
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-6.c scan-assembler-times \\\\mlxvx?\\\\M 42
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-6.c scan-assembler-times \\\\mstxvl\\\\M 10
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-8.c scan-assembler-times \\\\mlxvl\\\\M 16
>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-8.c scan-assembler-times \\\\mstxvl\\\\M 7
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-not \\\\mlxvx\\\\M
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-not \\\\mstxvx\\\\M
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-times \\\\mlxvl\\\\M 20
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-times \\\\mstxvl\\\\M 10
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-not \\\\mlxvx\\\\M
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-not \\\\mstxvx\\\\M
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-times \\\\mlxvl\\\\M 20
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-times \\\\mstxvl\\\\M 10
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-3.c scan-assembler-times \\\\mlxvl\\\\M 14
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-3.c scan-assembler-times \\\\mstxvl\\\\M 7
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-not \\\\mlxvx\\\\M
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-not \\\\mstxv\\\\M
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-not \\\\mstxvx\\\\M
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-times \\\\mlxvl\\\\M 70
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-times \\\\mstxvl\\\\M 70
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-not \\\\mlxvx\\\\M
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-not \\\\mstxv\\\\M
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-not \\\\mstxvx\\\\M
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-times \\\\mlxvl\\\\M 21
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-times \\\\mstxvl\\\\M 21
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-6.c scan-assembler-times \\\\mlxvl\\\\M 10
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-6.c scan-assembler-times \\\\mstxvl\\\\M 10
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-6.c scan-assembler-times \\\\mstxvx?\\\\M 6
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-8.c scan-assembler-times \\\\mlxvl\\\\M 30
>> < FAIL: gcc.target/powerpc/p9-vec-length-full-8.c scan-assembler-times \\\\mstxvl\\\\M 10
>>
>> By checking several of them, it's due to that we don't vectorize
>> some loop having float type involved with partial vector any more.
>>
>> As you suggested above, I fixed them with an extra option
>> "-fno-trapping-math" and verified all of them can pass again.
>> I also noticed that the original test case in PR96373 fails
>> on Power10 too, so I added one constructed case pr96373.c
>> into sub bucket gcc.target/powerpc for testing coverage
>> on Power.
>>
>> One re-spin with the attached adjustment shows there is no
>> regression failure any more, and the new test case works well
>> on both ppc64 (P8) and ppc64le (P10) Linux.
> 
> Thanks for doing this.  The patch is OK, if you need approval.
> I'll push mine once it's in.

Thanks for the review!  Pushed in r13-5978-g4f5a1198065dc0.

btw, do we want this to be backported?  If yes, I'm going to
backport it to gcc-12 and gcc-11 branches soon (for gcc-10 we
don't have partial vector support on Power btw).

BR,
Kewen
Richard Sandiford Feb. 14, 2023, 9:20 a.m. UTC | #5
"Kewen.Lin" <linkw@linux.ibm.com> writes:
> on 2023/2/13 21:57, Richard Sandiford wrote:
>> "Kewen.Lin" <linkw@linux.ibm.com> writes:
>>> Hi Richard,
>>>
>>> on 2023/1/27 19:08, Richard Sandiford via Gcc-patches wrote:
>>>> PR96373 points out that a predicated SVE loop currently converts
>>>> trapping unconditional ops into unpredicated vector ops.  Doing
>>>> the operation on inactive lanes can then raise an exception.
>>>>
>>>> As discussed in the PR trail, we aren't 100% consistent about
>>>> whether we preserve traps or not.  But the direction of travel
>>>> is clearly to improve that rather than live with it.  This patch
>>>> tries to do that for the SVE case.
>>>>
>>>> Doing this regresses gcc.target/aarch64/sve/fabd_1.c.  I've added
>>>> -fno-trapping-math for now and filed PR108571 to track it.
>>>> A similar problem applies to fsubr_1.d.
>>>>
>>>> I think this is likely to regress Power 10, since conditional
>>>> operations are only available for masked loops.  I think we'll
>>>> need to add -fno-trapping-math to any affected testcases,
>>>> but I don't have a Power 10 system to test on.  Kewen, would you
>>>> mind giving this a spin and seeing how bad the fallout is?
>>>>
>>>
>>> Sorry for the late reply, I'm just back from vacation.
>>>
>>> Thank you for fixing this and caring about Power10!
>>>
>>> I tested your proposed patch on one Power10 machine (ppc64le),
>>> it's bootstrapped but some test failures got exposed as below.
>>>
>>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-1.c scan-assembler-times \\\\mlxvl\\\\M 14
>>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-1.c scan-assembler-times \\\\mstxvl\\\\M 7
>>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-2.c scan-assembler-times \\\\mlxvl\\\\M 20
>>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-2.c scan-assembler-times \\\\mstxvl\\\\M 10
>>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-3.c scan-assembler-times \\\\mlxvl\\\\M 14
>>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-3.c scan-assembler-times \\\\mstxvl\\\\M 7
>>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times \\\\mlxvl\\\\M 70
>>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times \\\\mlxvx?\\\\M 120
>>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times \\\\mstxvl\\\\M 70
>>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-4.c scan-assembler-times \\\\mstxvx?\\\\M 70
>>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-5.c scan-assembler-times \\\\mlxvl\\\\M 21
>>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-5.c scan-assembler-times \\\\mstxvl\\\\M 21
>>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-5.c scan-assembler-times \\\\mstxvx?\\\\M 21
>>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-6.c scan-assembler-times \\\\mlxvl\\\\M 10
>>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-6.c scan-assembler-times \\\\mlxvx?\\\\M 42
>>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-6.c scan-assembler-times \\\\mstxvl\\\\M 10
>>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-8.c scan-assembler-times \\\\mlxvl\\\\M 16
>>> < FAIL: gcc.target/powerpc/p9-vec-length-epil-8.c scan-assembler-times \\\\mstxvl\\\\M 7
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-not \\\\mlxvx\\\\M
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-not \\\\mstxvx\\\\M
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-times \\\\mlxvl\\\\M 20
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-1.c scan-assembler-times \\\\mstxvl\\\\M 10
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-not \\\\mlxvx\\\\M
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-not \\\\mstxvx\\\\M
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-times \\\\mlxvl\\\\M 20
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-2.c scan-assembler-times \\\\mstxvl\\\\M 10
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-3.c scan-assembler-times \\\\mlxvl\\\\M 14
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-3.c scan-assembler-times \\\\mstxvl\\\\M 7
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-not \\\\mlxvx\\\\M
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-not \\\\mstxv\\\\M
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-not \\\\mstxvx\\\\M
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-times \\\\mlxvl\\\\M 70
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-4.c scan-assembler-times \\\\mstxvl\\\\M 70
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-not \\\\mlxvx\\\\M
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-not \\\\mstxv\\\\M
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-not \\\\mstxvx\\\\M
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-times \\\\mlxvl\\\\M 21
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-5.c scan-assembler-times \\\\mstxvl\\\\M 21
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-6.c scan-assembler-times \\\\mlxvl\\\\M 10
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-6.c scan-assembler-times \\\\mstxvl\\\\M 10
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-6.c scan-assembler-times \\\\mstxvx?\\\\M 6
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-8.c scan-assembler-times \\\\mlxvl\\\\M 30
>>> < FAIL: gcc.target/powerpc/p9-vec-length-full-8.c scan-assembler-times \\\\mstxvl\\\\M 10
>>>
>>> By checking several of them, it's due to that we don't vectorize
>>> some loop having float type involved with partial vector any more.
>>>
>>> As you suggested above, I fixed them with an extra option
>>> "-fno-trapping-math" and verified all of them can pass again.
>>> I also noticed that the original test case in PR96373 fails
>>> on Power10 too, so I added one constructed case pr96373.c
>>> into sub bucket gcc.target/powerpc for testing coverage
>>> on Power.
>>>
>>> One re-spin with the attached adjustment shows there is no
>>> regression failure any more, and the new test case works well
>>> on both ppc64 (P8) and ppc64le (P10) Linux.
>> 
>> Thanks for doing this.  The patch is OK, if you need approval.
>> I'll push mine once it's in.
>
> Thanks for the review!  Pushed in r13-5978-g4f5a1198065dc0.

Thanks, I've now pushed the vectoriser patch.

> btw, do we want this to be backported?  If yes, I'm going to
> backport it to gcc-12 and gcc-11 branches soon (for gcc-10 we
> don't have partial vector support on Power btw).

Yeah, for SVE it'll need to go on all active branches.  I'm going
to be off until 27th Feb so I'll start backporting after that.

Richard
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fabd_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fabd_1.c
index 13ad83be24c..30bde6f0df7 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/fabd_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fabd_1.c
@@ -1,5 +1,5 @@ 
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O3 --save-temps" } */
+/* { dg-options "-O3 --save-temps -fno-trapping-math" } */
 
 #define N 16
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fmul_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fmul_1.c
index 4a3e7c06745..0245a8c1422 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/fmul_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fmul_1.c
@@ -27,20 +27,20 @@  DO_ARITH_OPS (_Float16, *, mul)
 DO_ARITH_OPS (float, *, mul)
 DO_ARITH_OPS (double, *, mul)
 
-/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
 /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 1 } } */
-/* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2} } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2.0\n} 1 } } */
 /* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #5} } } */
 /* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #-} } } */
 
-/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
 /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */
-/* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2} } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2.0\n} 1 } } */
 /* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #5} } } */
 /* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #-} } } */
 
-/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
 /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0.5\n} 1 } } */
-/* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2} } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2.0\n} 1 } } */
 /* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #5} } } */
 /* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #-} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fp_arith_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fp_arith_1.c
index 5aed0dcb490..419d6e1b5ec 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/fp_arith_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fp_arith_1.c
@@ -34,37 +34,37 @@  DO_ARITH_OPS (double, -, minus)
 
 /* No specific count because it's valid to use fadd or fsub for the
    out-of-range constants.  */
-/* { dg-final { scan-assembler {\tfadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} } } */
+/* { dg-final { scan-assembler {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} } } */
 /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */
 /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */
 /* { dg-final { scan-assembler-not   {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2} } } */
 /* { dg-final { scan-assembler-not   {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #-} } } */
 
-/* { dg-final { scan-assembler {\tfsub\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} } } */
+/* { dg-final { scan-assembler {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} } } */
 /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */
 /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */
 /* { dg-final { scan-assembler-not   {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2} } } */
 /* { dg-final { scan-assembler-not   {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #-} } } */
 
-/* { dg-final { scan-assembler {\tfadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} } } */
+/* { dg-final { scan-assembler {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} } } */
 /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */
 /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */
 /* { dg-final { scan-assembler-not   {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2} } } */
 /* { dg-final { scan-assembler-not   {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #-} } } */
 
-/* { dg-final { scan-assembler {\tfsub\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} } } */
+/* { dg-final { scan-assembler {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} } } */
 /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */
 /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */
 /* { dg-final { scan-assembler-not   {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2} } } */
 /* { dg-final { scan-assembler-not   {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #-} } } */
 
-/* { dg-final { scan-assembler {\tfadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} } } */
+/* { dg-final { scan-assembler {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} } } */
 /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1.0\n} 2 } } */
 /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0.5\n} 2 } } */
 /* { dg-final { scan-assembler-not   {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2} } } */
 /* { dg-final { scan-assembler-not   {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #-} } } */
 
-/* { dg-final { scan-assembler {\tfsub\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} } } */
+/* { dg-final { scan-assembler {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} } } */
 /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1.0\n} 2 } } */
 /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0.5\n} 2 } } */
 /* { dg-final { scan-assembler-not   {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fsubr_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fsubr_1.c
index f47a360dee9..012cf6e9e5d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/fsubr_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fsubr_1.c
@@ -1,5 +1,5 @@ 
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O3 --save-temps" } */
+/* { dg-options "-O3 --save-temps -fno-trapping-math" } */
 
 #define DO_IMMEDIATE_OPS(VALUE, TYPE, NAME)			\
 void vsubrarithimm_##NAME##_##TYPE (TYPE *dst, int count)	\
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index eb4ca1f184e..56e3c30658e 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -6301,6 +6301,7 @@  vectorizable_operation (vec_info *vinfo,
   int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
   internal_fn cond_fn = get_conditional_internal_fn (code);
+  bool could_trap = gimple_could_trap_p (stmt);
 
   if (!vec_stmt) /* transformation not required.  */
     {
@@ -6309,7 +6310,7 @@  vectorizable_operation (vec_info *vinfo,
 	 keeping the inactive lanes as-is.  */
       if (loop_vinfo
 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
-	  && reduc_idx >= 0)
+	  && (could_trap || reduc_idx >= 0))
 	{
 	  if (cond_fn == IFN_LAST
 	      || !direct_internal_fn_supported_p (cond_fn, vectype,
@@ -6452,16 +6453,31 @@  vectorizable_operation (vec_info *vinfo,
       vop1 = ((op_type == binary_op || op_type == ternary_op)
 	      ? vec_oprnds1[i] : NULL_TREE);
       vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
-      if (masked_loop_p && reduc_idx >= 0)
+      if (masked_loop_p && (reduc_idx >= 0 || could_trap))
 	{
-	  /* Perform the operation on active elements only and take
-	     inactive elements from the reduction chain input.  */
-	  gcc_assert (!vop2);
-	  vop2 = reduc_idx == 1 ? vop1 : vop0;
 	  tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
 					  vectype, i);
-	  gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
-						    vop0, vop1, vop2);
+	  auto_vec<tree> vops (5);
+	  vops.quick_push (mask);
+	  vops.quick_push (vop0);
+	  if (vop1)
+	    vops.quick_push (vop1);
+	  if (vop2)
+	    vops.quick_push (vop2);
+	  if (reduc_idx >= 0)
+	    {
+	      /* Perform the operation on active elements only and take
+		 inactive elements from the reduction chain input.  */
+	      gcc_assert (!vop2);
+	      vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
+	    }
+	  else
+	    {
+	      auto else_value = targetm.preferred_else_value
+		(cond_fn, vectype, vops.length () - 1, &vops[1]);
+	      vops.quick_push (else_value);
+	    }
+	  gcall *call = gimple_build_call_internal_vec (cond_fn, vops);
 	  new_temp = make_ssa_name (vec_dest, call);
 	  gimple_call_set_lhs (call, new_temp);
 	  gimple_call_set_nothrow (call, true);