diff mbox series

S/390: Fix conditional returns

Message ID 20180905083448.1689-1-iii@linux.ibm.com
State New
Headers show
Series S/390: Fix conditional returns | expand

Commit Message

Ilya Leoshkevich Sept. 5, 2018, 8:34 a.m. UTC
S/390 epilogue ends with (parallel [(return) (use %r14)]) instead of
the more usual (return) or (simple_return).  This sequence is not
recognized by the conditional return logic in try_optimize_cfg ().

gcc/ChangeLog:

2018-08-28  Ilya Leoshkevich  <iii@linux.ibm.com>

	PR target/80080
	* cfgcleanup.c (bb_is_just_return): Accept PARALLELs containing
	RETURNs.
	* cfgrtl.c (rtl_verify_bb_layout): Handle PARALLELs containing
	conditional jumps.
	* config/s390/s390.md: Recognize PARALLELs containing RETURNs.
	* jump.c (copy_update_parallel): Create a copy of a PARALLEL
	in which one of side effects is replaced.
	(redirect_exp_1): Handle jump targets that are PARALLELs
	containing RETURNs.
	(redirect_jump_2): Likewise.
	(return_in_parallel): Recognize PARALLELs containing RETURNs.
	* rtl.h (return_in_parallel): Add declaration.

gcc/testsuite/ChangeLog:

2018-08-28  Ilya Leoshkevich  <iii@linux.ibm.com>

	PR target/80080
	* gcc.target/s390/risbg-ll-3.c: Expect conditional returns.
	* gcc.target/s390/zvector/vec-cmp-2.c: Likewise.
---
 gcc/cfgcleanup.c                              |  2 +-
 gcc/cfgrtl.c                                  |  3 +-
 gcc/config/s390/s390.md                       | 13 +++-
 gcc/jump.c                                    | 69 ++++++++++++++++++-
 gcc/rtl.h                                     |  1 +
 gcc/testsuite/gcc.target/s390/risbg-ll-3.c    |  4 +-
 .../gcc.target/s390/zvector/vec-cmp-2.c       | 48 ++++++-------
 7 files changed, 108 insertions(+), 32 deletions(-)

Comments

Andreas Krebbel Sept. 6, 2018, 7:42 a.m. UTC | #1
Hi Ilya,

the S/390 parts look good to me. For the rest we will need approval from a middle-end maintainer.

Andreas

On 05.09.2018 10:34, Ilya Leoshkevich wrote:
> S/390 epilogue ends with (parallel [(return) (use %r14)]) instead of
> the more usual (return) or (simple_return).  This sequence is not
> recognized by the conditional return logic in try_optimize_cfg ().
> 
> gcc/ChangeLog:
> 
> 2018-08-28  Ilya Leoshkevich  <iii@linux.ibm.com>
> 
> 	PR target/80080
> 	* cfgcleanup.c (bb_is_just_return): Accept PARALLELs containing
> 	RETURNs.
> 	* cfgrtl.c (rtl_verify_bb_layout): Handle PARALLELs containing
> 	conditional jumps.
> 	* config/s390/s390.md: Recognize PARALLELs containing RETURNs.
> 	* jump.c (copy_update_parallel): Create a copy of a PARALLEL
> 	in which one of side effects is replaced.
> 	(redirect_exp_1): Handle jump targets that are PARALLELs
> 	containing RETURNs.
> 	(redirect_jump_2): Likewise.
> 	(return_in_parallel): Recognize PARALLELs containing RETURNs.
> 	* rtl.h (return_in_parallel): Add declaration.
> 
> gcc/testsuite/ChangeLog:
> 
> 2018-08-28  Ilya Leoshkevich  <iii@linux.ibm.com>
> 
> 	PR target/80080
> 	* gcc.target/s390/risbg-ll-3.c: Expect conditional returns.
> 	* gcc.target/s390/zvector/vec-cmp-2.c: Likewise.
> ---
>  gcc/cfgcleanup.c                              |  2 +-
>  gcc/cfgrtl.c                                  |  3 +-
>  gcc/config/s390/s390.md                       | 13 +++-
>  gcc/jump.c                                    | 69 ++++++++++++++++++-
>  gcc/rtl.h                                     |  1 +
>  gcc/testsuite/gcc.target/s390/risbg-ll-3.c    |  4 +-
>  .../gcc.target/s390/zvector/vec-cmp-2.c       | 48 ++++++-------
>  7 files changed, 108 insertions(+), 32 deletions(-)
> 
> diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c
> index 4a5dc29d14f..7f2545f453f 100644
> --- a/gcc/cfgcleanup.c
> +++ b/gcc/cfgcleanup.c
> @@ -2624,7 +2624,7 @@ bb_is_just_return (basic_block bb, rtx_insn **ret, rtx_insn **use)
>        {
>  	rtx pat = PATTERN (insn);
>  
> -	if (!*ret && ANY_RETURN_P (pat))
> +	if (!*ret && (ANY_RETURN_P (pat) || return_in_parallel (pat)))
>  	  *ret = insn;
>  	else if (!*ret && !*use && GET_CODE (pat) == USE
>  	    && REG_P (XEXP (pat, 0))
> diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
> index 3b1931daeba..701c6a985b8 100644
> --- a/gcc/cfgrtl.c
> +++ b/gcc/cfgrtl.c
> @@ -2987,7 +2987,8 @@ rtl_verify_bb_layout (void)
>  	}
>  
>        if (JUMP_P (x)
> -	  && returnjump_p (x) && ! condjump_p (x)
> +	  && returnjump_p (x)
> +	  && ! (condjump_p (x) || condjump_in_parallel_p (x))
>  	  && ! ((y = next_nonnote_nondebug_insn (x))
>  		&& BARRIER_P (y)))
>  	    fatal_insn ("return not followed by barrier", x);
> diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> index db260e41bfd..3c413638038 100644
> --- a/gcc/config/s390/s390.md
> +++ b/gcc/config/s390/s390.md
> @@ -8842,8 +8842,19 @@
>     (set_attr "type"  "branch")
>     (set_attr "atype" "agen")])
>  
> +(define_subst "add_use_return_reg_subst"
> +  [(set (match_operand 0 "" "")
> +	(match_operand 1 "" ""))]
> +  ""
> +  [(set (match_dup 0)
> +	(match_dup 1))
> +   (use (reg RETURN_REGNUM))])
> +
> +(define_subst_attr "add_use_return_reg_name" "add_use_return_reg_subst"
> +		   "" "_use_return_reg")
> +
>  ;; A conditional return instruction.
> -(define_insn "*c<code>"
> +(define_insn "*c<code><add_use_return_reg_name>"
>    [(set (pc)
>          (if_then_else
>            (match_operator 0 "s390_comparison" [(reg CC_REGNUM) (const_int 0)])
> diff --git a/gcc/jump.c b/gcc/jump.c
> index 06f7255d24d..8057ace74d2 100644
> --- a/gcc/jump.c
> +++ b/gcc/jump.c
> @@ -1423,6 +1423,26 @@ redirect_target (rtx x)
>    return x;
>  }
>  
> +/* Create a copy of PARALLEL with side-effect OSIDE replaced by NSIDE.  */
> +static rtx
> +copy_update_parallel (rtx par, rtx *oside, rtx nside)
> +{
> +  rtx npar;
> +  int i;
> +
> +  npar = gen_rtx_PARALLEL (GET_MODE (par), rtvec_alloc (XVECLEN (par, 0)));
> +  for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
> +    {
> +      rtx *side_effect = &XVECEXP (par, 0, i);
> +
> +      if (side_effect == oside)
> +	XVECEXP (npar, 0, i) = nside;
> +      else
> +	XVECEXP (npar, 0, i) = copy_rtx (*side_effect);
> +    }
> +  return npar;
> +}
> +
>  /* Throughout LOC, redirect OLABEL to NLABEL.  Treat null OLABEL or
>     NLABEL as a return.  Accrue modifications into the change group.  */
>  
> @@ -1437,9 +1457,22 @@ redirect_exp_1 (rtx *loc, rtx olabel, rtx nlabel, rtx_insn *insn)
>    if ((code == LABEL_REF && label_ref_label (x) == olabel)
>        || x == olabel)
>      {
> -      x = redirect_target (nlabel);
> -      if (GET_CODE (x) == LABEL_REF && loc == &PATTERN (insn))
> - 	x = gen_rtx_SET (pc_rtx, x);
> +      rtx *nret = return_in_parallel (nlabel);
> +
> +      if (nret)
> +	{
> +	  rtx npat;
> +
> +	  x = *nret;
> +	  npat = copy_update_parallel (nlabel, nret, PATTERN (insn));
> +	  validate_change (insn, &PATTERN (insn), npat, 1);
> +	}
> +      else
> +	{
> +	  x = redirect_target (nlabel);
> +	  if (GET_CODE (x) == LABEL_REF && loc == &PATTERN (insn))
> +	    x = gen_rtx_SET (pc_rtx, x);
> +	}
>        validate_change (insn, loc, x, 1);
>        return;
>      }
> @@ -1551,10 +1584,15 @@ void
>  redirect_jump_2 (rtx_jump_insn *jump, rtx olabel, rtx nlabel, int delete_unused,
>  		 int invert)
>  {
> +  rtx *ret;
>    rtx note;
>  
>    gcc_assert (JUMP_LABEL (jump) == olabel);
>  
> +  ret = return_in_parallel (nlabel);
> +  if (ret)
> +    nlabel = *ret;
> +
>    /* Negative DELETE_UNUSED used to be used to signalize behavior on
>       moving FUNCTION_END note.  Just sanity check that no user still worry
>       about this.  */
> @@ -1929,3 +1967,28 @@ reg_or_subregno (const_rtx reg)
>    gcc_assert (REG_P (reg));
>    return REGNO (reg);
>  }
> +
> +/* If PAT is a PARALLEL, that contains a single RETURN or a SIMPLE_RETURN, and
> +   zero or more USEs and CLOBBERs, returns an address of that RETURN or
> +   SIMPLE_RETURN.  Otherwise returns NULL.  */
> +
> +rtx *
> +return_in_parallel (rtx pat)
> +{
> +  int i;
> +  rtx *ret = NULL;
> +
> +  if (GET_CODE (pat) != PARALLEL)
> +    return NULL;
> +  for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
> +    {
> +      rtx *side_effect = &XVECEXP (pat, 0, i);
> +
> +      if (!ret && ANY_RETURN_P (*side_effect))
> +	ret = side_effect;
> +      else if (GET_CODE (*side_effect) != USE
> +	  && GET_CODE (*side_effect) != CLOBBER)
> +	return NULL;
> +    }
> +  return ret;
> +}
> diff --git a/gcc/rtl.h b/gcc/rtl.h
> index 68d3ceab29f..bf87ddc0bc7 100644
> --- a/gcc/rtl.h
> +++ b/gcc/rtl.h
> @@ -3970,6 +3970,7 @@ extern enum rtx_code reversed_comparison_code_parts (enum rtx_code, const_rtx,
>  						     const_rtx, const rtx_insn *);
>  extern void delete_for_peephole (rtx_insn *, rtx_insn *);
>  extern int condjump_in_parallel_p (const rtx_insn *);
> +extern rtx *return_in_parallel (rtx);
>  
>  /* In emit-rtl.c.  */
>  extern int max_reg_num (void);
> diff --git a/gcc/testsuite/gcc.target/s390/risbg-ll-3.c b/gcc/testsuite/gcc.target/s390/risbg-ll-3.c
> index 838f1ffbd91..25f7ddb9c61 100644
> --- a/gcc/testsuite/gcc.target/s390/risbg-ll-3.c
> +++ b/gcc/testsuite/gcc.target/s390/risbg-ll-3.c
> @@ -23,7 +23,7 @@ i64 f1 (i64 v_a, i64 v_b)
>  extern i64 f2_foo();
>  i64 f2 (i64 v_a, i64 v_b)
>  {
> -/* { dg-final { scan-assembler "f2:\n\trisbg\t%r2,%r3,60,62,0\n\tje\t" { target { lp64 } } } } */
> +/* { dg-final { scan-assembler "f2:\n\trisbg\t%r2,%r3,60,62,0\n\tbner\t%r14\n\tjg\tf2_foo\n" { target { lp64 } } } } */
>  /* { dg-final { scan-assembler "f2:\n\trisbgn\t%r3,%r2,0,0\\\+32-1,64-0-32\n\trisbg\t%r3,%r5,60,62,0" { target { ! lp64 } } } } */
>    i64 v_anda = v_a & -15;
>    i64 v_andb = v_b & 14;
> @@ -37,7 +37,7 @@ i64 f2 (i64 v_a, i64 v_b)
>  void f2_bar ();
>  void f2_cconly (i64 v_a, i64 v_b)
>  {
> -/* { dg-final { scan-assembler "f2_cconly:\n\trisbg\t%r3,%r2,63,59,0\n\tjne\t"  { target { lp64 } } } } */
> +/* { dg-final { scan-assembler "f2_cconly:\n\trisbg\t%r3,%r2,63,59,0\n\tber\t%r14\n\tjg\tf2_bar\n" { target { lp64 } } } } */
>  /* { dg-final { scan-assembler "f2_cconly:\n\trisbgn\t%r3,%r2,0,0\\\+32-1,64-0-32\n\trisbg\t%r3,%r5,60,62,0\n\tjne\t" { target { ! lp64 } } } } */
>    if ((v_a & -15) | (v_b & 14))
>      f2_bar();
> diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c b/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c
> index 1e63defa063..09a15eb25f0 100644
> --- a/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c
> +++ b/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c
> @@ -15,7 +15,7 @@ all_eq_double (vector double a, vector double b)
>    if (__builtin_expect (vec_all_eq (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times all_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */
> +/* { dg-final { scan-assembler-times all_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tbner\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  all_ne_double (vector double a, vector double b)
> @@ -23,7 +23,7 @@ all_ne_double (vector double a, vector double b)
>    if (__builtin_expect (vec_all_ne (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times all_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tjle 1 } } */
> +/* { dg-final { scan-assembler-times all_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tbler\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  all_gt_double (vector double a, vector double b)
> @@ -31,7 +31,7 @@ all_gt_double (vector double a, vector double b)
>    if (__builtin_expect (vec_all_gt (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times all_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */
> +/* { dg-final { scan-assembler-times all_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tbner\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  all_lt_double (vector double a, vector double b)
> @@ -39,7 +39,7 @@ all_lt_double (vector double a, vector double b)
>    if (__builtin_expect (vec_all_lt (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times all_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tjne 1 } } */
> +/* { dg-final { scan-assembler-times all_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tbner\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  all_ge_double (vector double a, vector double b)
> @@ -47,7 +47,7 @@ all_ge_double (vector double a, vector double b)
>    if (__builtin_expect (vec_all_ge (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times all_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */
> +/* { dg-final { scan-assembler-times all_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tbner\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  all_le_double (vector double a, vector double b)
> @@ -55,7 +55,7 @@ all_le_double (vector double a, vector double b)
>    if (__builtin_expect (vec_all_le (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times all_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tjne 1 } } */
> +/* { dg-final { scan-assembler-times all_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tbner\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  any_eq_double (vector double a, vector double b)
> @@ -63,7 +63,7 @@ any_eq_double (vector double a, vector double b)
>    if (__builtin_expect (vec_any_eq (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times any_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */
> +/* { dg-final { scan-assembler-times any_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tbnler\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  any_ne_double (vector double a, vector double b)
> @@ -71,7 +71,7 @@ any_ne_double (vector double a, vector double b)
>    if (__builtin_expect (vec_any_ne (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times any_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tje 1 } } */
> +/* { dg-final { scan-assembler-times any_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tber\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  any_gt_double (vector double a, vector double b)
> @@ -79,7 +79,7 @@ any_gt_double (vector double a, vector double b)
>    if (__builtin_expect (vec_any_gt (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times any_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */
> +/* { dg-final { scan-assembler-times any_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tbnler\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  any_lt_double (vector double a, vector double b)
> @@ -87,7 +87,7 @@ any_lt_double (vector double a, vector double b)
>    if (__builtin_expect (vec_any_lt (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times any_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tjnle 1 } } */
> +/* { dg-final { scan-assembler-times any_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tbnler\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  any_ge_double (vector double a, vector double b)
> @@ -95,7 +95,7 @@ any_ge_double (vector double a, vector double b)
>    if (__builtin_expect (vec_any_ge (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times any_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */
> +/* { dg-final { scan-assembler-times any_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tbnler\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  any_le_double (vector double a, vector double b)
> @@ -103,7 +103,7 @@ any_le_double (vector double a, vector double b)
>    if (__builtin_expect (vec_any_le (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times any_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tjnle 1 } } */
> +/* { dg-final { scan-assembler-times any_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tbnler\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  all_eq_int (vector int a, vector int b)
> @@ -111,7 +111,7 @@ all_eq_int (vector int a, vector int b)
>    if (__builtin_expect (vec_all_eq (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times all_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */
> +/* { dg-final { scan-assembler-times all_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tbner\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  all_ne_int (vector int a, vector int b)
> @@ -119,7 +119,7 @@ all_ne_int (vector int a, vector int b)
>    if (__builtin_expect (vec_all_ne (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times all_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tjle 1 } } */
> +/* { dg-final { scan-assembler-times all_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tbler\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  all_gt_int (vector int a, vector int b)
> @@ -127,7 +127,7 @@ all_gt_int (vector int a, vector int b)
>    if (__builtin_expect (vec_all_gt (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times all_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */
> +/* { dg-final { scan-assembler-times all_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tbner\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  all_lt_int (vector int a, vector int b)
> @@ -135,7 +135,7 @@ all_lt_int (vector int a, vector int b)
>    if (__builtin_expect (vec_all_lt (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times all_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tjne 1 } } */
> +/* { dg-final { scan-assembler-times all_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tbner\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  all_ge_int (vector int a, vector int b)
> @@ -143,7 +143,7 @@ all_ge_int (vector int a, vector int b)
>    if (__builtin_expect (vec_all_ge (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times all_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tjle 1 } } */
> +/* { dg-final { scan-assembler-times all_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tbler\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  all_le_int (vector int a, vector int b)
> @@ -151,7 +151,7 @@ all_le_int (vector int a, vector int b)
>    if (__builtin_expect (vec_all_le (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times all_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tjle 1 } } */
> +/* { dg-final { scan-assembler-times all_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tbler\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  any_eq_int (vector int a, vector int b)
> @@ -159,7 +159,7 @@ any_eq_int (vector int a, vector int b)
>    if (__builtin_expect (vec_any_eq (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times any_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */
> +/* { dg-final { scan-assembler-times any_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tbnler\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  any_ne_int (vector int a, vector int b)
> @@ -167,7 +167,7 @@ any_ne_int (vector int a, vector int b)
>    if (__builtin_expect (vec_any_ne (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times any_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tje 1 } } */
> +/* { dg-final { scan-assembler-times any_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tber\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  any_gt_int (vector int a, vector int b)
> @@ -175,7 +175,7 @@ any_gt_int (vector int a, vector int b)
>    if (__builtin_expect (vec_any_gt (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times any_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */
> +/* { dg-final { scan-assembler-times any_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tbnler\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  any_lt_int (vector int a, vector int b)
> @@ -183,7 +183,7 @@ any_lt_int (vector int a, vector int b)
>    if (__builtin_expect (vec_any_lt (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times any_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tjnle 1 } } */
> +/* { dg-final { scan-assembler-times any_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tbnler\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  any_ge_int (vector int a, vector int b)
> @@ -191,7 +191,7 @@ any_ge_int (vector int a, vector int b)
>    if (__builtin_expect (vec_any_ge (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times any_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tje 1 } } */
> +/* { dg-final { scan-assembler-times any_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tber\t%r14\n 1 } } */
>  
>  void __attribute__((noinline,noclone))
>  any_le_int (vector int a, vector int b)
> @@ -199,5 +199,5 @@ any_le_int (vector int a, vector int b)
>    if (__builtin_expect (vec_any_le (a, b), 1))
>      g = 2;
>  }
> -/* { dg-final { scan-assembler-times any_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tje 1 } } */
> +/* { dg-final { scan-assembler-times any_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tber\t%r14\n 1 } } */
>  
>
Jeff Law Sept. 18, 2018, 12:57 a.m. UTC | #2
On 9/5/18 2:34 AM, Ilya Leoshkevich wrote:
> S/390 epilogue ends with (parallel [(return) (use %r14)]) instead of
> the more usual (return) or (simple_return).  This sequence is not
> recognized by the conditional return logic in try_optimize_cfg ().
> 
> gcc/ChangeLog:
> 
> 2018-08-28  Ilya Leoshkevich  <iii@linux.ibm.com>
> 
> 	PR target/80080
> 	* cfgcleanup.c (bb_is_just_return): Accept PARALLELs containing
> 	RETURNs.
> 	* cfgrtl.c (rtl_verify_bb_layout): Handle PARALLELs containing
> 	conditional jumps.
> 	* config/s390/s390.md: Recognize PARALLELs containing RETURNs.
> 	* jump.c (copy_update_parallel): Create a copy of a PARALLEL
> 	in which one of side effects is replaced.
> 	(redirect_exp_1): Handle jump targets that are PARALLELs
> 	containing RETURNs.
> 	(redirect_jump_2): Likewise.
> 	(return_in_parallel): Recognize PARALLELs containing RETURNs.
> 	* rtl.h (return_in_parallel): Add declaration.
> 
> gcc/testsuite/ChangeLog:
> 
> 2018-08-28  Ilya Leoshkevich  <iii@linux.ibm.com>
> 
> 	PR target/80080
> 	* gcc.target/s390/risbg-ll-3.c: Expect conditional returns.
> 	* gcc.target/s390/zvector/vec-cmp-2.c: Likewise.
> ---
>  gcc/cfgcleanup.c                              |  2 +-
>  gcc/cfgrtl.c                                  |  3 +-
>  gcc/config/s390/s390.md                       | 13 +++-
>  gcc/jump.c                                    | 69 ++++++++++++++++++-
>  gcc/rtl.h                                     |  1 +
>  gcc/testsuite/gcc.target/s390/risbg-ll-3.c    |  4 +-
>  .../gcc.target/s390/zvector/vec-cmp-2.c       | 48 ++++++-------
>  7 files changed, 108 insertions(+), 32 deletions(-)
> 
> diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c
> index 4a5dc29d14f..7f2545f453f 100644
> --- a/gcc/cfgcleanup.c
> +++ b/gcc/cfgcleanup.c
> @@ -2624,7 +2624,7 @@ bb_is_just_return (basic_block bb, rtx_insn **ret, rtx_insn **use)
>        {
>  	rtx pat = PATTERN (insn);
>  
> -	if (!*ret && ANY_RETURN_P (pat))
> +	if (!*ret && (ANY_RETURN_P (pat) || return_in_parallel (pat)))
>  	  *ret = insn;
>  	else if (!*ret && !*use && GET_CODE (pat) == USE
>  	    && REG_P (XEXP (pat, 0))
So what else is in the return insn that requires you to test for
return_in_parallel?  If we're going to allow a return in a parallel,
here I think we need to tighten down its form given the intended ways
bb_is_just_return is supposed to be used.  Essentially other side
effects would seem to be forbidden in the parallel.  ie, you could have
a PARALLEL with a return and use inside, but not a return with anything
else inside (such as a clobber).

Why do you need to make a copy of the parallel RTX in redirect_exp_1?
We don't do it for other cases -- why can't we just use validate_change
like all the other RTXs?



>  /* Throughout LOC, redirect OLABEL to NLABEL.  Treat null OLABEL or
>     NLABEL as a return.  Accrue modifications into the change group.  */
>  
> @@ -1437,9 +1457,22 @@ redirect_exp_1 (rtx *loc, rtx olabel, rtx nlabel, rtx_insn *insn)
>    if ((code == LABEL_REF && label_ref_label (x) == olabel)
>        || x == olabel)
>      {
> -      x = redirect_target (nlabel);
> -      if (GET_CODE (x) == LABEL_REF && loc == &PATTERN (insn))
> - 	x = gen_rtx_SET (pc_rtx, x);
> +      rtx *nret = return_in_parallel (nlabel);
> +
> +      if (nret)
> +	{
> +	  rtx npat;
> +
> +	  x = *nret;
> +	  npat = copy_update_parallel (nlabel, nret, PATTERN (insn));
> +	  validate_change (insn, &PATTERN (insn), npat, 1);
> +	}
> +      else
> +	{
> +	  x = redirect_target (nlabel);
> +	  if (GET_CODE (x) == LABEL_REF && loc == &PATTERN (insn))
> +	    x = gen_rtx_SET (pc_rtx, x);
> +	}
>        validate_change (insn, loc, x, 1);
Why the need to use copy_update_parallel here?  Is there a reason why
validate_change is insufficient?


>        return;
>      }
> @@ -1551,10 +1584,15 @@ void
>  redirect_jump_2 (rtx_jump_insn *jump, rtx olabel, rtx nlabel, int delete_unused,
>  		 int invert)
>  {
> +  rtx *ret;
>    rtx note;
>  
>    gcc_assert (JUMP_LABEL (jump) == olabel);
>  
> +  ret = return_in_parallel (nlabel);
> +  if (ret)
> +    nlabel = *ret;
Why does return_in_parallel return an rtx *?  Can't you just return the
rtx and avoid the unnecessary dereferencing?  I guess this ultimately
comes back to why can't you use validate_change like everyone else in
redirect_exp_1?

jeff


Jeff
Ilya Leoshkevich Sept. 18, 2018, 12:06 p.m. UTC | #3
> Am 18.09.2018 um 02:57 schrieb Jeff Law <law@redhat.com>:
> 
> On 9/5/18 2:34 AM, Ilya Leoshkevich wrote:
>> --- a/gcc/cfgcleanup.c
>> +++ b/gcc/cfgcleanup.c
>> @@ -2624,7 +2624,7 @@ bb_is_just_return (basic_block bb, rtx_insn **ret, rtx_insn **use)
>>       {
>> 	rtx pat = PATTERN (insn);
>> 
>> -	if (!*ret && ANY_RETURN_P (pat))
>> +	if (!*ret && (ANY_RETURN_P (pat) || return_in_parallel (pat)))
>> 	  *ret = insn;
>> 	else if (!*ret && !*use && GET_CODE (pat) == USE
>> 	    && REG_P (XEXP (pat, 0))
> So what else is in the return insn that requires you to test for
> return_in_parallel?  If we're going to allow a return in a parallel,
> here I think we need to tighten down its form given the intended ways
> bb_is_just_return is supposed to be used.  Essentially other side
> effects would seem to be forbidden in the parallel.  ie, you could have
> a PARALLEL with a return and use inside, but not a return with anything
> else inside (such as a clobber).

Yes, it’s RETURN+USE.  I allowed CLOBBERs, because bb_is_just_return
already allows them, but I don’t think it's necessary for the S/390 use
case, so I can make it more restrictive if needed.

> 
> Why do you need to make a copy of the parallel RTX in redirect_exp_1?
> We don't do it for other cases -- why can't we just use validate_change
> like all the other RTXs?
> 
> 
>> /* Throughout LOC, redirect OLABEL to NLABEL.  Treat null OLABEL or
>>    NLABEL as a return.  Accrue modifications into the change group.  */
>> 
>> @@ -1437,9 +1457,22 @@ redirect_exp_1 (rtx *loc, rtx olabel, rtx nlabel, rtx_insn *insn)
>>   if ((code == LABEL_REF && label_ref_label (x) == olabel)
>>       || x == olabel)
>>     {
>> -      x = redirect_target (nlabel);
>> -      if (GET_CODE (x) == LABEL_REF && loc == &PATTERN (insn))
>> - 	x = gen_rtx_SET (pc_rtx, x);
>> +      rtx *nret = return_in_parallel (nlabel);
>> +
>> +      if (nret)
>> +	{
>> +	  rtx npat;
>> +
>> +	  x = *nret;
>> +	  npat = copy_update_parallel (nlabel, nret, PATTERN (insn));
>> +	  validate_change (insn, &PATTERN (insn), npat, 1);
>> +	}
>> +      else
>> +	{
>> +	  x = redirect_target (nlabel);
>> +	  if (GET_CODE (x) == LABEL_REF && loc == &PATTERN (insn))
>> +	    x = gen_rtx_SET (pc_rtx, x);
>> +	}
>>       validate_change (insn, loc, x, 1);
> Why the need to use copy_update_parallel here?  Is there a reason why
> validate_change is insufficient?

The original RTL has the following form:

(jump_insn 1 (set %pc (if_then_else (ne %cc 0) (label_ref 2) %pc)))
...
(code_label 2)
(jump_insn 3 (parallel [(return) (use %r14)]))

and the goal is to transform (jump_insn 1) to:

(jump_insn 1
 (parallel
  [(set %pc (if_then_else (ne %cc 0) (return) %pc))
   (use %r14)]))

while keeping (code_label 2) and (jump_insn 3) intact.  So I have to
create a new PARALLEL based on the original one.

> 
> 
>>       return;
>>     }
>> @@ -1551,10 +1584,15 @@ void
>> redirect_jump_2 (rtx_jump_insn *jump, rtx olabel, rtx nlabel, int delete_unused,
>> 		 int invert)
>> {
>> +  rtx *ret;
>>   rtx note;
>> 
>>   gcc_assert (JUMP_LABEL (jump) == olabel);
>> 
>> +  ret = return_in_parallel (nlabel);
>> +  if (ret)
>> +    nlabel = *ret;
> Why does return_in_parallel return an rtx *?  Can't you just return the
> rtx and avoid the unnecessary dereferencing?  I guess this ultimately
> comes back to why can't you use validate_change like everyone else in
> redirect_exp_1?

Right, this is related.  This is to indicate to copy_update_parallel,
which of the side-effects need to be updated.
Segher Boessenkool Sept. 18, 2018, 2:04 p.m. UTC | #4
On Wed, Sep 05, 2018 at 10:34:48AM +0200, Ilya Leoshkevich wrote:
> S/390 epilogue ends with (parallel [(return) (use %r14)]) instead of
> the more usual (return) or (simple_return).  This sequence is not
> recognized by the conditional return logic in try_optimize_cfg ().

Why does it need this?  Other targets with a link register make
EPILOGUE_USES handle this.

If you really need a parallel, can you make ANY_RETURN_P recognise it?

> +/* Create a copy of PARALLEL with side-effect OSIDE replaced by NSIDE.  */
> +static rtx
> +copy_update_parallel (rtx par, rtx *oside, rtx nside)
> +{
> +  rtx npar;
> +  int i;
> +
> +  npar = gen_rtx_PARALLEL (GET_MODE (par), rtvec_alloc (XVECLEN (par, 0)));
> +  for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
> +    {
> +      rtx *side_effect = &XVECEXP (par, 0, i);
> +
> +      if (side_effect == oside)
> +	XVECEXP (npar, 0, i) = nside;
> +      else
> +	XVECEXP (npar, 0, i) = copy_rtx (*side_effect);
> +    }
> +  return npar;
> +}

This doesn't work if nside is used anywhere else.  But the only caller
uses the previous instruction pattern; maybe make a function specialised
to that only?  You could give it a better name, too ;-)  (It is especially
surprising because the function is called copy_* but it does _not_ copy
its argument!)


Segher
Jeff Law Sept. 19, 2018, 4:15 a.m. UTC | #5
On 9/18/18 8:04 AM, Segher Boessenkool wrote:
> On Wed, Sep 05, 2018 at 10:34:48AM +0200, Ilya Leoshkevich wrote:
>> S/390 epilogue ends with (parallel [(return) (use %r14)]) instead of
>> the more usual (return) or (simple_return).  This sequence is not
>> recognized by the conditional return logic in try_optimize_cfg ().
> 
> Why does it need this?  Other targets with a link register make
> EPILOGUE_USES handle this.
I think because he's trying to optimize a conditional jump to a return
insn into a conditional return insn.  I don't think we do that on other
targets, though I have pondered it from time to time.

Jeff
Ilya Leoshkevich Sept. 19, 2018, 8:48 a.m. UTC | #6
> Am 19.09.2018 um 06:15 schrieb Jeff Law <law@redhat.com>:
> 
> On 9/18/18 8:04 AM, Segher Boessenkool wrote:
>> On Wed, Sep 05, 2018 at 10:34:48AM +0200, Ilya Leoshkevich wrote:
>>> S/390 epilogue ends with (parallel [(return) (use %r14)]) instead of
>>> the more usual (return) or (simple_return).  This sequence is not
>>> recognized by the conditional return logic in try_optimize_cfg ().
>> 
>> Why does it need this?  Other targets with a link register make
>> EPILOGUE_USES handle this.
> I think because he's trying to optimize a conditional jump to a return
> insn into a conditional return insn.  I don't think we do that on other
> targets, though I have pondered it from time to time.
S/390 back-end already uses EPILOGUE_USES.  The reason RETURN in
PARALLEL still exists is that on older processors sequences using
non-standard dynamically chosen link register are sometimes more
efficient, and for such use cases EPILOGUE_USES doesn't work.

I talked to Ulrich Weigand about this yesterday, and he suggested to
drop RETURN in PARALLEL for newer processors.  I will give this a try,
and if it works, middle-end changes in this patch will no longer be
needed.
Segher Boessenkool Sept. 19, 2018, 12:12 p.m. UTC | #7
On Tue, Sep 18, 2018 at 10:15:00PM -0600, Jeff Law wrote:
> On 9/18/18 8:04 AM, Segher Boessenkool wrote:
> > On Wed, Sep 05, 2018 at 10:34:48AM +0200, Ilya Leoshkevich wrote:
> >> S/390 epilogue ends with (parallel [(return) (use %r14)]) instead of
> >> the more usual (return) or (simple_return).  This sequence is not
> >> recognized by the conditional return logic in try_optimize_cfg ().
> > 
> > Why does it need this?  Other targets with a link register make
> > EPILOGUE_USES handle this.
> I think because he's trying to optimize a conditional jump to a return
> insn into a conditional return insn.  I don't think we do that on other
> targets, though I have pondered it from time to time.

See r248351, r236106, and importantly r235904.  I wrote that because it
is quite useful on Power.

s390 already _has_ a proper EPILOGUE_USES btw.  So just the return
patterns themselves need fixing and all will be good, it seems.


Segher
diff mbox series

Patch

diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c
index 4a5dc29d14f..7f2545f453f 100644
--- a/gcc/cfgcleanup.c
+++ b/gcc/cfgcleanup.c
@@ -2624,7 +2624,7 @@  bb_is_just_return (basic_block bb, rtx_insn **ret, rtx_insn **use)
       {
 	rtx pat = PATTERN (insn);
 
-	if (!*ret && ANY_RETURN_P (pat))
+	if (!*ret && (ANY_RETURN_P (pat) || return_in_parallel (pat)))
 	  *ret = insn;
 	else if (!*ret && !*use && GET_CODE (pat) == USE
 	    && REG_P (XEXP (pat, 0))
diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
index 3b1931daeba..701c6a985b8 100644
--- a/gcc/cfgrtl.c
+++ b/gcc/cfgrtl.c
@@ -2987,7 +2987,8 @@  rtl_verify_bb_layout (void)
 	}
 
       if (JUMP_P (x)
-	  && returnjump_p (x) && ! condjump_p (x)
+	  && returnjump_p (x)
+	  && ! (condjump_p (x) || condjump_in_parallel_p (x))
 	  && ! ((y = next_nonnote_nondebug_insn (x))
 		&& BARRIER_P (y)))
 	    fatal_insn ("return not followed by barrier", x);
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index db260e41bfd..3c413638038 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -8842,8 +8842,19 @@ 
    (set_attr "type"  "branch")
    (set_attr "atype" "agen")])
 
+(define_subst "add_use_return_reg_subst"
+  [(set (match_operand 0 "" "")
+	(match_operand 1 "" ""))]
+  ""
+  [(set (match_dup 0)
+	(match_dup 1))
+   (use (reg RETURN_REGNUM))])
+
+(define_subst_attr "add_use_return_reg_name" "add_use_return_reg_subst"
+		   "" "_use_return_reg")
+
 ;; A conditional return instruction.
-(define_insn "*c<code>"
+(define_insn "*c<code><add_use_return_reg_name>"
   [(set (pc)
         (if_then_else
           (match_operator 0 "s390_comparison" [(reg CC_REGNUM) (const_int 0)])
diff --git a/gcc/jump.c b/gcc/jump.c
index 06f7255d24d..8057ace74d2 100644
--- a/gcc/jump.c
+++ b/gcc/jump.c
@@ -1423,6 +1423,26 @@  redirect_target (rtx x)
   return x;
 }
 
+/* Create a copy of PARALLEL with side-effect OSIDE replaced by NSIDE.  */
+static rtx
+copy_update_parallel (rtx par, rtx *oside, rtx nside)
+{
+  rtx npar;
+  int i;
+
+  npar = gen_rtx_PARALLEL (GET_MODE (par), rtvec_alloc (XVECLEN (par, 0)));
+  for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
+    {
+      rtx *side_effect = &XVECEXP (par, 0, i);
+
+      if (side_effect == oside)
+	XVECEXP (npar, 0, i) = nside;
+      else
+	XVECEXP (npar, 0, i) = copy_rtx (*side_effect);
+    }
+  return npar;
+}
+
 /* Throughout LOC, redirect OLABEL to NLABEL.  Treat null OLABEL or
    NLABEL as a return.  Accrue modifications into the change group.  */
 
@@ -1437,9 +1457,22 @@  redirect_exp_1 (rtx *loc, rtx olabel, rtx nlabel, rtx_insn *insn)
   if ((code == LABEL_REF && label_ref_label (x) == olabel)
       || x == olabel)
     {
-      x = redirect_target (nlabel);
-      if (GET_CODE (x) == LABEL_REF && loc == &PATTERN (insn))
- 	x = gen_rtx_SET (pc_rtx, x);
+      rtx *nret = return_in_parallel (nlabel);
+
+      if (nret)
+	{
+	  rtx npat;
+
+	  x = *nret;
+	  npat = copy_update_parallel (nlabel, nret, PATTERN (insn));
+	  validate_change (insn, &PATTERN (insn), npat, 1);
+	}
+      else
+	{
+	  x = redirect_target (nlabel);
+	  if (GET_CODE (x) == LABEL_REF && loc == &PATTERN (insn))
+	    x = gen_rtx_SET (pc_rtx, x);
+	}
       validate_change (insn, loc, x, 1);
       return;
     }
@@ -1551,10 +1584,15 @@  void
 redirect_jump_2 (rtx_jump_insn *jump, rtx olabel, rtx nlabel, int delete_unused,
 		 int invert)
 {
+  rtx *ret;
   rtx note;
 
   gcc_assert (JUMP_LABEL (jump) == olabel);
 
+  ret = return_in_parallel (nlabel);
+  if (ret)
+    nlabel = *ret;
+
   /* Negative DELETE_UNUSED used to be used to signalize behavior on
      moving FUNCTION_END note.  Just sanity check that no user still worry
      about this.  */
@@ -1929,3 +1967,28 @@  reg_or_subregno (const_rtx reg)
   gcc_assert (REG_P (reg));
   return REGNO (reg);
 }
+
+/* If PAT is a PARALLEL, that contains a single RETURN or a SIMPLE_RETURN, and
+   zero or more USEs and CLOBBERs, returns an address of that RETURN or
+   SIMPLE_RETURN.  Otherwise returns NULL.  */
+
+rtx *
+return_in_parallel (rtx pat)
+{
+  int i;
+  rtx *ret = NULL;
+
+  if (GET_CODE (pat) != PARALLEL)
+    return NULL;
+  for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
+    {
+      rtx *side_effect = &XVECEXP (pat, 0, i);
+
+      if (!ret && ANY_RETURN_P (*side_effect))
+	ret = side_effect;
+      else if (GET_CODE (*side_effect) != USE
+	  && GET_CODE (*side_effect) != CLOBBER)
+	return NULL;
+    }
+  return ret;
+}
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 68d3ceab29f..bf87ddc0bc7 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -3970,6 +3970,7 @@  extern enum rtx_code reversed_comparison_code_parts (enum rtx_code, const_rtx,
 						     const_rtx, const rtx_insn *);
 extern void delete_for_peephole (rtx_insn *, rtx_insn *);
 extern int condjump_in_parallel_p (const rtx_insn *);
+extern rtx *return_in_parallel (rtx);
 
 /* In emit-rtl.c.  */
 extern int max_reg_num (void);
diff --git a/gcc/testsuite/gcc.target/s390/risbg-ll-3.c b/gcc/testsuite/gcc.target/s390/risbg-ll-3.c
index 838f1ffbd91..25f7ddb9c61 100644
--- a/gcc/testsuite/gcc.target/s390/risbg-ll-3.c
+++ b/gcc/testsuite/gcc.target/s390/risbg-ll-3.c
@@ -23,7 +23,7 @@  i64 f1 (i64 v_a, i64 v_b)
 extern i64 f2_foo();
 i64 f2 (i64 v_a, i64 v_b)
 {
-/* { dg-final { scan-assembler "f2:\n\trisbg\t%r2,%r3,60,62,0\n\tje\t" { target { lp64 } } } } */
+/* { dg-final { scan-assembler "f2:\n\trisbg\t%r2,%r3,60,62,0\n\tbner\t%r14\n\tjg\tf2_foo\n" { target { lp64 } } } } */
 /* { dg-final { scan-assembler "f2:\n\trisbgn\t%r3,%r2,0,0\\\+32-1,64-0-32\n\trisbg\t%r3,%r5,60,62,0" { target { ! lp64 } } } } */
   i64 v_anda = v_a & -15;
   i64 v_andb = v_b & 14;
@@ -37,7 +37,7 @@  i64 f2 (i64 v_a, i64 v_b)
 void f2_bar ();
 void f2_cconly (i64 v_a, i64 v_b)
 {
-/* { dg-final { scan-assembler "f2_cconly:\n\trisbg\t%r3,%r2,63,59,0\n\tjne\t"  { target { lp64 } } } } */
+/* { dg-final { scan-assembler "f2_cconly:\n\trisbg\t%r3,%r2,63,59,0\n\tber\t%r14\n\tjg\tf2_bar\n" { target { lp64 } } } } */
 /* { dg-final { scan-assembler "f2_cconly:\n\trisbgn\t%r3,%r2,0,0\\\+32-1,64-0-32\n\trisbg\t%r3,%r5,60,62,0\n\tjne\t" { target { ! lp64 } } } } */
   if ((v_a & -15) | (v_b & 14))
     f2_bar();
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c b/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c
index 1e63defa063..09a15eb25f0 100644
--- a/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c
+++ b/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c
@@ -15,7 +15,7 @@  all_eq_double (vector double a, vector double b)
   if (__builtin_expect (vec_all_eq (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times all_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */
+/* { dg-final { scan-assembler-times all_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tbner\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 all_ne_double (vector double a, vector double b)
@@ -23,7 +23,7 @@  all_ne_double (vector double a, vector double b)
   if (__builtin_expect (vec_all_ne (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times all_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tjle 1 } } */
+/* { dg-final { scan-assembler-times all_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tbler\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 all_gt_double (vector double a, vector double b)
@@ -31,7 +31,7 @@  all_gt_double (vector double a, vector double b)
   if (__builtin_expect (vec_all_gt (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times all_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */
+/* { dg-final { scan-assembler-times all_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tbner\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 all_lt_double (vector double a, vector double b)
@@ -39,7 +39,7 @@  all_lt_double (vector double a, vector double b)
   if (__builtin_expect (vec_all_lt (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times all_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tjne 1 } } */
+/* { dg-final { scan-assembler-times all_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tbner\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 all_ge_double (vector double a, vector double b)
@@ -47,7 +47,7 @@  all_ge_double (vector double a, vector double b)
   if (__builtin_expect (vec_all_ge (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times all_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */
+/* { dg-final { scan-assembler-times all_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tbner\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 all_le_double (vector double a, vector double b)
@@ -55,7 +55,7 @@  all_le_double (vector double a, vector double b)
   if (__builtin_expect (vec_all_le (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times all_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tjne 1 } } */
+/* { dg-final { scan-assembler-times all_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tbner\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 any_eq_double (vector double a, vector double b)
@@ -63,7 +63,7 @@  any_eq_double (vector double a, vector double b)
   if (__builtin_expect (vec_any_eq (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times any_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */
+/* { dg-final { scan-assembler-times any_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tbnler\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 any_ne_double (vector double a, vector double b)
@@ -71,7 +71,7 @@  any_ne_double (vector double a, vector double b)
   if (__builtin_expect (vec_any_ne (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times any_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tje 1 } } */
+/* { dg-final { scan-assembler-times any_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tber\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 any_gt_double (vector double a, vector double b)
@@ -79,7 +79,7 @@  any_gt_double (vector double a, vector double b)
   if (__builtin_expect (vec_any_gt (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times any_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */
+/* { dg-final { scan-assembler-times any_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tbnler\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 any_lt_double (vector double a, vector double b)
@@ -87,7 +87,7 @@  any_lt_double (vector double a, vector double b)
   if (__builtin_expect (vec_any_lt (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times any_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tjnle 1 } } */
+/* { dg-final { scan-assembler-times any_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tbnler\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 any_ge_double (vector double a, vector double b)
@@ -95,7 +95,7 @@  any_ge_double (vector double a, vector double b)
   if (__builtin_expect (vec_any_ge (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times any_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */
+/* { dg-final { scan-assembler-times any_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tbnler\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 any_le_double (vector double a, vector double b)
@@ -103,7 +103,7 @@  any_le_double (vector double a, vector double b)
   if (__builtin_expect (vec_any_le (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times any_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tjnle 1 } } */
+/* { dg-final { scan-assembler-times any_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tbnler\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 all_eq_int (vector int a, vector int b)
@@ -111,7 +111,7 @@  all_eq_int (vector int a, vector int b)
   if (__builtin_expect (vec_all_eq (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times all_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */
+/* { dg-final { scan-assembler-times all_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tbner\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 all_ne_int (vector int a, vector int b)
@@ -119,7 +119,7 @@  all_ne_int (vector int a, vector int b)
   if (__builtin_expect (vec_all_ne (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times all_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tjle 1 } } */
+/* { dg-final { scan-assembler-times all_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tbler\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 all_gt_int (vector int a, vector int b)
@@ -127,7 +127,7 @@  all_gt_int (vector int a, vector int b)
   if (__builtin_expect (vec_all_gt (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times all_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */
+/* { dg-final { scan-assembler-times all_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tbner\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 all_lt_int (vector int a, vector int b)
@@ -135,7 +135,7 @@  all_lt_int (vector int a, vector int b)
   if (__builtin_expect (vec_all_lt (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times all_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tjne 1 } } */
+/* { dg-final { scan-assembler-times all_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tbner\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 all_ge_int (vector int a, vector int b)
@@ -143,7 +143,7 @@  all_ge_int (vector int a, vector int b)
   if (__builtin_expect (vec_all_ge (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times all_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tjle 1 } } */
+/* { dg-final { scan-assembler-times all_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tbler\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 all_le_int (vector int a, vector int b)
@@ -151,7 +151,7 @@  all_le_int (vector int a, vector int b)
   if (__builtin_expect (vec_all_le (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times all_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tjle 1 } } */
+/* { dg-final { scan-assembler-times all_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tbler\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 any_eq_int (vector int a, vector int b)
@@ -159,7 +159,7 @@  any_eq_int (vector int a, vector int b)
   if (__builtin_expect (vec_any_eq (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times any_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */
+/* { dg-final { scan-assembler-times any_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tbnler\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 any_ne_int (vector int a, vector int b)
@@ -167,7 +167,7 @@  any_ne_int (vector int a, vector int b)
   if (__builtin_expect (vec_any_ne (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times any_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tje 1 } } */
+/* { dg-final { scan-assembler-times any_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tber\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 any_gt_int (vector int a, vector int b)
@@ -175,7 +175,7 @@  any_gt_int (vector int a, vector int b)
   if (__builtin_expect (vec_any_gt (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times any_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */
+/* { dg-final { scan-assembler-times any_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tbnler\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 any_lt_int (vector int a, vector int b)
@@ -183,7 +183,7 @@  any_lt_int (vector int a, vector int b)
   if (__builtin_expect (vec_any_lt (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times any_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tjnle 1 } } */
+/* { dg-final { scan-assembler-times any_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tbnler\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 any_ge_int (vector int a, vector int b)
@@ -191,7 +191,7 @@  any_ge_int (vector int a, vector int b)
   if (__builtin_expect (vec_any_ge (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times any_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tje 1 } } */
+/* { dg-final { scan-assembler-times any_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tber\t%r14\n 1 } } */
 
 void __attribute__((noinline,noclone))
 any_le_int (vector int a, vector int b)
@@ -199,5 +199,5 @@  any_le_int (vector int a, vector int b)
   if (__builtin_expect (vec_any_le (a, b), 1))
     g = 2;
 }
-/* { dg-final { scan-assembler-times any_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tje 1 } } */
+/* { dg-final { scan-assembler-times any_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tber\t%r14\n 1 } } */