Message ID | 20180905083448.1689-1-iii@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | S/390: Fix conditional returns | expand |
Hi Ilya, the S/390 parts look good to me. For the rest we will need approval from a middle-end maintainer. Andreas On 05.09.2018 10:34, Ilya Leoshkevich wrote: > S/390 epilogue ends with (parallel [(return) (use %r14)]) instead of > the more usual (return) or (simple_return). This sequence is not > recognized by the conditional return logic in try_optimize_cfg (). > > gcc/ChangeLog: > > 2018-08-28 Ilya Leoshkevich <iii@linux.ibm.com> > > PR target/80080 > * cfgcleanup.c (bb_is_just_return): Accept PARALLELs containing > RETURNs. > * cfgrtl.c (rtl_verify_bb_layout): Handle PARALLELs containing > conditional jumps. > * config/s390/s390.md: Recognize PARALLELs containing RETURNs. > * jump.c (copy_update_parallel): Create a copy of a PARALLEL > in which one of side effects is replaced. > (redirect_exp_1): Handle jump targets that are PARALLELs > containing RETURNs. > (redirect_jump_2): Likewise. > (return_in_parallel): Recognize PARALLELs containing RETURNs. > * rtl.h (return_in_parallel): Add declaration. > > gcc/testsuite/ChangeLog: > > 2018-08-28 Ilya Leoshkevich <iii@linux.ibm.com> > > PR target/80080 > * gcc.target/s390/risbg-ll-3.c: Expect conditional returns. > * gcc.target/s390/zvector/vec-cmp-2.c: Likewise. > --- > gcc/cfgcleanup.c | 2 +- > gcc/cfgrtl.c | 3 +- > gcc/config/s390/s390.md | 13 +++- > gcc/jump.c | 69 ++++++++++++++++++- > gcc/rtl.h | 1 + > gcc/testsuite/gcc.target/s390/risbg-ll-3.c | 4 +- > .../gcc.target/s390/zvector/vec-cmp-2.c | 48 ++++++------- > 7 files changed, 108 insertions(+), 32 deletions(-) > > diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c > index 4a5dc29d14f..7f2545f453f 100644 > --- a/gcc/cfgcleanup.c > +++ b/gcc/cfgcleanup.c > @@ -2624,7 +2624,7 @@ bb_is_just_return (basic_block bb, rtx_insn **ret, rtx_insn **use) > { > rtx pat = PATTERN (insn); > > - if (!*ret && ANY_RETURN_P (pat)) > + if (!*ret && (ANY_RETURN_P (pat) || return_in_parallel (pat))) > *ret = insn; > else if (!*ret && !*use && GET_CODE (pat) == USE > && REG_P (XEXP (pat, 0)) > diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c > index 3b1931daeba..701c6a985b8 100644 > --- a/gcc/cfgrtl.c > +++ b/gcc/cfgrtl.c > @@ -2987,7 +2987,8 @@ rtl_verify_bb_layout (void) > } > > if (JUMP_P (x) > - && returnjump_p (x) && ! condjump_p (x) > + && returnjump_p (x) > + && ! (condjump_p (x) || condjump_in_parallel_p (x)) > && ! ((y = next_nonnote_nondebug_insn (x)) > && BARRIER_P (y))) > fatal_insn ("return not followed by barrier", x); > diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md > index db260e41bfd..3c413638038 100644 > --- a/gcc/config/s390/s390.md > +++ b/gcc/config/s390/s390.md > @@ -8842,8 +8842,19 @@ > (set_attr "type" "branch") > (set_attr "atype" "agen")]) > > +(define_subst "add_use_return_reg_subst" > + [(set (match_operand 0 "" "") > + (match_operand 1 "" ""))] > + "" > + [(set (match_dup 0) > + (match_dup 1)) > + (use (reg RETURN_REGNUM))]) > + > +(define_subst_attr "add_use_return_reg_name" "add_use_return_reg_subst" > + "" "_use_return_reg") > + > ;; A conditional return instruction. > -(define_insn "*c<code>" > +(define_insn "*c<code><add_use_return_reg_name>" > [(set (pc) > (if_then_else > (match_operator 0 "s390_comparison" [(reg CC_REGNUM) (const_int 0)]) > diff --git a/gcc/jump.c b/gcc/jump.c > index 06f7255d24d..8057ace74d2 100644 > --- a/gcc/jump.c > +++ b/gcc/jump.c > @@ -1423,6 +1423,26 @@ redirect_target (rtx x) > return x; > } > > +/* Create a copy of PARALLEL with side-effect OSIDE replaced by NSIDE. */ > +static rtx > +copy_update_parallel (rtx par, rtx *oside, rtx nside) > +{ > + rtx npar; > + int i; > + > + npar = gen_rtx_PARALLEL (GET_MODE (par), rtvec_alloc (XVECLEN (par, 0))); > + for (i = XVECLEN (par, 0) - 1; i >= 0; i--) > + { > + rtx *side_effect = &XVECEXP (par, 0, i); > + > + if (side_effect == oside) > + XVECEXP (npar, 0, i) = nside; > + else > + XVECEXP (npar, 0, i) = copy_rtx (*side_effect); > + } > + return npar; > +} > + > /* Throughout LOC, redirect OLABEL to NLABEL. Treat null OLABEL or > NLABEL as a return. Accrue modifications into the change group. */ > > @@ -1437,9 +1457,22 @@ redirect_exp_1 (rtx *loc, rtx olabel, rtx nlabel, rtx_insn *insn) > if ((code == LABEL_REF && label_ref_label (x) == olabel) > || x == olabel) > { > - x = redirect_target (nlabel); > - if (GET_CODE (x) == LABEL_REF && loc == &PATTERN (insn)) > - x = gen_rtx_SET (pc_rtx, x); > + rtx *nret = return_in_parallel (nlabel); > + > + if (nret) > + { > + rtx npat; > + > + x = *nret; > + npat = copy_update_parallel (nlabel, nret, PATTERN (insn)); > + validate_change (insn, &PATTERN (insn), npat, 1); > + } > + else > + { > + x = redirect_target (nlabel); > + if (GET_CODE (x) == LABEL_REF && loc == &PATTERN (insn)) > + x = gen_rtx_SET (pc_rtx, x); > + } > validate_change (insn, loc, x, 1); > return; > } > @@ -1551,10 +1584,15 @@ void > redirect_jump_2 (rtx_jump_insn *jump, rtx olabel, rtx nlabel, int delete_unused, > int invert) > { > + rtx *ret; > rtx note; > > gcc_assert (JUMP_LABEL (jump) == olabel); > > + ret = return_in_parallel (nlabel); > + if (ret) > + nlabel = *ret; > + > /* Negative DELETE_UNUSED used to be used to signalize behavior on > moving FUNCTION_END note. Just sanity check that no user still worry > about this. */ > @@ -1929,3 +1967,28 @@ reg_or_subregno (const_rtx reg) > gcc_assert (REG_P (reg)); > return REGNO (reg); > } > + > +/* If PAT is a PARALLEL, that contains a single RETURN or a SIMPLE_RETURN, and > + zero or more USEs and CLOBBERs, returns an address of that RETURN or > + SIMPLE_RETURN. Otherwise returns NULL. */ > + > +rtx * > +return_in_parallel (rtx pat) > +{ > + int i; > + rtx *ret = NULL; > + > + if (GET_CODE (pat) != PARALLEL) > + return NULL; > + for (i = XVECLEN (pat, 0) - 1; i >= 0; i--) > + { > + rtx *side_effect = &XVECEXP (pat, 0, i); > + > + if (!ret && ANY_RETURN_P (*side_effect)) > + ret = side_effect; > + else if (GET_CODE (*side_effect) != USE > + && GET_CODE (*side_effect) != CLOBBER) > + return NULL; > + } > + return ret; > +} > diff --git a/gcc/rtl.h b/gcc/rtl.h > index 68d3ceab29f..bf87ddc0bc7 100644 > --- a/gcc/rtl.h > +++ b/gcc/rtl.h > @@ -3970,6 +3970,7 @@ extern enum rtx_code reversed_comparison_code_parts (enum rtx_code, const_rtx, > const_rtx, const rtx_insn *); > extern void delete_for_peephole (rtx_insn *, rtx_insn *); > extern int condjump_in_parallel_p (const rtx_insn *); > +extern rtx *return_in_parallel (rtx); > > /* In emit-rtl.c. */ > extern int max_reg_num (void); > diff --git a/gcc/testsuite/gcc.target/s390/risbg-ll-3.c b/gcc/testsuite/gcc.target/s390/risbg-ll-3.c > index 838f1ffbd91..25f7ddb9c61 100644 > --- a/gcc/testsuite/gcc.target/s390/risbg-ll-3.c > +++ b/gcc/testsuite/gcc.target/s390/risbg-ll-3.c > @@ -23,7 +23,7 @@ i64 f1 (i64 v_a, i64 v_b) > extern i64 f2_foo(); > i64 f2 (i64 v_a, i64 v_b) > { > -/* { dg-final { scan-assembler "f2:\n\trisbg\t%r2,%r3,60,62,0\n\tje\t" { target { lp64 } } } } */ > +/* { dg-final { scan-assembler "f2:\n\trisbg\t%r2,%r3,60,62,0\n\tbner\t%r14\n\tjg\tf2_foo\n" { target { lp64 } } } } */ > /* { dg-final { scan-assembler "f2:\n\trisbgn\t%r3,%r2,0,0\\\+32-1,64-0-32\n\trisbg\t%r3,%r5,60,62,0" { target { ! lp64 } } } } */ > i64 v_anda = v_a & -15; > i64 v_andb = v_b & 14; > @@ -37,7 +37,7 @@ i64 f2 (i64 v_a, i64 v_b) > void f2_bar (); > void f2_cconly (i64 v_a, i64 v_b) > { > -/* { dg-final { scan-assembler "f2_cconly:\n\trisbg\t%r3,%r2,63,59,0\n\tjne\t" { target { lp64 } } } } */ > +/* { dg-final { scan-assembler "f2_cconly:\n\trisbg\t%r3,%r2,63,59,0\n\tber\t%r14\n\tjg\tf2_bar\n" { target { lp64 } } } } */ > /* { dg-final { scan-assembler "f2_cconly:\n\trisbgn\t%r3,%r2,0,0\\\+32-1,64-0-32\n\trisbg\t%r3,%r5,60,62,0\n\tjne\t" { target { ! lp64 } } } } */ > if ((v_a & -15) | (v_b & 14)) > f2_bar(); > diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c b/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c > index 1e63defa063..09a15eb25f0 100644 > --- a/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c > +++ b/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c > @@ -15,7 +15,7 @@ all_eq_double (vector double a, vector double b) > if (__builtin_expect (vec_all_eq (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times all_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */ > +/* { dg-final { scan-assembler-times all_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tbner\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > all_ne_double (vector double a, vector double b) > @@ -23,7 +23,7 @@ all_ne_double (vector double a, vector double b) > if (__builtin_expect (vec_all_ne (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times all_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tjle 1 } } */ > +/* { dg-final { scan-assembler-times all_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tbler\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > all_gt_double (vector double a, vector double b) > @@ -31,7 +31,7 @@ all_gt_double (vector double a, vector double b) > if (__builtin_expect (vec_all_gt (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times all_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */ > +/* { dg-final { scan-assembler-times all_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tbner\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > all_lt_double (vector double a, vector double b) > @@ -39,7 +39,7 @@ all_lt_double (vector double a, vector double b) > if (__builtin_expect (vec_all_lt (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times all_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tjne 1 } } */ > +/* { dg-final { scan-assembler-times all_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tbner\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > all_ge_double (vector double a, vector double b) > @@ -47,7 +47,7 @@ all_ge_double (vector double a, vector double b) > if (__builtin_expect (vec_all_ge (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times all_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */ > +/* { dg-final { scan-assembler-times all_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tbner\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > all_le_double (vector double a, vector double b) > @@ -55,7 +55,7 @@ all_le_double (vector double a, vector double b) > if (__builtin_expect (vec_all_le (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times all_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tjne 1 } } */ > +/* { dg-final { scan-assembler-times all_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tbner\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > any_eq_double (vector double a, vector double b) > @@ -63,7 +63,7 @@ any_eq_double (vector double a, vector double b) > if (__builtin_expect (vec_any_eq (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times any_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */ > +/* { dg-final { scan-assembler-times any_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tbnler\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > any_ne_double (vector double a, vector double b) > @@ -71,7 +71,7 @@ any_ne_double (vector double a, vector double b) > if (__builtin_expect (vec_any_ne (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times any_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tje 1 } } */ > +/* { dg-final { scan-assembler-times any_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tber\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > any_gt_double (vector double a, vector double b) > @@ -79,7 +79,7 @@ any_gt_double (vector double a, vector double b) > if (__builtin_expect (vec_any_gt (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times any_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */ > +/* { dg-final { scan-assembler-times any_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tbnler\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > any_lt_double (vector double a, vector double b) > @@ -87,7 +87,7 @@ any_lt_double (vector double a, vector double b) > if (__builtin_expect (vec_any_lt (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times any_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tjnle 1 } } */ > +/* { dg-final { scan-assembler-times any_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tbnler\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > any_ge_double (vector double a, vector double b) > @@ -95,7 +95,7 @@ any_ge_double (vector double a, vector double b) > if (__builtin_expect (vec_any_ge (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times any_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */ > +/* { dg-final { scan-assembler-times any_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tbnler\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > any_le_double (vector double a, vector double b) > @@ -103,7 +103,7 @@ any_le_double (vector double a, vector double b) > if (__builtin_expect (vec_any_le (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times any_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tjnle 1 } } */ > +/* { dg-final { scan-assembler-times any_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tbnler\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > all_eq_int (vector int a, vector int b) > @@ -111,7 +111,7 @@ all_eq_int (vector int a, vector int b) > if (__builtin_expect (vec_all_eq (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times all_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */ > +/* { dg-final { scan-assembler-times all_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tbner\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > all_ne_int (vector int a, vector int b) > @@ -119,7 +119,7 @@ all_ne_int (vector int a, vector int b) > if (__builtin_expect (vec_all_ne (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times all_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tjle 1 } } */ > +/* { dg-final { scan-assembler-times all_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tbler\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > all_gt_int (vector int a, vector int b) > @@ -127,7 +127,7 @@ all_gt_int (vector int a, vector int b) > if (__builtin_expect (vec_all_gt (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times all_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */ > +/* { dg-final { scan-assembler-times all_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tbner\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > all_lt_int (vector int a, vector int b) > @@ -135,7 +135,7 @@ all_lt_int (vector int a, vector int b) > if (__builtin_expect (vec_all_lt (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times all_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tjne 1 } } */ > +/* { dg-final { scan-assembler-times all_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tbner\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > all_ge_int (vector int a, vector int b) > @@ -143,7 +143,7 @@ all_ge_int (vector int a, vector int b) > if (__builtin_expect (vec_all_ge (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times all_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tjle 1 } } */ > +/* { dg-final { scan-assembler-times all_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tbler\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > all_le_int (vector int a, vector int b) > @@ -151,7 +151,7 @@ all_le_int (vector int a, vector int b) > if (__builtin_expect (vec_all_le (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times all_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tjle 1 } } */ > +/* { dg-final { scan-assembler-times all_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tbler\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > any_eq_int (vector int a, vector int b) > @@ -159,7 +159,7 @@ any_eq_int (vector int a, vector int b) > if (__builtin_expect (vec_any_eq (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times any_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */ > +/* { dg-final { scan-assembler-times any_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tbnler\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > any_ne_int (vector int a, vector int b) > @@ -167,7 +167,7 @@ any_ne_int (vector int a, vector int b) > if (__builtin_expect (vec_any_ne (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times any_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tje 1 } } */ > +/* { dg-final { scan-assembler-times any_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tber\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > any_gt_int (vector int a, vector int b) > @@ -175,7 +175,7 @@ any_gt_int (vector int a, vector int b) > if (__builtin_expect (vec_any_gt (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times any_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */ > +/* { dg-final { scan-assembler-times any_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tbnler\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > any_lt_int (vector int a, vector int b) > @@ -183,7 +183,7 @@ any_lt_int (vector int a, vector int b) > if (__builtin_expect (vec_any_lt (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times any_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tjnle 1 } } */ > +/* { dg-final { scan-assembler-times any_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tbnler\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > any_ge_int (vector int a, vector int b) > @@ -191,7 +191,7 @@ any_ge_int (vector int a, vector int b) > if (__builtin_expect (vec_any_ge (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times any_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tje 1 } } */ > +/* { dg-final { scan-assembler-times any_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tber\t%r14\n 1 } } */ > > void __attribute__((noinline,noclone)) > any_le_int (vector int a, vector int b) > @@ -199,5 +199,5 @@ any_le_int (vector int a, vector int b) > if (__builtin_expect (vec_any_le (a, b), 1)) > g = 2; > } > -/* { dg-final { scan-assembler-times any_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tje 1 } } */ > +/* { dg-final { scan-assembler-times any_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tber\t%r14\n 1 } } */ > >
On 9/5/18 2:34 AM, Ilya Leoshkevich wrote: > S/390 epilogue ends with (parallel [(return) (use %r14)]) instead of > the more usual (return) or (simple_return). This sequence is not > recognized by the conditional return logic in try_optimize_cfg (). > > gcc/ChangeLog: > > 2018-08-28 Ilya Leoshkevich <iii@linux.ibm.com> > > PR target/80080 > * cfgcleanup.c (bb_is_just_return): Accept PARALLELs containing > RETURNs. > * cfgrtl.c (rtl_verify_bb_layout): Handle PARALLELs containing > conditional jumps. > * config/s390/s390.md: Recognize PARALLELs containing RETURNs. > * jump.c (copy_update_parallel): Create a copy of a PARALLEL > in which one of side effects is replaced. > (redirect_exp_1): Handle jump targets that are PARALLELs > containing RETURNs. > (redirect_jump_2): Likewise. > (return_in_parallel): Recognize PARALLELs containing RETURNs. > * rtl.h (return_in_parallel): Add declaration. > > gcc/testsuite/ChangeLog: > > 2018-08-28 Ilya Leoshkevich <iii@linux.ibm.com> > > PR target/80080 > * gcc.target/s390/risbg-ll-3.c: Expect conditional returns. > * gcc.target/s390/zvector/vec-cmp-2.c: Likewise. > --- > gcc/cfgcleanup.c | 2 +- > gcc/cfgrtl.c | 3 +- > gcc/config/s390/s390.md | 13 +++- > gcc/jump.c | 69 ++++++++++++++++++- > gcc/rtl.h | 1 + > gcc/testsuite/gcc.target/s390/risbg-ll-3.c | 4 +- > .../gcc.target/s390/zvector/vec-cmp-2.c | 48 ++++++------- > 7 files changed, 108 insertions(+), 32 deletions(-) > > diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c > index 4a5dc29d14f..7f2545f453f 100644 > --- a/gcc/cfgcleanup.c > +++ b/gcc/cfgcleanup.c > @@ -2624,7 +2624,7 @@ bb_is_just_return (basic_block bb, rtx_insn **ret, rtx_insn **use) > { > rtx pat = PATTERN (insn); > > - if (!*ret && ANY_RETURN_P (pat)) > + if (!*ret && (ANY_RETURN_P (pat) || return_in_parallel (pat))) > *ret = insn; > else if (!*ret && !*use && GET_CODE (pat) == USE > && REG_P (XEXP (pat, 0)) So what else is in the return insn that requires you to test for return_in_parallel? If we're going to allow a return in a parallel, here I think we need to tighten down its form given the intended ways bb_is_just_return is supposed to be used. Essentially other side effects would seem to be forbidden in the parallel. ie, you could have a PARALLEL with a return and use inside, but not a return with anything else inside (such as a clobber). Why do you need to make a copy of the parallel RTX in redirect_exp_1? We don't do it for other cases -- why can't we just use validate_change like all the other RTXs? > /* Throughout LOC, redirect OLABEL to NLABEL. Treat null OLABEL or > NLABEL as a return. Accrue modifications into the change group. */ > > @@ -1437,9 +1457,22 @@ redirect_exp_1 (rtx *loc, rtx olabel, rtx nlabel, rtx_insn *insn) > if ((code == LABEL_REF && label_ref_label (x) == olabel) > || x == olabel) > { > - x = redirect_target (nlabel); > - if (GET_CODE (x) == LABEL_REF && loc == &PATTERN (insn)) > - x = gen_rtx_SET (pc_rtx, x); > + rtx *nret = return_in_parallel (nlabel); > + > + if (nret) > + { > + rtx npat; > + > + x = *nret; > + npat = copy_update_parallel (nlabel, nret, PATTERN (insn)); > + validate_change (insn, &PATTERN (insn), npat, 1); > + } > + else > + { > + x = redirect_target (nlabel); > + if (GET_CODE (x) == LABEL_REF && loc == &PATTERN (insn)) > + x = gen_rtx_SET (pc_rtx, x); > + } > validate_change (insn, loc, x, 1); Why the need to use copy_update_parallel here? Is there a reason why validate_change is insufficient? > return; > } > @@ -1551,10 +1584,15 @@ void > redirect_jump_2 (rtx_jump_insn *jump, rtx olabel, rtx nlabel, int delete_unused, > int invert) > { > + rtx *ret; > rtx note; > > gcc_assert (JUMP_LABEL (jump) == olabel); > > + ret = return_in_parallel (nlabel); > + if (ret) > + nlabel = *ret; Why does return_in_parallel return an rtx *? Can't you just return the rtx and avoid the unnecessary dereferencing? I guess this ultimately comes back to why can't you use validate_change like everyone else in redirect_exp_1? jeff Jeff
> Am 18.09.2018 um 02:57 schrieb Jeff Law <law@redhat.com>: > > On 9/5/18 2:34 AM, Ilya Leoshkevich wrote: >> --- a/gcc/cfgcleanup.c >> +++ b/gcc/cfgcleanup.c >> @@ -2624,7 +2624,7 @@ bb_is_just_return (basic_block bb, rtx_insn **ret, rtx_insn **use) >> { >> rtx pat = PATTERN (insn); >> >> - if (!*ret && ANY_RETURN_P (pat)) >> + if (!*ret && (ANY_RETURN_P (pat) || return_in_parallel (pat))) >> *ret = insn; >> else if (!*ret && !*use && GET_CODE (pat) == USE >> && REG_P (XEXP (pat, 0)) > So what else is in the return insn that requires you to test for > return_in_parallel? If we're going to allow a return in a parallel, > here I think we need to tighten down its form given the intended ways > bb_is_just_return is supposed to be used. Essentially other side > effects would seem to be forbidden in the parallel. ie, you could have > a PARALLEL with a return and use inside, but not a return with anything > else inside (such as a clobber). Yes, it’s RETURN+USE. I allowed CLOBBERs, because bb_is_just_return already allows them, but I don’t think it's necessary for the S/390 use case, so I can make it more restrictive if needed. > > Why do you need to make a copy of the parallel RTX in redirect_exp_1? > We don't do it for other cases -- why can't we just use validate_change > like all the other RTXs? > > >> /* Throughout LOC, redirect OLABEL to NLABEL. Treat null OLABEL or >> NLABEL as a return. Accrue modifications into the change group. */ >> >> @@ -1437,9 +1457,22 @@ redirect_exp_1 (rtx *loc, rtx olabel, rtx nlabel, rtx_insn *insn) >> if ((code == LABEL_REF && label_ref_label (x) == olabel) >> || x == olabel) >> { >> - x = redirect_target (nlabel); >> - if (GET_CODE (x) == LABEL_REF && loc == &PATTERN (insn)) >> - x = gen_rtx_SET (pc_rtx, x); >> + rtx *nret = return_in_parallel (nlabel); >> + >> + if (nret) >> + { >> + rtx npat; >> + >> + x = *nret; >> + npat = copy_update_parallel (nlabel, nret, PATTERN (insn)); >> + validate_change (insn, &PATTERN (insn), npat, 1); >> + } >> + else >> + { >> + x = redirect_target (nlabel); >> + if (GET_CODE (x) == LABEL_REF && loc == &PATTERN (insn)) >> + x = gen_rtx_SET (pc_rtx, x); >> + } >> validate_change (insn, loc, x, 1); > Why the need to use copy_update_parallel here? Is there a reason why > validate_change is insufficient? The original RTL has the following form: (jump_insn 1 (set %pc (if_then_else (ne %cc 0) (label_ref 2) %pc))) ... (code_label 2) (jump_insn 3 (parallel [(return) (use %r14)])) and the goal is to transform (jump_insn 1) to: (jump_insn 1 (parallel [(set %pc (if_then_else (ne %cc 0) (return) %pc)) (use %r14)])) while keeping (code_label 2) and (jump_insn 3) intact. So I have to create a new PARALLEL based on the original one. > > >> return; >> } >> @@ -1551,10 +1584,15 @@ void >> redirect_jump_2 (rtx_jump_insn *jump, rtx olabel, rtx nlabel, int delete_unused, >> int invert) >> { >> + rtx *ret; >> rtx note; >> >> gcc_assert (JUMP_LABEL (jump) == olabel); >> >> + ret = return_in_parallel (nlabel); >> + if (ret) >> + nlabel = *ret; > Why does return_in_parallel return an rtx *? Can't you just return the > rtx and avoid the unnecessary dereferencing? I guess this ultimately > comes back to why can't you use validate_change like everyone else in > redirect_exp_1? Right, this is related. This is to indicate to copy_update_parallel, which of the side-effects need to be updated.
On Wed, Sep 05, 2018 at 10:34:48AM +0200, Ilya Leoshkevich wrote: > S/390 epilogue ends with (parallel [(return) (use %r14)]) instead of > the more usual (return) or (simple_return). This sequence is not > recognized by the conditional return logic in try_optimize_cfg (). Why does it need this? Other targets with a link register make EPILOGUE_USES handle this. If you really need a parallel, can you make ANY_RETURN_P recognise it? > +/* Create a copy of PARALLEL with side-effect OSIDE replaced by NSIDE. */ > +static rtx > +copy_update_parallel (rtx par, rtx *oside, rtx nside) > +{ > + rtx npar; > + int i; > + > + npar = gen_rtx_PARALLEL (GET_MODE (par), rtvec_alloc (XVECLEN (par, 0))); > + for (i = XVECLEN (par, 0) - 1; i >= 0; i--) > + { > + rtx *side_effect = &XVECEXP (par, 0, i); > + > + if (side_effect == oside) > + XVECEXP (npar, 0, i) = nside; > + else > + XVECEXP (npar, 0, i) = copy_rtx (*side_effect); > + } > + return npar; > +} This doesn't work if nside is used anywhere else. But the only caller uses the previous instruction pattern; maybe make a function specialised to that only? You could give it a better name, too ;-) (It is especially surprising because the function is called copy_* but it does _not_ copy its argument!) Segher
On 9/18/18 8:04 AM, Segher Boessenkool wrote: > On Wed, Sep 05, 2018 at 10:34:48AM +0200, Ilya Leoshkevich wrote: >> S/390 epilogue ends with (parallel [(return) (use %r14)]) instead of >> the more usual (return) or (simple_return). This sequence is not >> recognized by the conditional return logic in try_optimize_cfg (). > > Why does it need this? Other targets with a link register make > EPILOGUE_USES handle this. I think because he's trying to optimize a conditional jump to a return insn into a conditional return insn. I don't think we do that on other targets, though I have pondered it from time to time. Jeff
> Am 19.09.2018 um 06:15 schrieb Jeff Law <law@redhat.com>: > > On 9/18/18 8:04 AM, Segher Boessenkool wrote: >> On Wed, Sep 05, 2018 at 10:34:48AM +0200, Ilya Leoshkevich wrote: >>> S/390 epilogue ends with (parallel [(return) (use %r14)]) instead of >>> the more usual (return) or (simple_return). This sequence is not >>> recognized by the conditional return logic in try_optimize_cfg (). >> >> Why does it need this? Other targets with a link register make >> EPILOGUE_USES handle this. > I think because he's trying to optimize a conditional jump to a return > insn into a conditional return insn. I don't think we do that on other > targets, though I have pondered it from time to time. S/390 back-end already uses EPILOGUE_USES. The reason RETURN in PARALLEL still exists is that on older processors sequences using non-standard dynamically chosen link register are sometimes more efficient, and for such use cases EPILOGUE_USES doesn't work. I talked to Ulrich Weigand about this yesterday, and he suggested to drop RETURN in PARALLEL for newer processors. I will give this a try, and if it works, middle-end changes in this patch will no longer be needed.
On Tue, Sep 18, 2018 at 10:15:00PM -0600, Jeff Law wrote: > On 9/18/18 8:04 AM, Segher Boessenkool wrote: > > On Wed, Sep 05, 2018 at 10:34:48AM +0200, Ilya Leoshkevich wrote: > >> S/390 epilogue ends with (parallel [(return) (use %r14)]) instead of > >> the more usual (return) or (simple_return). This sequence is not > >> recognized by the conditional return logic in try_optimize_cfg (). > > > > Why does it need this? Other targets with a link register make > > EPILOGUE_USES handle this. > I think because he's trying to optimize a conditional jump to a return > insn into a conditional return insn. I don't think we do that on other > targets, though I have pondered it from time to time. See r248351, r236106, and importantly r235904. I wrote that because it is quite useful on Power. s390 already _has_ a proper EPILOGUE_USES btw. So just the return patterns themselves need fixing and all will be good, it seems. Segher
diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c index 4a5dc29d14f..7f2545f453f 100644 --- a/gcc/cfgcleanup.c +++ b/gcc/cfgcleanup.c @@ -2624,7 +2624,7 @@ bb_is_just_return (basic_block bb, rtx_insn **ret, rtx_insn **use) { rtx pat = PATTERN (insn); - if (!*ret && ANY_RETURN_P (pat)) + if (!*ret && (ANY_RETURN_P (pat) || return_in_parallel (pat))) *ret = insn; else if (!*ret && !*use && GET_CODE (pat) == USE && REG_P (XEXP (pat, 0)) diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c index 3b1931daeba..701c6a985b8 100644 --- a/gcc/cfgrtl.c +++ b/gcc/cfgrtl.c @@ -2987,7 +2987,8 @@ rtl_verify_bb_layout (void) } if (JUMP_P (x) - && returnjump_p (x) && ! condjump_p (x) + && returnjump_p (x) + && ! (condjump_p (x) || condjump_in_parallel_p (x)) && ! ((y = next_nonnote_nondebug_insn (x)) && BARRIER_P (y))) fatal_insn ("return not followed by barrier", x); diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index db260e41bfd..3c413638038 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -8842,8 +8842,19 @@ (set_attr "type" "branch") (set_attr "atype" "agen")]) +(define_subst "add_use_return_reg_subst" + [(set (match_operand 0 "" "") + (match_operand 1 "" ""))] + "" + [(set (match_dup 0) + (match_dup 1)) + (use (reg RETURN_REGNUM))]) + +(define_subst_attr "add_use_return_reg_name" "add_use_return_reg_subst" + "" "_use_return_reg") + ;; A conditional return instruction. -(define_insn "*c<code>" +(define_insn "*c<code><add_use_return_reg_name>" [(set (pc) (if_then_else (match_operator 0 "s390_comparison" [(reg CC_REGNUM) (const_int 0)]) diff --git a/gcc/jump.c b/gcc/jump.c index 06f7255d24d..8057ace74d2 100644 --- a/gcc/jump.c +++ b/gcc/jump.c @@ -1423,6 +1423,26 @@ redirect_target (rtx x) return x; } +/* Create a copy of PARALLEL with side-effect OSIDE replaced by NSIDE. */ +static rtx +copy_update_parallel (rtx par, rtx *oside, rtx nside) +{ + rtx npar; + int i; + + npar = gen_rtx_PARALLEL (GET_MODE (par), rtvec_alloc (XVECLEN (par, 0))); + for (i = XVECLEN (par, 0) - 1; i >= 0; i--) + { + rtx *side_effect = &XVECEXP (par, 0, i); + + if (side_effect == oside) + XVECEXP (npar, 0, i) = nside; + else + XVECEXP (npar, 0, i) = copy_rtx (*side_effect); + } + return npar; +} + /* Throughout LOC, redirect OLABEL to NLABEL. Treat null OLABEL or NLABEL as a return. Accrue modifications into the change group. */ @@ -1437,9 +1457,22 @@ redirect_exp_1 (rtx *loc, rtx olabel, rtx nlabel, rtx_insn *insn) if ((code == LABEL_REF && label_ref_label (x) == olabel) || x == olabel) { - x = redirect_target (nlabel); - if (GET_CODE (x) == LABEL_REF && loc == &PATTERN (insn)) - x = gen_rtx_SET (pc_rtx, x); + rtx *nret = return_in_parallel (nlabel); + + if (nret) + { + rtx npat; + + x = *nret; + npat = copy_update_parallel (nlabel, nret, PATTERN (insn)); + validate_change (insn, &PATTERN (insn), npat, 1); + } + else + { + x = redirect_target (nlabel); + if (GET_CODE (x) == LABEL_REF && loc == &PATTERN (insn)) + x = gen_rtx_SET (pc_rtx, x); + } validate_change (insn, loc, x, 1); return; } @@ -1551,10 +1584,15 @@ void redirect_jump_2 (rtx_jump_insn *jump, rtx olabel, rtx nlabel, int delete_unused, int invert) { + rtx *ret; rtx note; gcc_assert (JUMP_LABEL (jump) == olabel); + ret = return_in_parallel (nlabel); + if (ret) + nlabel = *ret; + /* Negative DELETE_UNUSED used to be used to signalize behavior on moving FUNCTION_END note. Just sanity check that no user still worry about this. */ @@ -1929,3 +1967,28 @@ reg_or_subregno (const_rtx reg) gcc_assert (REG_P (reg)); return REGNO (reg); } + +/* If PAT is a PARALLEL, that contains a single RETURN or a SIMPLE_RETURN, and + zero or more USEs and CLOBBERs, returns an address of that RETURN or + SIMPLE_RETURN. Otherwise returns NULL. */ + +rtx * +return_in_parallel (rtx pat) +{ + int i; + rtx *ret = NULL; + + if (GET_CODE (pat) != PARALLEL) + return NULL; + for (i = XVECLEN (pat, 0) - 1; i >= 0; i--) + { + rtx *side_effect = &XVECEXP (pat, 0, i); + + if (!ret && ANY_RETURN_P (*side_effect)) + ret = side_effect; + else if (GET_CODE (*side_effect) != USE + && GET_CODE (*side_effect) != CLOBBER) + return NULL; + } + return ret; +} diff --git a/gcc/rtl.h b/gcc/rtl.h index 68d3ceab29f..bf87ddc0bc7 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -3970,6 +3970,7 @@ extern enum rtx_code reversed_comparison_code_parts (enum rtx_code, const_rtx, const_rtx, const rtx_insn *); extern void delete_for_peephole (rtx_insn *, rtx_insn *); extern int condjump_in_parallel_p (const rtx_insn *); +extern rtx *return_in_parallel (rtx); /* In emit-rtl.c. */ extern int max_reg_num (void); diff --git a/gcc/testsuite/gcc.target/s390/risbg-ll-3.c b/gcc/testsuite/gcc.target/s390/risbg-ll-3.c index 838f1ffbd91..25f7ddb9c61 100644 --- a/gcc/testsuite/gcc.target/s390/risbg-ll-3.c +++ b/gcc/testsuite/gcc.target/s390/risbg-ll-3.c @@ -23,7 +23,7 @@ i64 f1 (i64 v_a, i64 v_b) extern i64 f2_foo(); i64 f2 (i64 v_a, i64 v_b) { -/* { dg-final { scan-assembler "f2:\n\trisbg\t%r2,%r3,60,62,0\n\tje\t" { target { lp64 } } } } */ +/* { dg-final { scan-assembler "f2:\n\trisbg\t%r2,%r3,60,62,0\n\tbner\t%r14\n\tjg\tf2_foo\n" { target { lp64 } } } } */ /* { dg-final { scan-assembler "f2:\n\trisbgn\t%r3,%r2,0,0\\\+32-1,64-0-32\n\trisbg\t%r3,%r5,60,62,0" { target { ! lp64 } } } } */ i64 v_anda = v_a & -15; i64 v_andb = v_b & 14; @@ -37,7 +37,7 @@ i64 f2 (i64 v_a, i64 v_b) void f2_bar (); void f2_cconly (i64 v_a, i64 v_b) { -/* { dg-final { scan-assembler "f2_cconly:\n\trisbg\t%r3,%r2,63,59,0\n\tjne\t" { target { lp64 } } } } */ +/* { dg-final { scan-assembler "f2_cconly:\n\trisbg\t%r3,%r2,63,59,0\n\tber\t%r14\n\tjg\tf2_bar\n" { target { lp64 } } } } */ /* { dg-final { scan-assembler "f2_cconly:\n\trisbgn\t%r3,%r2,0,0\\\+32-1,64-0-32\n\trisbg\t%r3,%r5,60,62,0\n\tjne\t" { target { ! lp64 } } } } */ if ((v_a & -15) | (v_b & 14)) f2_bar(); diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c b/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c index 1e63defa063..09a15eb25f0 100644 --- a/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c +++ b/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c @@ -15,7 +15,7 @@ all_eq_double (vector double a, vector double b) if (__builtin_expect (vec_all_eq (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times all_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */ +/* { dg-final { scan-assembler-times all_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tbner\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) all_ne_double (vector double a, vector double b) @@ -23,7 +23,7 @@ all_ne_double (vector double a, vector double b) if (__builtin_expect (vec_all_ne (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times all_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tjle 1 } } */ +/* { dg-final { scan-assembler-times all_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tbler\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) all_gt_double (vector double a, vector double b) @@ -31,7 +31,7 @@ all_gt_double (vector double a, vector double b) if (__builtin_expect (vec_all_gt (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times all_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */ +/* { dg-final { scan-assembler-times all_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tbner\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) all_lt_double (vector double a, vector double b) @@ -39,7 +39,7 @@ all_lt_double (vector double a, vector double b) if (__builtin_expect (vec_all_lt (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times all_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tjne 1 } } */ +/* { dg-final { scan-assembler-times all_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tbner\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) all_ge_double (vector double a, vector double b) @@ -47,7 +47,7 @@ all_ge_double (vector double a, vector double b) if (__builtin_expect (vec_all_ge (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times all_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */ +/* { dg-final { scan-assembler-times all_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tbner\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) all_le_double (vector double a, vector double b) @@ -55,7 +55,7 @@ all_le_double (vector double a, vector double b) if (__builtin_expect (vec_all_le (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times all_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tjne 1 } } */ +/* { dg-final { scan-assembler-times all_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tbner\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) any_eq_double (vector double a, vector double b) @@ -63,7 +63,7 @@ any_eq_double (vector double a, vector double b) if (__builtin_expect (vec_any_eq (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times any_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */ +/* { dg-final { scan-assembler-times any_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tbnler\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) any_ne_double (vector double a, vector double b) @@ -71,7 +71,7 @@ any_ne_double (vector double a, vector double b) if (__builtin_expect (vec_any_ne (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times any_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tje 1 } } */ +/* { dg-final { scan-assembler-times any_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tber\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) any_gt_double (vector double a, vector double b) @@ -79,7 +79,7 @@ any_gt_double (vector double a, vector double b) if (__builtin_expect (vec_any_gt (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times any_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */ +/* { dg-final { scan-assembler-times any_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tbnler\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) any_lt_double (vector double a, vector double b) @@ -87,7 +87,7 @@ any_lt_double (vector double a, vector double b) if (__builtin_expect (vec_any_lt (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times any_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tjnle 1 } } */ +/* { dg-final { scan-assembler-times any_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tbnler\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) any_ge_double (vector double a, vector double b) @@ -95,7 +95,7 @@ any_ge_double (vector double a, vector double b) if (__builtin_expect (vec_any_ge (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times any_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */ +/* { dg-final { scan-assembler-times any_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tbnler\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) any_le_double (vector double a, vector double b) @@ -103,7 +103,7 @@ any_le_double (vector double a, vector double b) if (__builtin_expect (vec_any_le (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times any_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tjnle 1 } } */ +/* { dg-final { scan-assembler-times any_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tbnler\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) all_eq_int (vector int a, vector int b) @@ -111,7 +111,7 @@ all_eq_int (vector int a, vector int b) if (__builtin_expect (vec_all_eq (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times all_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */ +/* { dg-final { scan-assembler-times all_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tbner\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) all_ne_int (vector int a, vector int b) @@ -119,7 +119,7 @@ all_ne_int (vector int a, vector int b) if (__builtin_expect (vec_all_ne (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times all_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tjle 1 } } */ +/* { dg-final { scan-assembler-times all_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tbler\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) all_gt_int (vector int a, vector int b) @@ -127,7 +127,7 @@ all_gt_int (vector int a, vector int b) if (__builtin_expect (vec_all_gt (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times all_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */ +/* { dg-final { scan-assembler-times all_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tbner\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) all_lt_int (vector int a, vector int b) @@ -135,7 +135,7 @@ all_lt_int (vector int a, vector int b) if (__builtin_expect (vec_all_lt (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times all_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tjne 1 } } */ +/* { dg-final { scan-assembler-times all_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tbner\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) all_ge_int (vector int a, vector int b) @@ -143,7 +143,7 @@ all_ge_int (vector int a, vector int b) if (__builtin_expect (vec_all_ge (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times all_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tjle 1 } } */ +/* { dg-final { scan-assembler-times all_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tbler\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) all_le_int (vector int a, vector int b) @@ -151,7 +151,7 @@ all_le_int (vector int a, vector int b) if (__builtin_expect (vec_all_le (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times all_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tjle 1 } } */ +/* { dg-final { scan-assembler-times all_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tbler\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) any_eq_int (vector int a, vector int b) @@ -159,7 +159,7 @@ any_eq_int (vector int a, vector int b) if (__builtin_expect (vec_any_eq (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times any_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */ +/* { dg-final { scan-assembler-times any_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tbnler\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) any_ne_int (vector int a, vector int b) @@ -167,7 +167,7 @@ any_ne_int (vector int a, vector int b) if (__builtin_expect (vec_any_ne (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times any_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tje 1 } } */ +/* { dg-final { scan-assembler-times any_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tber\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) any_gt_int (vector int a, vector int b) @@ -175,7 +175,7 @@ any_gt_int (vector int a, vector int b) if (__builtin_expect (vec_any_gt (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times any_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */ +/* { dg-final { scan-assembler-times any_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tbnler\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) any_lt_int (vector int a, vector int b) @@ -183,7 +183,7 @@ any_lt_int (vector int a, vector int b) if (__builtin_expect (vec_any_lt (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times any_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tjnle 1 } } */ +/* { dg-final { scan-assembler-times any_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tbnler\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) any_ge_int (vector int a, vector int b) @@ -191,7 +191,7 @@ any_ge_int (vector int a, vector int b) if (__builtin_expect (vec_any_ge (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times any_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tje 1 } } */ +/* { dg-final { scan-assembler-times any_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tber\t%r14\n 1 } } */ void __attribute__((noinline,noclone)) any_le_int (vector int a, vector int b) @@ -199,5 +199,5 @@ any_le_int (vector int a, vector int b) if (__builtin_expect (vec_any_le (a, b), 1)) g = 2; } -/* { dg-final { scan-assembler-times any_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tje 1 } } */ +/* { dg-final { scan-assembler-times any_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tber\t%r14\n 1 } } */