diff mbox series

[RFC] Come up with VEC_COND_OP_EXPRs.

Message ID 366dfa22-58e7-e1df-62c3-cc98082a552c@suse.cz
State New
Headers show
Series [RFC] Come up with VEC_COND_OP_EXPRs. | expand

Commit Message

Martin Liška Sept. 24, 2019, 10:25 a.m. UTC
Hi.

The patch introduces couple of new TREE_CODEs that will help us to have
a proper GIMPLE representation of current VECT_COND_EXPR. Right now,
the first argument is typically a GENERIC tcc_expression tree with 2 operands
that are visited at various places in GIMPLE code. That said, based on the discussion
with Richi, I'm suggesting to come up with e.g.
VECT_COND_LT_EXPR<COND_LHS, COND_RHS, IF_CLAUSE, ELSE_CLAUSE>. Such a change logically
introduces new GIMPLE_QUATERNARY_RHS gassignments. For now, the VEC_COND_EXPR remains
and is only valid in GENERIC and gimplifier will take care of the corresponding transition.

The patch is a prototype and missing bits are:
- folding support addition for GIMPLE_QUATERNARY_RHS is missing
- fancy tcc_comparison expressions like LTGT_EXPR, UNORDERED_EXPR, ORDERED_EXPR,
  UNLT_EXPR and others are not supported right now
- comments are missing for various functions added

Apart from that I was able to bootstrap and run tests with a quite small fallout.
Thoughts?
Martin

---
 gcc/cfgexpand.c             | 33 ++++++++-----
 gcc/expr.c                  | 36 +++++++++-----
 gcc/expr.h                  |  2 +-
 gcc/gimple-expr.c           | 14 +++++-
 gcc/gimple-expr.h           |  6 +--
 gcc/gimple-fold.c           | 15 +++++-
 gcc/gimple-match-head.c     |  3 ++
 gcc/gimple-pretty-print.c   | 76 ++++++++++++++++++++--------
 gcc/gimple.c                | 95 ++++++++++++++++++++++++++++++-----
 gcc/gimple.h                | 82 +++++++++++++++++++++++++-----
 gcc/gimplify.c              | 42 +++++++++++-----
 gcc/optabs.c                | 58 +++++++++-------------
 gcc/optabs.h                |  2 +-
 gcc/tree-cfg.c              | 99 ++++++++++++++++++++++++++++++++-----
 gcc/tree-inline.c           |  2 +-
 gcc/tree-ssa-forwprop.c     | 11 +++--
 gcc/tree-ssa-loop-niter.c   |  4 +-
 gcc/tree-ssa-operands.c     |  1 -
 gcc/tree-ssa-pre.c          |  5 +-
 gcc/tree-ssa-reassoc.c      |  4 +-
 gcc/tree-ssa-scopedtables.c | 46 ++++++++++++++++-
 gcc/tree-ssa-scopedtables.h |  2 +
 gcc/tree-vect-generic.c     | 53 +++++++++++---------
 gcc/tree-vect-loop.c        | 50 ++++++++-----------
 gcc/tree-vect-patterns.c    |  4 +-
 gcc/tree-vect-stmts.c       | 17 ++++---
 gcc/tree.def                |  7 +++
 gcc/tree.h                  | 64 ++++++++++++++++++++++++
 28 files changed, 620 insertions(+), 213 deletions(-)
=== FAILURES ===
FAIL: g++.dg/ext/pr56790-1.C  -std=gnu++14  scan-tree-dump ccp1 "{ 5, 13 }"
FAIL: g++.dg/ext/pr56790-1.C  -std=gnu++17  scan-tree-dump ccp1 "{ 5, 13 }"
FAIL: g++.dg/ext/pr56790-1.C  -std=gnu++98  scan-tree-dump ccp1 "{ 5, 13 }"
FAIL: g++.dg/tree-ssa/cprop-vcond.C  -std=gnu++14  1 blank line(s) in output
FAIL: g++.dg/tree-ssa/cprop-vcond.C  -std=gnu++14 (internal compiler error)
FAIL: g++.dg/tree-ssa/cprop-vcond.C  -std=gnu++14 (test for excess errors)
FAIL: g++.dg/tree-ssa/cprop-vcond.C  -std=gnu++17  1 blank line(s) in output
FAIL: g++.dg/tree-ssa/cprop-vcond.C  -std=gnu++17 (internal compiler error)
FAIL: g++.dg/tree-ssa/cprop-vcond.C  -std=gnu++17 (test for excess errors)
FAIL: g++.dg/tree-ssa/cprop-vcond.C  -std=gnu++98  1 blank line(s) in output
FAIL: g++.dg/tree-ssa/cprop-vcond.C  -std=gnu++98 (internal compiler error)
FAIL: g++.dg/tree-ssa/cprop-vcond.C  -std=gnu++98 (test for excess errors)
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++14  scan-tree-dump-not forwprop1 " (?:>|>=|<|<=) { -214748364[78]"
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++14  scan-tree-dump-not forwprop1 " (?:>|>=|<|<=) { 214748364[67]"
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++14  scan-tree-dump-times forwprop1 "(?:return| =) { -1, -1, -1, -1 }" 2
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++14  scan-tree-dump-times forwprop1 "(?:return| =) { 0, 0, 0, 0 }" 2
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++17  scan-tree-dump-not forwprop1 " (?:>|>=|<|<=) { -214748364[78]"
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++17  scan-tree-dump-not forwprop1 " (?:>|>=|<|<=) { 214748364[67]"
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++17  scan-tree-dump-times forwprop1 "(?:return| =) { -1, -1, -1, -1 }" 2
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++17  scan-tree-dump-times forwprop1 "(?:return| =) { 0, 0, 0, 0 }" 2
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++98  scan-tree-dump-not forwprop1 " (?:>|>=|<|<=) { -214748364[78]"
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++98  scan-tree-dump-not forwprop1 " (?:>|>=|<|<=) { 214748364[67]"
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++98  scan-tree-dump-times forwprop1 "(?:return| =) { -1, -1, -1, -1 }" 2
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++98  scan-tree-dump-times forwprop1 "(?:return| =) { 0, 0, 0, 0 }" 2
FAIL: gcc.c-torture/execute/ieee/pr50310.c compilation,  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)
FAIL: gcc.c-torture/execute/ieee/pr50310.c compilation,  -O3 -g  (internal compiler error)
FAIL: gcc.dg/pr50310-1.c (internal compiler error)
FAIL: gcc.dg/pr50310-1.c (test for excess errors)
FAIL: gcc.dg/pr50310-2.c (internal compiler error)
FAIL: gcc.dg/pr50310-2.c (test for excess errors)
FAIL: gcc.dg/tree-ssa/foldconst-6.c scan-tree-dump-not ccp1 "2, 666"
FAIL: gcc.dg/tree-ssa/operand-equal-2.c scan-tree-dump fre1 "v = . 0, 0, 0, 0 ."
FAIL: gcc.dg/tree-ssa/pr68714.c scan-tree-dump-times reassoc1 " <= " 1

Comments

Richard Sandiford Sept. 24, 2019, 11:11 a.m. UTC | #1
Martin Liška <mliska@suse.cz> writes:
> Hi.
>
> The patch introduces couple of new TREE_CODEs that will help us to have
> a proper GIMPLE representation of current VECT_COND_EXPR. Right now,
> the first argument is typically a GENERIC tcc_expression tree with 2 operands
> that are visited at various places in GIMPLE code. That said, based on the discussion
> with Richi, I'm suggesting to come up with e.g.
> VECT_COND_LT_EXPR<COND_LHS, COND_RHS, IF_CLAUSE, ELSE_CLAUSE>. Such a change logically
> introduces new GIMPLE_QUATERNARY_RHS gassignments. For now, the VEC_COND_EXPR remains
> and is only valid in GENERIC and gimplifier will take care of the corresponding transition.
>
> The patch is a prototype and missing bits are:
> - folding support addition for GIMPLE_QUATERNARY_RHS is missing
> - fancy tcc_comparison expressions like LTGT_EXPR, UNORDERED_EXPR, ORDERED_EXPR,
>   UNLT_EXPR and others are not supported right now
> - comments are missing for various functions added
>
> Apart from that I was able to bootstrap and run tests with a quite small fallout.
> Thoughts?
> Martin

I think this is going in the wrong direction.  There are some targets
that can only handle VEC_COND_EXPRs well if we know the associated
condition, and others where a compare-and-VEC_COND_EXPR will always be
two operations.  In that situation, it seems like the native gimple
representation should be the simpler representation rather than the
more complex one.  That way the comparisons can be optimised
independently of any VEC_COND_EXPRs on targets that benefit from that.

So IMO it would be better to use three-operand VEC_COND_EXPRs with
no embedded conditions as the preferred gimple representation and
have internal functions for the fused operations that some targets
prefer.  This means that using fused operations is "just" an instruction
selection decision rather than hard-coded throughout gimple.  (And that
fits in well with the idea of doing more instruction selection in gimple.)

Thanks,
Richard
Richard Biener Sept. 24, 2019, 11:29 a.m. UTC | #2
On Tue, Sep 24, 2019 at 1:11 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Martin Liška <mliska@suse.cz> writes:
> > Hi.
> >
> > The patch introduces couple of new TREE_CODEs that will help us to have
> > a proper GIMPLE representation of current VECT_COND_EXPR. Right now,
> > the first argument is typically a GENERIC tcc_expression tree with 2 operands
> > that are visited at various places in GIMPLE code. That said, based on the discussion
> > with Richi, I'm suggesting to come up with e.g.
> > VECT_COND_LT_EXPR<COND_LHS, COND_RHS, IF_CLAUSE, ELSE_CLAUSE>. Such a change logically
> > introduces new GIMPLE_QUATERNARY_RHS gassignments. For now, the VEC_COND_EXPR remains
> > and is only valid in GENERIC and gimplifier will take care of the corresponding transition.
> >
> > The patch is a prototype and missing bits are:
> > - folding support addition for GIMPLE_QUATERNARY_RHS is missing
> > - fancy tcc_comparison expressions like LTGT_EXPR, UNORDERED_EXPR, ORDERED_EXPR,
> >   UNLT_EXPR and others are not supported right now
> > - comments are missing for various functions added
> >
> > Apart from that I was able to bootstrap and run tests with a quite small fallout.
> > Thoughts?
> > Martin
>
> I think this is going in the wrong direction.  There are some targets
> that can only handle VEC_COND_EXPRs well if we know the associated
> condition, and others where a compare-and-VEC_COND_EXPR will always be
> two operations.  In that situation, it seems like the native gimple
> representation should be the simpler representation rather than the
> more complex one.  That way the comparisons can be optimised
> independently of any VEC_COND_EXPRs on targets that benefit from that.
>
> So IMO it would be better to use three-operand VEC_COND_EXPRs with
> no embedded conditions as the preferred gimple representation and
> have internal functions for the fused operations that some targets
> prefer.  This means that using fused operations is "just" an instruction
> selection decision rather than hard-coded throughout gimple.  (And that
> fits in well with the idea of doing more instruction selection in gimple.)

So I've been doing that before, but more generally also for COND_EXPR.
We cannot rely on TER and the existing RTL expansion "magic" for the
instruction selection issue you mention because TER isn't reliable.  With
IFNs for optabs we could do actual [vector] condition instruction selection
before RTL expansion, ignoring "single-use" issues - is that what you are
hinting at?  How should the vectorizer deal with this?  Should it directly
use the optab IFNs then when facing "split" COND_EXPRs?  IIRC the
most fallout of a simple patch (adjusting is_gimple_condexpr) is in the
vectorizer.

Note I'm specifically looking for a solution that applies to both COND_EXPR
and VEC_COND_EXPR since both suffer from the same issues.

There was also recent work in putting back possibly trapping comparisons
into [VEC_]COND_EXPR because it doesn't interfere with EH and allows
better code.  Also you SVE people had VN issues with cond-exprs and
VN runs into the exact same issue (but would handle separate comparisons
better - with the caveat of breaking TER).

Richard.

>
> Thanks,
> Richard
Richard Sandiford Sept. 24, 2019, 11:56 a.m. UTC | #3
Richard Biener <richard.guenther@gmail.com> writes:
> On Tue, Sep 24, 2019 at 1:11 PM Richard Sandiford
> <richard.sandiford@arm.com> wrote:
>>
>> Martin Liška <mliska@suse.cz> writes:
>> > Hi.
>> >
>> > The patch introduces couple of new TREE_CODEs that will help us to have
>> > a proper GIMPLE representation of current VECT_COND_EXPR. Right now,
>> > the first argument is typically a GENERIC tcc_expression tree with 2 operands
>> > that are visited at various places in GIMPLE code. That said, based on the discussion
>> > with Richi, I'm suggesting to come up with e.g.
>> > VECT_COND_LT_EXPR<COND_LHS, COND_RHS, IF_CLAUSE, ELSE_CLAUSE>. Such a change logically
>> > introduces new GIMPLE_QUATERNARY_RHS gassignments. For now, the VEC_COND_EXPR remains
>> > and is only valid in GENERIC and gimplifier will take care of the corresponding transition.
>> >
>> > The patch is a prototype and missing bits are:
>> > - folding support addition for GIMPLE_QUATERNARY_RHS is missing
>> > - fancy tcc_comparison expressions like LTGT_EXPR, UNORDERED_EXPR, ORDERED_EXPR,
>> >   UNLT_EXPR and others are not supported right now
>> > - comments are missing for various functions added
>> >
>> > Apart from that I was able to bootstrap and run tests with a quite small fallout.
>> > Thoughts?
>> > Martin
>>
>> I think this is going in the wrong direction.  There are some targets
>> that can only handle VEC_COND_EXPRs well if we know the associated
>> condition, and others where a compare-and-VEC_COND_EXPR will always be
>> two operations.  In that situation, it seems like the native gimple
>> representation should be the simpler representation rather than the
>> more complex one.  That way the comparisons can be optimised
>> independently of any VEC_COND_EXPRs on targets that benefit from that.
>>
>> So IMO it would be better to use three-operand VEC_COND_EXPRs with
>> no embedded conditions as the preferred gimple representation and
>> have internal functions for the fused operations that some targets
>> prefer.  This means that using fused operations is "just" an instruction
>> selection decision rather than hard-coded throughout gimple.  (And that
>> fits in well with the idea of doing more instruction selection in gimple.)
>
> So I've been doing that before, but more generally also for COND_EXPR.
> We cannot rely on TER and the existing RTL expansion "magic" for the
> instruction selection issue you mention because TER isn't reliable.  With
> IFNs for optabs we could do actual [vector] condition instruction selection
> before RTL expansion, ignoring "single-use" issues - is that what you are
> hinting at?

Yeah.  It'd be similar to how most FMA selection happens after
vectorisation but before expand.

> How should the vectorizer deal with this?  Should it directly
> use the optab IFNs then when facing "split" COND_EXPRs?  IIRC the
> most fallout of a simple patch (adjusting is_gimple_condexpr) is in the
> vectorizer.

I guess that would be down to how well the vector costings work if we
just stick to VEC_COND_EXPR and cost the comparison separately.  Using
optabs directly in the vectoriser definitely sounds OK if that ends up
being necessary for good code.  But if (like you say) the COND_EXPR is
also split apart, we'd be costing the scalar comparison and selection
separately as well.

> Note I'm specifically looking for a solution that applies to both COND_EXPR
> and VEC_COND_EXPR since both suffer from the same issues.

Yeah, think the same approach would work for COND_EXPR if it's needed.
(And I think the same trade-off applies there too.  Some targets will
always need a separate comparison to implement a four-operand COND_EXPR.)

> There was also recent work in putting back possibly trapping comparisons
> into [VEC_]COND_EXPR because it doesn't interfere with EH and allows
> better code.

OK, that's a good counter-reason :-)  But it seems quite special-purpose.
I assume this works even for targets that do split the VEC_COND_EXPR
because the result is undefined on entry to the EH receiver if the
operation didn't complete.  But that should be true of any non-trapping
work done after the comparison, with the same proviso.

So this still seems like an instruction-selection issue.  We're just
saying that it's OK to combine a trapping comparison and a VEC_COND_EXPR
from the non-trapping path.  The same would be true for any other
instruction selection that combines trapping and non-trapping
operations, provided that the speculated parts can never trap.

> Also you SVE people had VN issues with cond-exprs and
> VN runs into the exact same issue (but would handle separate comparisons
> better - with the caveat of breaking TER).

The VN thing turned out to be a red herring there, sorry.  I think
I was remembering the state before ifcvt did its own value numbering.
The remaining issue for the vectoriser is that we don't avoid duplicate
cast conversions in vect_recog_mask_conversion_pattern, but that's
mostly a cost thing.  The redundancies do get removed by later passes.

Thanks,
Richard
Richard Biener Sept. 24, 2019, 12:17 p.m. UTC | #4
On Tue, Sep 24, 2019 at 1:57 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Richard Biener <richard.guenther@gmail.com> writes:
> > On Tue, Sep 24, 2019 at 1:11 PM Richard Sandiford
> > <richard.sandiford@arm.com> wrote:
> >>
> >> Martin Liška <mliska@suse.cz> writes:
> >> > Hi.
> >> >
> >> > The patch introduces couple of new TREE_CODEs that will help us to have
> >> > a proper GIMPLE representation of current VECT_COND_EXPR. Right now,
> >> > the first argument is typically a GENERIC tcc_expression tree with 2 operands
> >> > that are visited at various places in GIMPLE code. That said, based on the discussion
> >> > with Richi, I'm suggesting to come up with e.g.
> >> > VECT_COND_LT_EXPR<COND_LHS, COND_RHS, IF_CLAUSE, ELSE_CLAUSE>. Such a change logically
> >> > introduces new GIMPLE_QUATERNARY_RHS gassignments. For now, the VEC_COND_EXPR remains
> >> > and is only valid in GENERIC and gimplifier will take care of the corresponding transition.
> >> >
> >> > The patch is a prototype and missing bits are:
> >> > - folding support addition for GIMPLE_QUATERNARY_RHS is missing
> >> > - fancy tcc_comparison expressions like LTGT_EXPR, UNORDERED_EXPR, ORDERED_EXPR,
> >> >   UNLT_EXPR and others are not supported right now
> >> > - comments are missing for various functions added
> >> >
> >> > Apart from that I was able to bootstrap and run tests with a quite small fallout.
> >> > Thoughts?
> >> > Martin
> >>
> >> I think this is going in the wrong direction.  There are some targets
> >> that can only handle VEC_COND_EXPRs well if we know the associated
> >> condition, and others where a compare-and-VEC_COND_EXPR will always be
> >> two operations.  In that situation, it seems like the native gimple
> >> representation should be the simpler representation rather than the
> >> more complex one.  That way the comparisons can be optimised
> >> independently of any VEC_COND_EXPRs on targets that benefit from that.
> >>
> >> So IMO it would be better to use three-operand VEC_COND_EXPRs with
> >> no embedded conditions as the preferred gimple representation and
> >> have internal functions for the fused operations that some targets
> >> prefer.  This means that using fused operations is "just" an instruction
> >> selection decision rather than hard-coded throughout gimple.  (And that
> >> fits in well with the idea of doing more instruction selection in gimple.)
> >
> > So I've been doing that before, but more generally also for COND_EXPR.
> > We cannot rely on TER and the existing RTL expansion "magic" for the
> > instruction selection issue you mention because TER isn't reliable.  With
> > IFNs for optabs we could do actual [vector] condition instruction selection
> > before RTL expansion, ignoring "single-use" issues - is that what you are
> > hinting at?
>
> Yeah.  It'd be similar to how most FMA selection happens after
> vectorisation but before expand.
>
> > How should the vectorizer deal with this?  Should it directly
> > use the optab IFNs then when facing "split" COND_EXPRs?  IIRC the
> > most fallout of a simple patch (adjusting is_gimple_condexpr) is in the
> > vectorizer.
>
> I guess that would be down to how well the vector costings work if we
> just stick to VEC_COND_EXPR and cost the comparison separately.  Using
> optabs directly in the vectoriser definitely sounds OK if that ends up
> being necessary for good code.  But if (like you say) the COND_EXPR is
> also split apart, we'd be costing the scalar comparison and selection
> separately as well.
>
> > Note I'm specifically looking for a solution that applies to both COND_EXPR
> > and VEC_COND_EXPR since both suffer from the same issues.
>
> Yeah, think the same approach would work for COND_EXPR if it's needed.
> (And I think the same trade-off applies there too.  Some targets will
> always need a separate comparison to implement a four-operand COND_EXPR.)
>
> > There was also recent work in putting back possibly trapping comparisons
> > into [VEC_]COND_EXPR because it doesn't interfere with EH and allows
> > better code.
>
> OK, that's a good counter-reason :-)  But it seems quite special-purpose.
> I assume this works even for targets that do split the VEC_COND_EXPR
> because the result is undefined on entry to the EH receiver if the
> operation didn't complete.  But that should be true of any non-trapping
> work done after the comparison, with the same proviso.
>
> So this still seems like an instruction-selection issue.  We're just
> saying that it's OK to combine a trapping comparison and a VEC_COND_EXPR
> from the non-trapping path.  The same would be true for any other
> instruction selection that combines trapping and non-trapping
> operations, provided that the speculated parts can never trap.

Sure, but that case would necessarily be combining the compare and the
select to the compare place which is "backwards" (and would speculate
the select).  Certainly something we don't do anywhere.  This case btw
made me consider going the four-operand way (I've pondered with all available
ops multiple times...).

> > Also you SVE people had VN issues with cond-exprs and
> > VN runs into the exact same issue (but would handle separate comparisons
> > better - with the caveat of breaking TER).
>
> The VN thing turned out to be a red herring there, sorry.  I think
> I was remembering the state before ifcvt did its own value numbering.
> The remaining issue for the vectoriser is that we don't avoid duplicate
> cast conversions in vect_recog_mask_conversion_pattern, but that's
> mostly a cost thing.  The redundancies do get removed by later passes.

Well, I checked and value-numbering doesn't really handle non-trivial
"equalities" of the condition operand (if one of the operands of the
condition need to be valueized to be detected equal).

So to go forward and to make sure we don't regress the appropriate
way would probably to tackle the expansion part first.  I guess we'll
not notice for scalar COND_EXPRs (because those don't happen
very often) so we could "lower" VEC_COND_EXPRs to the desired
form (and key IL verificataion on PROP_gimple_lvec), which then
means late FRE/DOM have the chance to break things by doing
CSE.  At the same time we'd remove the forwprop pieces that put
the condition back in.  Then we can see to implement the
instruction selection somehow somewhere... (does it need to happen
at -O0?  I think that might be desirable - looking at vectorizer
intrinsic code might help to decide).

Does that sound sensible?  I've searched my patch archieves and
could share several incomplete attempts on tackling this, dating
back to as far as 2010...)

Richard.

> Thanks,
> Richard
Richard Sandiford Sept. 24, 2019, 2:51 p.m. UTC | #5
Richard Biener <richard.guenther@gmail.com> writes:
> On Tue, Sep 24, 2019 at 1:57 PM Richard Sandiford
> <richard.sandiford@arm.com> wrote:
>>
>> Richard Biener <richard.guenther@gmail.com> writes:
>> > On Tue, Sep 24, 2019 at 1:11 PM Richard Sandiford
>> > <richard.sandiford@arm.com> wrote:
>> >>
>> >> Martin Liška <mliska@suse.cz> writes:
>> >> > Hi.
>> >> >
>> >> > The patch introduces couple of new TREE_CODEs that will help us to have
>> >> > a proper GIMPLE representation of current VECT_COND_EXPR. Right now,
>> >> > the first argument is typically a GENERIC tcc_expression tree with 2 operands
>> >> > that are visited at various places in GIMPLE code. That said, based on the discussion
>> >> > with Richi, I'm suggesting to come up with e.g.
>> >> > VECT_COND_LT_EXPR<COND_LHS, COND_RHS, IF_CLAUSE, ELSE_CLAUSE>. Such a change logically
>> >> > introduces new GIMPLE_QUATERNARY_RHS gassignments. For now, the VEC_COND_EXPR remains
>> >> > and is only valid in GENERIC and gimplifier will take care of the corresponding transition.
>> >> >
>> >> > The patch is a prototype and missing bits are:
>> >> > - folding support addition for GIMPLE_QUATERNARY_RHS is missing
>> >> > - fancy tcc_comparison expressions like LTGT_EXPR, UNORDERED_EXPR, ORDERED_EXPR,
>> >> >   UNLT_EXPR and others are not supported right now
>> >> > - comments are missing for various functions added
>> >> >
>> >> > Apart from that I was able to bootstrap and run tests with a quite small fallout.
>> >> > Thoughts?
>> >> > Martin
>> >>
>> >> I think this is going in the wrong direction.  There are some targets
>> >> that can only handle VEC_COND_EXPRs well if we know the associated
>> >> condition, and others where a compare-and-VEC_COND_EXPR will always be
>> >> two operations.  In that situation, it seems like the native gimple
>> >> representation should be the simpler representation rather than the
>> >> more complex one.  That way the comparisons can be optimised
>> >> independently of any VEC_COND_EXPRs on targets that benefit from that.
>> >>
>> >> So IMO it would be better to use three-operand VEC_COND_EXPRs with
>> >> no embedded conditions as the preferred gimple representation and
>> >> have internal functions for the fused operations that some targets
>> >> prefer.  This means that using fused operations is "just" an instruction
>> >> selection decision rather than hard-coded throughout gimple.  (And that
>> >> fits in well with the idea of doing more instruction selection in gimple.)
>> >
>> > So I've been doing that before, but more generally also for COND_EXPR.
>> > We cannot rely on TER and the existing RTL expansion "magic" for the
>> > instruction selection issue you mention because TER isn't reliable.  With
>> > IFNs for optabs we could do actual [vector] condition instruction selection
>> > before RTL expansion, ignoring "single-use" issues - is that what you are
>> > hinting at?
>>
>> Yeah.  It'd be similar to how most FMA selection happens after
>> vectorisation but before expand.
>>
>> > How should the vectorizer deal with this?  Should it directly
>> > use the optab IFNs then when facing "split" COND_EXPRs?  IIRC the
>> > most fallout of a simple patch (adjusting is_gimple_condexpr) is in the
>> > vectorizer.
>>
>> I guess that would be down to how well the vector costings work if we
>> just stick to VEC_COND_EXPR and cost the comparison separately.  Using
>> optabs directly in the vectoriser definitely sounds OK if that ends up
>> being necessary for good code.  But if (like you say) the COND_EXPR is
>> also split apart, we'd be costing the scalar comparison and selection
>> separately as well.
>>
>> > Note I'm specifically looking for a solution that applies to both COND_EXPR
>> > and VEC_COND_EXPR since both suffer from the same issues.
>>
>> Yeah, think the same approach would work for COND_EXPR if it's needed.
>> (And I think the same trade-off applies there too.  Some targets will
>> always need a separate comparison to implement a four-operand COND_EXPR.)
>>
>> > There was also recent work in putting back possibly trapping comparisons
>> > into [VEC_]COND_EXPR because it doesn't interfere with EH and allows
>> > better code.
>>
>> OK, that's a good counter-reason :-)  But it seems quite special-purpose.
>> I assume this works even for targets that do split the VEC_COND_EXPR
>> because the result is undefined on entry to the EH receiver if the
>> operation didn't complete.  But that should be true of any non-trapping
>> work done after the comparison, with the same proviso.
>>
>> So this still seems like an instruction-selection issue.  We're just
>> saying that it's OK to combine a trapping comparison and a VEC_COND_EXPR
>> from the non-trapping path.  The same would be true for any other
>> instruction selection that combines trapping and non-trapping
>> operations, provided that the speculated parts can never trap.
>
> Sure, but that case would necessarily be combining the compare and the
> select to the compare place which is "backwards" (and would speculate
> the select).  Certainly something we don't do anywhere.  This case btw
> made me consider going the four-operand way (I've pondered with all available
> ops multiple times...).

Yeah, but that was my point: speculating/moving back operations that
are dependent on the result of the comparison is valid for any non-trapping
operation, not just selects.  E.g. maybe some future target will want to
have a version of IFN_COND_ADD with an embedded condition, and the same
thing would then be useful for integer additions based on FP comparison
results.

So I don't think VEC_COND_EXPR is such a special case that we need
the four-operand form all the way through gimple.

>> > Also you SVE people had VN issues with cond-exprs and
>> > VN runs into the exact same issue (but would handle separate comparisons
>> > better - with the caveat of breaking TER).
>>
>> The VN thing turned out to be a red herring there, sorry.  I think
>> I was remembering the state before ifcvt did its own value numbering.
>> The remaining issue for the vectoriser is that we don't avoid duplicate
>> cast conversions in vect_recog_mask_conversion_pattern, but that's
>> mostly a cost thing.  The redundancies do get removed by later passes.
>
> Well, I checked and value-numbering doesn't really handle non-trivial
> "equalities" of the condition operand (if one of the operands of the
> condition need to be valueized to be detected equal).
>
> So to go forward and to make sure we don't regress the appropriate
> way would probably to tackle the expansion part first.  I guess we'll
> not notice for scalar COND_EXPRs (because those don't happen
> very often) so we could "lower" VEC_COND_EXPRs to the desired
> form (and key IL verificataion on PROP_gimple_lvec), which then
> means late FRE/DOM have the chance to break things by doing
> CSE.  At the same time we'd remove the forwprop pieces that put
> the condition back in.  Then we can see to implement the
> instruction selection somehow somewhere... (does it need to happen
> at -O0?  I think that might be desirable - looking at vectorizer
> intrinsic code might help to decide).

Not sure why we'd need it for correctness at -O0.  Can't VEC_COND_EXPR
always be emulated (albeit inefficiently)?  Even if you only have FP
compare-and-select, you can emulate VEC_COND_EXPRs on a 0/-1 mask.

If the code produced really is too poor even for -O0, then keeping
intrinsics as intrinsics during gimple would probably be better.

> Does that sound sensible?  I've searched my patch archieves and
> could share several incomplete attempts on tackling this, dating
> back to as far as 2010...)

Sounds good to me FWIW.

Thanks,
Richard
diff mbox series

Patch

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index a2f96239e2f..eb5ada52a3b 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -106,6 +106,12 @@  gimple_assign_rhs_to_tree (gimple *stmt)
   tree t;
   switch (get_gimple_rhs_class (gimple_expr_code (stmt)))
     {
+    case GIMPLE_QUATERNARY_RHS:
+      t = build4 (gimple_assign_rhs_code (stmt),
+		  TREE_TYPE (gimple_assign_lhs (stmt)),
+		  gimple_assign_rhs1 (stmt), gimple_assign_rhs2 (stmt),
+		  gimple_assign_rhs3 (stmt), gimple_assign_rhs4 (stmt));
+      break;
     case GIMPLE_TERNARY_RHS:
       t = build3 (gimple_assign_rhs_code (stmt),
 		  TREE_TYPE (gimple_assign_lhs (stmt)),
@@ -3793,17 +3799,20 @@  expand_gimple_stmt_1 (gimple *stmt)
 	    ops.type = TREE_TYPE (lhs);
 	    switch (get_gimple_rhs_class (ops.code))
 	      {
-		case GIMPLE_TERNARY_RHS:
-		  ops.op2 = gimple_assign_rhs3 (assign_stmt);
-		  /* Fallthru */
-		case GIMPLE_BINARY_RHS:
-		  ops.op1 = gimple_assign_rhs2 (assign_stmt);
-		  /* Fallthru */
-		case GIMPLE_UNARY_RHS:
-		  ops.op0 = gimple_assign_rhs1 (assign_stmt);
-		  break;
-		default:
-		  gcc_unreachable ();
+	      case GIMPLE_QUATERNARY_RHS:
+		ops.op3 = gimple_assign_rhs4 (assign_stmt);
+		/* Fallthru */
+	      case GIMPLE_TERNARY_RHS:
+		ops.op2 = gimple_assign_rhs3 (assign_stmt);
+		/* Fallthru */
+	      case GIMPLE_BINARY_RHS:
+		ops.op1 = gimple_assign_rhs2 (assign_stmt);
+		/* Fallthru */
+	      case GIMPLE_UNARY_RHS:
+		ops.op0 = gimple_assign_rhs1 (assign_stmt);
+		break;
+	      default:
+		gcc_unreachable ();
 	      }
 	    ops.location = gimple_location (stmt);
 
@@ -5172,7 +5181,7 @@  expand_debug_expr (tree exp)
 
     /* Vector stuff.  For most of the codes we don't have rtl codes.  */
     case REALIGN_LOAD_EXPR:
-    case VEC_COND_EXPR:
+    CASE_VEC_COND_EXPR:
     case VEC_PACK_FIX_TRUNC_EXPR:
     case VEC_PACK_FLOAT_EXPR:
     case VEC_PACK_SAT_EXPR:
diff --git a/gcc/expr.c b/gcc/expr.c
index 2f2b53f8b69..de18229f162 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -8450,7 +8450,7 @@  expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
   int ignore;
   bool reduce_bit_field;
   location_t loc = ops->location;
-  tree treeop0, treeop1, treeop2;
+  tree treeop0, treeop1, treeop2, treeop3;
 #define REDUCE_BIT_FIELD(expr)	(reduce_bit_field			  \
 				 ? reduce_to_bit_field_precision ((expr), \
 								  target, \
@@ -8464,13 +8464,15 @@  expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
   treeop0 = ops->op0;
   treeop1 = ops->op1;
   treeop2 = ops->op2;
+  treeop3 = ops->op3;
 
   /* We should be called only on simple (binary or unary) expressions,
      exactly those that are valid in gimple expressions that aren't
      GIMPLE_SINGLE_RHS (or invalid).  */
   gcc_assert (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS
 	      || get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS
-	      || get_gimple_rhs_class (code) == GIMPLE_TERNARY_RHS);
+	      || get_gimple_rhs_class (code) == GIMPLE_TERNARY_RHS
+	      || get_gimple_rhs_class (code) == GIMPLE_QUATERNARY_RHS);
 
   ignore = (target == const0_rtx
 	    || ((CONVERT_EXPR_CODE_P (code)
@@ -9141,16 +9143,15 @@  expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
       if (temp != 0)
 	return temp;
 
-      /* For vector MIN <x, y>, expand it a VEC_COND_EXPR <x <= y, x, y>
+      /* For vector MIN <x, y>, expand it a VEC_COND_*_EXPR <x <= y, x, y>
 	 and similarly for MAX <x, y>.  */
       if (VECTOR_TYPE_P (type))
 	{
 	  tree t0 = make_tree (type, op0);
 	  tree t1 = make_tree (type, op1);
-	  tree comparison = build2 (code == MIN_EXPR ? LE_EXPR : GE_EXPR,
-				    type, t0, t1);
-	  return expand_vec_cond_expr (type, comparison, t0, t1,
-				       original_target);
+	  return expand_vec_cond_expr (type,
+				       code == MIN_EXPR ? LE_EXPR : GE_EXPR, t0,
+				       t1, t0, t1, original_target);
 	}
 
       /* At this point, a MEM target is no longer useful; we will get better
@@ -9743,8 +9744,9 @@  expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
 	return temp;
       }
 
-    case VEC_COND_EXPR:
-      target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, target);
+    CASE_VEC_COND_EXPR:
+      target = expand_vec_cond_expr (type, code, treeop0, treeop1, treeop2,
+				     treeop3, target);
       return target;
 
     case VEC_DUPLICATE_EXPR:
@@ -9971,6 +9973,9 @@  expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 	  ops.code = gimple_assign_rhs_code (g);
           switch (get_gimple_rhs_class (ops.code))
 	    {
+	    case GIMPLE_QUATERNARY_RHS:
+	      ops.op3 = gimple_assign_rhs4 (g);
+	      /* Fallthru */
 	    case GIMPLE_TERNARY_RHS:
 	      ops.op2 = gimple_assign_rhs3 (g);
 	      /* Fallthru */
@@ -11774,6 +11779,7 @@  maybe_optimize_pow2p_mod_cmp (enum tree_code code, tree *arg0, tree *arg1)
   ops.op0 = treeop0;
   ops.op1 = treeop1;
   ops.op2 = NULL_TREE;
+  ops.op3 = NULL_TREE;
   start_sequence ();
   rtx mor = expand_expr_real_2 (&ops, NULL_RTX, TYPE_MODE (ops.type),
 				EXPAND_NORMAL);
@@ -11790,6 +11796,7 @@  maybe_optimize_pow2p_mod_cmp (enum tree_code code, tree *arg0, tree *arg1)
   ops.op0 = treeop0;
   ops.op1 = c3;
   ops.op2 = NULL_TREE;
+  ops.op3 = NULL_TREE;
   start_sequence ();
   rtx mur = expand_expr_real_2 (&ops, NULL_RTX, TYPE_MODE (ops.type),
 				EXPAND_NORMAL);
@@ -11977,6 +11984,7 @@  maybe_optimize_mod_cmp (enum tree_code code, tree *arg0, tree *arg1)
   ops.op0 = treeop0;
   ops.op1 = treeop1;
   ops.op2 = NULL_TREE;
+  ops.op3 = NULL_TREE;
   start_sequence ();
   rtx mor = expand_expr_real_2 (&ops, NULL_RTX, TYPE_MODE (ops.type),
 				EXPAND_NORMAL);
@@ -12082,16 +12090,18 @@  do_store_flag (sepops ops, rtx target, machine_mode mode)
      expander for this.  */
   if (TREE_CODE (ops->type) == VECTOR_TYPE)
     {
-      tree ifexp = build2 (ops->code, ops->type, arg0, arg1);
       if (VECTOR_BOOLEAN_TYPE_P (ops->type)
 	  && expand_vec_cmp_expr_p (TREE_TYPE (arg0), ops->type, ops->code))
-	return expand_vec_cmp_expr (ops->type, ifexp, target);
+	{
+	  tree ifexp = build2 (ops->code, ops->type, arg0, arg1);
+	  return expand_vec_cmp_expr (ops->type, ifexp, target);
+	}
       else
 	{
 	  tree if_true = constant_boolean_node (true, ops->type);
 	  tree if_false = constant_boolean_node (false, ops->type);
-	  return expand_vec_cond_expr (ops->type, ifexp, if_true,
-				       if_false, target);
+	  return expand_vec_cond_expr (ops->type, ops->code, arg0, arg1,
+				       if_true, if_false, target);
 	}
     }
 
diff --git a/gcc/expr.h b/gcc/expr.h
index 6eb70bf12f1..312d25c7be4 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -51,7 +51,7 @@  typedef struct separate_ops
   enum tree_code code;
   location_t location;
   tree type;
-  tree op0, op1, op2;
+  tree op0, op1, op2, op3;
 } *sepops;
 
 /* This is run during target initialization to set up which modes can be
diff --git a/gcc/gimple-expr.c b/gcc/gimple-expr.c
index 4082828e198..800ece6ec09 100644
--- a/gcc/gimple-expr.c
+++ b/gcc/gimple-expr.c
@@ -526,16 +526,25 @@  create_tmp_reg_fn (struct function *fn, tree type, const char *prefix)
 
 void
 extract_ops_from_tree (tree expr, enum tree_code *subcode_p, tree *op1_p,
-		       tree *op2_p, tree *op3_p)
+		       tree *op2_p, tree *op3_p, tree *op4_p)
 {
   *subcode_p = TREE_CODE (expr);
   switch (get_gimple_rhs_class (*subcode_p))
     {
+    case GIMPLE_QUATERNARY_RHS:
+      {
+	*op1_p = TREE_OPERAND (expr, 0);
+	*op2_p = TREE_OPERAND (expr, 1);
+	*op3_p = TREE_OPERAND (expr, 2);
+	*op4_p = TREE_OPERAND (expr, 3);
+	break;
+      }
     case GIMPLE_TERNARY_RHS:
       {
 	*op1_p = TREE_OPERAND (expr, 0);
 	*op2_p = TREE_OPERAND (expr, 1);
 	*op3_p = TREE_OPERAND (expr, 2);
+	*op4_p = NULL_TREE;
 	break;
       }
     case GIMPLE_BINARY_RHS:
@@ -543,6 +552,7 @@  extract_ops_from_tree (tree expr, enum tree_code *subcode_p, tree *op1_p,
 	*op1_p = TREE_OPERAND (expr, 0);
 	*op2_p = TREE_OPERAND (expr, 1);
 	*op3_p = NULL_TREE;
+	*op4_p = NULL_TREE;
 	break;
       }
     case GIMPLE_UNARY_RHS:
@@ -550,6 +560,7 @@  extract_ops_from_tree (tree expr, enum tree_code *subcode_p, tree *op1_p,
 	*op1_p = TREE_OPERAND (expr, 0);
 	*op2_p = NULL_TREE;
 	*op3_p = NULL_TREE;
+	*op4_p = NULL_TREE;
 	break;
       }
     case GIMPLE_SINGLE_RHS:
@@ -557,6 +568,7 @@  extract_ops_from_tree (tree expr, enum tree_code *subcode_p, tree *op1_p,
 	*op1_p = expr;
 	*op2_p = NULL_TREE;
 	*op3_p = NULL_TREE;
+	*op4_p = NULL_TREE;
 	break;
       }
     default:
diff --git a/gcc/gimple-expr.h b/gcc/gimple-expr.h
index 1ad1432bd17..c37bff201cc 100644
--- a/gcc/gimple-expr.h
+++ b/gcc/gimple-expr.h
@@ -36,7 +36,7 @@  extern tree create_tmp_reg_fn (struct function *, tree, const char *);
 
 
 extern void extract_ops_from_tree (tree, enum tree_code *, tree *, tree *,
-				   tree *);
+				   tree *, tree *);
 extern void gimple_cond_get_ops_from_tree (tree, enum tree_code *, tree *,
 					   tree *);
 extern bool is_gimple_lvalue (tree);
@@ -151,8 +151,8 @@  static inline void
 extract_ops_from_tree (tree expr, enum tree_code *code, tree *op0,
 		       tree *op1)
 {
-  tree op2;
-  extract_ops_from_tree (expr, code, op0, op1, &op2);
+  tree op2, op3;
+  extract_ops_from_tree (expr, code, op0, op1, &op2, &op3);
   gcc_assert (op2 == NULL_TREE);
 }
 
diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
index 8d642de2f67..4c2d5e73c0c 100644
--- a/gcc/gimple-fold.c
+++ b/gcc/gimple-fold.c
@@ -470,6 +470,10 @@  fold_gimple_assign (gimple_stmt_iterator *si)
         }
       break;
 
+    case GIMPLE_QUATERNARY_RHS:
+      // TODO
+      break;
+
     case GIMPLE_INVALID_RHS:
       gcc_unreachable ();
     }
@@ -4676,7 +4680,8 @@  replace_stmt_with_simplification (gimple_stmt_iterator *gsi,
 	  gimple_assign_set_rhs_with_ops (gsi, res_op->code,
 					  res_op->op_or_null (0),
 					  res_op->op_or_null (1),
-					  res_op->op_or_null (2));
+					  res_op->op_or_null (2),
+					  res_op->op_or_null (3));
 	  if (dump_file && (dump_flags & TDF_DETAILS))
 	    {
 	      fprintf (dump_file, "gimple_simplified to ");
@@ -6361,7 +6366,11 @@  gimple_fold_stmt_to_constant_1 (gimple *stmt, tree (*valueize) (tree),
 				       gimple_expr_type (stmt), op0, op1, op2);
             }
 
-          default:
+	  case GIMPLE_QUATERNARY_RHS:
+	    // TODO
+	    return NULL_TREE;
+
+	  default:
             gcc_unreachable ();
           }
       }
@@ -7628,6 +7637,7 @@  gimple_assign_nonnegative_warnv_p (gimple *stmt, bool *strict_overflow_p,
 					      gimple_assign_rhs2 (stmt),
 					      strict_overflow_p, depth);
     case GIMPLE_TERNARY_RHS:
+    case GIMPLE_QUATERNARY_RHS:
       return false;
     case GIMPLE_SINGLE_RHS:
       return tree_single_nonnegative_warnv_p (gimple_assign_rhs1 (stmt),
@@ -7722,6 +7732,7 @@  gimple_assign_integer_valued_real_p (gimple *stmt, int depth)
 					   gimple_assign_rhs1 (stmt),
 					   gimple_assign_rhs2 (stmt), depth);
     case GIMPLE_TERNARY_RHS:
+    case GIMPLE_QUATERNARY_RHS:
       return false;
     case GIMPLE_SINGLE_RHS:
       return integer_valued_real_single_p (gimple_assign_rhs1 (stmt), depth);
diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index 53278168a59..ebe60ba774d 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -974,6 +974,9 @@  gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
 	      return (gimple_resimplify3 (seq, res_op, valueize)
 		      || valueized);
 	    }
+	  case GIMPLE_QUATERNARY_RHS:
+	    // TODO: add support
+	    break;
 	  default:
 	    gcc_unreachable ();
 	  }
diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c
index 2d5ece06805..1d4d489429e 100644
--- a/gcc/gimple-pretty-print.c
+++ b/gcc/gimple-pretty-print.c
@@ -620,6 +620,59 @@  dump_ternary_rhs (pretty_printer *buffer, gassign *gs, int spc,
     }
 }
 
+static void
+dump_comparison (pretty_printer *buffer, tree_code code)
+{
+  switch (code)
+    {
+    case LT_EXPR:
+      pp_less (buffer);
+      break;
+    case GT_EXPR:
+      pp_greater (buffer);
+      break;
+    case LE_EXPR:
+      pp_less_equal (buffer);
+      break;
+    case GE_EXPR:
+      pp_greater_equal (buffer);
+      break;
+    case EQ_EXPR:
+      pp_string (buffer, "==");
+      break;
+    case NE_EXPR:
+      pp_string (buffer, "!=");
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Helper for dump_gimple_assign.  Print the quaternary RHS of the
+   assignment GS.  BUFFER, SPC and FLAGS are as in pp_gimple_stmt_1.  */
+
+static void
+dump_quaternary_rhs (pretty_printer *buffer, gassign *gs, int spc,
+		     dump_flags_t flags)
+{
+  enum tree_code code = gimple_assign_rhs_code (gs);
+  switch (code)
+    {
+    CASE_VEC_COND_EXPR:
+      dump_generic_node (buffer, gimple_assign_rhs1 (gs), spc, flags, false);
+      pp_space (buffer);
+      dump_comparison (buffer, vec_cmp_to_cmp_code (code));
+      pp_space (buffer);
+      dump_generic_node (buffer, gimple_assign_rhs2 (gs), spc, flags, false);
+      pp_string (buffer, " ? ");
+      dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false);
+      pp_string (buffer, " : ");
+      dump_generic_node (buffer, gimple_assign_rhs4 (gs), spc, flags, false);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
 
 /* Dump the gimple assignment GS.  BUFFER, SPC and FLAGS are as in
    pp_gimple_stmt_1.  */
@@ -675,6 +728,8 @@  dump_gimple_assign (pretty_printer *buffer, gassign *gs, int spc,
         dump_binary_rhs (buffer, gs, spc, flags);
       else if (gimple_num_ops (gs) == 4)
         dump_ternary_rhs (buffer, gs, spc, flags);
+      else if (gimple_num_ops (gs) == 5)
+	dump_quaternary_rhs (buffer, gs, spc, flags);
       else
         gcc_unreachable ();
       if (!(flags & TDF_RHS_ONLY))
@@ -1518,26 +1573,7 @@  dump_gimple_omp_for (pretty_printer *buffer, gomp_for *gs, int spc,
 	  dump_generic_node (buffer, gimple_omp_for_index (gs, i), spc,
 			     flags, false);
 	  pp_space (buffer);
-	  switch (gimple_omp_for_cond (gs, i))
-	    {
-	    case LT_EXPR:
-	      pp_less (buffer);
-	      break;
-	    case GT_EXPR:
-	      pp_greater (buffer);
-	      break;
-	    case LE_EXPR:
-	      pp_less_equal (buffer);
-	      break;
-	    case GE_EXPR:
-	      pp_greater_equal (buffer);
-	      break;
-	    case NE_EXPR:
-	      pp_string (buffer, "!=");
-	      break;
-	    default:
-	      gcc_unreachable ();
-	    }
+	  dump_comparison (buffer, gimple_omp_for_cond (gs, i));
 	  pp_space (buffer);
 	  dump_generic_node (buffer, gimple_omp_for_final (gs, i), spc,
 			     flags, false);
diff --git a/gcc/gimple.c b/gcc/gimple.c
index af62c8bf477..84150f3bebf 100644
--- a/gcc/gimple.c
+++ b/gcc/gimple.c
@@ -424,10 +424,10 @@  gassign *
 gimple_build_assign (tree lhs, tree rhs MEM_STAT_DECL)
 {
   enum tree_code subcode;
-  tree op1, op2, op3;
+  tree op1, op2, op3, op4;
 
-  extract_ops_from_tree (rhs, &subcode, &op1, &op2, &op3);
-  return gimple_build_assign (lhs, subcode, op1, op2, op3 PASS_MEM_STAT);
+  extract_ops_from_tree (rhs, &subcode, &op1, &op2, &op3, &op4);
+  return gimple_build_assign (lhs, subcode, op1, op2, op3, op4 PASS_MEM_STAT);
 }
 
 
@@ -436,7 +436,7 @@  gimple_build_assign (tree lhs, tree rhs MEM_STAT_DECL)
 
 static inline gassign *
 gimple_build_assign_1 (tree lhs, enum tree_code subcode, tree op1,
-		       tree op2, tree op3 MEM_STAT_DECL)
+		       tree op2, tree op3, tree op4 MEM_STAT_DECL)
 {
   unsigned num_ops;
   gassign *p;
@@ -462,9 +462,25 @@  gimple_build_assign_1 (tree lhs, enum tree_code subcode, tree op1,
       gimple_assign_set_rhs3 (p, op3);
     }
 
+  if (op4)
+    {
+      gcc_assert (num_ops > 4);
+      gimple_assign_set_rhs4 (p, op4);
+    }
+
   return p;
 }
 
+/* Build a GIMPLE_ASSIGN statement with subcode SUBCODE and operands
+   OP1, OP2, OP3 and OP4.  */
+
+gassign *
+gimple_build_assign (tree lhs, enum tree_code subcode, tree op1, tree op2,
+		     tree op3, tree op4 MEM_STAT_DECL)
+{
+  return gimple_build_assign_1 (lhs, subcode, op1, op2, op3, op4 PASS_MEM_STAT);
+}
+
 /* Build a GIMPLE_ASSIGN statement with subcode SUBCODE and operands
    OP1, OP2 and OP3.  */
 
@@ -472,7 +488,8 @@  gassign *
 gimple_build_assign (tree lhs, enum tree_code subcode, tree op1,
 		     tree op2, tree op3 MEM_STAT_DECL)
 {
-  return gimple_build_assign_1 (lhs, subcode, op1, op2, op3 PASS_MEM_STAT);
+  return gimple_build_assign_1 (lhs, subcode, op1, op2, op3,
+				NULL_TREE PASS_MEM_STAT);
 }
 
 /* Build a GIMPLE_ASSIGN statement with subcode SUBCODE and operands
@@ -482,8 +499,8 @@  gassign *
 gimple_build_assign (tree lhs, enum tree_code subcode, tree op1,
 		     tree op2 MEM_STAT_DECL)
 {
-  return gimple_build_assign_1 (lhs, subcode, op1, op2, NULL_TREE
-				PASS_MEM_STAT);
+  return gimple_build_assign_1 (lhs, subcode, op1, op2, NULL_TREE,
+				NULL_TREE PASS_MEM_STAT);
 }
 
 /* Build a GIMPLE_ASSIGN statement with subcode SUBCODE and operand OP1.  */
@@ -491,8 +508,8 @@  gimple_build_assign (tree lhs, enum tree_code subcode, tree op1,
 gassign *
 gimple_build_assign (tree lhs, enum tree_code subcode, tree op1 MEM_STAT_DECL)
 {
-  return gimple_build_assign_1 (lhs, subcode, op1, NULL_TREE, NULL_TREE
-				PASS_MEM_STAT);
+  return gimple_build_assign_1 (lhs, subcode, op1, NULL_TREE, NULL_TREE,
+				NULL_TREE PASS_MEM_STAT);
 }
 
 
@@ -1737,10 +1754,10 @@  void
 gimple_assign_set_rhs_from_tree (gimple_stmt_iterator *gsi, tree expr)
 {
   enum tree_code subcode;
-  tree op1, op2, op3;
+  tree op1, op2, op3, op4;
 
-  extract_ops_from_tree (expr, &subcode, &op1, &op2, &op3);
-  gimple_assign_set_rhs_with_ops (gsi, subcode, op1, op2, op3);
+  extract_ops_from_tree (expr, &subcode, &op1, &op2, &op3, &op4);
+  gimple_assign_set_rhs_with_ops (gsi, subcode, op1, op2, op3, op4);
 }
 
 
@@ -1752,7 +1769,7 @@  gimple_assign_set_rhs_from_tree (gimple_stmt_iterator *gsi, tree expr)
 
 void
 gimple_assign_set_rhs_with_ops (gimple_stmt_iterator *gsi, enum tree_code code,
-				tree op1, tree op2, tree op3)
+				tree op1, tree op2, tree op3, tree op4)
 {
   unsigned new_rhs_ops = get_gimple_rhs_num_ops (code);
   gimple *stmt = gsi_stmt (*gsi);
@@ -1778,6 +1795,8 @@  gimple_assign_set_rhs_with_ops (gimple_stmt_iterator *gsi, enum tree_code code,
     gimple_assign_set_rhs2 (stmt, op2);
   if (new_rhs_ops > 2)
     gimple_assign_set_rhs3 (stmt, op3);
+  if (new_rhs_ops > 3)
+    gimple_assign_set_rhs4 (stmt, op4);
   if (stmt != old_stmt)
     gsi_replace (gsi, stmt, false);
 }
@@ -2234,6 +2253,8 @@  get_gimple_rhs_num_ops (enum tree_code code)
       return 2;
     case GIMPLE_TERNARY_RHS:
       return 3;
+    case GIMPLE_QUATERNARY_RHS:
+      return 4;
     default:
       gcc_unreachable ();
     }
@@ -2266,6 +2287,13 @@  get_gimple_rhs_num_ops (enum tree_code code)
       || (SYM) == ADDR_EXPR						    \
       || (SYM) == WITH_SIZE_EXPR					    \
       || (SYM) == SSA_NAME) ? GIMPLE_SINGLE_RHS				    \
+   : ((SYM) == VEC_COND_LT_EXPR						    \
+      || (SYM) == VEC_COND_LE_EXPR					    \
+      || (SYM) == VEC_COND_LE_EXPR					    \
+      || (SYM) == VEC_COND_GT_EXPR					    \
+      || (SYM) == VEC_COND_GE_EXPR					    \
+      || (SYM) == VEC_COND_EQ_EXPR					    \
+      || (SYM) == VEC_COND_NE_EXPR) ? GIMPLE_QUATERNARY_RHS		    \
    : GIMPLE_INVALID_RHS),
 #define END_OF_BASE_TREE_CODES (unsigned char) GIMPLE_INVALID_RHS,
 
@@ -3271,6 +3299,47 @@  gimple_inexpensive_call_p (gcall *stmt)
   return false;
 }
 
+gassign *
+gimple_build_vec_cond_expr (tree lhs, tree condition, tree then_clause,
+			    tree else_clause)
+{
+  tree cond_lhs, cond_rhs;
+  tree_code code;
+
+  if (TREE_CODE (condition) == SSA_NAME)
+    {
+      gimple *stmt = SSA_NAME_DEF_STMT (condition);
+      code = gimple_assign_rhs_code (stmt);
+      if (TREE_CODE_CLASS (code) == tcc_comparison)
+	{
+	  code = cmp_to_vec_cmp_code (code);
+	  cond_lhs = gimple_assign_rhs1 (stmt);
+	  cond_rhs = gimple_assign_rhs2 (stmt);
+	}
+      else
+	{
+	  code = VEC_COND_EQ_EXPR;
+	  cond_lhs = condition;
+	  cond_rhs = constant_boolean_node (true, TREE_TYPE (condition));
+	}
+    }
+  else if (TREE_CODE (condition) == VECTOR_CST)
+    {
+      // TODO: this should be probably folded right away
+      code = VEC_COND_EQ_EXPR;
+      cond_lhs = condition;
+      cond_rhs = constant_boolean_node (true, TREE_TYPE (condition));
+    }
+  else
+    {
+      code = cmp_to_vec_cmp_code (TREE_CODE (condition));
+      cond_lhs = TREE_OPERAND (condition, 0);
+      cond_rhs = TREE_OPERAND (condition, 1);
+    }
+  return gimple_build_assign (lhs, code, cond_lhs, cond_rhs, then_clause,
+			      else_clause);
+}
+
 #if CHECKING_P
 
 namespace selftest {
diff --git a/gcc/gimple.h b/gcc/gimple.h
index cf1f8da5ae2..5363b0910a8 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -115,12 +115,13 @@  GIMPLE_CHECK2(const gimple *gs)
    get_gimple_rhs_class.  */
 enum gimple_rhs_class
 {
-  GIMPLE_INVALID_RHS,	/* The expression cannot be used on the RHS.  */
-  GIMPLE_TERNARY_RHS,	/* The expression is a ternary operation.  */
-  GIMPLE_BINARY_RHS,	/* The expression is a binary operation.  */
-  GIMPLE_UNARY_RHS,	/* The expression is a unary operation.  */
-  GIMPLE_SINGLE_RHS	/* The expression is a single object (an SSA
-			   name, a _DECL, a _REF, etc.  */
+  GIMPLE_INVALID_RHS,    /* The expression cannot be used on the RHS.  */
+  GIMPLE_QUATERNARY_RHS, /* The expression is a quoternary operation.  */
+  GIMPLE_TERNARY_RHS,    /* The expression is a ternary operation.  */
+  GIMPLE_BINARY_RHS,     /* The expression is a binary operation.  */
+  GIMPLE_UNARY_RHS,      /* The expression is a unary operation.  */
+  GIMPLE_SINGLE_RHS      /* The expression is a single object (an SSA
+			    name, a _DECL, a _REF, etc.  */
 };
 
 /* Specific flags for individual GIMPLE statements.  These flags are
@@ -1457,11 +1458,15 @@  gcall *gimple_build_call_internal (enum internal_fn, unsigned, ...);
 gcall *gimple_build_call_internal_vec (enum internal_fn, vec<tree> );
 gcall *gimple_build_call_from_tree (tree, tree);
 gassign *gimple_build_assign (tree, tree CXX_MEM_STAT_INFO);
+gassign *gimple_build_assign (tree, enum tree_code,
+			      tree, tree, tree, tree CXX_MEM_STAT_INFO);
 gassign *gimple_build_assign (tree, enum tree_code,
 			      tree, tree, tree CXX_MEM_STAT_INFO);
 gassign *gimple_build_assign (tree, enum tree_code,
 			      tree, tree CXX_MEM_STAT_INFO);
 gassign *gimple_build_assign (tree, enum tree_code, tree CXX_MEM_STAT_INFO);
+gassign *gimple_build_vec_cond_expr (tree, tree, tree, tree);
+
 gcond *gimple_build_cond (enum tree_code, tree, tree, tree, tree);
 gcond *gimple_build_cond_from_tree (tree, tree, tree);
 void gimple_cond_set_condition_from_tree (gcond *, tree);
@@ -1530,7 +1535,7 @@  bool gimple_assign_unary_nop_p (gimple *);
 void gimple_set_bb (gimple *, basic_block);
 void gimple_assign_set_rhs_from_tree (gimple_stmt_iterator *, tree);
 void gimple_assign_set_rhs_with_ops (gimple_stmt_iterator *, enum tree_code,
-				     tree, tree, tree);
+				     tree, tree, tree, tree);
 tree gimple_get_lhs (const gimple *);
 void gimple_set_lhs (gimple *, tree);
 gimple *gimple_copy (gimple *);
@@ -2685,25 +2690,80 @@  gimple_assign_set_rhs3 (gimple *gs, tree rhs)
   gimple_assign_set_rhs3 (ass, rhs);
 }
 
+/* Return the fourth operand on the RHS of assignment statement GS.
+   If GS does not have two operands, NULL is returned instead.  */
+
+static inline tree
+gimple_assign_rhs4 (const gassign *gs)
+{
+  if (gimple_num_ops (gs) >= 5)
+    return gs->op[4];
+  else
+    return NULL_TREE;
+}
+
+static inline tree
+gimple_assign_rhs4 (const gimple *gs)
+{
+  const gassign *ass = GIMPLE_CHECK2<const gassign *> (gs);
+  return gimple_assign_rhs4 (ass);
+}
+
+/* Return a pointer to the fourth operand on the RHS of assignment
+   statement GS.  */
+
+static inline tree *
+gimple_assign_rhs4_ptr (gimple *gs)
+{
+  gassign *ass = GIMPLE_CHECK2<gassign *> (gs);
+  gcc_gimple_checking_assert (gimple_num_ops (gs) >= 5);
+  return &ass->op[4];
+}
+
+/* Set RHS to be the fourth operand on the RHS of assignment statement GS.  */
+
+static inline void
+gimple_assign_set_rhs4 (gassign *gs, tree rhs)
+{
+  gcc_gimple_checking_assert (gimple_num_ops (gs) >= 5);
+  gs->op[4] = rhs;
+}
+
+static inline void
+gimple_assign_set_rhs4 (gimple *gs, tree rhs)
+{
+  gassign *ass = GIMPLE_CHECK2<gassign *> (gs);
+  gimple_assign_set_rhs4 (ass, rhs);
+}
+
+/* A wrapper around 4 operand gimple_assign_set_rhs_with_ops, for callers
+   which expect to see only three operands.  */
+
+static inline void
+gimple_assign_set_rhs_with_ops (gimple_stmt_iterator *gsi, enum tree_code code,
+				tree op1, tree op2, tree op3)
+{
+  gimple_assign_set_rhs_with_ops (gsi, code, op1, op2, op3, NULL);
+}
 
-/* A wrapper around 3 operand gimple_assign_set_rhs_with_ops, for callers
+/* A wrapper around 4 operand gimple_assign_set_rhs_with_ops, for callers
    which expect to see only two operands.  */
 
 static inline void
 gimple_assign_set_rhs_with_ops (gimple_stmt_iterator *gsi, enum tree_code code,
 				tree op1, tree op2)
 {
-  gimple_assign_set_rhs_with_ops (gsi, code, op1, op2, NULL);
+  gimple_assign_set_rhs_with_ops (gsi, code, op1, op2, NULL, NULL);
 }
 
-/* A wrapper around 3 operand gimple_assign_set_rhs_with_ops, for callers
+/* A wrapper around 4 operand gimple_assign_set_rhs_with_ops, for callers
    which expect to see only one operands.  */
 
 static inline void
 gimple_assign_set_rhs_with_ops (gimple_stmt_iterator *gsi, enum tree_code code,
 				tree op1)
 {
-  gimple_assign_set_rhs_with_ops (gsi, code, op1, NULL, NULL);
+  gimple_assign_set_rhs_with_ops (gsi, code, op1, NULL, NULL, NULL);
 }
 
 /* Returns true if GS is a nontemporal move.  */
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 623cdbfed7c..dcde288789f 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -13812,19 +13812,20 @@  gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 
 	case VEC_COND_EXPR:
 	  {
-	    enum gimplify_status r0, r1, r2;
-
-	    r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
-				post_p, is_gimple_condexpr, fb_rvalue);
-	    r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
-				post_p, is_gimple_val, fb_rvalue);
-	    r2 = gimplify_expr (&TREE_OPERAND (*expr_p, 2), pre_p,
-				post_p, is_gimple_val, fb_rvalue);
-
-	    ret = MIN (MIN (r0, r1), r2);
-	    recalculate_side_effects (*expr_p);
+	    tree type = TREE_TYPE (TREE_OPERAND (*expr_p, 1));
+	    tree cond_expr = TREE_OPERAND (*expr_p, 0);
+	    tree_code vec_code = cmp_to_vec_cmp_code (TREE_CODE (cond_expr));
+	    *expr_p = build4_loc (input_location, vec_code, type,
+				  TREE_OPERAND (cond_expr, 0),
+				  TREE_OPERAND (cond_expr, 1),
+				  TREE_OPERAND (*expr_p, 1),
+				  TREE_OPERAND (*expr_p, 2));
+
+	    goto expr_4;
 	  }
-	  break;
+
+	CASE_VEC_COND_EXPR:
+	  goto expr_4;
 
 	case VEC_PERM_EXPR:
 	  /* Classified as tcc_expression.  */
@@ -13923,6 +13924,23 @@  gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 		break;
 	      }
 
+	    expr_4:
+	      {
+		enum gimplify_status r0, r1, r2, r3;
+
+		r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p, post_p,
+				    is_gimple_val, fb_rvalue);
+		r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p, post_p,
+				    is_gimple_val, fb_rvalue);
+		r2 = gimplify_expr (&TREE_OPERAND (*expr_p, 2), pre_p, post_p,
+				    is_gimple_val, fb_rvalue);
+		r3 = gimplify_expr (&TREE_OPERAND (*expr_p, 3), pre_p, post_p,
+				    is_gimple_val, fb_rvalue);
+
+		ret = MIN (MIN (r0, r1), MIN (r2, r3));
+		break;
+	      }
+
 	    case tcc_declaration:
 	    case tcc_constant:
 	      ret = GS_ALL_DONE;
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 35921e691f9..b9d540d935d 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -5822,44 +5822,36 @@  expand_vec_cond_mask_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
    three operands.  */
 
 rtx
-expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
-		      rtx target)
+expand_vec_cond_expr (tree vec_cond_type, tree_code tcode, tree cond_lhs,
+		      tree cond_rhs, tree if_true, tree if_false, rtx target)
 {
   class expand_operand ops[6];
   enum insn_code icode;
-  rtx comparison, rtx_op1, rtx_op2;
   machine_mode mode = TYPE_MODE (vec_cond_type);
   machine_mode cmp_op_mode;
   bool unsignedp;
-  tree op0a, op0b;
-  enum tree_code tcode;
+  tcode = vec_cmp_to_cmp_code (tcode);
 
-  if (COMPARISON_CLASS_P (op0))
-    {
-      op0a = TREE_OPERAND (op0, 0);
-      op0b = TREE_OPERAND (op0, 1);
-      tcode = TREE_CODE (op0);
-    }
-  else
+  if (tcode == EQ_EXPR && TREE_CODE (cond_rhs) == VECTOR_CST
+      && integer_all_onesp (cond_rhs))
     {
-      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0)));
-      if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
+      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (cond_lhs)));
+      if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (cond_lhs)))
 	  != CODE_FOR_nothing)
-	return expand_vec_cond_mask_expr (vec_cond_type, op0, op1,
-					  op2, target);
+	return expand_vec_cond_mask_expr (vec_cond_type, cond_lhs, if_true,
+					  if_false, target);
       /* Fake op0 < 0.  */
       else
 	{
-	  gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0)))
+	  gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (cond_lhs)))
 		      == MODE_VECTOR_INT);
-	  op0a = op0;
-	  op0b = build_zero_cst (TREE_TYPE (op0));
+	  cond_rhs = build_zero_cst (TREE_TYPE (cond_lhs));
 	  tcode = LT_EXPR;
 	}
     }
-  cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a));
-  unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a));
 
+  cmp_op_mode = TYPE_MODE (TREE_TYPE (cond_lhs));
+  unsignedp = TYPE_UNSIGNED (TREE_TYPE (cond_lhs));
 
   gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode))
 	      && known_eq (GET_MODE_NUNITS (mode),
@@ -5868,22 +5860,20 @@  expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
   icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
   if (icode == CODE_FOR_nothing)
     {
-      if (tcode == LT_EXPR
-	  && op0a == op0
-	  && TREE_CODE (op0) == VECTOR_CST)
+      if (tcode == LT_EXPR && TREE_CODE (cond_lhs) == VECTOR_CST)
 	{
 	  /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR
 	     into a constant when only get_vcond_eq_icode is supported.
 	     Verify < 0 and != 0 behave the same and change it to NE_EXPR.  */
 	  unsigned HOST_WIDE_INT nelts;
-	  if (!VECTOR_CST_NELTS (op0).is_constant (&nelts))
+	  if (!VECTOR_CST_NELTS (cond_lhs).is_constant (&nelts))
 	    {
-	      if (VECTOR_CST_STEPPED_P (op0))
+	      if (VECTOR_CST_STEPPED_P (cond_lhs))
 		return 0;
-	      nelts = vector_cst_encoded_nelts (op0);
+	      nelts = vector_cst_encoded_nelts (cond_lhs);
 	    }
 	  for (unsigned int i = 0; i < nelts; ++i)
-	    if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1)
+	    if (tree_int_cst_sgn (vector_cst_elt (cond_lhs, i)) == 1)
 	      return 0;
 	  tcode = NE_EXPR;
 	}
@@ -5893,14 +5883,14 @@  expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
 	return 0;
     }
 
-  comparison = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp,
-				   icode, 4);
-  rtx_op1 = expand_normal (op1);
-  rtx_op2 = expand_normal (op2);
+  rtx comparison = vector_compare_rtx (VOIDmode, tcode, cond_lhs, cond_rhs,
+				       unsignedp, icode, 4);
+  rtx rtx_true = expand_normal (if_true);
+  rtx rtx_false = expand_normal (if_false);
 
   create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], rtx_op1, mode);
-  create_input_operand (&ops[2], rtx_op2, mode);
+  create_input_operand (&ops[1], rtx_true, mode);
+  create_input_operand (&ops[2], rtx_false, mode);
   create_fixed_operand (&ops[3], comparison);
   create_fixed_operand (&ops[4], XEXP (comparison, 0));
   create_fixed_operand (&ops[5], XEXP (comparison, 1));
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 897bb5d4443..df72cc7910b 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -315,7 +315,7 @@  extern rtx expand_vec_perm_const (machine_mode, rtx, rtx,
 extern rtx expand_vec_cmp_expr (tree, tree, rtx);
 
 /* Generate code for VEC_COND_EXPR.  */
-extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx);
+extern rtx expand_vec_cond_expr (tree, tree_code, tree, tree, tree, tree, rtx);
 
 /* Generate code for VEC_SERIES_EXPR.  */
 extern rtx expand_vec_series_expr (machine_mode, rtx, rtx, rtx);
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index b75fdb2e63f..348dd8437db 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -4152,20 +4152,9 @@  verify_gimple_assign_ternary (gassign *stmt)
 	  return true;
 	}
       break;
-
     case VEC_COND_EXPR:
-      if (!VECTOR_BOOLEAN_TYPE_P (rhs1_type)
-	  || maybe_ne (TYPE_VECTOR_SUBPARTS (rhs1_type),
-		       TYPE_VECTOR_SUBPARTS (lhs_type)))
-	{
-	  error ("the first argument of a %qs must be of a "
-		 "boolean vector type of the same number of elements "
-		 "as the result", code_name);
-	  debug_generic_expr (lhs_type);
-	  debug_generic_expr (rhs1_type);
-	  return true;
-	}
-      /* Fallthrough.  */
+      error ("%qs in gimple IL", code_name);
+      return true;
     case COND_EXPR:
       if (!is_gimple_val (rhs1)
 	  && verify_gimple_comparison (TREE_TYPE (rhs1),
@@ -4364,6 +4353,87 @@  verify_gimple_assign_ternary (gassign *stmt)
   return false;
 }
 
+/* Verify a gimple assignment statement STMT with a quaternary rhs.
+   Returns true if anything is wrong.  */
+
+static bool
+verify_gimple_assign_quaternary (gassign *stmt)
+{
+  enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
+  tree lhs = gimple_assign_lhs (stmt);
+  tree lhs_type = TREE_TYPE (lhs);
+  tree rhs1 = gimple_assign_rhs1 (stmt);
+  tree rhs1_type = TREE_TYPE (rhs1);
+  tree rhs2 = gimple_assign_rhs2 (stmt);
+  tree rhs2_type = TREE_TYPE (rhs2);
+  tree rhs3 = gimple_assign_rhs3 (stmt);
+  tree rhs3_type = TREE_TYPE (rhs3);
+  tree rhs4 = gimple_assign_rhs4 (stmt);
+  tree rhs4_type = TREE_TYPE (rhs4);
+
+  const char *const code_name = get_tree_code_name (rhs_code);
+
+  if (!is_gimple_reg (lhs))
+    {
+      error ("non-register as LHS of ternary operation");
+      return true;
+    }
+
+  /* First handle operations that involve different types.  */
+  switch (rhs_code)
+    {
+    CASE_VEC_COND_EXPR:
+      if (TREE_CODE (rhs1_type) != VECTOR_TYPE
+	  || TREE_CODE (rhs2_type) != VECTOR_TYPE
+	  || TREE_CODE (rhs3_type) != VECTOR_TYPE
+	  || TREE_CODE (rhs4_type) != VECTOR_TYPE)
+	{
+	  error ("vector types expected in %qs", code_name);
+	  debug_generic_expr (lhs_type);
+	  debug_generic_expr (rhs1_type);
+	  debug_generic_expr (rhs2_type);
+	  debug_generic_expr (rhs3_type);
+	  debug_generic_expr (rhs4_type);
+	  return true;
+	}
+      if (maybe_ne (TYPE_VECTOR_SUBPARTS (rhs1_type),
+		    TYPE_VECTOR_SUBPARTS (rhs2_type))
+	  || maybe_ne (TYPE_VECTOR_SUBPARTS (rhs2_type),
+		       TYPE_VECTOR_SUBPARTS (rhs3_type))
+	  || maybe_ne (TYPE_VECTOR_SUBPARTS (rhs3_type),
+		       TYPE_VECTOR_SUBPARTS (rhs4_type))
+	  || maybe_ne (TYPE_VECTOR_SUBPARTS (rhs4_type),
+		       TYPE_VECTOR_SUBPARTS (lhs_type)))
+	{
+	  error ("vectors with different element number found in %qs",
+		 code_name);
+	  debug_generic_expr (lhs_type);
+	  debug_generic_expr (rhs1_type);
+	  debug_generic_expr (rhs2_type);
+	  debug_generic_expr (rhs3_type);
+	  debug_generic_expr (rhs4_type);
+	  return true;
+	}
+      if (!useless_type_conversion_p (lhs_type, rhs3_type)
+	  || !useless_type_conversion_p (lhs_type, rhs4_type)
+	  || !useless_type_conversion_p (rhs1_type, rhs2_type))
+	{
+	  error ("type mismatch in %qs", code_name);
+	  debug_generic_expr (lhs_type);
+	  debug_generic_expr (rhs1_type);
+	  debug_generic_expr (rhs2_type);
+	  debug_generic_expr (rhs3_type);
+	  debug_generic_expr (rhs4_type);
+	  return true;
+	}
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  return false;
+}
+
 /* Verify a gimple assignment statement STMT with a single rhs.
    Returns true if anything is wrong.  */
 
@@ -4617,6 +4687,9 @@  verify_gimple_assign (gassign *stmt)
     case GIMPLE_TERNARY_RHS:
       return verify_gimple_assign_ternary (stmt);
 
+    case GIMPLE_QUATERNARY_RHS:
+      return verify_gimple_assign_quaternary (stmt);
+
     default:
       gcc_unreachable ();
     }
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index e4ae1b058fd..a5034dc862b 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -4092,7 +4092,7 @@  estimate_operator_cost (enum tree_code code, eni_weights *weights,
     /* Assign cost of 1 to usual operations.
        ??? We may consider mapping RTL costs to this.  */
     case COND_EXPR:
-    case VEC_COND_EXPR:
+    CASE_VEC_COND_EXPR:
     case VEC_PERM_EXPR:
 
     case PLUS_EXPR:
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index 221f140b356..a62e524d52f 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -349,6 +349,11 @@  rhs_to_tree (tree type, gimple *stmt)
   enum tree_code code = gimple_assign_rhs_code (stmt);
   switch (get_gimple_rhs_class (code))
     {
+    case GIMPLE_QUATERNARY_RHS:
+      // TODO: use fold_build4_loc
+      return build4_loc (loc, code, type, gimple_assign_rhs1 (stmt),
+			 gimple_assign_rhs2 (stmt), gimple_assign_rhs3 (stmt),
+			 gimple_assign_rhs4 (stmt));
     case GIMPLE_TERNARY_RHS:
       return fold_build3_loc (loc, code, type, gimple_assign_rhs1 (stmt),
 			      gimple_assign_rhs2 (stmt),
@@ -2166,8 +2171,7 @@  simplify_vector_constructor (gimple_stmt_iterator *gsi)
       if (conv_code == ERROR_MARK)
 	gimple_assign_set_rhs_from_tree (gsi, orig[0]);
       else
-	gimple_assign_set_rhs_with_ops (gsi, conv_code, orig[0],
-					NULL_TREE, NULL_TREE);
+	gimple_assign_set_rhs_with_ops (gsi, conv_code, orig[0], NULL_TREE);
     }
   else
     {
@@ -2227,8 +2231,7 @@  simplify_vector_constructor (gimple_stmt_iterator *gsi)
 				   VEC_PERM_EXPR, orig[0], orig[1], op2);
 	  orig[0] = gimple_assign_lhs (perm);
 	  gsi_insert_before (gsi, perm, GSI_SAME_STMT);
-	  gimple_assign_set_rhs_with_ops (gsi, conv_code, orig[0],
-					  NULL_TREE, NULL_TREE);
+	  gimple_assign_set_rhs_with_ops (gsi, conv_code, orig[0]);
 	}
     }
   update_stmt (gsi_stmt (*gsi));
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index cd2ced36971..7f2b96a1bf0 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -3177,9 +3177,9 @@  static widest_int
 derive_constant_upper_bound (tree val)
 {
   enum tree_code code;
-  tree op0, op1, op2;
+  tree op0, op1, op2, op3;
 
-  extract_ops_from_tree (val, &code, &op0, &op1, &op2);
+  extract_ops_from_tree (val, &code, &op0, &op1, &op2, &op3);
   return derive_constant_upper_bound_ops (TREE_TYPE (val), op0, code, op1);
 }
 
diff --git a/gcc/tree-ssa-operands.c b/gcc/tree-ssa-operands.c
index e643b33a6b0..a09229c470e 100644
--- a/gcc/tree-ssa-operands.c
+++ b/gcc/tree-ssa-operands.c
@@ -797,7 +797,6 @@  get_expr_operands (struct function *fn, gimple *stmt, tree *expr_p, int flags)
       return;
 
     case COND_EXPR:
-    case VEC_COND_EXPR:
     case VEC_PERM_EXPR:
       get_expr_operands (fn, stmt, &TREE_OPERAND (expr, 0), uflags);
       get_expr_operands (fn, stmt, &TREE_OPERAND (expr, 1), uflags);
diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c
index c618601a184..b09a6188c90 100644
--- a/gcc/tree-ssa-pre.c
+++ b/gcc/tree-ssa-pre.c
@@ -3875,11 +3875,10 @@  compute_avail (void)
 		      enum tree_code code = gimple_assign_rhs_code (stmt);
 		      vn_nary_op_t nary;
 
-		      /* COND_EXPR and VEC_COND_EXPR are awkward in
+		      /* COND_EXPR and VEC_COND_*_EXPR are awkward in
 			 that they contain an embedded complex expression.
 			 Don't even try to shove those through PRE.  */
-		      if (code == COND_EXPR
-			  || code == VEC_COND_EXPR)
+		      if (code == COND_EXPR || vec_cond_expr_p (code))
 			continue;
 
 		      vn_nary_op_lookup_stmt (stmt, &nary);
diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
index 510dfd1e188..01ec9701bfb 100644
--- a/gcc/tree-ssa-reassoc.c
+++ b/gcc/tree-ssa-reassoc.c
@@ -3747,7 +3747,7 @@  optimize_range_tests (enum tree_code opcode,
 }
 
 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
-   the operands of the VEC_COND_EXPR.  Returns ERROR_MARK on failure,
+   the operands of the VEC_COND_*_EXPR.  Returns ERROR_MARK on failure,
    otherwise the comparison code.  TYPE is a return value that is set
    to type of comparison.  */
 
@@ -3763,7 +3763,7 @@  ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type)
 
   /* ??? If we start creating more COND_EXPR, we could perform
      this same optimization with them.	For now, simplify.  */
-  if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
+  if (!vec_cond_expr_p (gimple_assign_rhs_code (stmt)))
     return ERROR_MARK;
 
   tree cond = gimple_assign_rhs1 (stmt);
diff --git a/gcc/tree-ssa-scopedtables.c b/gcc/tree-ssa-scopedtables.c
index 574bc30eee1..91d1b10c142 100644
--- a/gcc/tree-ssa-scopedtables.c
+++ b/gcc/tree-ssa-scopedtables.c
@@ -432,6 +432,14 @@  add_hashable_expr (const struct hashable_expr *expr, hash &hstate)
       inchash::add_expr (expr->ops.ternary.opnd2, hstate);
       break;
 
+    case EXPR_QUATERNARY:
+      hstate.add_object (expr->ops.quaternary.op);
+      inchash::add_expr (expr->ops.quaternary.opnd0, hstate);
+      inchash::add_expr (expr->ops.quaternary.opnd1, hstate);
+      inchash::add_expr (expr->ops.quaternary.opnd2, hstate);
+      inchash::add_expr (expr->ops.quaternary.opnd3, hstate);
+      break;
+
     case EXPR_CALL:
       {
         size_t i;
@@ -643,6 +651,19 @@  hashable_expr_equal_p (const struct hashable_expr *expr0,
 	      && operand_equal_p (expr0->ops.ternary.opnd1,
 				  expr1->ops.ternary.opnd0, 0));
 
+    case EXPR_QUATERNARY:
+      if (expr0->ops.quaternary.op != expr1->ops.quaternary.op)
+	return false;
+
+      return (operand_equal_p (expr0->ops.quaternary.opnd0,
+			       expr1->ops.quaternary.opnd0, 0)
+	      && operand_equal_p (expr0->ops.quaternary.opnd1,
+				  expr1->ops.quaternary.opnd1, 0)
+	      && operand_equal_p (expr0->ops.quaternary.opnd2,
+				  expr1->ops.quaternary.opnd2, 0)
+	      && operand_equal_p (expr0->ops.quaternary.opnd3,
+				  expr1->ops.quaternary.opnd3, 0));
+
     case EXPR_CALL:
       {
         size_t i;
@@ -736,7 +757,17 @@  expr_hash_elt::expr_hash_elt (gimple *stmt, tree orig_lhs)
 	  expr->ops.ternary.opnd1 = gimple_assign_rhs2 (stmt);
 	  expr->ops.ternary.opnd2 = gimple_assign_rhs3 (stmt);
 	  break;
-        default:
+	case GIMPLE_QUATERNARY_RHS:
+	  expr->kind = EXPR_TERNARY;
+	  expr->type = TREE_TYPE (gimple_assign_lhs (stmt));
+	  expr->ops.quaternary.op = subcode;
+	  expr->ops.quaternary.opnd0 = gimple_assign_rhs1 (stmt);
+	  expr->ops.quaternary.opnd1 = gimple_assign_rhs2 (stmt);
+	  expr->ops.quaternary.opnd2 = gimple_assign_rhs3 (stmt);
+	  expr->ops.quaternary.opnd3 = gimple_assign_rhs4 (stmt);
+	  break;
+
+	default:
           gcc_unreachable ();
         }
     }
@@ -896,6 +927,19 @@  expr_hash_elt::print (FILE *stream)
 	fputs (">", stream);
 	break;
 
+      case EXPR_QUATERNARY:
+	fprintf (stream, " %s <",
+		 get_tree_code_name (m_expr.ops.quaternary.op));
+	print_generic_expr (stream, m_expr.ops.quaternary.opnd0);
+	fputs (", ", stream);
+	print_generic_expr (stream, m_expr.ops.quaternary.opnd1);
+	fputs (", ", stream);
+	print_generic_expr (stream, m_expr.ops.quaternary.opnd2);
+	fputs (", ", stream);
+	print_generic_expr (stream, m_expr.ops.quaternary.opnd3);
+	fputs (">", stream);
+	break;
+
       case EXPR_CALL:
         {
           size_t i;
diff --git a/gcc/tree-ssa-scopedtables.h b/gcc/tree-ssa-scopedtables.h
index 48185006823..e3ed5c5e753 100644
--- a/gcc/tree-ssa-scopedtables.h
+++ b/gcc/tree-ssa-scopedtables.h
@@ -29,6 +29,7 @@  enum expr_kind
   EXPR_UNARY,
   EXPR_BINARY,
   EXPR_TERNARY,
+  EXPR_QUATERNARY,
   EXPR_CALL,
   EXPR_PHI
 };
@@ -42,6 +43,7 @@  struct hashable_expr
     struct { enum tree_code op;  tree opnd; } unary;
     struct { enum tree_code op;  tree opnd0, opnd1; } binary;
     struct { enum tree_code op;  tree opnd0, opnd1, opnd2; } ternary;
+    struct { enum tree_code op;  tree opnd0, opnd1, opnd2, opnd3; } quaternary;
     struct { gcall *fn_from; bool pure; size_t nargs; tree *args; } call;
     struct { size_t nargs; tree *args; } phi;
   } ops;
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index 5855653257b..01ab9423b3c 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -373,8 +373,9 @@  expand_vector_addition (gimple_stmt_iterator *gsi,
 
 /* Try to expand vector comparison expression OP0 CODE OP1 by
    querying optab if the following expression:
-	VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
+	VEC_COND_CODE_EXPR< OP0, OP1, {-1,...}, {0,...}>
    can be expanded.  */
+
 static tree
 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
                           tree op1, enum tree_code code)
@@ -691,12 +692,10 @@  expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 	  if (addend == NULL_TREE
 	      && expand_vec_cond_expr_p (type, type, LT_EXPR))
 	    {
-	      tree zero, cst, cond, mask_type;
+	      tree zero, cst;
 	      gimple *stmt;
 
-	      mask_type = build_same_sized_truth_vector_type (type);
 	      zero = build_zero_cst (type);
-	      cond = build2 (LT_EXPR, mask_type, op0, zero);
 	      tree_vector_builder vec (type, nunits, 1);
 	      for (i = 0; i < nunits; i++)
 		vec.quick_push (build_int_cst (TREE_TYPE (type),
@@ -704,7 +703,7 @@  expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 						<< shifts[i]) - 1));
 	      cst = vec.build ();
 	      addend = make_ssa_name (type);
-	      stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
+	      stmt = gimple_build_assign (addend, VEC_COND_LT_EXPR, op0, zero,
 					  cst, zero);
 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
 	    }
@@ -909,41 +908,46 @@  expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 
 /* Expand a vector condition to scalars, by using many conditions
    on the vector's elements.  */
+
 static void
 expand_vector_condition (gimple_stmt_iterator *gsi)
 {
   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
+  tree_code code = vec_cmp_to_cmp_code (gimple_assign_rhs_code (stmt));
   tree type = gimple_expr_type (stmt);
-  tree a = gimple_assign_rhs1 (stmt);
-  tree a1 = a;
-  tree a2 = NULL_TREE;
-  bool a_is_comparison = false;
+  tree a = NULL_TREE;
+  tree a1 = gimple_assign_rhs1 (stmt);
+  tree a2 = gimple_assign_rhs2 (stmt);
+  bool a_is_comparison = true;
   bool a_is_scalar_bitmask = false;
-  tree b = gimple_assign_rhs2 (stmt);
-  tree c = gimple_assign_rhs3 (stmt);
+  tree b = gimple_assign_rhs3 (stmt);
+  tree c = gimple_assign_rhs4 (stmt);
   vec<constructor_elt, va_gc> *v;
   tree constr;
   tree inner_type = TREE_TYPE (type);
-  tree cond_type = TREE_TYPE (TREE_TYPE (a));
-  tree comp_inner_type = cond_type;
+  tree cond_type = TREE_TYPE (TREE_TYPE (a1));
+  tree comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
   tree width = TYPE_SIZE (inner_type);
   tree index = bitsize_int (0);
-  tree comp_width = width;
+  tree comp_width = TYPE_SIZE (comp_inner_type);
   tree comp_index = index;
   int i;
   location_t loc = gimple_location (gsi_stmt (*gsi));
 
-  if (!is_gimple_val (a))
+  if (code == EQ_EXPR
+      && TREE_CODE (a2) == VECTOR_CST
+      && integer_all_onesp (a2))
     {
-      gcc_assert (COMPARISON_CLASS_P (a));
-      a_is_comparison = true;
-      a1 = TREE_OPERAND (a, 0);
-      a2 = TREE_OPERAND (a, 1);
-      comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
-      comp_width = TYPE_SIZE (comp_inner_type);
+      a_is_comparison = false;
+      a = a1;
+
+      comp_inner_type = cond_type;
+      comp_width = width;
     }
+  else
+    cond_type = truth_type_for (cond_type);
 
-  if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a)))
+  if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), code))
     return;
 
   /* Handle vector boolean types with bitmasks.  If there is a comparison
@@ -997,6 +1001,7 @@  expand_vector_condition (gimple_stmt_iterator *gsi)
     }
 
   int nunits = nunits_for_known_piecewise_op (type);
+
   vec_alloc (v, nunits);
   for (i = 0; i < nunits; i++)
     {
@@ -1009,7 +1014,7 @@  expand_vector_condition (gimple_stmt_iterator *gsi)
 				       comp_width, comp_index);
 	  tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
 				       comp_width, comp_index);
-	  aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2);
+	  aa = fold_build2 (code, cond_type, aa1, aa2);
 	}
       else if (a_is_scalar_bitmask)
 	{
@@ -1964,7 +1969,7 @@  expand_vector_operations_1 (gimple_stmt_iterator *gsi)
       return;
     }
 
-  if (code == VEC_COND_EXPR)
+  if (vec_cond_expr_p (code))
     {
       expand_vector_condition (gsi);
       return;
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 9e399cdacee..b884089e086 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -4463,7 +4463,7 @@  vect_create_epilog_for_reduction (vec<tree> vect_defs,
       poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype);
 
       gimple *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info)->stmt;
-      gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
+      gcc_assert (vec_cond_expr_p (gimple_assign_rhs_code (vec_stmt)));
 
       int scalar_precision
 	= GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (vectype)));
@@ -4511,15 +4511,13 @@  vect_create_epilog_for_reduction (vec<tree> vect_defs,
 	 Finally, we update the phi (NEW_PHI_TREE) to take the value of
 	 the new cond_expr (INDEX_COND_EXPR).  */
 
-      /* Duplicate the condition from vec_stmt.  */
-      tree ccompare = unshare_expr (gimple_assign_rhs1 (vec_stmt));
-
       /* Create a conditional, where the condition is taken from vec_stmt
 	 (CCOMPARE), then is the induction index (INDEX_BEFORE_INCR) and
 	 else is the phi (NEW_PHI_TREE).  */
-      tree index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
-				     ccompare, indx_before_incr,
-				     new_phi_tree);
+      tree index_cond_expr
+	= build4 (gimple_assign_rhs_code (vec_stmt), cr_index_vector_type,
+		  gimple_assign_rhs1 (vec_stmt), gimple_assign_rhs2 (vec_stmt),
+		  indx_before_incr, new_phi_tree);
       induction_index = make_ssa_name (cr_index_vector_type);
       gimple *index_condition = gimple_build_assign (induction_index,
 						     index_cond_expr);
@@ -4741,8 +4739,6 @@  vect_create_epilog_for_reduction (vec<tree> vect_defs,
       tree index_vec_type = TREE_TYPE (induction_index);
       gcc_checking_assert (TYPE_UNSIGNED (index_vec_type));
       tree index_scalar_type = TREE_TYPE (index_vec_type);
-      tree index_vec_cmp_type = build_same_sized_truth_vector_type
-	(index_vec_type);
 
       /* Get an unsigned integer version of the type of the data vector.  */
       int scalar_precision
@@ -4785,22 +4781,16 @@  vect_create_epilog_for_reduction (vec<tree> vect_defs,
 	 (VEC_COND) with one data value and the rest zeros.
 	 In the case where the loop never made any matches, every index will
 	 match, resulting in a vector with all data values (which will all be
-	 the default value).  */
-
-      /* Compare the max index vector to the vector of found indexes to find
+	 the default value).
+	 Compare the max index vector to the vector of found indexes to find
 	 the position of the max value.  */
-      tree vec_compare = make_ssa_name (index_vec_cmp_type);
-      gimple *vec_compare_stmt = gimple_build_assign (vec_compare, EQ_EXPR,
-						      induction_index,
-						      max_index_vec);
-      gsi_insert_before (&exit_gsi, vec_compare_stmt, GSI_SAME_STMT);
 
       /* Use the compare to choose either values from the data vector or
 	 zero.  */
       tree vec_cond = make_ssa_name (vectype);
-      gimple *vec_cond_stmt = gimple_build_assign (vec_cond, VEC_COND_EXPR,
-						   vec_compare, new_phi_result,
-						   zero_vec);
+      gimple *vec_cond_stmt
+	= gimple_build_assign (vec_cond, VEC_COND_EQ_EXPR, induction_index,
+			       max_index_vec, new_phi_result, zero_vec);
       gsi_insert_before (&exit_gsi, vec_cond_stmt, GSI_SAME_STMT);
 
       /* Finally we need to extract the data value from the vector (VEC_COND)
@@ -5033,8 +5023,11 @@  vect_create_epilog_for_reduction (vec<tree> vect_defs,
 	     vec = seq ? new_phi_result : vector_identity;
 
 	     VEC is now suitable for a full vector reduction.  */
-	  tree vec = gimple_build (&seq, VEC_COND_EXPR, vectype,
-				   sel, new_phi_result, vector_identity);
+	  tree vec = make_ssa_name (new_phi_result);
+	  gimple *cond_expr
+	    = gimple_build_vec_cond_expr (vec, sel, new_phi_result,
+					  vector_identity);
+	  gimple_seq_add_stmt (&seq, cond_expr);
 
 	  /* Do the reduction and convert it to the appropriate type.  */
 	  tree scalar = gimple_build (&seq, as_combined_fn (reduc_fn),
@@ -5634,8 +5627,7 @@  merge_with_identity (gimple_stmt_iterator *gsi, tree mask, tree vectype,
 		     tree vec, tree identity)
 {
   tree cond = make_temp_ssa_name (vectype, NULL, "cond");
-  gimple *new_stmt = gimple_build_assign (cond, VEC_COND_EXPR,
-					  mask, vec, identity);
+  gimple *new_stmt = gimple_build_vec_cond_expr (cond, mask, vec, identity);
   gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
   return cond;
 }
@@ -5936,8 +5928,8 @@  build_vect_cond_expr (enum tree_code code, tree vop[3], tree mask,
 	tree vectype = TREE_TYPE (vop[1]);
 	tree zero = build_zero_cst (vectype);
 	tree masked_op1 = make_temp_ssa_name (vectype, NULL, "masked_op1");
-	gassign *select = gimple_build_assign (masked_op1, VEC_COND_EXPR,
-					       mask, vop[1], zero);
+	gassign *select
+	  = gimple_build_vec_cond_expr (masked_op1, mask, vop[1], zero);
 	gsi_insert_before (gsi, select, GSI_SAME_STMT);
 	vop[1] = masked_op1;
 	break;
@@ -5947,8 +5939,8 @@  build_vect_cond_expr (enum tree_code code, tree vop[3], tree mask,
       {
 	tree vectype = TREE_TYPE (vop[1]);
 	tree masked_op1 = make_temp_ssa_name (vectype, NULL, "masked_op1");
-	gassign *select = gimple_build_assign (masked_op1, VEC_COND_EXPR,
-					       mask, vop[1], vop[0]);
+	gassign *select
+	  = gimple_build_vec_cond_expr (masked_op1, mask, vop[1], vop[0]);
 	gsi_insert_before (gsi, select, GSI_SAME_STMT);
 	vop[1] = masked_op1;
 	break;
@@ -6376,7 +6368,7 @@  vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 
 	 we're interested in the last element in x_3 for which a_2 || a_3
 	 is true, whereas the current reduction chain handling would
-	 vectorize x_2 as a normal VEC_COND_EXPR and only treat x_3
+	 vectorize x_2 as a normal VEC_COND_*_EXPR and only treat x_3
 	 as a reduction operation.  */
       if (reduc_index == -1)
 	{
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index baa9a4cb8fa..402879c8635 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -3460,7 +3460,7 @@  check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
 	  tree vecitype, comp_vectype;
 
 	  /* If the comparison can throw, then is_gimple_condexpr will be
-	     false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
+	     false and we can't make a COND_EXPR/VEC_COND_*_EXPR out of it.  */
 	  if (stmt_could_throw_p (cfun, def_stmt))
 	    return false;
 
@@ -3582,7 +3582,7 @@  adjust_bool_pattern (tree var, tree out_type,
 	   S3'  c_T = x2 CMP2 y2 ? a_T : 0;
 	   S4'  f_T = c_T;
 
-	 At least when VEC_COND_EXPR is implemented using masks
+	 At least when VEC_COND_*_EXPR is implemented using masks
 	 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
 	 computes the comparison masks and ands it, in one case with
 	 all ones vector, in the other case with a vector register.
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index b1e97f85d96..9d291da705c 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -55,6 +55,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "gimple-fold.h"
 #include "regs.h"
 #include "attribs.h"
+#include "print-tree.h"
 
 /* For lang_hooks.types.type_for_mode.  */
 #include "langhooks.h"
@@ -6419,15 +6420,15 @@  scan_operand_equal_p (tree ref1, tree ref2)
   return true;
 }
 
-
-enum scan_store_kind {
+enum scan_store_kind
+{
   /* Normal permutation.  */
   scan_store_kind_perm,
 
   /* Whole vector left shift permutation with zero init.  */
   scan_store_kind_lshift_zero,
 
-  /* Whole vector left shift permutation and VEC_COND_EXPR.  */
+  /* Whole vector left shift permutation and VEC_COND_*_EXPR.  */
   scan_store_kind_lshift_cond
 };
 
@@ -7095,8 +7096,8 @@  vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 			       ? boolean_false_node : boolean_true_node);
 
 	      tree new_temp2 = make_ssa_name (vectype);
-	      g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
-				       new_temp, vec_oprnd1);
+	      g = gimple_build_vec_cond_expr (new_temp2, vb.build (), new_temp,
+					      vec_oprnd1);
 	      new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
 	      STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
 	      prev_stmt_info = new_stmt_info;
@@ -9769,7 +9770,7 @@  vect_is_simple_cond (tree cond, vec_info *vinfo,
 
    Check if STMT_INFO is conditional modify expression that can be vectorized.
    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
-   stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
+   stmt using VEC_COND_*_EXPR  to replace it, put it in VEC_STMT, and insert it
    at GSI.
 
    When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
@@ -10158,8 +10159,8 @@  vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 	    {
 	      new_temp = make_ssa_name (vec_dest);
 	      gassign *new_stmt
-		= gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
-				       vec_then_clause, vec_else_clause);
+		= gimple_build_vec_cond_expr (new_temp, vec_compare,
+					      vec_then_clause, vec_else_clause);
 	      new_stmt_info
 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
 	    }
diff --git a/gcc/tree.def b/gcc/tree.def
index fb6e7344fa6..da9d28359fd 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -551,6 +551,13 @@  DEFTREECODE (VEC_SERIES_EXPR, "vec_series_expr", tcc_binary, 2)
 */
 DEFTREECODE (VEC_COND_EXPR, "vec_cond_expr", tcc_expression, 3)
 
+DEFTREECODE (VEC_COND_LT_EXPR, "vec_cond_lt_expr", tcc_expression, 4)
+DEFTREECODE (VEC_COND_LE_EXPR, "vec_cond_le_expr", tcc_expression, 4)
+DEFTREECODE (VEC_COND_GT_EXPR, "vec_cond_gt_expr", tcc_expression, 4)
+DEFTREECODE (VEC_COND_GE_EXPR, "vec_cond_ge_expr", tcc_expression, 4)
+DEFTREECODE (VEC_COND_EQ_EXPR, "vec_cond_eq_expr", tcc_expression, 4)
+DEFTREECODE (VEC_COND_NE_EXPR, "vec_cond_ne_expr", tcc_expression, 4)
+
 /* Vector permutation expression.  A = VEC_PERM_EXPR<v0, v1, mask> means
 
    N = length(mask)
diff --git a/gcc/tree.h b/gcc/tree.h
index c825109b5f7..5512e356b9b 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -5132,6 +5132,70 @@  complete_or_array_type_p (const_tree type)
 	     && COMPLETE_TYPE_P (TREE_TYPE (type)));
 }
 
+static inline tree_code
+cmp_to_vec_cmp_code (tree_code code)
+{
+  switch (code)
+    {
+    case LT_EXPR:
+      return VEC_COND_LT_EXPR;
+    case LE_EXPR:
+      return VEC_COND_LE_EXPR;
+    case GT_EXPR:
+      return VEC_COND_GT_EXPR;
+    case GE_EXPR:
+      return VEC_COND_GE_EXPR;
+    case EQ_EXPR:
+      return VEC_COND_EQ_EXPR;
+    case NE_EXPR:
+      return VEC_COND_NE_EXPR;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+static inline tree_code
+vec_cmp_to_cmp_code (tree_code code)
+{
+  switch (code)
+    {
+    case VEC_COND_LT_EXPR:
+      return LT_EXPR;
+    case VEC_COND_LE_EXPR:
+      return LE_EXPR;
+    case VEC_COND_GT_EXPR:
+      return GT_EXPR;
+    case VEC_COND_GE_EXPR:
+      return GE_EXPR;
+    case VEC_COND_EQ_EXPR:
+      return EQ_EXPR;
+    case VEC_COND_NE_EXPR:
+      return NE_EXPR;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+#define CASE_VEC_COND_EXPR                                                     \
+  case VEC_COND_LT_EXPR:                                                       \
+  case VEC_COND_LE_EXPR:                                                       \
+  case VEC_COND_GT_EXPR:                                                       \
+  case VEC_COND_GE_EXPR:                                                       \
+  case VEC_COND_EQ_EXPR:                                                       \
+  case VEC_COND_NE_EXPR
+
+static inline bool
+vec_cond_expr_p (tree_code code)
+{
+  switch (code)
+    {
+    CASE_VEC_COND_EXPR:
+      return true;
+    default:
+      return false;
+    }
+}
+
 /* Return true if the value of T could be represented as a poly_widest_int.  */
 
 inline bool