diff mbox

Vector Comparison patch

Message ID CABYV9SUgzQNs78tsHAdumSacgi+9=1M=wLnFcdE319YvnuH71w@mail.gmail.com
State New
Headers show

Commit Message

Artem Shinkarov Aug. 17, 2011, 6:51 p.m. UTC
On Wed, Aug 17, 2011 at 4:28 PM, Artem Shinkarov
<artyom.shinkaroff@gmail.com> wrote:
> On Wed, Aug 17, 2011 at 3:58 PM, Richard Guenther
> <richard.guenther@gmail.com> wrote:
>> On Wed, Aug 17, 2011 at 3:30 PM, Artem Shinkarov
>> <artyom.shinkaroff@gmail.com> wrote:
>>> Hi
>>>
>>> Several comments before the new version of the patch.
>>> 1) x != x
>>> I am happy to adjust constant_boolean_node, but look at the code
>>> around line 9074 in fold-const.c, you will see that x <op> x
>>> elimination, even with adjusted constant_boolean_node, will look about
>>> the same as my code. Because I need to check the parameters (!FLOAT_P,
>>>  HONOR_NANS) on TREE_TYPE (arg0) not arg0, and I need to construct
>>> constant_boolean_node (-1), not 1 in case of true.
>>> But I will change constant_boolean_node to accept vector types.
>>
>> Hm, that should be handled transparently if you look at the defines
>> of FLOAT_TYPE_P and the HONOR_* macros.
>>
>
> Ok, Currently I have this, what do you think:
>      int true_val = TREE_CODE (type) == VECTOR_TYPE ? -1 : 0;
>      tree arg0_type = TREE_CODE (type) == VECTOR_TYPE
>                       ? TREE_TYPE (TREE_TYPE (arg0)) : TREE_TYPE (arg0);
>        switch (code)
>          {
>          case EQ_EXPR:
>            if (! FLOAT_TYPE_P (arg0_type)
>                || ! HONOR_NANS (TYPE_MODE (arg0_type)))
>              return constant_boolean_node (true_val, type);
>            break;
>
>          case GE_EXPR:
>          case LE_EXPR:
>            if (! FLOAT_TYPE_P (arg0_type)
>                || ! HONOR_NANS (TYPE_MODE (arg0_type)))
>              return constant_boolean_node (true_val, type);
>            return fold_build2_loc (loc, EQ_EXPR, type, arg0, arg1);
>
>          case NE_EXPR:
>            /* For NE, we can only do this simplification if integer
>               or we don't honor IEEE floating point NaNs.  */
>            if (FLOAT_TYPE_P (arg0_type)
>                && HONOR_NANS (TYPE_MODE (arg0_type)))
>              break;
>            /* ... fall through ...  */
>          case GT_EXPR:
>          case LT_EXPR:
>            return constant_boolean_node (0, type);
>          default:
>            gcc_unreachable ();
>          }
>
> Works fine for both vector and scalar cases.

Please ignore this comment.

>
>>>
>>> 2) comparison vs vcond
>>> v = v1 < v2;
>>> v = v1 < v2 ? {-1,...} : {0,...};
>>>
>>> are not the same.
>>> 16,25c16,22
>>> <       movdqa  .LC1(%rip), %xmm1
>>> <       pshufd  $225, %xmm1, %xmm1
>>> <       pshufd  $39, %xmm0, %xmm0
>>> <       movss   %xmm2, %xmm1
>>> <       pshufd  $225, %xmm1, %xmm1
>>> <       pcmpgtd %xmm1, %xmm0
>>> <       pcmpeqd %xmm1, %xmm1
>>> <       pcmpeqd %xmm1, %xmm0
>>> <       pand    %xmm1, %xmm0
>>> <       movdqa  %xmm0, -24(%rsp)
>>> ---
>>>>       pshufd  $39, %xmm0, %xmm1
>>>>       movdqa  .LC1(%rip), %xmm0
>>>>       pshufd  $225, %xmm0, %xmm0
>>>>       movss   %xmm2, %xmm0
>>>>       pshufd  $225, %xmm0, %xmm0
>>>>       pcmpgtd %xmm0, %xmm1
>>>>       movdqa  %xmm1, -24(%rsp)
>>>
>>> So I would keep the hook, it could be removed at any time when the
>>> standard expansion will start to work fine.
>>
>> Which one is which?
>
> You must be joking. :)
> The first one (inefficient) is vec0 > vec1 ? {-1,...} : {0,...}
> The second is vec0 > vec1. expand_vec_cond_expr is stupid, which is
> fine, but it means that we need to construct it carefully.

This is still important.

>
>> I'd really like to make this patch simpler at first,
>> and removing that hook is an obvious thing that _should_ be possible,
>> even optimally (by fixing the targets).
>
> Ok, let's remove the hook, then could you provide some more
> information rather than we just need to do it?
>
> Simple in this case means inefficient -- I would hope to make it
> efficient as well.

This is very important.

>>> 3) mask ? vec0 : vec1
>>> So no, I don't think we need to convert {3, 4, -1, 5} to {0,0,-1,0}
>>> (that would surprise my anyway, I'd have expected {-1,-1,-1,-1} ;)).
>>>
>>> Does OpenCL somehow support you here?
>>>
>>> OpenCL says that vector operation mask ? vec0 : vec1 is the same as
>>> select (vec0, vec1, mask). The semantics of select operation is the
>>> following:
>>>
>>> gentype select (gentype a, gentype b, igentype c)
>>> For each component of a vector type,
>>> result[i] = if MSB of c[i] is set ? b[i] : a[i].
>>>
>>> I am not sure what they really understand using the term MSB. As far
>>> as I know MSB is Most Significant Bit, so does it mean that in case of
>>> 3-bit integer 100 would trigger true but 011 would be still false...
>>
>> Yes, MSB is Most Significant Bit - that's a somewhat odd definition ;)
>>
>>> My reading would be that if all bits set, then take the first element,
>>> otherwise the second.
>>>
>>> It is also confusing when  a ? vec0 : vec1, and a != 0 ? vec0 vec1
>>> produce different results. So I would stick to all bits set being true
>>> scenario.
>>
>> For the middle-end part definitely.  Thus I'd simply leave the mask alone.
>>
>
> Well, it seems very unnatural to me. In the case of scalars mask ?
> val0 : val1 would not work the same way as (mask & val0) | (~mask  &
> val1), why should we have the same behaviour for the vector stuff?
>

And that.

>>> 4) Backend stuff. Ok, we could always fall back to reject the cases
>>> when cond and operands have different type, and then fix the backend.
>>>
>>> Adjustments are coming.
>>>
>>>
>>> Artem.
>>>
>>
>
> New issue about transforming cond to cons == {-1, ..} in
> expand_vec_cond_expr. When I do this:
>  icode = get_vcond_icode (vec_cond_type, mode);
>  if (icode == CODE_FOR_nothing)
>    return 0;
>
>  /* If OP0 is not a comparison, adjust it by transforming to
>     the expression OP0 == {-1, -1, ...}  */
>  if (!COMPARISON_CLASS_P (op0))
>    op0 = build2 (EQ_EXPR, TREE_TYPE (op0), op0,
>                  build_vector_from_val (TREE_TYPE (op0),
>                  build_int_cst (TREE_TYPE (TREE_TYPE (op0)), -1)));
>
> I run into the trouble that the constant vector which I insert, cannot
> be expanded, and  compiler fails with assertion.
>
> This happens on my machine:
> Linux temanbk 2.6.38-gentoo-r4 #3 SMP Mon Aug 8 00:32:30 BST 2011
> x86_64 Intel(R) Core(TM)2 Duo CPU T7300 @ 2.00GHz GenuineIntel
> GNU/Linux
>
> When I run a comparison of vectors of 64-bit integers. They are
> lowered in the veclower, but if I insert them in expand_vec_cond_expr,
> I receive an error. However expand_vec_cond_expr_p happily accepts it.

This is solved as well.

And here is a new version of the patch. Tested and bootstrapped.


Artem.

Comments

Joseph Myers Aug. 17, 2011, 9:52 p.m. UTC | #1
On Wed, 17 Aug 2011, Artem Shinkarov wrote:

> +For the convenience condition in the vector conditional can be just a
> +vector of signed integer type. In that case this vector is implicitly
> +compared with vectors of zeroes. Consider an example:

Where is this bit tested in the testcases added?

> +      if (TREE_CODE (type1) != VECTOR_TYPE
> +	  || TREE_CODE (type2) != VECTOR_TYPE)
> +        {
> +          error_at (colon_loc, "vector comparisom arguments must be of "
> +                               "type vector");

"comparison"

> +      /* Avoid C_MAYBE_CONST in VEC_COND_EXPR.  */
> +      sc = c_fully_fold (ifexp, false, &maybe_const);
> +      sc = save_expr (sc);
> +      if (!maybe_const)
> +	ifexp = c_wrap_maybe_const (sc, true);
> +      else
> +	ifexp = sc;

This looks like it's duplicating c_save_expr; that is, like "ifexp = 
c_save_expr (ifexp);" would suffice.

But, it's not clear that it actually achieves the effect described in the 
comment; have you actually tried with function calls, assignments etc. in 
the operands?  The code in build_binary_op uses save_expr rather than 
c_save_expr because it does some intermediate operations before calling 
c_wrap_maybe_const, and if you really want to avoid C_MAYBE_CONST in 
VEC_COND_EXPR then you'll need to continue calling save_expr, as here, but 
delay the call to c_wrap_maybe_const so that the whole VEC_COND_EXPR is 
wrapped if required.
Richard Biener Aug. 18, 2011, 9:23 a.m. UTC | #2
On Wed, Aug 17, 2011 at 8:51 PM, Artem Shinkarov
<artyom.shinkaroff@gmail.com> wrote:
> On Wed, Aug 17, 2011 at 4:28 PM, Artem Shinkarov
> <artyom.shinkaroff@gmail.com> wrote:
>> On Wed, Aug 17, 2011 at 3:58 PM, Richard Guenther
>> <richard.guenther@gmail.com> wrote:
>>> On Wed, Aug 17, 2011 at 3:30 PM, Artem Shinkarov
>>> <artyom.shinkaroff@gmail.com> wrote:
>>>> Hi
>>>>
>>>> Several comments before the new version of the patch.
>>>> 1) x != x
>>>> I am happy to adjust constant_boolean_node, but look at the code
>>>> around line 9074 in fold-const.c, you will see that x <op> x
>>>> elimination, even with adjusted constant_boolean_node, will look about
>>>> the same as my code. Because I need to check the parameters (!FLOAT_P,
>>>>  HONOR_NANS) on TREE_TYPE (arg0) not arg0, and I need to construct
>>>> constant_boolean_node (-1), not 1 in case of true.
>>>> But I will change constant_boolean_node to accept vector types.
>>>
>>> Hm, that should be handled transparently if you look at the defines
>>> of FLOAT_TYPE_P and the HONOR_* macros.
>>>
>>
>> Ok, Currently I have this, what do you think:
>>      int true_val = TREE_CODE (type) == VECTOR_TYPE ? -1 : 0;
>>      tree arg0_type = TREE_CODE (type) == VECTOR_TYPE
>>                       ? TREE_TYPE (TREE_TYPE (arg0)) : TREE_TYPE (arg0);
>>        switch (code)
>>          {
>>          case EQ_EXPR:
>>            if (! FLOAT_TYPE_P (arg0_type)
>>                || ! HONOR_NANS (TYPE_MODE (arg0_type)))
>>              return constant_boolean_node (true_val, type);
>>            break;
>>
>>          case GE_EXPR:
>>          case LE_EXPR:
>>            if (! FLOAT_TYPE_P (arg0_type)
>>                || ! HONOR_NANS (TYPE_MODE (arg0_type)))
>>              return constant_boolean_node (true_val, type);
>>            return fold_build2_loc (loc, EQ_EXPR, type, arg0, arg1);
>>
>>          case NE_EXPR:
>>            /* For NE, we can only do this simplification if integer
>>               or we don't honor IEEE floating point NaNs.  */
>>            if (FLOAT_TYPE_P (arg0_type)
>>                && HONOR_NANS (TYPE_MODE (arg0_type)))
>>              break;
>>            /* ... fall through ...  */
>>          case GT_EXPR:
>>          case LT_EXPR:
>>            return constant_boolean_node (0, type);
>>          default:
>>            gcc_unreachable ();
>>          }
>>
>> Works fine for both vector and scalar cases.
>
> Please ignore this comment.
>
>>
>>>>
>>>> 2) comparison vs vcond
>>>> v = v1 < v2;
>>>> v = v1 < v2 ? {-1,...} : {0,...};
>>>>
>>>> are not the same.
>>>> 16,25c16,22
>>>> <       movdqa  .LC1(%rip), %xmm1
>>>> <       pshufd  $225, %xmm1, %xmm1
>>>> <       pshufd  $39, %xmm0, %xmm0
>>>> <       movss   %xmm2, %xmm1
>>>> <       pshufd  $225, %xmm1, %xmm1
>>>> <       pcmpgtd %xmm1, %xmm0
>>>> <       pcmpeqd %xmm1, %xmm1
>>>> <       pcmpeqd %xmm1, %xmm0
>>>> <       pand    %xmm1, %xmm0
>>>> <       movdqa  %xmm0, -24(%rsp)
>>>> ---
>>>>>       pshufd  $39, %xmm0, %xmm1
>>>>>       movdqa  .LC1(%rip), %xmm0
>>>>>       pshufd  $225, %xmm0, %xmm0
>>>>>       movss   %xmm2, %xmm0
>>>>>       pshufd  $225, %xmm0, %xmm0
>>>>>       pcmpgtd %xmm0, %xmm1
>>>>>       movdqa  %xmm1, -24(%rsp)
>>>>
>>>> So I would keep the hook, it could be removed at any time when the
>>>> standard expansion will start to work fine.
>>>
>>> Which one is which?
>>
>> You must be joking. :)

:)

>> The first one (inefficient) is vec0 > vec1 ? {-1,...} : {0,...}
>> The second is vec0 > vec1. expand_vec_cond_expr is stupid, which is
>> fine, but it means that we need to construct it carefully.
>
> This is still important.

Yes.  I think the backends need to handle optimizing this case,
esp. considering targets that do not have instructions to produce
a {-1,...}/{0,...} bitmask from a comparison but produce a vector
of condition codes.  With using vec0 > vec1 ? {-1...} : {0,...} for
mask = vec0 > vec1; we avoid exposing the result kind of
vector comparisons.

It should be easily possible for x86 for example to recognize
the -1 : 0 case.

>>> I'd really like to make this patch simpler at first,
>>> and removing that hook is an obvious thing that _should_ be possible,
>>> even optimally (by fixing the targets).
>>
>> Ok, let's remove the hook, then could you provide some more
>> information rather than we just need to do it?
>>
>> Simple in this case means inefficient -- I would hope to make it
>> efficient as well.
>
> This is very important.

Yes, and I think the fix is in the backends.  I still think we have to
sort out the best building blocks we want the targets to expose.
Currently we only have the vectorizer vcond patterns which should
be enough to get the C language support implemented.  After that
we should concentrate on generating efficient code for all variants.

>>>> 3) mask ? vec0 : vec1
>>>> So no, I don't think we need to convert {3, 4, -1, 5} to {0,0,-1,0}
>>>> (that would surprise my anyway, I'd have expected {-1,-1,-1,-1} ;)).
>>>>
>>>> Does OpenCL somehow support you here?
>>>>
>>>> OpenCL says that vector operation mask ? vec0 : vec1 is the same as
>>>> select (vec0, vec1, mask). The semantics of select operation is the
>>>> following:
>>>>
>>>> gentype select (gentype a, gentype b, igentype c)
>>>> For each component of a vector type,
>>>> result[i] = if MSB of c[i] is set ? b[i] : a[i].
>>>>
>>>> I am not sure what they really understand using the term MSB. As far
>>>> as I know MSB is Most Significant Bit, so does it mean that in case of
>>>> 3-bit integer 100 would trigger true but 011 would be still false...
>>>
>>> Yes, MSB is Most Significant Bit - that's a somewhat odd definition ;)
>>>
>>>> My reading would be that if all bits set, then take the first element,
>>>> otherwise the second.
>>>>
>>>> It is also confusing when  a ? vec0 : vec1, and a != 0 ? vec0 vec1
>>>> produce different results. So I would stick to all bits set being true
>>>> scenario.
>>>
>>> For the middle-end part definitely.  Thus I'd simply leave the mask alone.
>>>
>>
>> Well, it seems very unnatural to me. In the case of scalars mask ?
>> val0 : val1 would not work the same way as (mask & val0) | (~mask  &
>> val1), why should we have the same behaviour for the vector stuff?
>
> And that.

Yeah, well.  That's really a question for language lawyers ;)  I agree
that it would be nice to have mask ? val0 : val1 behave "the same"
for scalars and vectors.  The question is whether for vectors you
define it on the bit-level (which makes it equal to (mask & val0) |
(~mask & val1))
or on the vector component level.  The vector component level
is probably what people would expect.

Which means we have to treat mask ? val0 : val1 as
mask != {0,...} ? val0 : val1.

>>>> 4) Backend stuff. Ok, we could always fall back to reject the cases
>>>> when cond and operands have different type, and then fix the backend.
>>>>
>>>> Adjustments are coming.
>>>>
>>>>
>>>> Artem.
>>>>
>>>
>>
>> New issue about transforming cond to cons == {-1, ..} in
>> expand_vec_cond_expr. When I do this:
>>  icode = get_vcond_icode (vec_cond_type, mode);
>>  if (icode == CODE_FOR_nothing)
>>    return 0;
>>
>>  /* If OP0 is not a comparison, adjust it by transforming to
>>     the expression OP0 == {-1, -1, ...}  */
>>  if (!COMPARISON_CLASS_P (op0))
>>    op0 = build2 (EQ_EXPR, TREE_TYPE (op0), op0,
>>                  build_vector_from_val (TREE_TYPE (op0),
>>                  build_int_cst (TREE_TYPE (TREE_TYPE (op0)), -1)));
>>
>> I run into the trouble that the constant vector which I insert, cannot
>> be expanded, and  compiler fails with assertion.

I'd use != {0,0,...} as eventually a zero vector is cheaper to construct
and it supports the scalar ?: semantics - whenever the mask element
is non-zero it's true.

>> This happens on my machine:
>> Linux temanbk 2.6.38-gentoo-r4 #3 SMP Mon Aug 8 00:32:30 BST 2011
>> x86_64 Intel(R) Core(TM)2 Duo CPU T7300 @ 2.00GHz GenuineIntel
>> GNU/Linux
>>
>> When I run a comparison of vectors of 64-bit integers. They are
>> lowered in the veclower, but if I insert them in expand_vec_cond_expr,
>> I receive an error. However expand_vec_cond_expr_p happily accepts it.
>
> This is solved as well.
>
> And here is a new version of the patch. Tested and bootstrapped.

I'll look at it later.

Thanks,
Richard.

>
> Artem.
>
Artem Shinkarov Aug. 18, 2011, 10:10 a.m. UTC | #3
> Yes.  I think the backends need to handle optimizing this case,
> esp. considering targets that do not have instructions to produce
> a {-1,...}/{0,...} bitmask from a comparison but produce a vector
> of condition codes.  With using vec0 > vec1 ? {-1...} : {0,...} for
> mask = vec0 > vec1; we avoid exposing the result kind of
> vector comparisons.
>
> It should be easily possible for x86 for example to recognize
> the -1 : 0 case.

Ok, I am fine with this approach. Ho could we check if vector
comparison returns {-1..}/{0..} or something else. If I can check
that, I could adjust expand_vec_cond_exrp, and get rid of the hook.

> Yes, and I think the fix is in the backends.  I still think we have to
> sort out the best building blocks we want the targets to expose.
> Currently we only have the vectorizer vcond patterns which should
> be enough to get the C language support implemented.  After that
> we should concentrate on generating efficient code for all variants.

Ok, see my above comment.

> Yeah, well.  That's really a question for language lawyers ;)  I agree
> that it would be nice to have mask ? val0 : val1 behave "the same"
> for scalars and vectors.  The question is whether for vectors you
> define it on the bit-level (which makes it equal to (mask & val0) |
> (~mask & val1))
> or on the vector component level.  The vector component level
> is probably what people would expect.
>
> Which means we have to treat mask ? val0 : val1 as
> mask != {0,...} ? val0 : val1.

> I'd use != {0,0,...} as eventually a zero vector is cheaper to construct
> and it supports the scalar ?: semantics - whenever the mask element
> is non-zero it's true.

Ok, I am fine with x != {0,...}, I can adjust it in both cases.


Artem.
Artem Shinkarov Aug. 18, 2011, 10:20 a.m. UTC | #4
On Wed, Aug 17, 2011 at 10:52 PM, Joseph S. Myers
<joseph@codesourcery.com> wrote:
> On Wed, 17 Aug 2011, Artem Shinkarov wrote:
>
>> +For the convenience condition in the vector conditional can be just a
>> +vector of signed integer type. In that case this vector is implicitly
>> +compared with vectors of zeroes. Consider an example:
>
> Where is this bit tested in the testcases added?

In the gcc.c-torture/execute/vector-vcond-2.c at the end of test-case:

  /* Condition expressed with a single variable.  */
  dres = l0 ? d0 : d1;
  check_compare (2, dres, l0, ((vector (2, long)){-1,-1}), d0, d1, ==,
"%f", "%i");

  lres = l1 ? l0 : l1;
  check_compare (2, lres, l1, ((vector (2, long)){-1,-1}), l0, l1, ==,
"%i", "%i");

  fres = i0 ? f0 : f1;
  check_compare (4, fres, i0, ((vector (4, int)){-1,-1,-1,-1}),
		 f0, f1, ==, "%f", "%i");

  ires = i1 ? i0 : i1;
  check_compare (4, ires, i1, ((vector (4, int)){-1,-1,-1,-1}),
		 i0, i1, ==, "%i", "%i");

>
>> +      if (TREE_CODE (type1) != VECTOR_TYPE
>> +       || TREE_CODE (type2) != VECTOR_TYPE)
>> +        {
>> +          error_at (colon_loc, "vector comparisom arguments must be of "
>> +                               "type vector");
>
> "comparison"

Thanks, adjusted.

>> +      /* Avoid C_MAYBE_CONST in VEC_COND_EXPR.  */
>> +      sc = c_fully_fold (ifexp, false, &maybe_const);
>> +      sc = save_expr (sc);
>> +      if (!maybe_const)
>> +     ifexp = c_wrap_maybe_const (sc, true);
>> +      else
>> +     ifexp = sc;
>
> This looks like it's duplicating c_save_expr; that is, like "ifexp =
> c_save_expr (ifexp);" would suffice.
>
> But, it's not clear that it actually achieves the effect described in the
> comment; have you actually tried with function calls, assignments etc. in
> the operands?

I tested it with gcc.dg/vector-compare-2.c:
typedef int vec __attribute__((vector_size(16)));

vec i,j;
extern vec a, b, c;

vec
foo (int x)
{
  return (x ? i : j) ? a : b;
}

vec
bar (int x)
{
  return a ? (x ? i : j) : b;
}

vec
baz (int x)
{
  return a ? b : (x ? i : j);
}

Is it good enough?

> The code in build_binary_op uses save_expr rather than
> c_save_expr because it does some intermediate operations before calling
> c_wrap_maybe_const, and if you really want to avoid C_MAYBE_CONST in
> VEC_COND_EXPR then you'll need to continue calling save_expr, as here, but
> delay the call to c_wrap_maybe_const so that the whole VEC_COND_EXPR is
> wrapped if required.

Ok, but I need to wrap it at some point, where do you think it would
be appropriate to do?


Thanks,
Artem.
Joseph Myers Aug. 18, 2011, 2:03 p.m. UTC | #5
On Thu, 18 Aug 2011, Artem Shinkarov wrote:

> >> +      /* Avoid C_MAYBE_CONST in VEC_COND_EXPR.  */
> >> +      sc = c_fully_fold (ifexp, false, &maybe_const);
> >> +      sc = save_expr (sc);
> >> +      if (!maybe_const)
> >> +     ifexp = c_wrap_maybe_const (sc, true);
> >> +      else
> >> +     ifexp = sc;
> >
> > This looks like it's duplicating c_save_expr; that is, like "ifexp =
> > c_save_expr (ifexp);" would suffice.
> >
> > But, it's not clear that it actually achieves the effect described in the
> > comment; have you actually tried with function calls, assignments etc. in
> > the operands?
> 
> I tested it with gcc.dg/vector-compare-2.c:
> typedef int vec __attribute__((vector_size(16)));
> 
> vec i,j;
> extern vec a, b, c;
> 
> vec
> foo (int x)
> {
>   return (x ? i : j) ? a : b;
> }
> 
> vec
> bar (int x)
> {
>   return a ? (x ? i : j) : b;
> }
> 
> vec
> baz (int x)
> {
>   return a ? b : (x ? i : j);
> }
> 
> Is it good enough?

No, because none of the operands there involve assignment, increment, 
decrement, function call or comma operators (which are the main cases that 
would trigger the creation of C_MAYBE_CONST_EXPR).

> > The code in build_binary_op uses save_expr rather than
> > c_save_expr because it does some intermediate operations before calling
> > c_wrap_maybe_const, and if you really want to avoid C_MAYBE_CONST in
> > VEC_COND_EXPR then you'll need to continue calling save_expr, as here, but
> > delay the call to c_wrap_maybe_const so that the whole VEC_COND_EXPR is
> > wrapped if required.
> 
> Ok, but I need to wrap it at some point, where do you think it would
> be appropriate to do?

After the creation of the VEC_COND_EXPR.  I.e. don't just return the 
results of build3 or convert, wrap them as needed before returning them.
Richard Henderson Aug. 18, 2011, 2:31 p.m. UTC | #6
On 08/18/2011 02:23 AM, Richard Guenther wrote:
>>> >> The first one (inefficient) is vec0 > vec1 ? {-1,...} : {0,...}
>>> >> The second is vec0 > vec1. expand_vec_cond_expr is stupid, which is
>>> >> fine, but it means that we need to construct it carefully.
>> >
>> > This is still important.
> Yes.  I think the backends need to handle optimizing this case,
> esp. considering targets that do not have instructions to produce
> a {-1,...}/{0,...} bitmask from a comparison but produce a vector
> of condition codes.  With using vec0 > vec1 ? {-1...} : {0,...} for
> mask = vec0 > vec1; we avoid exposing the result kind of
> vector comparisons.
> 
> It should be easily possible for x86 for example to recognize
> the -1 : 0 case.
> 

I think you've been glossing over the hard part with "..." up there.
I challenge you to actually fill that in with something meaningful
in rtl.

I suspect that you simply have to add another named pattern that
will Do What You Want on mips and suchlike that produce a CCmode.



r~
Paolo Bonzini Aug. 29, 2011, 11:41 a.m. UTC | #7
On 08/18/2011 11:23 AM, Richard Guenther wrote:
> Yeah, well.  That's really a question for language lawyers;)   I agree
> that it would be nice to have mask ? val0 : val1 behave "the same"
> for scalars and vectors.  The question is whether for vectors you
> define it on the bit-level (which makes it equal to (mask&  val0) |
> (~mask&  val1))
> or on the vector component level.  The vector component level
> is probably what people would expect.
>
> Which means we have to treat mask ? val0 : val1 as
> mask != {0,...} ? val0 : val1.

The definition in OpenCL makes zero sense to me.  For byte operands it 
is custom-tailored after the SSE PMOVMSKB instruction, but there is no 
PMOVMSKW/PMOVMSKD instruction so you would need very slow bit shift 
operations before PMOVMSK.  On the other hand, bit selection is for 
example in Altivec.

Do we have some way to contact anyone in the OpenCL standards group 
(CCing Chris Lattner)?

If you wanted to implement it, it would be mask < {0,...} ? val0 : val1. 
  But really, since we're not implementing OpenCL C I would really 
prefer to have bit-level selection, and let a front-end implement the quirk.

Paolo
Richard Henderson Sept. 16, 2011, 5:05 p.m. UTC | #8
On 08/29/2011 04:41 AM, Paolo Bonzini wrote:
> The definition in OpenCL makes zero sense to me.  For byte operands
> it is custom-tailored after the SSE PMOVMSKB instruction, but there
> is no PMOVMSKW/PMOVMSKD instruction so you would need very slow bit
> shift operations before PMOVMSK.  On the other hand, bit selection is
> for example in Altivec.

Not PMOVMSKB, but the sse4.1 PBLENDVB.

With that, we don't need funny shift operations for wider integer
types, but only *because* the comparison produces -1, which means
that the MSB of each byte is in fact set.

Which means that the Perfect wording probably doesn't want to be
specific to bit selection, but include bit selection (aka and-andn-or)
as a valid implementation.



r~
diff mbox

Patch

Index: gcc/doc/extend.texi
===================================================================
--- gcc/doc/extend.texi	(revision 177665)
+++ gcc/doc/extend.texi	(working copy)
@@ -6553,6 +6553,97 @@  invoke undefined behavior at runtime.  W
 accesses for vector subscription can be enabled with
 @option{-Warray-bounds}.
 
+In C vector comparison is supported within standard comparison operators:
+@code{==, !=, <, <=, >, >=}. Both integer-type and real-type vectors
+can be compared but only of the same type. The result of the
+comparison is a signed integer-type vector where the size of each
+element must be the same as the size of compared vectors element.
+Comparison is happening element by element. False value is 0, true
+value is -1 (constant of the appropriate type where all bits are set).
+Consider the following example.
+
+@smallexample
+typedef int v4si __attribute__ ((vector_size (16)));
+
+v4si a = @{1,2,3,4@};
+v4si b = @{3,2,1,4@};
+v4si c;
+
+c = a >  b;     /* The result would be @{0, 0,-1, 0@}  */
+c = a == b;     /* The result would be @{0,-1, 0,-1@}  */
+@end smallexample
+
+In addition to the vector comparison C supports conditional expressions
+where the condition is a vector of signed integers. In that case result
+of the condition is used as a mask to select either from the first 
+operand or from the second. Consider the following example:
+
+@smallexample
+typedef int v4si __attribute__ ((vector_size (16)));
+
+v4si a = @{1,2,3,4@};
+v4si b = @{3,2,1,7@};
+v4si c = @{2,3,4,5@};
+v4si d = @{6,7,8,9@};
+v4si res;
+
+res = a >= b ? c : d;  /* res would contain @{6, 3, 4, 9@}  */
+@end smallexample
+
+The number of elements in the condition must be the same as number of
+elements in the both operands. The same stands for the size of the type
+of the elements. The type of the vector conditional is determined by
+the types of the operands which must be the same. Consider an example:
+
+@smallexample
+typedef int  v4si __attribute__ ((vector_size (16)));
+typedef float v4f __attribute__ ((vector_size (16)));
+
+v4si a = @{1,2,3,4@};
+v4si b = @{2,3,4,5@};
+v4f f = @{1.,  5., 7., -8.@};
+v4f g = @{3., -2., 8.,  1.@};
+v4si ires;
+v4f fres;
+
+fres = a <= b ? f : g;  /* fres would contain @{1., 5., 7., -8.@}  */
+ires = f <= g ? a : b;  /* fres would contain @{1,  3,  3,   4@}  */
+@end smallexample
+
+For the convenience condition in the vector conditional can be just a
+vector of signed integer type. In that case this vector is implicitly
+compared with vectors of zeroes. Consider an example:
+
+@smallexample
+typedef int  v4si __attribute__ ((vector_size (16)));
+
+v4si a = @{1,0,3,0@};
+v4si b = @{2,3,4,5@};
+v4si ires;
+
+ires = a ? b : a;  /* synonym for ires = a != @{0,0,0,0@} ? a :b;  */
+@end smallexample
+
+Pleas note that the conditional where the operands are vectors and the
+condition is integer works in a standard way -- returns first operand
+if the condition is true and second otherwise. Consider an example:
+
+@smallexample
+typedef int  v4si __attribute__ ((vector_size (16)));
+
+v4si a = @{1,0,3,0@};
+v4si b = @{2,3,4,5@};
+v4si ires;
+int x,y;
+
+/* standard conditional returning A or B  */
+ires = x > y ? a : b;  
+
+/* vector conditional where the condition is (x > y ? a : b)  */
+ires = (x > y ? a : b) ? b : a; 
+@end smallexample
+
+
 You can declare variables and use them in function calls and returns, as
 well as in assignments and some casts.  You can specify a vector type as
 a return type for a function.  Vector types can also be used as function
Index: gcc/doc/tm.texi
===================================================================
--- gcc/doc/tm.texi	(revision 177665)
+++ gcc/doc/tm.texi	(working copy)
@@ -5738,6 +5738,10 @@  misalignment value (@var{misalign}).
 Return true if vector alignment is reachable (by peeling N iterations) for the given type.
 @end deftypefn
 
+@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_VEC_COMPARE (gimple_stmt_iterator *@var{gsi}, tree @var{type}, tree @var{v0}, tree @var{v1}, enum tree_code @var{code})
+This hook should check whether it is possible to express vectorcomparison using the hardware-specific instructions and return resulttree. Hook should return NULL_TREE if expansion is impossible.
+@end deftypefn
+
 @deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_VEC_PERM (tree @var{type}, tree *@var{mask_element_type})
 Target builtin that implements vector permute.
 @end deftypefn
Index: gcc/doc/tm.texi.in
===================================================================
--- gcc/doc/tm.texi.in	(revision 177665)
+++ gcc/doc/tm.texi.in	(working copy)
@@ -5676,6 +5676,8 @@  misalignment value (@var{misalign}).
 Return true if vector alignment is reachable (by peeling N iterations) for the given type.
 @end deftypefn
 
+@hook TARGET_VECTORIZE_BUILTIN_VEC_COMPARE
+
 @hook TARGET_VECTORIZE_BUILTIN_VEC_PERM
 Target builtin that implements vector permute.
 @end deftypefn
Index: gcc/targhooks.c
===================================================================
--- gcc/targhooks.c	(revision 177665)
+++ gcc/targhooks.c	(working copy)
@@ -969,6 +969,18 @@  default_builtin_vector_alignment_reachab
   return true;
 }
 
+/* Replaces vector comparison with the target-specific instructions 
+   and returns the resulting variable or NULL_TREE otherwise.  */
+tree 
+default_builtin_vec_compare (gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED, 
+                             tree type ATTRIBUTE_UNUSED, 
+                             tree v0 ATTRIBUTE_UNUSED, 
+                             tree v1 ATTRIBUTE_UNUSED, 
+                             enum tree_code code ATTRIBUTE_UNUSED)
+{
+  return NULL_TREE;
+}
+
 /* By default, assume that a target supports any factor of misalignment
    memory access if it supports movmisalign patten.
    is_packed is true if the memory access is defined in a packed struct.  */
Index: gcc/targhooks.h
===================================================================
--- gcc/targhooks.h	(revision 177665)
+++ gcc/targhooks.h	(working copy)
@@ -86,6 +86,11 @@  extern int default_builtin_vectorization
 extern tree default_builtin_reciprocal (unsigned int, bool, bool);
 
 extern bool default_builtin_vector_alignment_reachable (const_tree, bool);
+
+extern tree default_builtin_vec_compare (gimple_stmt_iterator *gsi, 
+                                         tree type, tree v0, tree v1, 
+                                         enum tree_code code);
+
 extern bool
 default_builtin_support_vector_misalignment (enum machine_mode mode,
 					     const_tree,
Index: gcc/target.def
===================================================================
--- gcc/target.def	(revision 177665)
+++ gcc/target.def	(working copy)
@@ -988,6 +988,15 @@  DEFHOOK
  bool, (tree vec_type, tree mask),
  hook_bool_tree_tree_true)
 
+/* Implement hardware vector comparison or return false.  */
+DEFHOOK
+(builtin_vec_compare,
+ "This hook should check whether it is possible to express vector\
+comparison using the hardware-specific instructions and return result\
+tree. Hook should return NULL_TREE if expansion is impossible.",
+ tree, (gimple_stmt_iterator *gsi, tree type, tree v0, tree v1, enum tree_code code),
+ default_builtin_vec_compare)
+
 /* Return true if the target supports misaligned store/load of a
    specific factor denoted in the third parameter.  The last parameter
    is true if the access is defined in a packed struct.  */
Index: gcc/optabs.c
===================================================================
--- gcc/optabs.c	(revision 177665)
+++ gcc/optabs.c	(working copy)
@@ -6572,6 +6572,13 @@  expand_vec_cond_expr (tree vec_cond_type
   if (icode == CODE_FOR_nothing)
     return 0;
 
+  /* If OP0 is not a comparison, adjust it by transforming to 
+     the expression OP0 == {-1, -1, ...}  */
+  if (!COMPARISON_CLASS_P (op0))
+    op0 = build2 (EQ_EXPR, TREE_TYPE (op0), op0,
+		  build_vector_from_val (TREE_TYPE (op0),
+		  build_int_cst (TREE_TYPE (TREE_TYPE (op0)), -1)));
+  
   comparison = vector_compare_rtx (op0, unsignedp, icode);
   rtx_op1 = expand_normal (op1);
   rtx_op2 = expand_normal (op2);
Index: gcc/target.h
===================================================================
--- gcc/target.h	(revision 177665)
+++ gcc/target.h	(working copy)
@@ -51,6 +51,7 @@ 
 #define GCC_TARGET_H
 
 #include "insn-modes.h"
+#include "gimple.h"
 
 #ifdef ENABLE_CHECKING
 
Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c	(revision 177665)
+++ gcc/fold-const.c	(working copy)
@@ -5930,12 +5930,21 @@  extract_muldiv_1 (tree t, tree c, enum t
 }
 
 /* Return a node which has the indicated constant VALUE (either 0 or
-   1), and is of the indicated TYPE.  */
+   1 for scalars and is either {-1,-1,..} or {0,0,...} for vectors), 
+   and is of the indicated TYPE.  */
 
 tree
 constant_boolean_node (int value, tree type)
 {
-  if (type == integer_type_node)
+  if (TREE_CODE (type) == VECTOR_TYPE)
+    {
+      tree tval;
+      
+      gcc_assert (TREE_CODE (TREE_TYPE (type)) == INTEGER_TYPE);
+      tval = build_int_cst (TREE_TYPE (type), value);
+      return build_vector_from_val (type, tval);
+    }
+  else if (type == integer_type_node)
     return value ? integer_one_node : integer_zero_node;
   else if (type == boolean_type_node)
     return value ? boolean_true_node : boolean_false_node;
@@ -9073,26 +9082,29 @@  fold_comparison (location_t loc, enum tr
      floating-point, we can only do some of these simplifications.)  */
   if (operand_equal_p (arg0, arg1, 0))
     {
+      int true_val = TREE_CODE (type) == VECTOR_TYPE ? -1 : 0;
+      tree arg0_type = TREE_TYPE (arg0);
+      
       switch (code)
 	{
 	case EQ_EXPR:
-	  if (! FLOAT_TYPE_P (TREE_TYPE (arg0))
-	      || ! HONOR_NANS (TYPE_MODE (TREE_TYPE (arg0))))
-	    return constant_boolean_node (1, type);
+	  if (! FLOAT_TYPE_P (arg0_type)
+	      || ! HONOR_NANS (TYPE_MODE (arg0_type)))
+	    return constant_boolean_node (true_val, type);
 	  break;
 
 	case GE_EXPR:
 	case LE_EXPR:
-	  if (! FLOAT_TYPE_P (TREE_TYPE (arg0))
-	      || ! HONOR_NANS (TYPE_MODE (TREE_TYPE (arg0))))
-	    return constant_boolean_node (1, type);
+	  if (! FLOAT_TYPE_P (arg0_type)
+	      || ! HONOR_NANS (TYPE_MODE (arg0_type)))
+	    return constant_boolean_node (true_val, type);
 	  return fold_build2_loc (loc, EQ_EXPR, type, arg0, arg1);
 
 	case NE_EXPR:
 	  /* For NE, we can only do this simplification if integer
 	     or we don't honor IEEE floating point NaNs.  */
-	  if (FLOAT_TYPE_P (TREE_TYPE (arg0))
-	      && HONOR_NANS (TYPE_MODE (TREE_TYPE (arg0))))
+	  if (FLOAT_TYPE_P (arg0_type)
+	      && HONOR_NANS (TYPE_MODE (arg0_type)))
 	    break;
 	  /* ... fall through ...  */
 	case GT_EXPR:
Index: gcc/testsuite/gcc.c-torture/execute/vector-vcond-2.c
===================================================================
--- gcc/testsuite/gcc.c-torture/execute/vector-vcond-2.c	(revision 0)
+++ gcc/testsuite/gcc.c-torture/execute/vector-vcond-2.c	(revision 0)
@@ -0,0 +1,78 @@ 
+#define vector(elcount, type)  \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define vidx(type, vec, idx) (*(((type *) &(vec)) + idx))
+
+#define check_compare(count, res, i0, i1, c0, c1, op, fmt0, fmt1) \
+do { \
+    int __i; \
+    for (__i = 0; __i < count; __i ++) { \
+        if ((res)[__i] != \
+                ((i0)[__i] op (i1)[__i]  \
+		? (c0)[__i] : (c1)[__i]))  \
+	{ \
+            __builtin_printf (fmt0 " != (" fmt1 " " #op " " fmt1 " ? " \
+			      fmt0 " : " fmt0 ")", \
+	    (res)[__i], (i0)[__i], (i1)[__i],\
+	    (c0)[__i], (c1)[__i]); \
+            __builtin_abort (); \
+        } \
+    } \
+} while (0)
+
+#define test(count, v0, v1, c0, c1, res, fmt0, fmt1); \
+do { \
+    res = (v0 > v1) ? c0: c1; \
+    check_compare (count, res, v0, v1, c0, c1, >, fmt0, fmt1); \
+    res = (v0 >= v1) ? c0: c1; \
+    check_compare (count, res, v0, v1, c0, c1, >=, fmt0, fmt1); \
+    res = (v0 < v1) ? c0: c1; \
+    check_compare (count, res, v0, v1, c0, c1, <, fmt0, fmt1); \
+    res = (v0 <= v1) ? c0: c1; \
+    check_compare (count, res, v0, v1, c0, c1, <=, fmt0, fmt1); \
+    res = (v0 == v1) ? c0: c1; \
+    check_compare (count, res, v0, v1, c0, c1, ==, fmt0, fmt1); \
+    res = (v0 != v1) ? c0: c1; \
+    check_compare (count, res, v0, v1, c0, c1, !=, fmt0, fmt1); \
+} while (0)
+
+
+int main (int argc, char *argv[]) {
+  vector (4, int) i0 = {argc, 1,  2,  10}; 
+  vector (4, int) i1 = {0, argc, 2, (int)-23};
+  vector (4, int) ires;
+  vector (4, float) f0 = {1., 7., (float)argc, 4.};
+  vector (4, float) f1 = {6., 2., 8., (float)argc};
+  vector (4, float) fres;
+
+  vector (2, double) d0 = {1., (double)argc};
+  vector (2, double) d1 = {6., 2.};
+  vector (2, double) dres;
+  vector (2, long) l0 = {argc, 3};
+  vector (2, long) l1 = {5,  8};
+  vector (2, long) lres;
+  
+  /* Thes tests work fine.  */
+  test (4, i0, i1, f0, f1, fres, "%f", "%i");
+  test (4, f0, f1, i0, i1, ires, "%i", "%f");
+  test (2, d0, d1, l0, l1, lres, "%i", "%f");
+  test (2, l0, l1, d0, d1, dres, "%f", "%i");
+
+  /* Condition expressed with a single variable.  */
+  dres = l0 ? d0 : d1;
+  check_compare (2, dres, l0, ((vector (2, long)){-1,-1}), d0, d1, ==, "%f", "%i");
+  
+  lres = l1 ? l0 : l1;
+  check_compare (2, lres, l1, ((vector (2, long)){-1,-1}), l0, l1, ==, "%i", "%i");
+ 
+  fres = i0 ? f0 : f1;
+  check_compare (4, fres, i0, ((vector (4, int)){-1,-1,-1,-1}), 
+		 f0, f1, ==, "%f", "%i");
+
+  ires = i1 ? i0 : i1;
+  check_compare (4, ires, i1, ((vector (4, int)){-1,-1,-1,-1}), 
+		 i0, i1, ==, "%i", "%i");
+
+  return 0;
+}
+
Index: gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c
===================================================================
--- gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c	(revision 0)
+++ gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c	(revision 0)
@@ -0,0 +1,123 @@ 
+#define vector(elcount, type)  \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define check_compare(count, res, i0, i1, op, fmt) \
+do { \
+    int __i; \
+    for (__i = 0; __i < count; __i ++) { \
+      if ((res)[__i] != ((i0)[__i] op (i1)[__i] ? -1 : 0)) \
+	{ \
+            __builtin_printf ("%i != ((" fmt " " #op " " fmt " ? -1 : 0) ", \
+			      (res)[__i], (i0)[__i], (i1)[__i]); \
+            __builtin_abort (); \
+        } \
+    } \
+} while (0)
+
+#define test(count, v0, v1, res, fmt); \
+do { \
+    res = (v0 > v1); \
+    check_compare (count, res, v0, v1, >, fmt); \
+    res = (v0 < v1); \
+    check_compare (count, res, v0, v1, <, fmt); \
+    res = (v0 >= v1); \
+    check_compare (count, res, v0, v1, >=, fmt); \
+    res = (v0 <= v1); \
+    check_compare (count, res, v0, v1, <=, fmt); \
+    res = (v0 == v1); \
+    check_compare (count, res, v0, v1, ==, fmt); \
+    res = (v0 != v1); \
+    check_compare (count, res, v0, v1, !=, fmt); \
+} while (0)
+
+
+int main (int argc, char *argv[]) {
+#define INT  int
+    vector (4, INT) i0;
+    vector (4, INT) i1;
+    vector (4, int) ires;
+    int i;
+
+    i0 = (vector (4, INT)){argc, 1,  2,  10};
+    i1 = (vector (4, INT)){0, 3, 2, (INT)-23};    
+    test (4, i0, i1, ires, "%i");
+#undef INT
+
+#define INT unsigned int 
+    vector (4, int) ures;
+    vector (4, INT) u0;
+    vector (4, INT) u1;
+
+    u0 = (vector (4, INT)){argc, 1,  2,  10};
+    u1 = (vector (4, INT)){0, 3, 2, (INT)-23};    
+    test (4, u0, u1, ures, "%u");
+#undef INT
+
+
+#define SHORT short
+    vector (8, SHORT) s0;
+    vector (8, SHORT) s1;
+    vector (8, short) sres;
+
+    s0 = (vector (8, SHORT)){argc, 1,  2,  10,  6, 87, (SHORT)-5, 2};
+    s1 = (vector (8, SHORT)){0, 3, 2, (SHORT)-23, 12, 10, (SHORT)-2, 0};    
+    test (8, s0, s1, sres, "%i");
+#undef SHORT
+
+#define SHORT unsigned short
+    vector (8, SHORT) us0;
+    vector (8, SHORT) us1;
+    vector (8, short) usres;
+
+    us0 = (vector (8, SHORT)){argc, 1,  2,  10,  6, 87, (SHORT)-5, 2};
+    us1 = (vector (8, SHORT)){0, 3, 2, (SHORT)-23, 12, 10, (SHORT)-2, 0};    
+    test (8, us0, us1, usres, "%u");
+#undef SHORT
+
+#define CHAR signed char
+    vector (16, CHAR) c0;
+    vector (16, CHAR) c1;
+    vector (16, signed char) cres;
+
+    c0 = (vector (16, CHAR)){argc, 1,  2,  10,  6, 87, (CHAR)-5, 2, \
+                             argc, 1,  2,  10,  6, 87, (CHAR)-5, 2 };
+
+    c1 = (vector (16, CHAR)){0, 3, 2, (CHAR)-23, 12, 10, (CHAR)-2, 0, \
+                             0, 3, 2, (CHAR)-23, 12, 10, (CHAR)-2, 0};
+    test (16, c0, c1, cres, "%i");
+#undef CHAR
+
+#define CHAR unsigned char
+    vector (16, CHAR) uc0;
+    vector (16, CHAR) uc1;
+    vector (16, signed char) ucres;
+
+    uc0 = (vector (16, CHAR)){argc, 1,  2,  10,  6, 87, (CHAR)-5, 2, \
+                             argc, 1,  2,  10,  6, 87, (CHAR)-5, 2 };
+
+    uc1 = (vector (16, CHAR)){0, 3, 2, (CHAR)-23, 12, 10, (CHAR)-2, 0, \
+                             0, 3, 2, (CHAR)-23, 12, 10, (CHAR)-2, 0};
+    test (16, uc0, uc1, ucres, "%u");
+#undef CHAR
+/* Float comparison.  */
+    vector (4, float) f0;
+    vector (4, float) f1;
+    vector (4, int) ifres;
+
+    f0 = (vector (4, float)){(float)argc, 1.,  2.,  10.};
+    f1 = (vector (4, float)){0., 3., 2., (float)-23};    
+    test (4, f0, f1, ifres, "%f");
+    
+/* Double comparison.  */
+    vector (2, double) d0;
+    vector (2, double) d1;
+    vector (2, long) idres;
+
+    d0 = (vector (2, double)){(double)argc,  10.};
+    d1 = (vector (2, double)){0., (double)-23};    
+    test (2, d0, d1, idres, "%f");
+
+
+    return 0;
+}
+
Index: gcc/testsuite/gcc.c-torture/execute/vector-vcond-1.c
===================================================================
--- gcc/testsuite/gcc.c-torture/execute/vector-vcond-1.c	(revision 0)
+++ gcc/testsuite/gcc.c-torture/execute/vector-vcond-1.c	(revision 0)
@@ -0,0 +1,154 @@ 
+#define vector(elcount, type)  \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define vidx(type, vec, idx) (*(((type *) &(vec)) + idx))
+
+#define check_compare(type, count, res, i0, i1, c0, c1, op, fmt) \
+do { \
+    int __i; \
+    for (__i = 0; __i < count; __i ++) { \
+        if (vidx (type, res, __i) != \
+                ((vidx (type, i0, __i) op vidx (type, i1, __i))  \
+		? vidx (type, c0, __i) : vidx (type, c1, __i)))  \
+	{ \
+            __builtin_printf (fmt " != ((" fmt " " #op " " fmt ") ? " fmt " : " fmt ")", \
+	    vidx (type, res, __i), vidx (type, i0, __i), vidx (type, i1, __i),\
+	    vidx (type, c0, __i), vidx (type, c1, __i)); \
+            __builtin_abort (); \
+        } \
+    } \
+} while (0)
+
+#define test(type, count, v0, v1, c0, c1, res, fmt); \
+do { \
+    res = (v0 > v1) ? c0: c1; \
+    check_compare (type, count, res, v0, v1, c0, c1, >, fmt); \
+    res = (v0 >= v1) ? c0: c1; \
+    check_compare (type, count, res, v0, v1, c0, c1, >=, fmt); \
+    res = (v0 < v1) ? c0: c1; \
+    check_compare (type, count, res, v0, v1, c0, c1, <, fmt); \
+    res = (v0 <= v1) ? c0: c1; \
+    check_compare (type, count, res, v0, v1, c0, c1, <=, fmt); \
+    res = (v0 == v1) ? c0: c1; \
+    check_compare (type, count, res, v0, v1, c0, c1, ==, fmt); \
+    res = (v0 != v1) ? c0: c1; \
+    check_compare (type, count, res, v0, v1, c0, c1, !=, fmt); \
+} while (0)
+
+int main (int argc, char *argv[]) {
+#define INT  int
+    vector (4, INT) i0; vector (4, INT) i1;
+    vector (4, INT) ic0; vector (4, INT) ic1;
+    vector (4, INT) ires;
+
+    i0 = (vector (4, INT)){argc, 1,  2,  10};
+    i1 = (vector (4, INT)){0, 3, 2, (INT)-23};    
+    
+    ic0 = (vector (4, INT)){1, argc,  argc,  10};
+    ic1 = (vector (4, INT)){2, 3, argc, (INT)-23};    
+    test (INT, 4, i0, i1, ic0, ic1, ires, "%i");
+#undef INT
+
+#define INT  unsigned int
+    vector (4, INT) ui0; vector (4, INT) ui1;
+    vector (4, INT) uic0; vector (4, INT) uic1;
+    vector (4, INT) uires;
+
+    ui0 = (vector (4, INT)){argc, 1,  2,  10};
+    ui1 = (vector (4, INT)){0, 3, 2, (INT)-23};    
+    
+    uic0 = (vector (4, INT)){1, argc,  argc,  10};
+    uic1 = (vector (4, INT)){2, 3, argc, (INT)-23};    
+    test (INT, 4, ui0, ui1, uic0, uic1, uires, "%u");
+#undef INT
+
+#define SHORT short
+    vector (8, SHORT) s0;   vector (8, SHORT) s1;
+    vector (8, SHORT) sc0;   vector (8, SHORT) sc1;
+    vector (8, short) sres;
+
+    s0 = (vector (8, SHORT)){argc, 1,  2,  10,  6, 87, (SHORT)-5, 2};
+    s1 = (vector (8, SHORT)){0, 3, 2, (SHORT)-23, 12, 10, (SHORT)-2, 0};
+    
+    sc0 = (vector (8, SHORT)){argc, 1,  argc,  10,  6, 87, (SHORT)-5, argc};
+    sc1= (vector (8, SHORT)){0, 5, 2, (SHORT)-23, 2, 10, (SHORT)-2, argc};
+
+    test (SHORT, 8, s0, s1, sc0, sc1, sres, "%i");
+#undef SHORT
+
+#define SHORT unsigned short
+    vector (8, SHORT) us0;   vector (8, SHORT) us1;
+    vector (8, SHORT) usc0;   vector (8, SHORT) usc1;
+    vector (8, SHORT) usres;
+
+    us0 = (vector (8, SHORT)){argc, 1,  2,  10,  6, 87, (SHORT)-5, 2};
+    us1 = (vector (8, SHORT)){0, 3, 2, (SHORT)-23, 12, 10, (SHORT)-2, 0};
+    
+    usc0 = (vector (8, SHORT)){argc, 1,  argc,  10,  6, 87, (SHORT)-5, argc};
+    usc1= (vector (8, SHORT)){0, 5, 2, (SHORT)-23, 2, 10, (SHORT)-2, argc};
+
+    test (SHORT, 8, us0, us1, usc0, usc1, usres, "%u");
+#undef SHORT
+
+#define CHAR signed char
+    vector (16, CHAR) c0;   vector (16, CHAR) c1;
+    vector (16, CHAR) cc0;   vector (16, CHAR) cc1;
+    vector (16, CHAR) cres;
+
+    c0 = (vector (16, CHAR)){argc, 1,  2,  4,  7, 87, (CHAR)-5, 2, \
+                             argc, 1,  3,  18,  6, 87, (CHAR)-5, 2 };
+
+    c1 = (vector (16, CHAR)){0, 3, 2, (CHAR)-23, 12, 10, (CHAR)-2, 0, \
+                             0, 3, 2, (CHAR)-5, 28, 10, (CHAR)-2, 0};
+    
+    cc0 = (vector (16, CHAR)){argc, 1,  argc,  4,  7, 87, (CHAR)-23, 2, \
+                             33, 8,  3,  18,  6, 87, (CHAR)-5, 41 };
+
+    cc1 = (vector (16, CHAR)){0, 27, 2, (CHAR)-1, 12, 10, (CHAR)-2, 0, \
+                             0, 3, 0x23, (CHAR)-5, 28, 1, (CHAR)-2, 0};
+
+    test (CHAR, 16, c0, c1, cc0, cc1, cres, "%i");
+#undef CHAR
+
+#define CHAR unsigned char
+    vector (16, CHAR) uc0;   vector (16, CHAR) uc1;
+    vector (16, CHAR) ucc0;   vector (16, CHAR) ucc1;
+    vector (16, CHAR) ucres;
+
+    uc0 = (vector (16, CHAR)){argc, 1,  2,  4,  7, 87, (CHAR)-5, 2, \
+                             argc, 1,  3,  18,  6, 87, (CHAR)-5, 2 };
+
+    uc1 = (vector (16, CHAR)){0, 3, 2, (CHAR)-23, 12, 10, (CHAR)-2, 0, \
+                             0, 3, 2, (CHAR)-5, 28, 10, (CHAR)-2, 0};
+    
+    ucc0 = (vector (16, CHAR)){argc, 1,  argc,  4,  7, 87, (CHAR)-23, 2, \
+                             33, 8,  3,  18,  6, 87, (CHAR)-5, 41 };
+
+    ucc1 = (vector (16, CHAR)){0, 27, 2, (CHAR)-1, 12, 10, (CHAR)-2, 0, \
+                             0, 3, 0x23, (CHAR)-5, 28, 1, (CHAR)-2, 0};
+
+    test (CHAR, 16, uc0, uc1, ucc0, ucc1, ucres, "%u");
+#undef CHAR
+
+/* Float version.  */
+   vector (4, float) f0 = {1., 7., (float)argc, 4.};
+   vector (4, float) f1 = {6., 2., 8., (float)argc};
+   vector (4, float) fc0 = {3., 12., 4., (float)argc};
+   vector (4, float) fc1 = {7., 5., (float)argc, 6.};
+   vector (4, float) fres;
+
+   test (float, 4, f0, f1, fc0, fc1, fres, "%f");
+
+/* Double version.  */
+   vector (2, double) d0 = {1., (double)argc};
+   vector (2, double) d1 = {6., 2.};
+   vector (2, double) dc0 = {(double)argc, 7.};
+   vector (2, double) dc1 = {7., 5.};
+   vector (2, double) dres;
+
+   //test (double, 2, d0, d1, dc0, dc1, dres, "%f");
+
+
+   return 0;
+}
+
Index: gcc/testsuite/gcc.c-torture/execute/vector-compare-2.c
===================================================================
--- gcc/testsuite/gcc.c-torture/execute/vector-compare-2.c	(revision 0)
+++ gcc/testsuite/gcc.c-torture/execute/vector-compare-2.c	(revision 0)
@@ -0,0 +1,27 @@ 
+#define vector(elcount, type)  \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+/* Check that constant folding in 
+   these simple cases works.  */
+vector (4, int)
+foo (vector (4, int) x)
+{
+  return   (x == x) + (x != x) + (x >  x) 
+	 + (x <  x) + (x >= x) + (x <= x);
+}
+
+int 
+main (int argc, char *argv[])
+{
+  vector (4, int) t = {argc, 2, argc, 42};
+  vector (4, int) r;
+  int i;
+
+  r = foo (t);
+
+  for (i = 0; i < 4; i++)
+    if (r[i] != -3)
+      __builtin_abort ();
+
+  return 0;
+}
Index: gcc/testsuite/gcc.dg/vector-compare-1.c
===================================================================
--- gcc/testsuite/gcc.dg/vector-compare-1.c	(revision 0)
+++ gcc/testsuite/gcc.dg/vector-compare-1.c	(revision 0)
@@ -0,0 +1,24 @@ 
+/* { dg-do compile } */
+#define vector(elcount, type)  \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+void
+foo (vector (4, int) x, vector (4, float) y)
+{
+  vector (4, int) p4;
+  vector (4, int) r4;
+  vector (4, unsigned int) q4;
+  vector (8, int) r8;
+  vector (4, float) f4;
+  
+  r4 = x > y;	    /* { dg-error "comparing vectors with different element types" } */
+  r8 = (x != p4);   /* { dg-error "incompatible types when assigning to type" } */
+  r8 == r4;	    /* { dg-error "comparing vectors with different number of elements" } */
+
+  r4 ? y : p4;	    /* { dg-error "vectors of different types involved in vector comparison" } */
+  r4 ? r4 : r8;	    /* { dg-error "vectors of different length found in vector comparison" } */
+  y ? f4 : y;	    /* { dg-error "non-integer type in vector condition" } */
+  
+  /* Do not trigger that  */
+  q4 ? p4 : r4;	    /* { "vector comparison must be of signed integer vector type" } */
+}
Index: gcc/testsuite/gcc.dg/vector-compare-2.c
===================================================================
--- gcc/testsuite/gcc.dg/vector-compare-2.c	(revision 0)
+++ gcc/testsuite/gcc.dg/vector-compare-2.c	(revision 0)
@@ -0,0 +1,27 @@ 
+/* { dg-do compile } */   
+
+/* Test if C_MAYBE_CONST are folded correctly when 
+   creating VEC_COND_EXPR.  */
+
+typedef int vec __attribute__((vector_size(16)));
+
+vec i,j;
+extern vec a, b, c;
+
+vec 
+foo (int x)
+{
+  return (x ? i : j) ? a : b;
+}
+
+vec 
+bar (int x)
+{
+  return a ? (x ? i : j) : b;
+}
+
+vec 
+baz (int x)
+{
+  return a ? b : (x ? i : j);
+}
Index: gcc/c-typeck.c
===================================================================
--- gcc/c-typeck.c	(revision 177665)
+++ gcc/c-typeck.c	(working copy)
@@ -4058,6 +4058,94 @@  build_conditional_expr (location_t colon
   type2 = TREE_TYPE (op2);
   code2 = TREE_CODE (type2);
 
+  if (TREE_CODE (TREE_TYPE (ifexp)) == VECTOR_TYPE)
+    {
+      bool maybe_const = true;
+      tree sc;
+      
+      if (TREE_CODE (type1) != VECTOR_TYPE
+	  || TREE_CODE (type2) != VECTOR_TYPE)
+        {
+          error_at (colon_loc, "vector comparisom arguments must be of "
+                               "type vector");
+          return error_mark_node;
+        }
+
+      if (TREE_CODE (TREE_TYPE (TREE_TYPE (ifexp))) != INTEGER_TYPE)
+        {
+          error_at (colon_loc, "non-integer type in vector condition");
+          return error_mark_node;
+        }
+      
+      if (TYPE_VECTOR_SUBPARTS (type1) != TYPE_VECTOR_SUBPARTS (type2)
+          || TYPE_VECTOR_SUBPARTS (TREE_TYPE (ifexp))
+             != TYPE_VECTOR_SUBPARTS (type1))
+        {
+          error_at (colon_loc, "vectors of different length found in "
+                               "vector comparison");
+          return error_mark_node;
+        }
+      
+      if (TREE_TYPE (type1) != TREE_TYPE (type2))
+        {
+          error_at (colon_loc, "vectors of different types involved in "
+                               "vector comparison");
+          return error_mark_node;
+        }
+
+      if (TYPE_SIZE (TREE_TYPE (TREE_TYPE (ifexp))) 
+          != TYPE_SIZE (TREE_TYPE (type1)))
+        {
+          error_at (colon_loc, "vector-condition element type must be "
+                               "the same as result vector element type");
+          return error_mark_node;
+        }
+      
+      /* Avoid C_MAYBE_CONST in VEC_COND_EXPR.  */
+      sc = c_fully_fold (ifexp, false, &maybe_const);
+      sc = save_expr (sc);
+      if (!maybe_const)
+	ifexp = c_wrap_maybe_const (sc, true);
+      else
+	ifexp = sc;
+      
+      sc = c_fully_fold (op1, false, &maybe_const);
+      sc = save_expr (sc);
+      if (!maybe_const)
+	op1 = c_wrap_maybe_const (sc, true);
+      else
+	op1 = sc;
+      
+      sc = c_fully_fold (op2, false, &maybe_const);
+      sc = save_expr (sc);
+      if (!maybe_const)
+	op2 = c_wrap_maybe_const (sc, true);
+      else
+	op2 = sc;
+
+      /* Currently the expansion of VEC_COND_EXPR does not allow
+	 expessions where the type of vectors you compare differs
+	 form the type of vectors you select from. For the time
+	 being we insert implicit conversions.  */
+      if ((COMPARISON_CLASS_P (ifexp)
+	   && TREE_TYPE (TREE_OPERAND (ifexp, 0)) != type1)
+	  || TREE_TYPE (ifexp) != type1)
+	{
+	  tree comp_type = COMPARISON_CLASS_P (ifexp)
+			   ? TREE_TYPE (TREE_OPERAND (ifexp, 0))
+			   : TREE_TYPE (ifexp);
+	  tree vcond;
+	  
+	  op1 = convert (comp_type, op1);
+	  op2 = convert (comp_type, op2);
+	  vcond = build3 (VEC_COND_EXPR, comp_type, ifexp, op1, op2);
+	  vcond = convert (type1, vcond);
+	  return vcond;
+	}
+      else
+	return build3 (VEC_COND_EXPR, type1, ifexp, op1, op2);
+    }
+
   /* C90 does not permit non-lvalue arrays in conditional expressions.
      In C99 they will be pointers by now.  */
   if (code1 == ARRAY_TYPE || code2 == ARRAY_TYPE)
@@ -9906,6 +9994,29 @@  build_binary_op (location_t location, en
 
     case EQ_EXPR:
     case NE_EXPR:
+      if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE)
+        {
+          tree intt;
+          if (TREE_TYPE (type0) != TREE_TYPE (type1))
+            {
+              error_at (location, "comparing vectors with different "
+                                  "element types");
+              return error_mark_node;
+            }
+
+          if (TYPE_VECTOR_SUBPARTS (type0) != TYPE_VECTOR_SUBPARTS (type1))
+            {
+              error_at (location, "comparing vectors with different "
+                                  "number of elements");
+              return error_mark_node;
+            }
+
+          /* Always construct signed integer vector type.  */
+          intt = c_common_type_for_size (TYPE_PRECISION (TREE_TYPE (type0)),0);
+          result_type = build_vector_type (intt, TYPE_VECTOR_SUBPARTS (type0));
+          converted = 1;
+          break;
+        }
       if (FLOAT_TYPE_P (type0) || FLOAT_TYPE_P (type1))
 	warning_at (location,
 		    OPT_Wfloat_equal,
@@ -10018,6 +10129,29 @@  build_binary_op (location_t location, en
     case GE_EXPR:
     case LT_EXPR:
     case GT_EXPR:
+      if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE)
+        {
+          tree intt;
+          if (TREE_TYPE (type0) != TREE_TYPE (type1))
+            {
+              error_at (location, "comparing vectors with different "
+                                  "element types");
+              return error_mark_node;
+            }
+
+          if (TYPE_VECTOR_SUBPARTS (type0) != TYPE_VECTOR_SUBPARTS (type1))
+            {
+              error_at (location, "comparing vectors with different "
+                                  "number of elements");
+              return error_mark_node;
+            }
+
+          /* Always construct signed integer vector type.  */
+          intt = c_common_type_for_size (TYPE_PRECISION (TREE_TYPE (type0)),0);
+          result_type = build_vector_type (intt, TYPE_VECTOR_SUBPARTS (type0));
+          converted = 1;
+          break;
+        }
       build_type = integer_type_node;
       if ((code0 == INTEGER_TYPE || code0 == REAL_TYPE
 	   || code0 == FIXED_POINT_TYPE)
@@ -10425,6 +10559,10 @@  c_objc_common_truthvalue_conversion (loc
     case FUNCTION_TYPE:
       gcc_unreachable ();
 
+    case VECTOR_TYPE:
+      error_at (location, "used vector type where scalar is required");
+      return error_mark_node;
+
     default:
       break;
     }
Index: gcc/gimplify.c
===================================================================
--- gcc/gimplify.c	(revision 177665)
+++ gcc/gimplify.c	(working copy)
@@ -7064,6 +7064,22 @@  gimplify_expr (tree *expr_p, gimple_seq
 	  }
 	  break;
 
+        case VEC_COND_EXPR:
+	  {
+	    enum gimplify_status r0, r1, r2;
+
+	    r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
+				post_p, is_gimple_condexpr, fb_rvalue);
+	    r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
+				post_p, is_gimple_val, fb_rvalue);
+	    r2 = gimplify_expr (&TREE_OPERAND (*expr_p, 2), pre_p,
+				post_p, is_gimple_val, fb_rvalue);
+	    recalculate_side_effects (*expr_p);
+
+	    ret = MIN (r0, MIN (r1, r2));
+	  }
+	  break;
+
 	case TARGET_MEM_REF:
 	  {
 	    enum gimplify_status r0 = GS_ALL_DONE, r1 = GS_ALL_DONE;
@@ -7348,6 +7364,11 @@  gimplify_expr (tree *expr_p, gimple_seq
 		{
 		  tree type = TREE_TYPE (TREE_OPERAND (*expr_p, 1));
 
+		  /* Vector comparisons is a valid gimple expression
+		     which could be lowered down later.  */
+		  if (TREE_CODE (type) == VECTOR_TYPE)
+		    goto expr_2;
+
 		  if (!AGGREGATE_TYPE_P (type))
 		    {
 		      tree org_type = TREE_TYPE (*expr_p);
Index: gcc/tree.def
===================================================================
--- gcc/tree.def	(revision 177665)
+++ gcc/tree.def	(working copy)
@@ -704,7 +704,10 @@  DEFTREECODE (TRUTH_NOT_EXPR, "truth_not_
    The others are allowed only for integer (or pointer or enumeral)
    or real types.
    In all cases the operands will have the same type,
-   and the value is always the type used by the language for booleans.  */
+   and the value is either the type used by the language for booleans
+   or an integer vector type of the same size and with the same number
+   of elements as the comparison operands.  True for a vector of
+   comparison results has all bits set while false is equal to zero.  */
 DEFTREECODE (LT_EXPR, "lt_expr", tcc_comparison, 2)
 DEFTREECODE (LE_EXPR, "le_expr", tcc_comparison, 2)
 DEFTREECODE (GT_EXPR, "gt_expr", tcc_comparison, 2)
Index: gcc/tree-vect-generic.c
===================================================================
--- gcc/tree-vect-generic.c	(revision 177665)
+++ gcc/tree-vect-generic.c	(working copy)
@@ -30,11 +30,16 @@  along with GCC; see the file COPYING3.
 #include "tree-pass.h"
 #include "flags.h"
 #include "ggc.h"
+#include "target.h"
 
 /* Need to include rtl.h, expr.h, etc. for optabs.  */
 #include "expr.h"
 #include "optabs.h"
 
+
+static void expand_vector_operations_1 (gimple_stmt_iterator *);
+
+
 /* Build a constant of type TYPE, made of VALUE's bits replicated
    every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision.  */
 static tree
@@ -125,6 +130,21 @@  do_binop (gimple_stmt_iterator *gsi, tre
   return gimplify_build2 (gsi, code, inner_type, a, b);
 }
 
+
+/* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0;  */
+static tree
+do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
+	  tree bitpos, tree bitsize, enum tree_code code)
+{
+  tree cond;
+  a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
+  b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
+  cond = gimplify_build2 (gsi, code, inner_type, a, b);
+  return gimplify_build3 (gsi, COND_EXPR, inner_type, cond, 
+                    build_int_cst (inner_type, -1),
+                    build_int_cst (inner_type, 0));
+}
+
 /* Expand vector addition to scalars.  This does bit twiddling
    in order to increase parallelism:
 
@@ -333,6 +353,24 @@  uniform_vector_p (tree vec)
   return NULL_TREE;
 }
 
+/* Try to expand vector comparison expression OP0 CODE OP1 using  
+   builtin_vec_compare hardware hook, in case target does not 
+   support comparison of type TYPE, extract comparison piecewise.  
+   GSI is used inside the target hook to create the code needed
+   for the given comparison.  */
+static tree
+expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
+                          tree op1, enum tree_code code)
+{
+ tree t = targetm.vectorize.builtin_vec_compare (gsi, type, op0, op1, code);
+
+  if (t == NULL_TREE)
+    t = expand_vector_piecewise (gsi, do_compare, type, 
+                    TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
+  return t;
+
+}
+
 static tree
 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
 			 gimple assign, enum tree_code code)
@@ -375,8 +413,24 @@  expand_vector_operation (gimple_stmt_ite
       case BIT_NOT_EXPR:
         return expand_vector_parallel (gsi, do_unop, type,
 		      		       gimple_assign_rhs1 (assign),
-				       NULL_TREE, code);
-
+        			       NULL_TREE, code);
+      case EQ_EXPR:
+      case NE_EXPR:
+      case GT_EXPR:
+      case LT_EXPR:
+      case GE_EXPR:
+      case LE_EXPR:
+      case UNEQ_EXPR:
+      case UNGT_EXPR:
+      case UNLT_EXPR:
+      case UNGE_EXPR:
+      case UNLE_EXPR:
+      case LTGT_EXPR:
+      case ORDERED_EXPR:
+      case UNORDERED_EXPR:
+        return expand_vector_comparison (gsi, type,
+                                      gimple_assign_rhs1 (assign),
+                                      gimple_assign_rhs2 (assign), code);
       default:
 	break;
       }
@@ -432,6 +486,64 @@  type_for_widest_vector_mode (enum machin
     }
 }
 
+
+
+/* Expand vector condition EXP which should have the form
+   VEC_COND_EXPR<cond, vec0, vec1> into the following
+   vector:
+     {cond[i] != 0 ? vec0[i] : vec1[i], ... }
+   i changes from 0 to TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec0)).  */
+static tree
+expand_vec_cond_expr_piecewise (gimple_stmt_iterator *gsi, tree exp)
+{
+  tree cond = TREE_OPERAND (exp, 0);
+  tree vec0 = TREE_OPERAND (exp, 1);
+  tree vec1 = TREE_OPERAND (exp, 2);
+  tree type = TREE_TYPE (vec0);
+  tree lhs, rhs, notmask;
+  tree var, new_rhs;
+  optab op = NULL;
+  gimple new_stmt;
+  gimple_stmt_iterator gsi_tmp;
+  tree t;
+
+  if (!COMPARISON_CLASS_P (cond))
+    cond = build2 (EQ_EXPR, TREE_TYPE (cond), cond,
+			    build_vector_from_val (TREE_TYPE (cond),
+			    build_int_cst (TREE_TYPE (TREE_TYPE (cond)), -1)));
+     
+  /* Expand vector condition inside of VEC_COND_EXPR.  */
+  op = optab_for_tree_code (TREE_CODE (cond), type, optab_default);
+  if (!op || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
+    {
+      var = create_tmp_reg (TREE_TYPE (cond), "cond");
+      new_rhs = expand_vector_comparison (gsi, TREE_TYPE (cond),
+					  TREE_OPERAND (cond, 0),
+					  TREE_OPERAND (cond, 1),
+					  TREE_CODE (cond));
+      new_stmt = gimple_build_assign (var, new_rhs);
+      gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+      update_stmt (gsi_stmt (*gsi));
+    }
+  else
+    var = cond;
+  
+  gsi_tmp = *gsi;
+  gsi_prev (&gsi_tmp);
+
+  /* Expand VCOND<mask, v0, v1> to ((v0 & mask) | (v1 & ~mask))  */
+  lhs = gimplify_build2 (gsi, BIT_AND_EXPR, type, var, vec0);
+  notmask = gimplify_build1 (gsi, BIT_NOT_EXPR, type, var);
+  rhs = gimplify_build2 (gsi, BIT_AND_EXPR, type, notmask, vec1);
+  t = gimplify_build2 (gsi, BIT_IOR_EXPR, type, lhs, rhs);
+
+  /* Run vecower on the expresisons we have introduced.  */
+  for (; gsi_tmp.ptr != gsi->ptr; gsi_next (&gsi_tmp))
+    expand_vector_operations_1 (&gsi_tmp);
+  
+  return t;
+}
+
 /* Process one statement.  If we identify a vector operation, expand it.  */
 
 static void
@@ -451,6 +563,23 @@  expand_vector_operations_1 (gimple_stmt_
   code = gimple_assign_rhs_code (stmt);
   rhs_class = get_gimple_rhs_class (code);
 
+  /* Check if VEC_COND_EXPR is supported in hardware within the
+     given types.  */
+  if (code == VEC_COND_EXPR)
+    {
+      tree exp = gimple_assign_rhs1 (stmt);
+      if (expand_vec_cond_expr_p (TREE_TYPE (exp), 
+                                  TYPE_MODE (TREE_TYPE (exp))))
+        {
+	  update_stmt (gsi_stmt (*gsi));
+	  return;
+        }
+        
+      new_rhs = expand_vec_cond_expr_piecewise (gsi, exp);
+      gimple_assign_set_rhs_from_tree (gsi, new_rhs);
+      update_stmt (gsi_stmt (*gsi));
+    }
+
   if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
     return;
 
Index: gcc/Makefile.in
===================================================================
--- gcc/Makefile.in	(revision 177665)
+++ gcc/Makefile.in	(working copy)
@@ -888,7 +888,7 @@  EXCEPT_H = except.h $(HASHTAB_H) vecprim
 TARGET_DEF = target.def target-hooks-macros.h
 C_TARGET_DEF = c-family/c-target.def target-hooks-macros.h
 COMMON_TARGET_DEF = common/common-target.def target-hooks-macros.h
-TARGET_H = $(TM_H) target.h $(TARGET_DEF) insn-modes.h
+TGT = $(TM_H) target.h $(TARGET_DEF) insn-modes.h
 C_TARGET_H = c-family/c-target.h $(C_TARGET_DEF)
 COMMON_TARGET_H = common/common-target.h $(INPUT_H) $(COMMON_TARGET_DEF)
 MACHMODE_H = machmode.h mode-classes.def insn-modes.h
@@ -919,8 +919,9 @@  TREE_H = tree.h all-tree.def tree.def c-
 REGSET_H = regset.h $(BITMAP_H) hard-reg-set.h
 BASIC_BLOCK_H = basic-block.h $(PREDICT_H) $(VEC_H) $(FUNCTION_H) cfghooks.h
 GIMPLE_H = gimple.h gimple.def gsstruct.def pointer-set.h $(VEC_H) \
-	vecir.h $(GGC_H) $(BASIC_BLOCK_H) $(TARGET_H) tree-ssa-operands.h \
+	vecir.h $(GGC_H) $(BASIC_BLOCK_H) $(TGT) tree-ssa-operands.h \
 	tree-ssa-alias.h $(INTERNAL_FN_H)
+TARGET_H = $(TGT) gimple.h
 GCOV_IO_H = gcov-io.h gcov-iov.h auto-host.h
 COVERAGE_H = coverage.h $(GCOV_IO_H)
 DEMANGLE_H = $(srcdir)/../include/demangle.h
@@ -3185,7 +3186,7 @@  tree-vect-generic.o : tree-vect-generic.
     $(TM_H) $(TREE_FLOW_H) $(GIMPLE_H) tree-iterator.h $(TREE_PASS_H) \
     $(FLAGS_H) $(OPTABS_H) $(MACHMODE_H) $(EXPR_H) \
     langhooks.h $(FLAGS_H) $(DIAGNOSTIC_H) gt-tree-vect-generic.h $(GGC_H) \
-    coretypes.h insn-codes.h
+    coretypes.h insn-codes.h target.h
 df-core.o : df-core.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
    insn-config.h $(RECOG_H) $(FUNCTION_H) $(REGS_H) alloc-pool.h \
    hard-reg-set.h $(BASIC_BLOCK_H) $(DF_H) $(BITMAP_H) sbitmap.h $(TIMEVAR_H) \
Index: gcc/tree-cfg.c
===================================================================
--- gcc/tree-cfg.c	(revision 177665)
+++ gcc/tree-cfg.c	(working copy)
@@ -3191,6 +3191,38 @@  verify_gimple_comparison (tree type, tre
       return true;
     }
 
+  if (TREE_CODE (type) == VECTOR_TYPE)
+    {
+      if (TREE_CODE (op0_type) != VECTOR_TYPE
+	  || TREE_CODE (op1_type) != VECTOR_TYPE)
+        {
+          error ("non-vector operands in vector comparison");
+          debug_generic_expr (op0_type);
+          debug_generic_expr (op1_type);
+          return true;
+        }
+      
+      if (!useless_type_conversion_p (op0_type, op1_type)
+	  && !useless_type_conversion_p (op1_type, op0_type))
+        {
+          error ("type mismatch in vector comparison");
+          debug_generic_expr (op0_type);
+          debug_generic_expr (op1_type);
+          return true;
+        }
+      
+      if (TYPE_VECTOR_SUBPARTS (type) != TYPE_VECTOR_SUBPARTS (op0_type)
+          && TYPE_PRECISION (TREE_TYPE (op0_type)) 
+             != TYPE_PRECISION (TREE_TYPE (type)))
+        {
+          error ("invalid vector comparison resulting type");
+          debug_generic_expr (type);
+          return true;
+        }
+        
+      return false;
+    }
+
   /* For comparisons we do not have the operations type as the
      effective type the comparison is carried out in.  Instead
      we require that either the first operand is trivially
Index: gcc/c-parser.c
===================================================================
--- gcc/c-parser.c	(revision 177665)
+++ gcc/c-parser.c	(working copy)
@@ -5339,6 +5339,15 @@  c_parser_conditional_expression (c_parse
       tree eptype = NULL_TREE;
 
       middle_loc = c_parser_peek_token (parser)->location;
+
+      if (TREE_CODE (TREE_TYPE (cond.value)) == VECTOR_TYPE)
+        {
+          error_at (middle_loc, "cannot ommit middle operator in "
+                                "vector comparison");
+          ret.value = error_mark_node;
+          return ret;
+        }
+      
       pedwarn (middle_loc, OPT_pedantic, 
 	       "ISO C forbids omitting the middle term of a ?: expression");
       warn_for_omitted_condop (middle_loc, cond.value);
@@ -5357,9 +5366,12 @@  c_parser_conditional_expression (c_parse
     }
   else
     {
-      cond.value
-	= c_objc_common_truthvalue_conversion
-	(cond_loc, default_conversion (cond.value));
+      if (TREE_CODE (TREE_TYPE (cond.value)) != VECTOR_TYPE)
+        {
+          cond.value
+            = c_objc_common_truthvalue_conversion
+            (cond_loc, default_conversion (cond.value));
+        }
       c_inhibit_evaluation_warnings += cond.value == truthvalue_false_node;
       exp1 = c_parser_expression_conv (parser);
       mark_exp_read (exp1.value);
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	(revision 177665)
+++ gcc/config/i386/i386.c	(working copy)
@@ -25,6 +25,7 @@  along with GCC; see the file COPYING3.
 #include "tm.h"
 #include "rtl.h"
 #include "tree.h"
+#include "tree-flow.h"
 #include "tm_p.h"
 #include "regs.h"
 #include "hard-reg-set.h"
@@ -32827,6 +32828,276 @@  ix86_vectorize_builtin_vec_perm (tree ve
   return ix86_builtins[(int) fcode];
 }
 
+/* Find target specific sequence for vector comparison of 
+   real-type vectors V0 and V1. Returns variable containing 
+   result of the comparison or NULL_TREE in other case.  */
+static tree
+vector_fp_compare (gimple_stmt_iterator *gsi, tree rettype, 
+                   enum machine_mode mode, tree v0, tree v1,
+                   enum tree_code code)
+{
+  enum ix86_builtins fcode;
+  int arg = -1;
+  tree fdef, frtype, tmp, var, t;
+  gimple new_stmt;
+  bool reverse = false;
+
+#define SWITCH_MODE(mode, fcode, code, value) \
+switch (mode) \
+  { \
+    case V2DFmode: \
+      if (!TARGET_SSE2) return NULL_TREE; \
+      fcode = IX86_BUILTIN_CMP ## code ## PD; \
+      break; \
+    case V4DFmode: \
+      if (!TARGET_AVX) return NULL_TREE; \
+      fcode = IX86_BUILTIN_CMPPD256; \
+      arg = value; \
+      break; \
+    case V4SFmode: \
+      if (!TARGET_SSE) return NULL_TREE; \
+      fcode = IX86_BUILTIN_CMP ## code ## PS; \
+      break; \
+    case V8SFmode: \
+      if (!TARGET_AVX) return NULL_TREE; \
+      fcode = IX86_BUILTIN_CMPPS256; \
+      arg = value; \
+      break; \
+    default: \
+      return NULL_TREE; \
+    /* FIXME: Similar instructions for MMX.  */ \
+  }
+
+  switch (code)
+    {
+      case EQ_EXPR:
+        SWITCH_MODE (mode, fcode, EQ, 0);
+        break;
+      
+      case NE_EXPR:
+        SWITCH_MODE (mode, fcode, NEQ, 4);
+        break;
+      
+      case GT_EXPR:
+        SWITCH_MODE (mode, fcode, LT, 1);
+        reverse = true;
+        break;
+      
+      case LT_EXPR:
+        SWITCH_MODE (mode, fcode, LT, 1);
+        break;
+      
+      case LE_EXPR:
+        SWITCH_MODE (mode, fcode, LE, 2);
+        break;
+
+      case GE_EXPR:
+        SWITCH_MODE (mode, fcode, LE, 2);
+        reverse = true;
+        break;
+
+      default:
+        return NULL_TREE;
+    }
+#undef SWITCH_MODE
+
+  fdef = ix86_builtins[(int)fcode];
+  frtype = TREE_TYPE (TREE_TYPE (fdef));
+ 
+  tmp = create_tmp_var (frtype, "tmp");
+  var = create_tmp_var (rettype, "tmp");
+
+  if (arg == -1)
+    if (reverse)
+      new_stmt = gimple_build_call (fdef, 2, v1, v0);
+    else
+      new_stmt = gimple_build_call (fdef, 2, v0, v1);
+  else
+    if (reverse)
+      new_stmt = gimple_build_call (fdef, 3, v0, v1, 
+                    build_int_cst (char_type_node, arg));
+    else
+      new_stmt = gimple_build_call (fdef, 3, v1, v0, 
+                    build_int_cst (char_type_node, arg));
+     
+  gimple_call_set_lhs (new_stmt, tmp); 
+  gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+  t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, rettype, tmp);
+  new_stmt = gimple_build_assign (var, t);
+  gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+  
+  return var;
+}
+
+/* Find target specific sequence for vector comparison of 
+   integer-type vectors V0 and V1. Returns variable containing 
+   result of the comparison or NULL_TREE in other case.  */
+static tree
+vector_int_compare (gimple_stmt_iterator *gsi, tree rettype, 
+                    enum machine_mode mode, tree v0, tree v1,
+                    enum tree_code code)
+{
+  enum ix86_builtins feq, fgt;
+  tree var, t, tmp, tmp1, tmp2, defeq, defgt, gtrtype, eqrtype;
+  gimple new_stmt;
+
+  switch (mode)
+    {
+      /* SSE integer-type vectors.  */
+      case V2DImode:
+        if (!TARGET_SSE4_2) return NULL_TREE;
+        feq = IX86_BUILTIN_PCMPEQQ;
+        fgt = IX86_BUILTIN_PCMPGTQ;
+        break;
+
+      case V4SImode:
+        if (!TARGET_SSE2) return NULL_TREE; 
+        feq = IX86_BUILTIN_PCMPEQD128;
+        fgt = IX86_BUILTIN_PCMPGTD128;
+        break;
+      
+      case V8HImode:
+        if (!TARGET_SSE2) return NULL_TREE;
+        feq = IX86_BUILTIN_PCMPEQW128;
+        fgt = IX86_BUILTIN_PCMPGTW128;
+        break;
+      
+      case V16QImode:
+        if (!TARGET_SSE2) return NULL_TREE;
+        feq = IX86_BUILTIN_PCMPEQB128;
+        fgt = IX86_BUILTIN_PCMPGTB128;
+        break;
+      
+      /* MMX integer-type vectors.  */
+      case V2SImode:
+        if (!TARGET_MMX) return NULL_TREE;
+        feq = IX86_BUILTIN_PCMPEQD;
+        fgt = IX86_BUILTIN_PCMPGTD;
+        break;
+
+      case V4HImode:
+        if (!TARGET_MMX) return NULL_TREE;
+        feq = IX86_BUILTIN_PCMPEQW;
+        fgt = IX86_BUILTIN_PCMPGTW;
+        break;
+
+      case V8QImode:
+        if (!TARGET_MMX) return NULL_TREE;
+        feq = IX86_BUILTIN_PCMPEQB;
+        fgt = IX86_BUILTIN_PCMPGTB;
+        break;
+      
+      /* FIXME: Similar instructions for AVX.  */
+      default:
+        return NULL_TREE;
+    }
+
+  
+  var = create_tmp_var (rettype, "ret");
+  defeq = ix86_builtins[(int)feq];
+  defgt = ix86_builtins[(int)fgt];
+  eqrtype = TREE_TYPE (TREE_TYPE (defeq));
+  gtrtype = TREE_TYPE (TREE_TYPE (defgt));
+
+#define EQGT_CALL(gsi, stmt, var, op0, op1, gteq) \
+do { \
+  var = create_tmp_var (gteq ## rtype, "tmp"); \
+  stmt = gimple_build_call (def ## gteq, 2, op0, op1); \
+  gimple_call_set_lhs (stmt, var); \
+  gsi_insert_before (gsi, stmt, GSI_SAME_STMT); \
+} while (0)
+   
+  switch (code)
+    {
+      case EQ_EXPR:
+        EQGT_CALL (gsi, new_stmt, tmp, v0, v1, eq);
+        break;
+
+      case NE_EXPR:
+        tmp = create_tmp_var (eqrtype, "tmp");
+
+        EQGT_CALL (gsi, new_stmt, tmp1, v0, v1, eq);
+        EQGT_CALL (gsi, new_stmt, tmp2, v0, v0, eq);
+
+        /* t = tmp1 ^ {-1, -1,...}  */
+        t = gimplify_build2 (gsi, BIT_XOR_EXPR, eqrtype, tmp1, tmp2);
+        new_stmt = gimple_build_assign (tmp, t);
+        gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+        break;
+
+      case GT_EXPR:
+        EQGT_CALL (gsi, new_stmt, tmp, v0, v1, gt);
+        break;
+
+      case LT_EXPR:
+        EQGT_CALL (gsi, new_stmt, tmp, v1, v0, gt);
+        break;
+
+      case GE_EXPR:
+        if (eqrtype != gtrtype)
+          return NULL_TREE;
+        tmp = create_tmp_var (eqrtype, "tmp");
+        EQGT_CALL (gsi, new_stmt, tmp1, v0, v1, gt);
+        EQGT_CALL (gsi, new_stmt, tmp2, v0, v1, eq);
+        t = gimplify_build2 (gsi, BIT_IOR_EXPR, eqrtype, tmp1, tmp2);
+        new_stmt = gimple_build_assign (tmp, t);
+        gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+        break;
+      
+      case LE_EXPR:
+         if (eqrtype != gtrtype)
+          return NULL_TREE;
+        tmp = create_tmp_var (eqrtype, "tmp");
+        EQGT_CALL (gsi, new_stmt, tmp1, v1, v0, gt);
+        EQGT_CALL (gsi, new_stmt, tmp2, v0, v1, eq);
+        t = gimplify_build2 (gsi, BIT_IOR_EXPR, eqrtype, tmp1, tmp2);
+        new_stmt = gimple_build_assign (tmp, t);
+        gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+        break;
+     
+      default:
+        return NULL_TREE;
+    }
+#undef EQGT_CALL
+
+  t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, rettype, tmp);
+  new_stmt = gimple_build_assign (var, t);
+  gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+  return var;
+}
+
+/* Lower a comparison of two vectors V0 and V1, returning a 
+   variable with the result of comparison. Returns NULL_TREE
+   when it is impossible to find a target specific sequence.  */
+static tree 
+ix86_vectorize_builtin_vec_compare (gimple_stmt_iterator *gsi, tree rettype, 
+                                    tree v0, tree v1, enum tree_code code)
+{
+  tree type;
+
+  /* Make sure we are comparing the same types.  */
+  if (TREE_TYPE (v0) != TREE_TYPE (v1)
+      || TREE_TYPE (TREE_TYPE (v0)) != TREE_TYPE (TREE_TYPE (v1)))
+    return NULL_TREE;
+  
+  type = TREE_TYPE (v0);
+  
+  /* Cannot compare packed unsigned integers 
+     unless it is EQ or NEQ operations.  */
+  if (TREE_CODE (TREE_TYPE (type)) == INTEGER_TYPE 
+      && TYPE_UNSIGNED (TREE_TYPE (type)))
+    if (code != EQ_EXPR && code != NE_EXPR)
+      return NULL_TREE;
+
+
+  if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
+    return vector_fp_compare (gsi, rettype, TYPE_MODE (type), v0, v1, code);
+  else if (TREE_CODE (TREE_TYPE (type)) == INTEGER_TYPE)
+    return vector_int_compare (gsi, rettype, TYPE_MODE (type), v0, v1, code);
+  else
+    return NULL_TREE;
+}
+
 /* Return a vector mode with twice as many elements as VMODE.  */
 /* ??? Consider moving this to a table generated by genmodes.c.  */
 
@@ -35270,6 +35541,11 @@  ix86_autovectorize_vector_sizes (void)
 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
   ix86_autovectorize_vector_sizes
 
+#undef TARGET_VECTORIZE_BUILTIN_VEC_COMPARE
+#define TARGET_VECTORIZE_BUILTIN_VEC_COMPARE \
+  ix86_vectorize_builtin_vec_compare
+
+
 #undef TARGET_SET_CURRENT_FUNCTION
 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function