Message ID | 20160620183159.GG7387@tucnak.redhat.com |
---|---|
State | New |
Headers | show |
On Mon, Jun 20, 2016 at 8:31 PM, Jakub Jelinek <jakub@redhat.com> wrote: > Hi! > > As discussed in the PR, this function is missing a lot of comparison codes > that can validly appear there, and gives wrong values for the others > except for NE. > This patch makes those values match what %D3 emits for the AVX vcmp*p{s,d}, > there is some controversy on whether UN{GT,GE,LT,LE,EQ} and/or LTGT should > raise exceptions or not, but that should be handled later on also together > with the scalar code (where we never raise exceptions), SSE, AVX and this. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/6.2? > > 2016-06-20 Jakub Jelinek <jakub@redhat.com> > > PR target/71559 > * config/i386/i386.c (ix86_fp_cmp_code_to_pcmp_immediate): Fix up > returned values and add UN*/LTGT/*ORDERED cases with values matching > D operand modifier on vcmp for AVX. > > * gcc.target/i386/sse2-pr71559.c: New test. > * gcc.target/i386/avx-pr71559.c: New test. > * gcc.target/i386/avx512f-pr71559.c: New test. OK for mainline and release branches after a week or so without problems in mainline. (I tried to review usage of all those bits, LGTM, but mistakes can happen...) Thanks, Uros. > --- gcc/config/i386/i386.c.jj 2016-06-20 10:36:29.489994876 +0200 > +++ gcc/config/i386/i386.c 2016-06-20 12:07:37.311006144 +0200 > @@ -23622,17 +23622,33 @@ ix86_fp_cmp_code_to_pcmp_immediate (enum > switch (code) > { > case EQ: > - return 0x08; > + return 0x00; > case NE: > return 0x04; > case GT: > - return 0x16; > + return 0x0e; > case LE: > - return 0x1a; > + return 0x02; > case GE: > - return 0x15; > + return 0x0d; > case LT: > - return 0x19; > + return 0x01; > + case UNLE: > + return 0x0a; > + case UNLT: > + return 0x09; > + case UNGE: > + return 0x05; > + case UNGT: > + return 0x06; > + case UNEQ: > + return 0x18; > + case LTGT: > + return 0x0c; > + case ORDERED: > + return 0x07; > + case UNORDERED: > + return 0x03; > default: > gcc_unreachable (); > } > --- gcc/testsuite/gcc.target/i386/sse2-pr71559.c.jj 2016-06-20 12:10:27.621795187 +0200 > +++ gcc/testsuite/gcc.target/i386/sse2-pr71559.c 2016-06-20 12:14:44.821457893 +0200 > @@ -0,0 +1,73 @@ > +/* PR target/71559 */ > +/* { dg-do run { target sse2 } } */ > +/* { dg-options "-O2 -ftree-vectorize -msse2" } */ > + > +#ifndef PR71559_TEST > +#include "sse2-check.h" > +#define PR71559_TEST sse2_test > +#endif > + > +#define N 16 > +float a[N] = { 5.0f, -3.0f, 1.0f, __builtin_nanf (""), 9.0f, 7.0f, -3.0f, -9.0f, > + -3.0f, -5.0f, -9.0f, __builtin_nanf (""), 0.5f, -0.5f, 0.0f, 0.0f }; > +float b[N] = { -5.0f, 3.0f, 1.0f, 7.0f, 8.0f, 8.0f, -3.0f, __builtin_nanf (""), > + -4.0f, -4.0f, -9.0f, __builtin_nanf (""), 0.0f, 0.0f, 0.0f, __builtin_nanf ("") }; > +int c[N], d[N]; > + > +#define FN(name, op) \ > +void \ > +name (void) \ > +{ \ > + int i; \ > + for (i = 0; i < N; i++) \ > + c[i] = (op || d[i] > 37) ? 5 : 32; \ > +} > +FN (eq, a[i] == b[i]) > +FN (ne, a[i] != b[i]) > +FN (gt, a[i] > b[i]) > +FN (ge, a[i] >= b[i]) > +FN (lt, a[i] < b[i]) > +FN (le, a[i] <= b[i]) > +FN (unle, !__builtin_isgreater (a[i], b[i])) > +FN (unlt, !__builtin_isgreaterequal (a[i], b[i])) > +FN (unge, !__builtin_isless (a[i], b[i])) > +FN (ungt, !__builtin_islessequal (a[i], b[i])) > +FN (uneq, !__builtin_islessgreater (a[i], b[i])) > +FN (ordered, !__builtin_isunordered (a[i], b[i])) > +FN (unordered, __builtin_isunordered (a[i], b[i])) > + > +#define TEST(name, GT, LT, EQ, UO) \ > + name (); \ > + for (i = 0; i < N; i++) \ > + { \ > + int v; \ > + switch (i % 4) \ > + { \ > + case 0: v = GT ? 5 : 32; break; \ > + case 1: v = LT ? 5 : 32; break; \ > + case 2: v = EQ ? 5 : 32; break; \ > + case 3: v = UO ? 5 : 32; break; \ > + } \ > + if (c[i] != v) \ > + __builtin_abort (); \ > + } > + > +void > +PR71559_TEST (void) > +{ > + int i; > + asm volatile ("" : : "g" (a), "g" (b), "g" (c), "g" (d) : "memory"); > + TEST (eq, 0, 0, 1, 0) > + TEST (ne, 1, 1, 0, 1) > + TEST (gt, 1, 0, 0, 0) > + TEST (ge, 1, 0, 1, 0) > + TEST (lt, 0, 1, 0, 0) > + TEST (le, 0, 1, 1, 0) > + TEST (unle, 0, 1, 1, 1) > + TEST (unlt, 0, 1, 0, 1) > + TEST (unge, 1, 0, 1, 1) > + TEST (ungt, 1, 0, 0, 1) > + TEST (uneq, 0, 0, 1, 1) > + TEST (ordered, 1, 1, 1, 0) > + TEST (unordered, 0, 0, 0, 1) > +} > --- gcc/testsuite/gcc.target/i386/avx-pr71559.c.jj 2016-06-20 12:10:44.028582301 +0200 > +++ gcc/testsuite/gcc.target/i386/avx-pr71559.c 2016-06-20 12:14:32.627616114 +0200 > @@ -0,0 +1,8 @@ > +/* PR target/71559 */ > +/* { dg-do run { target avx } } */ > +/* { dg-options "-O2 -ftree-vectorize -mavx" } */ > + > +#include "avx-check.h" > +#define PR71559_TEST avx_test > + > +#include "sse2-pr71559.c" > --- gcc/testsuite/gcc.target/i386/avx512f-pr71559.c.jj 2016-06-20 12:11:32.812949299 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-pr71559.c 2016-06-20 12:14:51.070376810 +0200 > @@ -0,0 +1,8 @@ > +/* PR target/71559 */ > +/* { dg-do run { target avx512f } } */ > +/* { dg-options "-O2 -ftree-vectorize -mavx512f" } */ > + > +#include "avx512f-check.h" > +#define PR71559_TEST avx512f_test > + > +#include "sse2-pr71559.c" > > Jakub
On Mon, Jun 20, 2016 at 09:04:26PM +0200, Uros Bizjak wrote: > OK for mainline and release branches after a week or so without > problems in mainline. Ok, thanks. > (I tried to review usage of all those bits, LGTM, but mistakes can happen...) I really hope the testcase should double check all that, ix86_fp_cmp_code_to_pcmp_immediate is run with it on all the listed codes except LTGT, and the expected results were written independently from the tables and are also checked with SSE2 and AVX implementations. When I've limited the testcase to the functions with only EQ/NE/GT/LE/GE/LT, the testcase doesn't ICE, but aborted under the AVX512F emulator. Jakub
--- gcc/config/i386/i386.c.jj 2016-06-20 10:36:29.489994876 +0200 +++ gcc/config/i386/i386.c 2016-06-20 12:07:37.311006144 +0200 @@ -23622,17 +23622,33 @@ ix86_fp_cmp_code_to_pcmp_immediate (enum switch (code) { case EQ: - return 0x08; + return 0x00; case NE: return 0x04; case GT: - return 0x16; + return 0x0e; case LE: - return 0x1a; + return 0x02; case GE: - return 0x15; + return 0x0d; case LT: - return 0x19; + return 0x01; + case UNLE: + return 0x0a; + case UNLT: + return 0x09; + case UNGE: + return 0x05; + case UNGT: + return 0x06; + case UNEQ: + return 0x18; + case LTGT: + return 0x0c; + case ORDERED: + return 0x07; + case UNORDERED: + return 0x03; default: gcc_unreachable (); } --- gcc/testsuite/gcc.target/i386/sse2-pr71559.c.jj 2016-06-20 12:10:27.621795187 +0200 +++ gcc/testsuite/gcc.target/i386/sse2-pr71559.c 2016-06-20 12:14:44.821457893 +0200 @@ -0,0 +1,73 @@ +/* PR target/71559 */ +/* { dg-do run { target sse2 } } */ +/* { dg-options "-O2 -ftree-vectorize -msse2" } */ + +#ifndef PR71559_TEST +#include "sse2-check.h" +#define PR71559_TEST sse2_test +#endif + +#define N 16 +float a[N] = { 5.0f, -3.0f, 1.0f, __builtin_nanf (""), 9.0f, 7.0f, -3.0f, -9.0f, + -3.0f, -5.0f, -9.0f, __builtin_nanf (""), 0.5f, -0.5f, 0.0f, 0.0f }; +float b[N] = { -5.0f, 3.0f, 1.0f, 7.0f, 8.0f, 8.0f, -3.0f, __builtin_nanf (""), + -4.0f, -4.0f, -9.0f, __builtin_nanf (""), 0.0f, 0.0f, 0.0f, __builtin_nanf ("") }; +int c[N], d[N]; + +#define FN(name, op) \ +void \ +name (void) \ +{ \ + int i; \ + for (i = 0; i < N; i++) \ + c[i] = (op || d[i] > 37) ? 5 : 32; \ +} +FN (eq, a[i] == b[i]) +FN (ne, a[i] != b[i]) +FN (gt, a[i] > b[i]) +FN (ge, a[i] >= b[i]) +FN (lt, a[i] < b[i]) +FN (le, a[i] <= b[i]) +FN (unle, !__builtin_isgreater (a[i], b[i])) +FN (unlt, !__builtin_isgreaterequal (a[i], b[i])) +FN (unge, !__builtin_isless (a[i], b[i])) +FN (ungt, !__builtin_islessequal (a[i], b[i])) +FN (uneq, !__builtin_islessgreater (a[i], b[i])) +FN (ordered, !__builtin_isunordered (a[i], b[i])) +FN (unordered, __builtin_isunordered (a[i], b[i])) + +#define TEST(name, GT, LT, EQ, UO) \ + name (); \ + for (i = 0; i < N; i++) \ + { \ + int v; \ + switch (i % 4) \ + { \ + case 0: v = GT ? 5 : 32; break; \ + case 1: v = LT ? 5 : 32; break; \ + case 2: v = EQ ? 5 : 32; break; \ + case 3: v = UO ? 5 : 32; break; \ + } \ + if (c[i] != v) \ + __builtin_abort (); \ + } + +void +PR71559_TEST (void) +{ + int i; + asm volatile ("" : : "g" (a), "g" (b), "g" (c), "g" (d) : "memory"); + TEST (eq, 0, 0, 1, 0) + TEST (ne, 1, 1, 0, 1) + TEST (gt, 1, 0, 0, 0) + TEST (ge, 1, 0, 1, 0) + TEST (lt, 0, 1, 0, 0) + TEST (le, 0, 1, 1, 0) + TEST (unle, 0, 1, 1, 1) + TEST (unlt, 0, 1, 0, 1) + TEST (unge, 1, 0, 1, 1) + TEST (ungt, 1, 0, 0, 1) + TEST (uneq, 0, 0, 1, 1) + TEST (ordered, 1, 1, 1, 0) + TEST (unordered, 0, 0, 0, 1) +} --- gcc/testsuite/gcc.target/i386/avx-pr71559.c.jj 2016-06-20 12:10:44.028582301 +0200 +++ gcc/testsuite/gcc.target/i386/avx-pr71559.c 2016-06-20 12:14:32.627616114 +0200 @@ -0,0 +1,8 @@ +/* PR target/71559 */ +/* { dg-do run { target avx } } */ +/* { dg-options "-O2 -ftree-vectorize -mavx" } */ + +#include "avx-check.h" +#define PR71559_TEST avx_test + +#include "sse2-pr71559.c" --- gcc/testsuite/gcc.target/i386/avx512f-pr71559.c.jj 2016-06-20 12:11:32.812949299 +0200 +++ gcc/testsuite/gcc.target/i386/avx512f-pr71559.c 2016-06-20 12:14:51.070376810 +0200 @@ -0,0 +1,8 @@ +/* PR target/71559 */ +/* { dg-do run { target avx512f } } */ +/* { dg-options "-O2 -ftree-vectorize -mavx512f" } */ + +#include "avx512f-check.h" +#define PR71559_TEST avx512f_test + +#include "sse2-pr71559.c"