Message ID | 20190216183857.GE2135@tucnak |
---|---|
State | New |
Headers | show |
Series | Fix *r<noxa>sbg_sidi_srl pattern (PR target/89369) | expand |
On 16.02.19 19:38, Jakub Jelinek wrote: > Hi! > > The following patch fixes wrong-code on the following testcase extracted > from pseudo-RNG with e.g. -march=zEC12 -O2. > The problem is in the instruction emitted by the *r<noxa>sbg_sidi_srl > patterns. We have in *.final correct: > (insn 67 65 68 (parallel [ > (set (reg:SI 1 %r1 [189]) > (xor:SI (subreg:SI (zero_extract:DI (reg/v:DI 11 %r11 [orig:89 th ] [89]) > (const_int 32 [0x20]) > (const_int 24 [0x18])) 4) > (reg:SI 1 %r1 [187]))) > (clobber (reg:CC 33 %cc)) > ]) "pr89369.c":44:73 1419 {*rxsbg_sidi_srl} > (expr_list:REG_DEAD (reg/v:DI 11 %r11 [orig:89 th ] [89]) > (expr_list:REG_UNUSED (reg:CC 33 %cc) > (nil)))) > which is effectively (reg:SI %r1) ^= (unsigned) ((reg:DI %r11) >> 8). > But the pattern emits rxsbg %r1,%r11,40,63,56 which is effectively > (reg:SI %r1) ^= ((unsigned) ((reg:DI %r11) >> 8) & 0xffffff) > or equivalently > (reg:SI %r1) ^= ((reg:SI %r11) >> 8). Even in the pattern one can see > that it wants to extract exactly 32 bits always, no matter what the shift > count is. Fixed by always emitting 32,63,(32+pos from zero extract). > On that pr89369.c testcase, the patch also changes > - rxsbg %r12,%r9,64,63,32 > - rxsbg %r12,%r1,64,63,32 > + rxsbg %r12,%r9,32,63,32 > + rxsbg %r12,%r1,32,63,32 > and > - rxsbg %r1,%r3,64,63,32 > + rxsbg %r1,%r3,32,63,32 > which are all with zero_extract with len 32 and pos 0, so again, it wants > to extract the low 32 bits. I3 64 larger than I4 63 is just weird. > The patch also changes the instructions emitted in rXsbg_mode_sXl.c: > - rosbg %r2,%r3,34,63,62 > + rosbg %r2,%r3,32,63,62 > and > - rxsbg %r2,%r3,34,63,62 > + rxsbg %r2,%r3,32,63,62 > Here, it is > __attribute__ ((noinline)) unsigned int > rosbg_si_srl (unsigned int a, unsigned int b) > { > return a | (b >> 2); > } > __attribute__ ((noinline)) unsigned int > rxsbg_si_srl (unsigned int a, unsigned int b) > { > return a ^ (b >> 2); > } > so from quick POV, one might think 34,63,62 is better, as we want to or in > just the 30 bits from b. Both should actually work the same though, because > (subreg/s/v:SI (reg/v:DI 64 [ b+-4 ]) 4) - the b argument is passed zero > extended and so it doesn't really matter how many bits we extract, as long > as it is 30 or more. If I try instead: > __attribute__ ((noinline, noipa)) unsigned int > rosbg_si_srl (unsigned int a, unsigned long long b) > { > return a | ((unsigned) b >> 2); > } > __attribute__ ((noinline, noipa)) unsigned int > rxsbg_si_srl (unsigned int a, unsigned long long b) > { > return a ^ ((unsigned) b >> 2); > } > then both the unpatched and patched compiler emit properly > rosbg %r2,%r3,34,63,62 > and > rxsbg %r2,%r3,34,63,62 > through a different pattern, because in that case we must not rely on the > upper 32 bits of b being zero. > > In addition to this change, the patch adds a cleanup, there is no reason to > use a static buffer in each instruction and increase global state, we can > just tweak the arguments and let the caller deal with it. That is something > normally done in other parts of the s390.md as well. As small CONST_INTs > are hashed, it shouldn't increase compile time memory. > > Bootstrapped/regtested on s390x-linux, ok for trunk? > > 2019-02-16 Jakub Jelinek <jakub@redhat.com> > > PR target/89369 > * config/s390/s390.md (*r<noxa>sbg_<mode>_srl_bitmask, > *r<noxa>sbg_<mode>_sll, *r<noxa>sbg_<mode>_srl): Don't construct > pattern in a temporary buffer. > (*r<noxa>sbg_sidi_srl): Likewise. Always use 32 as I3 rather > than 64-operands[2]. > > * gcc.c-torture/execute/pr89369.c: New test. > * gcc.target/s390/md/rXsbg_mode_sXl.c (rosbg_si_srl, > rxsbg_si_srl): Expect last 3 operands 32,63,62 rather than > 34,63,62. Ok. Thanks! Andreas
--- gcc/config/s390/s390.md.jj 2019-02-05 22:59:04.883503954 +0100 +++ gcc/config/s390/s390.md 2019-02-15 18:54:35.037131906 +0100 @@ -4263,10 +4263,8 @@ (define_insn "*r<noxa>sbg_<mode>_srl_bit && s390_extzv_shift_ok (<bitsize>, 64 - INTVAL (operands[3]), INTVAL (operands[2]))" { - static char buffer[256]; - sprintf (buffer, "r<noxa>sbg\t%%0,%%1,%%<bfstart>2,%%<bfend>2,%ld", - 64 - INTVAL (operands[3])); - return buffer; + operands[3] = GEN_INT (64 - INTVAL (operands[3])); + return "r<noxa>sbg\t%0,%1,%<bfstart>2,%<bfend>2,%3"; } [(set_attr "op_type" "RIE")]) @@ -4301,10 +4299,8 @@ (define_insn "*r<noxa>sbg_<mode>_sll" (clobber (reg:CC CC_REGNUM))] "TARGET_Z10" { - static char buffer[256]; - sprintf (buffer, "r<noxa>sbg\t%%0,%%1,<bitoff>,%ld,%%2", - 63 - INTVAL (operands[2])); - return buffer; + operands[3] = GEN_INT (63 - INTVAL (operands[2])); + return "r<noxa>sbg\t%0,%1,<bitoff>,%3,%2"; } [(set_attr "op_type" "RIE")]) @@ -4322,10 +4318,9 @@ (define_insn "*r<noxa>sbg_<mode>_srl" (clobber (reg:CC CC_REGNUM))] "TARGET_Z10" { - static char buffer[256]; - sprintf (buffer, "r<noxa>sbg\t%%0,%%1,%ld,63,%ld", - <bitoff_plus> INTVAL (operands[2]), 64 - INTVAL (operands[2])); - return buffer; + operands[3] = GEN_INT (64 - INTVAL (operands[2])); + operands[2] = GEN_INT (<bitoff_plus> INTVAL (operands[2])); + return "r<noxa>sbg\t%0,%1,%2,63,%3"; } [(set_attr "op_type" "RIE")]) @@ -4343,10 +4338,8 @@ (define_insn "*r<noxa>sbg_sidi_srl" (clobber (reg:CC CC_REGNUM))] "TARGET_Z10" { - static char buffer[256]; - sprintf (buffer, "r<noxa>sbg\t%%0,%%1,%ld,63,%ld", - 64 - INTVAL (operands[2]), 32 + INTVAL (operands[2])); - return buffer; + operands[2] = GEN_INT (32 + INTVAL (operands[2])); + return "r<noxa>sbg\t%0,%1,32,63,%2"; } [(set_attr "op_type" "RIE")]) --- gcc/testsuite/gcc.c-torture/execute/pr89369.c.jj 2019-02-15 18:54:05.425620085 +0100 +++ gcc/testsuite/gcc.c-torture/execute/pr89369.c 2019-02-15 18:53:40.801026052 +0100 @@ -0,0 +1,69 @@ +/* PR target/89369 */ + +#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8 && __CHAR_BIT__ == 8 +struct S { unsigned int u[4]; }; + +static void +foo (struct S *out, struct S const *in, int shift) +{ + unsigned long long th, tl, oh, ol; + th = ((unsigned long long) in->u[3] << 32) | in->u[2]; + tl = ((unsigned long long) in->u[1] << 32) | in->u[0]; + oh = th >> (shift * 8); + ol = tl >> (shift * 8); + ol |= th << (64 - shift * 8); + out->u[1] = ol >> 32; + out->u[0] = ol; + out->u[3] = oh >> 32; + out->u[2] = oh; +} + +static void +bar (struct S *out, struct S const *in, int shift) +{ + unsigned long long th, tl, oh, ol; + th = ((unsigned long long) in->u[3] << 32) | in->u[2]; + tl = ((unsigned long long) in->u[1] << 32) | in->u[0]; + oh = th << (shift * 8); + ol = tl << (shift * 8); + oh |= tl >> (64 - shift * 8); + out->u[1] = ol >> 32; + out->u[0] = ol; + out->u[3] = oh >> 32; + out->u[2] = oh; +} + +__attribute__((noipa)) static void +baz (struct S *r, struct S *a, struct S *b, struct S *c, struct S *d) +{ + struct S x, y; + bar (&x, a, 1); + foo (&y, c, 1); + r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> 11) & 0xdfffffefU) ^ y.u[0] ^ (d->u[0] << 18); + r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> 11) & 0xddfecb7fU) ^ y.u[1] ^ (d->u[1] << 18); + r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> 11) & 0xbffaffffU) ^ y.u[2] ^ (d->u[2] << 18); + r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> 11) & 0xbffffff6U) ^ y.u[3] ^ (d->u[3] << 18); +} + +int +main () +{ + struct S a[] = { { 0x000004d3, 0xbc5448db, 0xf22bde9f, 0xebb44f8f }, + { 0x03a32799, 0x60be8246, 0xa2d266ed, 0x7aa18536 }, + { 0x15a38518, 0xcf655ce1, 0xf3e09994, 0x50ef69fe }, + { 0x88274b07, 0xe7c94866, 0xc0ea9f47, 0xb6a83c43 }, + { 0xcd0d0032, 0x5d47f5d7, 0x5a0afbf6, 0xaea87b24 }, + { 0, 0, 0, 0 } }; + baz (&a[5], &a[0], &a[1], &a[2], &a[3]); + if (a[4].u[0] != a[5].u[0] || a[4].u[1] != a[5].u[1] + || a[4].u[2] != a[5].u[2] || a[4].u[3] != a[5].u[3]) + __builtin_abort (); + return 0; +} +#else +int +main () +{ + return 0; +} +#endif --- gcc/testsuite/gcc.target/s390/md/rXsbg_mode_sXl.c.jj 2018-11-16 17:33:44.126195406 +0100 +++ gcc/testsuite/gcc.target/s390/md/rXsbg_mode_sXl.c 2019-02-16 19:16:54.372512707 +0100 @@ -46,7 +46,7 @@ rosbg_si_srl (unsigned int a, unsigned i { return a | (b >> 2); } -/* { dg-final { scan-assembler-times "rosbg\t%r.,%r.,34,63,62" 1 } } */ +/* { dg-final { scan-assembler-times "rosbg\t%r.,%r.,32,63,62" 1 } } */ __attribute__ ((noinline)) unsigned int rxsbg_si_sll (unsigned int a, unsigned int b) @@ -60,7 +60,7 @@ rxsbg_si_srl (unsigned int a, unsigned i { return a ^ (b >> 2); } -/* { dg-final { scan-assembler-times "rxsbg\t%r.,%r.,34,63,62" 1 } } */ +/* { dg-final { scan-assembler-times "rxsbg\t%r.,%r.,32,63,62" 1 } } */ __attribute__ ((noinline)) unsigned long long di_sll (unsigned long long x)