Message ID | Zf6JT9gWK/kc2Sqz@tucnak |
---|---|
State | New |
Headers | show |
Series | predcom: Punt for steps which aren't multiples of access size [PR111683] | expand |
> Am 23.03.2024 um 08:49 schrieb Jakub Jelinek <jakub@redhat.com>: > > Hi! > > On the following testcases, there is no overlap between data references > within a single iteration, but the data references have size which is twice > as large as the step, which means the data references overlap with the next > iteration which predcom doesn't take into account. > As discussed in the PR, even if the reference size is smaller than step, > if step isn't a multiple of the reference size, there could be overlaps with > some other iteration later on. > The initial version of the patch regressed (test still passed, but predcom > didn't optimize anymore) pr71083.c which has a packed char, short structure > and was reading/writing the short 2 bytes in there with step 3. > The following patch deals with that by retrying for COMPONENT_REFs also the > aggregate sizes etc., so that it then compares 3 bytes against step 3. > In make check-gcc/check-g++ this patch I believe affects code generation > for only the 2 new testcases according to statistics I've gathered. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Ok Thanks, Richard > 2024-03-23 Jakub Jelinek <jakub@redhat.com> > > PR middle-end/111683 > * tree-predcom.cc (pcom_worker::suitable_component_p): If has_write > and comp_step is RS_NONZERO, return false if any reference in the > component doesn't have DR_STEP a multiple of access size. > > * gcc.dg/pr111683-1.c: New test. > * gcc.dg/pr111683-2.c: New test. > > --- gcc/tree-predcom.cc.jj 2024-03-22 09:19:27.700756950 +0100 > +++ gcc/tree-predcom.cc 2024-03-22 14:01:21.758978338 +0100 > @@ -1102,8 +1102,39 @@ pcom_worker::suitable_component_p (struc > gcc_assert (ok); > first->offset = 0; > > - for (i = 1; comp->refs.iterate (i, &a); i++) > + FOR_EACH_VEC_ELT (comp->refs, i, a) > { > + if (has_write && comp->comp_step == RS_NONZERO) > + { > + /* Punt for non-invariant references where step isn't a multiple > + of reference size. If step is smaller than reference size, > + it overlaps the access in next iteration, if step is larger, > + but not multiple of the access size, there could be overlap > + in some later iteration. There might be more latent issues > + about this in predcom or data reference analysis. If the > + reference is a COMPONENT_REF, also check if step isn't a > + multiple of the containg aggregate size. See PR111683. */ > + tree ref = DR_REF (a->ref); > + tree step = DR_STEP (a->ref); > + if (TREE_CODE (ref) == COMPONENT_REF > + && DECL_BIT_FIELD (TREE_OPERAND (ref, 1))) > + ref = TREE_OPERAND (ref, 0); > + do > + { > + tree sz = TYPE_SIZE_UNIT (TREE_TYPE (ref)); > + if (TREE_CODE (sz) != INTEGER_CST) > + return false; > + if (wi::multiple_of_p (wi::to_offset (step), > + wi::to_offset (sz), SIGNED)) > + break; > + if (TREE_CODE (ref) != COMPONENT_REF) > + return false; > + ref = TREE_OPERAND (ref, 0); > + } > + while (1); > + } > + if (i == 0) > + continue; > /* Polynomial offsets are no use, since we need to know the > gap between iteration numbers at compile time. */ > poly_widest_int offset; > --- gcc/testsuite/gcc.dg/pr111683-1.c.jj 2024-03-22 11:14:29.292908760 +0100 > +++ gcc/testsuite/gcc.dg/pr111683-1.c 2024-03-22 11:14:29.292908760 +0100 > @@ -0,0 +1,22 @@ > +/* PR middle-end/111683 */ > +/* { dg-do run } */ > +/* { dg-options "-O2" } */ > + > +long long b[6] = { 3, 4, 5, 6, 7, 8 }, c[16]; > +long long d[9] = { 3, 7, 12, 18, 22, 26, 21, 15, 8 }; > +typedef long long U __attribute__ ((vector_size(16), may_alias, aligned(1))); > +typedef long long V __attribute__ ((vector_size(16), may_alias)); > + > +int > +main () > +{ > + for (int f = 0; f < 6; f++) > + { > + *(U *) &c[f] = *(U *) &c[f] + (V) { b[f], b[f] }; > + *(U *) &c[f + 2] = *(U *) &c[f + 2] + (V) { b[f], b[f] }; > + } > + for (int f = 0; f < 9; f++) > + if (c[f] != d[f]) > + __builtin_abort (); > + return 0; > +} > --- gcc/testsuite/gcc.dg/pr111683-2.c.jj 2024-03-22 11:14:29.292908760 +0100 > +++ gcc/testsuite/gcc.dg/pr111683-2.c 2024-03-22 11:14:29.292908760 +0100 > @@ -0,0 +1,27 @@ > +/* PR middle-end/111683 */ > +/* { dg-do run } */ > +/* { dg-options "-O2" } */ > + > +int b[6] = { 3, 4, 5, 6, 7, 8 }, c[12]; > +int d[16] = { 0, 1, 3, 6, 10, 14, 12, 9, 5, 0, 0, 0 }; > + > +int > +main () > +{ > + int i; > + if (sizeof (int) * 2 != sizeof (long long)) > + return 0; > + for (i = 0; i < 6; i++) > + { > + long long a; > + __builtin_memcpy (&a, &c[i], sizeof (a)); > + a += (((long long) i) << (sizeof (int) * __CHAR_BIT__)) + i; > + __builtin_memcpy (&c[i], &a, sizeof (a)); > + __builtin_memcpy (&a, &c[i + 2], sizeof (a)); > + a += (((long long) i) << (sizeof (int) * __CHAR_BIT__)) + i; > + __builtin_memcpy (&c[i + 2], &a, sizeof (a)); > + } > + if (__builtin_memcmp (&c[0], &d[0], sizeof (c))) > + __builtin_abort (); > + return 0; > +} > > Jakub >
--- gcc/tree-predcom.cc.jj 2024-03-22 09:19:27.700756950 +0100 +++ gcc/tree-predcom.cc 2024-03-22 14:01:21.758978338 +0100 @@ -1102,8 +1102,39 @@ pcom_worker::suitable_component_p (struc gcc_assert (ok); first->offset = 0; - for (i = 1; comp->refs.iterate (i, &a); i++) + FOR_EACH_VEC_ELT (comp->refs, i, a) { + if (has_write && comp->comp_step == RS_NONZERO) + { + /* Punt for non-invariant references where step isn't a multiple + of reference size. If step is smaller than reference size, + it overlaps the access in next iteration, if step is larger, + but not multiple of the access size, there could be overlap + in some later iteration. There might be more latent issues + about this in predcom or data reference analysis. If the + reference is a COMPONENT_REF, also check if step isn't a + multiple of the containg aggregate size. See PR111683. */ + tree ref = DR_REF (a->ref); + tree step = DR_STEP (a->ref); + if (TREE_CODE (ref) == COMPONENT_REF + && DECL_BIT_FIELD (TREE_OPERAND (ref, 1))) + ref = TREE_OPERAND (ref, 0); + do + { + tree sz = TYPE_SIZE_UNIT (TREE_TYPE (ref)); + if (TREE_CODE (sz) != INTEGER_CST) + return false; + if (wi::multiple_of_p (wi::to_offset (step), + wi::to_offset (sz), SIGNED)) + break; + if (TREE_CODE (ref) != COMPONENT_REF) + return false; + ref = TREE_OPERAND (ref, 0); + } + while (1); + } + if (i == 0) + continue; /* Polynomial offsets are no use, since we need to know the gap between iteration numbers at compile time. */ poly_widest_int offset; --- gcc/testsuite/gcc.dg/pr111683-1.c.jj 2024-03-22 11:14:29.292908760 +0100 +++ gcc/testsuite/gcc.dg/pr111683-1.c 2024-03-22 11:14:29.292908760 +0100 @@ -0,0 +1,22 @@ +/* PR middle-end/111683 */ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +long long b[6] = { 3, 4, 5, 6, 7, 8 }, c[16]; +long long d[9] = { 3, 7, 12, 18, 22, 26, 21, 15, 8 }; +typedef long long U __attribute__ ((vector_size(16), may_alias, aligned(1))); +typedef long long V __attribute__ ((vector_size(16), may_alias)); + +int +main () +{ + for (int f = 0; f < 6; f++) + { + *(U *) &c[f] = *(U *) &c[f] + (V) { b[f], b[f] }; + *(U *) &c[f + 2] = *(U *) &c[f + 2] + (V) { b[f], b[f] }; + } + for (int f = 0; f < 9; f++) + if (c[f] != d[f]) + __builtin_abort (); + return 0; +} --- gcc/testsuite/gcc.dg/pr111683-2.c.jj 2024-03-22 11:14:29.292908760 +0100 +++ gcc/testsuite/gcc.dg/pr111683-2.c 2024-03-22 11:14:29.292908760 +0100 @@ -0,0 +1,27 @@ +/* PR middle-end/111683 */ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +int b[6] = { 3, 4, 5, 6, 7, 8 }, c[12]; +int d[16] = { 0, 1, 3, 6, 10, 14, 12, 9, 5, 0, 0, 0 }; + +int +main () +{ + int i; + if (sizeof (int) * 2 != sizeof (long long)) + return 0; + for (i = 0; i < 6; i++) + { + long long a; + __builtin_memcpy (&a, &c[i], sizeof (a)); + a += (((long long) i) << (sizeof (int) * __CHAR_BIT__)) + i; + __builtin_memcpy (&c[i], &a, sizeof (a)); + __builtin_memcpy (&a, &c[i + 2], sizeof (a)); + a += (((long long) i) << (sizeof (int) * __CHAR_BIT__)) + i; + __builtin_memcpy (&c[i + 2], &a, sizeof (a)); + } + if (__builtin_memcmp (&c[0], &d[0], sizeof (c))) + __builtin_abort (); + return 0; +}