Message ID | 1513107641.30918.8.camel@linux.vnet.ibm.com |
---|---|
State | New |
Headers | show |
Series | [rs6000,PR,target/82190] fix mem size info in rtl generated by memcmp and strncmp/strcmp builtin expansion | expand |
On Tue, Dec 12, 2017 at 01:40:41PM -0600, Aaron Sawdey wrote: > 2017-12-12 Aaron Sawdey <acsawdey@linux.vnet.ibm.com> > > PR target/82190 > * config/rs6000/rs6000-string.c (expand_block_move, > expand_strn_compare): fix set_mem_size() calls. That should be capitalized: Fix instead of fix > --- gcc/config/rs6000/rs6000-string.c (revision 255585) > +++ gcc/config/rs6000/rs6000-string.c (working copy) > @@ -1247,6 +1247,9 @@ > if (bytes > rs6000_block_move_inline_limit) > return 0; > > + bool isP8 = (rs6000_cpu == PROCESSOR_POWER8); > + bool isP9 = (rs6000_cpu == PROCESSOR_POWER9); > + > for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes) > { > union { > @@ -1258,7 +1261,7 @@ > > /* Altivec first, since it will be faster than a string move > when it applies, and usually not significantly larger. */ > - if (TARGET_ALTIVEC && bytes >= 16 && align >= 128) > + if (TARGET_ALTIVEC && bytes >= 16 && (isP8 || isP9 || align >= 128)) > { > move_bytes = 16; > mode = V4SImode; Is this the patch you meant to attach? First of all, it only changes expand_block_move, not expand_strn_compare, and the change seems more like an optimization of P8/P9 rather than actual fix (otherwise, wouldn't it fail on say P7?). > + return 0; > +} > + > + Please avoid unnecessary trailing whitespace. Jakub
On Tue, 2017-12-12 at 20:50 +0100, Jakub Jelinek wrote: > On Tue, Dec 12, 2017 at 01:40:41PM -0600, Aaron Sawdey wrote: > > 2017-12-12 Aaron Sawdey <acsawdey@linux.vnet.ibm.com> > > > > PR target/82190 > > * config/rs6000/rs6000-string.c (expand_block_move, > > expand_strn_compare): fix set_mem_size() calls. > > That should be capitalized: Fix instead of fix [wrong patch deleted] > Is this the patch you meant to attach? First of all, it only changes > expand_block_move, not expand_strn_compare, and the change seems more > like an optimization of P8/P9 rather than actual fix (otherwise, > wouldn't > it fail on say P7?). > > > + return 0; > > +} > > + > > + > > Please avoid unnecessary trailing whitespace. > Jakub, Yes that is a different patch unrelated to the 82190 fix. I've attached the correct patch. Thanks! Aaron
On Tue, Dec 12, 2017 at 02:01:16PM -0600, Aaron Sawdey wrote: > > > 2017-12-12 Aaron Sawdey <acsawdey@linux.vnet.ibm.com> > > > > > > PR target/82190 > > > * config/rs6000/rs6000-string.c (expand_block_move, > > > expand_strn_compare): fix set_mem_size() calls. > > > > That should be capitalized: Fix instead of fix > > [wrong patch deleted] > Yes that is a different patch unrelated to the 82190 fix. I've > attached the correct patch. That looks better :-) Okay for trunk, thanks! Segher
On Tue, 2017-12-12 at 14:12 -0600, Segher Boessenkool wrote:
> That looks better :-) Okay for trunk, thanks!
As we discussed on IRC before christmas, I've simplified this to use
TARGET_EFFICIENT_UNALIGNED_VSX instead of checking explicitly for P8/P9
processors. Has the same effect but is cleaner/clearer. Committed in
256112.
Aaron
Index: gcc/config/rs6000/rs6000-string.c
===================================================================
--- gcc/config/rs6000/rs6000-string.c (revision 256110)
+++ gcc/config/rs6000/rs6000-string.c (working copy)
@@ -73,7 +73,7 @@
When optimize_size, avoid any significant code bloat; calling
memset is about 4 instructions, so allow for one instruction to
load zero and three to do clearing. */
- if (TARGET_ALTIVEC && align >= 128)
+ if (TARGET_ALTIVEC && (align >= 128 || TARGET_EFFICIENT_UNALIGNED_VSX))
clear_step = 16;
else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
clear_step = 8;
@@ -90,7 +90,7 @@
machine_mode mode = BLKmode;
rtx dest;
- if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
+ if (bytes >= 16 && TARGET_ALTIVEC && (align >= 128 || TARGET_EFFICIENT_UNALIGNED_VSX))
{
clear_bytes = 16;
mode = V4SImode;
@@ -1260,7 +1260,7 @@
/* Altivec first, since it will be faster than a string move
when it applies, and usually not significantly larger. */
- if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
+ if (TARGET_ALTIVEC && bytes >= 16 && (TARGET_EFFICIENT_UNALIGNED_VSX || align >= 128))
{
move_bytes = 16;
mode = V4SImode;
Index: gcc/config/rs6000/rs6000-string.c =================================================================== --- gcc/config/rs6000/rs6000-string.c (revision 255585) +++ gcc/config/rs6000/rs6000-string.c (working copy) @@ -1247,6 +1247,9 @@ if (bytes > rs6000_block_move_inline_limit) return 0; + bool isP8 = (rs6000_cpu == PROCESSOR_POWER8); + bool isP9 = (rs6000_cpu == PROCESSOR_POWER9); + for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes) { union { @@ -1258,7 +1261,7 @@ /* Altivec first, since it will be faster than a string move when it applies, and usually not significantly larger. */ - if (TARGET_ALTIVEC && bytes >= 16 && align >= 128) + if (TARGET_ALTIVEC && bytes >= 16 && (isP8 || isP9 || align >= 128)) { move_bytes = 16; mode = V4SImode; Index: gcc/testsuite/gcc.dg/pr82190.c =================================================================== --- gcc/testsuite/gcc.dg/pr82190.c (nonexistent) +++ gcc/testsuite/gcc.dg/pr82190.c (working copy) @@ -0,0 +1,22 @@ +/* PR target/82190 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fno-optimize-strlen -fweb" } */ + +char src[64] __attribute__ ((aligned)) = "aaaaaaa"; +char dst[64] __attribute__ ((aligned)); + +int +main () +{ + __builtin_memcpy (dst, src, 6); + if (__builtin_memcmp (dst, src, 6)) + __builtin_abort (); + + __builtin_memcpy (dst, src, 7); + if (__builtin_memcmp (dst, src, 7)) + __builtin_abort (); + + return 0; +} + +