From patchwork Fri Jul 16 17:49:23 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [rs6000] make inline block move amount configurable Date: Fri, 16 Jul 2010 07:49:23 -0000 From: Nathan Froyd X-Patchwork-Id: 59120 Message-Id: <20100716174922.GB12333@codesourcery.com> To: gcc-patches@gcc.gnu.org Cc: dje.gcc@gmail.com rs6000's expand_block_move only expands a move of up to 32 bytes (64 for 64-bit targets) into inline loads and stores. For some processors and some applications, this hard-coded limit is not useful and inhibits performance tuning. The patch below adds a new, rs6000-specific option to control that, with appropriate defaulting for Freescale's e500mc and e5500 (aka e500mc64) processors. There are testcases and documentation included. Tested with cross to powerpc-eabispe. OK to commit? -Nathan gcc/ * config/rs6000/rs6000.opt (mblock-move-inline-limit): New option. * config/rs6000/rs6000.c (rs6000_override_options): Set rs6000_block_move_inline_limit appropriately. (expand_block_move): Use rs6000_block_move_inline_limit. * doc/invoke.texi (mblock-move-inline-limit): Document. gcc/testsuite/ * gcc.target/powerpc/block-move-1.c: New test. * gcc.target/powerpc/block-move-2.c: New test. Index: gcc.target/powerpc/block-move-1.c =================================================================== --- gcc.target/powerpc/block-move-1.c (revision 0) +++ gcc.target/powerpc/block-move-1.c (revision 0) @@ -0,0 +1,14 @@ +/* Test that we bump up low values of -mblock-move-inline-limit */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mblock-move-inline-limit=8" } */ + +typedef __SIZE_TYPE__ size_t; +extern void *memcpy (void *, const void *, size_t); + +void +cpy16 (void *x, void *y) +{ + memcpy (x, y, 16); +} + +/* { dg-final { scan-assembler-not "memcpy" } } */ Index: gcc.target/powerpc/block-move-2.c =================================================================== --- gcc.target/powerpc/block-move-2.c (revision 0) +++ gcc.target/powerpc/block-move-2.c (revision 0) @@ -0,0 +1,14 @@ +/* Test that we honor -mblock-move-inline-limit. */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mblock-move-inline-limit=128" } */ + +typedef __SIZE_TYPE__ size_t; +extern void *memcpy (void *, const void *, size_t); + +void +cpy128 (void *x, void *y) +{ + memcpy (x, y, 128); +} + +/* { dg-final { scan-assembler-not "memcpy" } } */ Index: ChangeLog =================================================================== Index: doc/invoke.texi =================================================================== --- doc/invoke.texi (revision 162262) +++ doc/invoke.texi (working copy) @@ -772,6 +772,7 @@ See RS/6000 and PowerPC Options. -mcall-sysv -mcall-netbsd @gol -maix-struct-return -msvr4-struct-return @gol -mabi=@var{abi-type} -msecure-plt -mbss-plt @gol +-mblock-move-inline-limit=@var{num} @gol -misel -mno-isel @gol -misel=yes -misel=no @gol -mspe -mno-spe @gol @@ -15626,6 +15627,13 @@ On embedded PowerPC systems, put all ini in the @samp{.data} section, and all uninitialized data in the @samp{.bss} section. +@item -mblock-move-inline-limit=@var{num} +@opindex mblock-move-inline-limit +Inline all block moves (such as calls to @code{memcpy} or structure +copies) less than or equal to @var{num} bytes. The minimum value for +@var{num} is 32 bytes on 32-bit targets and 64 bytes on 64-bit +targets. The default value is target-specific. + @item -G @var{num} @opindex G @cindex smaller data references (PowerPC) Index: config/rs6000/rs6000.opt =================================================================== --- config/rs6000/rs6000.opt (revision 162262) +++ config/rs6000/rs6000.opt (working copy) @@ -245,6 +245,10 @@ mvrsave= Target RejectNegative Joined -mvrsave=yes/no Deprecated option. Use -mvrsave/-mno-vrsave instead +mblock-move-inline-limit= +Target Report Var(rs6000_block_move_inline_limit) Init(0) RejectNegative Joined UInteger +Specify how many bytes should be moved inline before calling out to memcpy/memmove + misel Target Report Mask(ISEL) Generate isel instructions Index: config/rs6000/rs6000.c =================================================================== --- config/rs6000/rs6000.c (revision 162263) +++ config/rs6000/rs6000.c (working copy) @@ -2696,6 +2696,19 @@ rs6000_override_options (const char *def else if (TARGET_ALTIVEC) target_flags |= (MASK_PPC_GFXOPT & ~target_flags_explicit); + /* E500mc does "better" if we inline more aggressively. Respect the + user's opinion, though. */ + if (rs6000_block_move_inline_limit == 0 + && (rs6000_cpu == PROCESSOR_PPCE500MC + || rs6000_cpu == PROCESSOR_PPCE500MC64)) + rs6000_block_move_inline_limit = 128; + + /* store_one_arg depends on expand_block_move to handle at least the + size of reg_parm_stack_space. */ + if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32)) + rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32); + + /* Set debug flags */ if (rs6000_debug_name) { @@ -13208,9 +13221,7 @@ expand_block_move (rtx operands[]) if (bytes <= 0) return 1; - /* store_one_arg depends on expand_block_move to handle at least the size of - reg_parm_stack_space. */ - if (bytes > (TARGET_POWERPC64 ? 64 : 32)) + if (bytes > rs6000_block_move_inline_limit) return 0; for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)