===================================================================
@@ -67,6 +67,9 @@
#define NULL_BLOCK ((basic_block) NULL)
+/* TRUE if after combine pass. */
+static bool ifcvt_after_combine;
+
/* # of IF-THEN or IF-THEN-ELSE blocks we looked at */
static int num_possible_if_blocks;
@@ -144,8 +147,14 @@ cheap_bb_rtx_cost_p (const_basic_block bb, int sca
/* Our branch probability/scaling factors are just estimates and don't
account for cases where we can get speculation for free and other
secondary benefits. So we fudge the scale factor to make speculating
- appear a little more profitable. */
+ appear a little more profitable when optimizing for performance. */
scale += REG_BR_PROB_BASE / 8;
+
+ /* Set the scale to REG_BR_PROB_BASE to be more agressive when
+ optimizing for size and after combine pass. */
+ if (!optimize_function_for_speed_p (cfun) && ifcvt_after_combine)
+ scale = REG_BR_PROB_BASE;
+
max_cost *= scale;
while (1)
@@ -4445,6 +4454,7 @@ gate_handle_if_conversion (void)
static unsigned int
rest_of_handle_if_conversion (void)
{
+ ifcvt_after_combine = false;
if (flag_if_conversion)
{
if (dump_file)
@@ -4494,6 +4504,7 @@ gate_handle_if_after_combine (void)
static unsigned int
rest_of_handle_if_after_combine (void)
{
+ ifcvt_after_combine = true;
if_convert ();
return 0;
}
Wrong list. -----Original Message----- From: Bin Cheng [mailto:bin.cheng@arm.com] Sent: Monday, March 25, 2013 3:01 PM To: gcc@gcc.gnu.org Subject: [PATCH GCC]Relax the probability condition in CE pass when optimizing for code size Hi, The CE pass has been adapted to work with the probability of then/else branches. Now the transformation is done only when it's profitable. Problem is the change affects both performance and size, causing size regression in many cases (especially in C library like Newlib). So this patch relaxes the probability condition when we are optimizing for size. Below is an example from Newlib: unsigned int strlen (const char *); void * realloc (void * __r, unsigned int __size) ; void * memcpy (void *, const void *, unsigned int); int argz_add(char **argz , unsigned int *argz_len , const char *str) { int len_to_add = 0; unsigned int last = *argz_len; if (str == ((void *)0)) return 0; len_to_add = strlen(str) + 1; *argz_len += len_to_add; if(!(*argz = (char *)realloc(*argz, *argz_len))) return 12; memcpy(*argz + last, str, len_to_add); return 0; } The generated assembly for Os/cortex-m0 is like: argz_add: push {r0, r1, r2, r4, r5, r6, r7, lr} mov r6, r0 mov r7, r1 mov r4, r2 ldr r5, [r1] beq .L3 mov r0, r2 bl strlen add r0, r0, #1 add r1, r0, r5 str r0, [sp, #4] str r1, [r7] ldr r0, [r6] bl realloc mov r3, #12 str r0, [r6] cmp r0, #0 beq .L2 add r0, r0, r5 mov r1, r4 ldr r2, [sp, #4] bl memcpy mov r3, #0 b .L2 .L3: mov r3, r2 .L2: mov r0, r3 In which branch/mov instructions around .L3 can be CEed with this patch. During the work I observed passes before combine might interfere with CE pass, so this patch is enabled for ce2/ce3 after combination pass. It is tested on x86/thumb2 for both normal and Os. Is it ok for trunk? 2013-03-25 Bin Cheng <bin.cheng@arm.com> * ifcvt.c (ifcvt_after_combine): New static variable. (cheap_bb_rtx_cost_p): Set scale to REG_BR_PROB_BASE when optimizing for size. (rest_of_handle_if_conversion, rest_of_handle_if_after_combine): Clear/set the variable ifcvt_after_combine.