Patchwork Relax the probability condition in CE pass when optimizing for code size

login
register
mail settings
Submitter Bin Cheng
Date March 25, 2013, 7:15 a.m.
Message ID <000d01ce2928$8ca50a20$a5ef1e60$@cheng@arm.com>
Download mbox | patch
Permalink /patch/230576/
State New
Headers show

Comments

Bin Cheng - March 25, 2013, 7:15 a.m.
Wrong list.

-----Original Message-----
From: Bin Cheng [mailto:bin.cheng@arm.com] 
Sent: Monday, March 25, 2013 3:01 PM
To: gcc@gcc.gnu.org
Subject: [PATCH GCC]Relax the probability condition in CE pass when
optimizing for code size

Hi,
The CE pass has been adapted to work with the probability of then/else
branches. Now the transformation is done only when it's profitable.
Problem is the change affects both performance and size, causing size
regression in many cases (especially in C library like Newlib). 
So this patch relaxes the probability condition when we are optimizing for
size.

Below is an example from Newlib:

unsigned int strlen (const char *);
void * realloc (void * __r, unsigned int __size) ; void * memcpy (void *,
const void *, unsigned int); int argz_add(char **argz , unsigned int
*argz_len , const char *str) {
  int len_to_add = 0;
  unsigned int last = *argz_len;

  if (str == ((void *)0))
    return 0;

  len_to_add = strlen(str) + 1;
  *argz_len += len_to_add;

  if(!(*argz = (char *)realloc(*argz, *argz_len)))
    return 12;

  memcpy(*argz + last, str, len_to_add);
  return 0;
}

The generated assembly for Os/cortex-m0 is like:

argz_add:
	push	{r0, r1, r2, r4, r5, r6, r7, lr}
	mov	r6, r0
	mov	r7, r1
	mov	r4, r2
	ldr	r5, [r1]
	beq	.L3
	mov	r0, r2
	bl	strlen
	add	r0, r0, #1
	add	r1, r0, r5
	str	r0, [sp, #4]
	str	r1, [r7]
	ldr	r0, [r6]
	bl	realloc
	mov	r3, #12
	str	r0, [r6]
	cmp	r0, #0
	beq	.L2
	add	r0, r0, r5
	mov	r1, r4
	ldr	r2, [sp, #4]
	bl	memcpy
	mov	r3, #0
	b	.L2
.L3:
	mov	r3, r2
.L2:
	mov	r0, r3

In which branch/mov instructions around .L3 can be CEed with this patch.

During the work I observed passes before combine might interfere with CE
pass, so this patch is enabled for ce2/ce3 after combination pass.

It is tested on x86/thumb2 for both normal and Os. Is it ok for trunk?


2013-03-25  Bin Cheng  <bin.cheng@arm.com>

	* ifcvt.c (ifcvt_after_combine): New static variable.
	(cheap_bb_rtx_cost_p): Set scale to REG_BR_PROB_BASE when optimizing
	for size.
	(rest_of_handle_if_conversion, rest_of_handle_if_after_combine):
	Clear/set the variable ifcvt_after_combine.

Patch

Index: gcc/ifcvt.c
===================================================================
--- gcc/ifcvt.c	(revision 197029)
+++ gcc/ifcvt.c	(working copy)
@@ -67,6 +67,9 @@ 
 
 #define NULL_BLOCK	((basic_block) NULL)
 
+/* TRUE if after combine pass.  */
+static bool ifcvt_after_combine;
+
 /* # of IF-THEN or IF-THEN-ELSE blocks we looked at  */
 static int num_possible_if_blocks;
 
@@ -144,8 +147,14 @@  cheap_bb_rtx_cost_p (const_basic_block bb, int sca
   /* Our branch probability/scaling factors are just estimates and don't
      account for cases where we can get speculation for free and other
      secondary benefits.  So we fudge the scale factor to make speculating
-     appear a little more profitable.  */
+     appear a little more profitable when optimizing for performance.  */
   scale += REG_BR_PROB_BASE / 8;
+
+  /* Set the scale to REG_BR_PROB_BASE to be more agressive when
+     optimizing for size and after combine pass.  */
+  if (!optimize_function_for_speed_p (cfun) && ifcvt_after_combine)
+    scale = REG_BR_PROB_BASE;
+
   max_cost *= scale;
 
   while (1)
@@ -4445,6 +4454,7 @@  gate_handle_if_conversion (void)
 static unsigned int
 rest_of_handle_if_conversion (void)
 {
+  ifcvt_after_combine = false;
   if (flag_if_conversion)
     {
       if (dump_file)
@@ -4494,6 +4504,7 @@  gate_handle_if_after_combine (void)
 static unsigned int
 rest_of_handle_if_after_combine (void)
 {
+  ifcvt_after_combine = true;
   if_convert ();
   return 0;
 }