diff mbox

[3/3] Fix PR78120, in ifcvt/rtlanal/i386.

Message ID 36753dee-a20d-7c91-da3d-2394b29727e1@redhat.com
State New
Headers show

Commit Message

Bernd Schmidt Nov. 23, 2016, 7:02 p.m. UTC
On 11/23/2016 07:57 PM, Bernd Schmidt wrote:
> 3. ifcvt computes the sum of costs for the involved blocks, but only
> makes a before/after comparison when optimizing for size. When
> optimizing for speed, it uses max_seq_cost, which is an estimate
> computed from BRANCH_COST, which in turn can be zero for predictable
> branches on x86.

This is the final patch and has the testcase. It also happens to be the 
least risky of the series so it could be applied on its own (without the 
test).


Bernd

Comments

Jeff Law Nov. 23, 2016, 7:37 p.m. UTC | #1
On 11/23/2016 12:02 PM, Bernd Schmidt wrote:
> On 11/23/2016 07:57 PM, Bernd Schmidt wrote:
>> 3. ifcvt computes the sum of costs for the involved blocks, but only
>> makes a before/after comparison when optimizing for size. When
>> optimizing for speed, it uses max_seq_cost, which is an estimate
>> computed from BRANCH_COST, which in turn can be zero for predictable
>> branches on x86.
>
> This is the final patch and has the testcase. It also happens to be the
> least risky of the series so it could be applied on its own (without the
> test).
>
>
> Bernd
>
>
> 71280-3.diff
>
>
> 	PR rtl-optimization/78120
> 	* ifcvt.c (noce_conversion_profitable_p): Check original cost in all
> 	cases, and additionally test against max_seq_cost for speed
> 	optimization.
> 	(noce_process_if_block): Compute an estimate for the original cost when
> 	optimizing for speed, using the minimum of then and else block costs.
>
> 	PR rtl-optimization/78120
> 	* gcc.target/i386/pr78120.c: New test.
Also OK.  Obviously Uros has the call on the x86 target change.  Stage 
the series in as you see fit given the dependencies.

jeff
diff mbox

Patch

	PR rtl-optimization/78120
	* ifcvt.c (noce_conversion_profitable_p): Check original cost in all
	cases, and additionally test against max_seq_cost for speed
	optimization.
	(noce_process_if_block): Compute an estimate for the original cost when
	optimizing for speed, using the minimum of then and else block costs.

	PR rtl-optimization/78120
	* gcc.target/i386/pr78120.c: New test.

Index: gcc/ifcvt.c
===================================================================
--- gcc/ifcvt.c	(revision 242038)
+++ gcc/ifcvt.c	(working copy)
@@ -812,8 +812,10 @@  struct noce_if_info
      we're optimizing for size.  */
   bool speed_p;
 
-  /* The combined cost of COND, JUMP and the costs for THEN_BB and
-     ELSE_BB.  */
+  /* An estimate of the original costs.  When optimizing for size, this is the
+     combined cost of COND, JUMP and the costs for THEN_BB and ELSE_BB.
+     When optimizing for speed, we use the costs of COND plus the minimum of
+     the costs for THEN_BB and ELSE_BB, as computed in the next field.  */
   unsigned int original_cost;
 
   /* Maximum permissible cost for the unconditional sequence we should
@@ -852,12 +857,12 @@  noce_conversion_profitable_p (rtx_insn *
   /* Cost up the new sequence.  */
   unsigned int cost = seq_cost (seq, speed_p);
 
+  if (cost <= if_info->original_cost)
+    return true;
+
   /* When compiling for size, we can make a reasonably accurately guess
-     at the size growth.  */
-  if (!speed_p)
-    return cost <= if_info->original_cost;
-  else
-    return cost <= if_info->max_seq_cost;
+     at the size growth.  When compiling for speed, use the maximum.  */
+  return speed_p && cost <= if_info->max_seq_cost;
 }
 
 /* Helper function for noce_try_store_flag*.  */
@@ -3441,15 +3446,24 @@  noce_process_if_block (struct noce_if_in
 	}
     }
 
-  if (! bb_valid_for_noce_process_p (then_bb, cond, &if_info->original_cost,
+  bool speed_p = optimize_bb_for_speed_p (test_bb);
+  unsigned int then_cost = 0, else_cost = 0;
+  if (!bb_valid_for_noce_process_p (then_bb, cond, &then_cost,
 				    &if_info->then_simple))
     return false;
 
   if (else_bb
-      && ! bb_valid_for_noce_process_p (else_bb, cond, &if_info->original_cost,
-				      &if_info->else_simple))
+      && !bb_valid_for_noce_process_p (else_bb, cond, &else_cost,
+				       &if_info->else_simple))
     return false;
 
+  if (else_bb == NULL)
+    if_info->original_cost += then_cost;
+  else if (speed_p)
+    if_info->original_cost += MIN (then_cost, else_cost);
+  else
+    if_info->original_cost += then_cost + else_cost;
+
   insn_a = last_active_insn (then_bb, FALSE);
   set_a = single_set (insn_a);
   gcc_assert (set_a);
Index: gcc/testsuite/gcc.target/i386/pr78120.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr78120.c	(nonexistent)
+++ gcc/testsuite/gcc.target/i386/pr78120.c	(working copy)
@@ -0,0 +1,27 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic" } */
+/* { dg-final { scan-assembler "adc" } } */
+/* { dg-final { scan-assembler-not "jmp" } } */
+
+typedef unsigned long u64;
+
+typedef struct {
+  u64 hi, lo;
+} u128;
+
+static inline u128 add_u128 (u128 a, u128 b)
+{
+  a.lo += b.lo;
+  if (a.lo < b.lo)
+    a.hi++;
+
+  return a;
+}
+
+extern u128 t1, t2, t3;
+
+void foo (void)
+{
+  t1 = add_u128 (t1, t2);
+  t1 = add_u128 (t1, t3);
+}