diff mbox

PR78634: ifcvt/i386 cost updates

Message ID b390fe8e-479a-b302-0c25-dc10f25d0cb3@redhat.com
State New
Headers show

Commit Message

Bernd Schmidt Dec. 2, 2016, 4 p.m. UTC
With the i386 backend no longer double-counting the cost of a SET, the 
default implementation default_max_noce_ifcvt_seq_cost now provides too 
high a bound for if conversion, allowing very costly substitutions.

The following patch fixes this by making an i386 variant of the hook, 
but first - James, do you recall how you arrived at the COSTS_N_INSNS(3) 
magic number? What happens on arm if you lower that in the default hook?

The change in ifcvt.c seems to have no bearing on the PR, I just noticed 
we were missing a cost check in one place.

Lightly tested so far, will bootstrap & test on x86_64-linux.


Bernd

Comments

James Greenhalgh Dec. 2, 2016, 7:58 p.m. UTC | #1
On Fri, Dec 02, 2016 at 05:00:05PM +0100, Bernd Schmidt wrote:
> With the i386 backend no longer double-counting the cost of a SET,
> the default implementation default_max_noce_ifcvt_seq_cost now
> provides too high a bound for if conversion, allowing very costly
> substitutions.
> 
> The following patch fixes this by making an i386 variant of the
> hook, but first - James, do you recall how you arrived at the
> COSTS_N_INSNS(3) magic number? What happens on arm if you lower that
> in the default hook?

Hi Bernd,

Given the magic numbers in BRANCH_COST already, 3 was chosen to give a
resonable chance of allowing if-conversion of blocks containing multiple
sets. iWe need to do this because for AArch64 and x86 the conditional
move pattern will expand to two further patterns, each of which will get
a cost (before my cost model patches you would simply count the total
number of expansions you would make, so the multiplication by three is
to compensate)

I wouldn't say there was much scientific to choosing between 2 and 3
beyond looking at cases which worked on x86_64 and AArch64 before I
modified the cost model, and again after, and trying to maintain the
number of such cases which were if-converted.

Setting this to 2 in the generic hook and forcing AArch64 to run with
a custom hook would be just as correct as this patch (though arguably
yours is the better Stage 3 patch as it has more limited scope).

Thanks,
James

> The change in ifcvt.c seems to have no bearing on the PR, I just
> noticed we were missing a cost check in one place.
> 
> Lightly tested so far, will bootstrap & test on x86_64-linux.
> 
> 
> Bernd

> 	PR rtl-optimization/78634
> 	* config/i386/i386.c (ix86_max_noce_ifcvt_seq_cost): New function.
> 	(TARGET_MAX_NOCE_IFCVT_SEQ_COST): Define.
> 	* ifcvt.c (noce_try_cmove): Add missing cost check.
> 
> Index: gcc/config/i386/i386.c
> ===================================================================
> --- gcc/config/i386/i386.c	(revision 242958)
> +++ gcc/config/i386/i386.c	(working copy)
> @@ -50301,6 +50301,28 @@ ix86_spill_class (reg_class_t rclass, ma
>    return NO_REGS;
>  }
>  
> +/* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST.  Like the default implementation,
> +   but returns a lower bound.  */
> +
> +static unsigned int
> +ix86_max_noce_ifcvt_seq_cost (edge e)
> +{
> +  bool predictable_p = predictable_edge_p (e);
> +
> +  enum compiler_param param
> +    = (predictable_p
> +       ? PARAM_MAX_RTL_IF_CONVERSION_PREDICTABLE_COST
> +       : PARAM_MAX_RTL_IF_CONVERSION_UNPREDICTABLE_COST);
> +
> +  /* If we have a parameter set, use that, otherwise take a guess using
> +     BRANCH_COST.  */
> +  if (global_options_set.x_param_values[param])
> +    return PARAM_VALUE (param);
> +  else
> +    return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
> +}
> +
> +
>  /* Implement targetm.vectorize.init_cost.  */
>  
>  static void *
> @@ -51615,6 +51637,8 @@ ix86_run_selftests (void)
>  #undef TARGET_EXPAND_DIVMOD_LIBFUNC
>  #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
>  
> +#undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
> +#define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
>  #if CHECKING_P
>  #undef TARGET_RUN_TARGET_SELFTESTS
>  #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
> Index: gcc/ifcvt.c
> ===================================================================
> --- gcc/ifcvt.c	(revision 242958)
> +++ gcc/ifcvt.c	(working copy)
> @@ -1826,7 +1826,7 @@ noce_try_cmove (struct noce_if_info *if_
>  	    noce_emit_move_insn (if_info->x, target);
>  
>  	  seq = end_ifcvt_sequence (if_info);
> -	  if (!seq)
> +	  if (!seq || !noce_conversion_profitable_p (seq, if_info))
>  	    return FALSE;
>  
>  	  emit_insn_before_setloc (seq, if_info->jump,
Uros Bizjak Dec. 3, 2016, 9:49 a.m. UTC | #2
On Fri, Dec 2, 2016 at 8:58 PM, James Greenhalgh
<james.greenhalgh@arm.com> wrote:
> On Fri, Dec 02, 2016 at 05:00:05PM +0100, Bernd Schmidt wrote:
>> With the i386 backend no longer double-counting the cost of a SET,
>> the default implementation default_max_noce_ifcvt_seq_cost now
>> provides too high a bound for if conversion, allowing very costly
>> substitutions.
>>
>> The following patch fixes this by making an i386 variant of the
>> hook, but first - James, do you recall how you arrived at the
>> COSTS_N_INSNS(3) magic number? What happens on arm if you lower that
>> in the default hook?
>
> Hi Bernd,
>
> Given the magic numbers in BRANCH_COST already, 3 was chosen to give a
> resonable chance of allowing if-conversion of blocks containing multiple
> sets. iWe need to do this because for AArch64 and x86 the conditional
> move pattern will expand to two further patterns, each of which will get
> a cost (before my cost model patches you would simply count the total
> number of expansions you would make, so the multiplication by three is
> to compensate)
>
> I wouldn't say there was much scientific to choosing between 2 and 3
> beyond looking at cases which worked on x86_64 and AArch64 before I
> modified the cost model, and again after, and trying to maintain the
> number of such cases which were if-converted.
>
> Setting this to 2 in the generic hook and forcing AArch64 to run with
> a custom hook would be just as correct as this patch (though arguably
> yours is the better Stage 3 patch as it has more limited scope).

Based on the above explanation, the patch is OK.

Thanks,
Uros.
Bernd Schmidt Dec. 9, 2016, 11:49 a.m. UTC | #3
On 12/03/2016 10:49 AM, Uros Bizjak wrote:

> Based on the above explanation, the patch is OK.

I'll be treating the ifcvt part of it as obvious. However, testing 
showed an issue with the i386 funcspec-11 test:

/* PR target/36936 */
/* { dg-do compile } */
/* { dg-require-effective-target ia32 } */
/* { dg-options "-O2 -march=i386" } */
/* { dg-final { scan-assembler "cmov" } } */

extern int foo (int) __attribute__((__target__("arch=i686")));

int
foo (int x)
{
   if (x < 0)
     x = 1;
   return x;
}

It wants to test that we can temporarily switch the arch, but we're 
still using the i386 cost table, with a branch cost of 1. This means the 
cmov now won't get generated for cost reasons.

It seems like whether we're generating cmov when tuning for i386 isn't 
the most critical thing to worry about, so I propose amending the test 
to also pass in -mtune=i686. Ok with that change?


Bernd
Bernd Schmidt Jan. 18, 2017, 12:10 p.m. UTC | #4
On 12/09/2016 12:49 PM, Bernd Schmidt wrote:
> On 12/03/2016 10:49 AM, Uros Bizjak wrote:
>
>> Based on the above explanation, the patch is OK.
>
> I'll be treating the ifcvt part of it as obvious. However, testing
> showed an issue with the i386 funcspec-11 test:
>
> /* PR target/36936 */
> /* { dg-do compile } */
> /* { dg-require-effective-target ia32 } */
> /* { dg-options "-O2 -march=i386" } */
> /* { dg-final { scan-assembler "cmov" } } */
>
> extern int foo (int) __attribute__((__target__("arch=i686")));
>
> int
> foo (int x)
> {
>   if (x < 0)
>     x = 1;
>   return x;
> }
>
> It wants to test that we can temporarily switch the arch, but we're
> still using the i386 cost table, with a branch cost of 1. This means the
> cmov now won't get generated for cost reasons.
>
> It seems like whether we're generating cmov when tuning for i386 isn't
> the most critical thing to worry about, so I propose amending the test
> to also pass in -mtune=i686. Ok with that change?

Ping?


Bernd
Uros Bizjak Jan. 18, 2017, 12:19 p.m. UTC | #5
On Wed, Jan 18, 2017 at 1:10 PM, Bernd Schmidt <bschmidt@redhat.com> wrote:
> On 12/09/2016 12:49 PM, Bernd Schmidt wrote:
>>
>> On 12/03/2016 10:49 AM, Uros Bizjak wrote:
>>
>>> Based on the above explanation, the patch is OK.
>>
>>
>> I'll be treating the ifcvt part of it as obvious. However, testing
>> showed an issue with the i386 funcspec-11 test:
>>
>> /* PR target/36936 */
>> /* { dg-do compile } */
>> /* { dg-require-effective-target ia32 } */
>> /* { dg-options "-O2 -march=i386" } */
>> /* { dg-final { scan-assembler "cmov" } } */
>>
>> extern int foo (int) __attribute__((__target__("arch=i686")));
>>
>> int
>> foo (int x)
>> {
>>   if (x < 0)
>>     x = 1;
>>   return x;
>> }
>>
>> It wants to test that we can temporarily switch the arch, but we're
>> still using the i386 cost table, with a branch cost of 1. This means the
>> cmov now won't get generated for cost reasons.
>>
>> It seems like whether we're generating cmov when tuning for i386 isn't
>> the most critical thing to worry about, so I propose amending the test
>> to also pass in -mtune=i686. Ok with that change?
>
>
> Ping?

LGTM.

Thanks,
Uros.
diff mbox

Patch

	PR rtl-optimization/78634
	* config/i386/i386.c (ix86_max_noce_ifcvt_seq_cost): New function.
	(TARGET_MAX_NOCE_IFCVT_SEQ_COST): Define.
	* ifcvt.c (noce_try_cmove): Add missing cost check.

Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	(revision 242958)
+++ gcc/config/i386/i386.c	(working copy)
@@ -50301,6 +50301,28 @@  ix86_spill_class (reg_class_t rclass, ma
   return NO_REGS;
 }
 
+/* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST.  Like the default implementation,
+   but returns a lower bound.  */
+
+static unsigned int
+ix86_max_noce_ifcvt_seq_cost (edge e)
+{
+  bool predictable_p = predictable_edge_p (e);
+
+  enum compiler_param param
+    = (predictable_p
+       ? PARAM_MAX_RTL_IF_CONVERSION_PREDICTABLE_COST
+       : PARAM_MAX_RTL_IF_CONVERSION_UNPREDICTABLE_COST);
+
+  /* If we have a parameter set, use that, otherwise take a guess using
+     BRANCH_COST.  */
+  if (global_options_set.x_param_values[param])
+    return PARAM_VALUE (param);
+  else
+    return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
+}
+
+
 /* Implement targetm.vectorize.init_cost.  */
 
 static void *
@@ -51615,6 +51637,8 @@  ix86_run_selftests (void)
 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
 
+#undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
+#define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
 #if CHECKING_P
 #undef TARGET_RUN_TARGET_SELFTESTS
 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
Index: gcc/ifcvt.c
===================================================================
--- gcc/ifcvt.c	(revision 242958)
+++ gcc/ifcvt.c	(working copy)
@@ -1826,7 +1826,7 @@  noce_try_cmove (struct noce_if_info *if_
 	    noce_emit_move_insn (if_info->x, target);
 
 	  seq = end_ifcvt_sequence (if_info);
-	  if (!seq)
+	  if (!seq || !noce_conversion_profitable_p (seq, if_info))
 	    return FALSE;
 
 	  emit_insn_before_setloc (seq, if_info->jump,