diff mbox

[avr] : Increase branch costs after reload.

Message ID 65530b17-1c1a-ca6b-e6b9-5f4dcb7b006d@gjlay.de
State New
Headers show

Commit Message

Georg-Johann Lay Jan. 13, 2017, 1:28 p.m. UTC
This adds a penalty of 4 to the post-reload branch costs.

Purpose is reduce the number of out-of-line blocks like in

unsigned long variant5 (unsigned in)
{
     unsigned long out = 0;
     if (in & (1 << 0)) out |= 0xful << (4*0);
     if (in & (1 << 1)) out |= 0xful << (4*1);
     if (in & (1 << 2)) out |= 0xful << (4*2);
     if (in & (1 << 3)) out |= 0xful << (4*3);
     return out;
}

without the patch, code is


variant5:
	mov r18,r24	 ;  67	movqi_insn/1	[length = 1]
	sbrs r24,0	 ;  10	*sbrx_branchhi	[length = 2]
	rjmp .L6
	ldi r22,lo8(15)	 ;  5	*movsi/5	[length = 4]
	ldi r23,0
	ldi r24,0
	ldi r25,0
.L2:
	<some_code_and_epilogue>
.L6:
	ldi r22,0	 ;  4	*movsi/2	[length = 3]
	ldi r23,0
	movw r24,r22
	rjmp .L2	 ;  74	jump	[length = 1]


and with the patch it reads:

variant5:
	mov r18,r24	 ;  67	movqi_insn/1	[length = 1]
	ldi r22,lo8(15)	 ;  5	*movsi/5	[length = 4]
	ldi r23,0
	ldi r24,0
	ldi r25,0
	sbrc r18,0	 ;  10	*sbrx_branchhi	[length = 2]
	rjmp .L2
	ldi r22,0	 ;  4	*movsi/2	[length = 3]
	ldi r23,0
	movw r24,r22
.L2:
	<some_code_and_epilogue>


Using fall-through safes insn 74.

Main blocker for not increasing default branch costs in general
is do_store_flag which is a heap of assertions not using
rtx_costs and which gives the best results with the old
default of 0, which is not changed.

Tested without regressions.

Ok for trunk?

Johann

	* config/avr/avr.h (BRANCH_COST) [reload_completed]: Increase by 4.

Comments

Denis Chertykov Jan. 16, 2017, 8:05 a.m. UTC | #1
2017-01-13 17:28 GMT+04:00 Georg-Johann Lay <avr@gjlay.de>:
> This adds a penalty of 4 to the post-reload branch costs.
>
> Purpose is reduce the number of out-of-line blocks like in
>
> unsigned long variant5 (unsigned in)
> {
>     unsigned long out = 0;
>     if (in & (1 << 0)) out |= 0xful << (4*0);
>     if (in & (1 << 1)) out |= 0xful << (4*1);
>     if (in & (1 << 2)) out |= 0xful << (4*2);
>     if (in & (1 << 3)) out |= 0xful << (4*3);
>     return out;
> }
>
> without the patch, code is
>
>
> variant5:
>         mov r18,r24      ;  67  movqi_insn/1    [length = 1]
>         sbrs r24,0       ;  10  *sbrx_branchhi  [length = 2]
>         rjmp .L6
>         ldi r22,lo8(15)  ;  5   *movsi/5        [length = 4]
>         ldi r23,0
>         ldi r24,0
>         ldi r25,0
> .L2:
>         <some_code_and_epilogue>
> .L6:
>         ldi r22,0        ;  4   *movsi/2        [length = 3]
>         ldi r23,0
>         movw r24,r22
>         rjmp .L2         ;  74  jump    [length = 1]
>
>
> and with the patch it reads:
>
> variant5:
>         mov r18,r24      ;  67  movqi_insn/1    [length = 1]
>         ldi r22,lo8(15)  ;  5   *movsi/5        [length = 4]
>         ldi r23,0
>         ldi r24,0
>         ldi r25,0
>         sbrc r18,0       ;  10  *sbrx_branchhi  [length = 2]
>         rjmp .L2
>         ldi r22,0        ;  4   *movsi/2        [length = 3]
>         ldi r23,0
>         movw r24,r22
> .L2:
>         <some_code_and_epilogue>
>
>
> Using fall-through safes insn 74.
>
> Main blocker for not increasing default branch costs in general
> is do_store_flag which is a heap of assertions not using
> rtx_costs and which gives the best results with the old
> default of 0, which is not changed.
>
> Tested without regressions.
>
> Ok for trunk?
>
> Johann
>
>         * config/avr/avr.h (BRANCH_COST) [reload_completed]: Increase by 4.


Approved.
Please apply.
diff mbox

Patch

Index: config/avr/avr.h
===================================================================
--- config/avr/avr.h	(revision 244001)
+++ config/avr/avr.h	(working copy)
@@ -360,7 +360,12 @@  typedef struct avr_args
       }                                                                 \
   } while (0)
 
-#define BRANCH_COST(speed_p, predictable_p) avr_branch_cost
+/* We increase branch costs after reload in order to keep basic-block
+   reordering from introducing out-of-line jumps and to prefer fall-through
+   edges instead.  The default branch costs are 0, mainly because otherwise
+   do_store_flag might come up with bloated code.  */
+#define BRANCH_COST(speed_p, predictable_p)     \
+  (avr_branch_cost + (reload_completed ? 4 : 0))
 
 #define SLOW_BYTE_ACCESS 0