Power/GCC: Remove trailing NOP from byte-swap code
diff mbox

Message ID alpine.DEB.1.10.1406200120330.25395@tp.orcam.me.uk
State Accepted
Headers show

Commit Message

Maciej W. Rozycki June 20, 2014, 3:22 a.m. UTC
Hi,

 This change removes an extraneous NOP instruction placed at the end of 
code produced by each of byte-swap patterns due to the expansion of the 
(const_int 0) RTL unnecessarily produced by `define_split' definitions.  
Updated patterns follow what other targets do in corresponding situations.

 The change in output produced can be illustrated with the following 
simple example:

$ cat bswap.c
long long
bswap (long long i)
{
  return __builtin_bswap64 (i);
}
$ powerpc-linux-gnu-gcc -S -dp -o bswap.s bswap.c

This currently produces the following code:

	.file	"bswap.c"
	.section	".text"
	.align 2
	.globl bswap
	.type	bswap, @function
bswap:
	stwu 1,-32(1)	 # 20	movsi_update/2	[length = 4]
	stw 31,28(1)	 # 21	*movsi_internal1/4	[length = 4]
	mr 31,1	 # 22	*movsi_internal1/1	[length = 4]
	stw 3,8(31)	 # 31	*movsi_internal1/4	[length = 4]
	stw 4,12(31)	 # 32	*movsi_internal1/4	[length = 4]
	lwz 9,8(31)	 # 33	*movsi_internal1/3	[length = 4]
	lwz 10,12(31)	 # 34	*movsi_internal1/3	[length = 4]
	rlwinm 7,10,8,0xffffffff	 # 38	rotlsi3/2	[length = 4]
	rlwimi 7,10,24,0,7	 # 39	insvsi_internal	[length = 4]
	rlwimi 7,10,24,16,23	 # 40	*insvsi_internal1	[length = 4]
	rlwinm 8,9,8,0xffffffff	 # 41	rotlsi3/2	[length = 4]
	rlwimi 8,9,24,0,7	 # 42	insvsi_internal	[length = 4]
	rlwimi 8,9,24,16,23	 # 43	*insvsi_internal1	[length = 4]
	nop	 # 37	nop	[length = 4]
	mr 10,8	 # 44	*movsi_internal1/1	[length = 4]
	mr 9,7	 # 45	*movsi_internal1/1	[length = 4]
	mr 3,9	 # 46	*movsi_internal1/1	[length = 4]
	mr 4,10	 # 47	*movsi_internal1/1	[length = 4]
	addi 11,31,32	 # 25	*addsi3_internal1/2	[length = 4]
	lwz 31,-4(11)	 # 26	*movsi_internal1/3	[length = 4]
	mr 1,11	 # 28	*movsi_internal1/1	[length = 4]
	blr	 # 29	*return_internal_si	[length = 4]
	.size	bswap,.-bswap

Notice the NOP in the middle.  With this change applied this code is 
produced instead:

	.file	"bswap.c"
	.section	".text"
	.align 2
	.globl bswap
	.type	bswap, @function
bswap:
	stwu 1,-32(1)	 # 20	movsi_update/2	[length = 4]
	stw 31,28(1)	 # 21	*movsi_internal1/4	[length = 4]
	mr 31,1	 # 22	*movsi_internal1/1	[length = 4]
	stw 3,8(31)	 # 31	*movsi_internal1/4	[length = 4]
	stw 4,12(31)	 # 32	*movsi_internal1/4	[length = 4]
	lwz 9,8(31)	 # 33	*movsi_internal1/3	[length = 4]
	lwz 10,12(31)	 # 34	*movsi_internal1/3	[length = 4]
	rlwinm 7,10,8,0xffffffff	 # 37	rotlsi3/2	[length = 4]
	rlwimi 7,10,24,0,7	 # 38	insvsi_internal	[length = 4]
	rlwimi 7,10,24,16,23	 # 39	*insvsi_internal1	[length = 4]
	rlwinm 8,9,8,0xffffffff	 # 40	rotlsi3/2	[length = 4]
	rlwimi 8,9,24,0,7	 # 41	insvsi_internal	[length = 4]
	rlwimi 8,9,24,16,23	 # 42	*insvsi_internal1	[length = 4]
	mr 10,8	 # 43	*movsi_internal1/1	[length = 4]
	mr 9,7	 # 44	*movsi_internal1/1	[length = 4]
	mr 3,9	 # 45	*movsi_internal1/1	[length = 4]
	mr 4,10	 # 46	*movsi_internal1/1	[length = 4]
	addi 11,31,32	 # 25	*addsi3_internal1/2	[length = 4]
	lwz 31,-4(11)	 # 26	*movsi_internal1/3	[length = 4]
	mr 1,11	 # 28	*movsi_internal1/1	[length = 4]
	blr	 # 29	*return_internal_si	[length = 4]
	.size	bswap,.-bswap

 This has been regression tested with the powerpc-eabi target and the 
following multilibs:

-mcpu=603e
-mcpu=603e -msoft-float
-mcpu=8540 -mfloat-gprs=single -mspe=yes -mabi=spe
-mcpu=8540 -mfloat-gprs=single -mspe=yes -mabi=spe -msoft-float
-mcpu=8548 -mfloat-gprs=double -mspe=yes -mabi=spe
-mcpu=8548 -mfloat-gprs=double -mspe=yes -mabi=spe -mlittle
-mcpu=8548 -mfloat-gprs=double -mspe=yes -mabi=spe -msoft-float
-mcpu=7400 -maltivec -mabi=altivec

as well as the powerpc-linux-gnu target and the following multilibs:

-mcpu=603e
-mcpu=603e -msoft-float
-mcpu=8540 -mfloat-gprs=single -mspe=yes -mabi=spe
-mcpu=8548 -mfloat-gprs=double -mspe=yes -mabi=spe
-mcpu=7400 -maltivec -mabi=altivec
-mcpu=e5500 -m64

 OK to apply?

2014-06-20  Maciej W. Rozycki  <macro@codesourcery.com>

	gcc/
	* config/rs6000/rs6000.md: Append `DONE' to preparation
	statements of `bswap' pattern splitters.

  Maciej

gcc-ppc-bswap-done.diff

Patch
diff mbox

Index: gcc-fsf-trunk-quilt/gcc/config/rs6000/rs6000.md
===================================================================
--- gcc-fsf-trunk-quilt.orig/gcc/config/rs6000/rs6000.md	2014-06-10 21:46:36.000000000 +0100
+++ gcc-fsf-trunk-quilt/gcc/config/rs6000/rs6000.md	2014-06-11 02:40:42.028572744 +0100
@@ -2475,6 +2475,7 @@ 
   emit_insn (gen_bswapsi2 (op4_32, word_high));
   emit_insn (gen_ashldi3 (dest, op3, GEN_INT (32)));
   emit_insn (gen_iordi3 (dest, dest, op4));
+  DONE;
 }")
 
 (define_split
@@ -2536,6 +2537,7 @@ 
     }
   emit_insn (gen_bswapsi2 (word_high, src_si));
   emit_insn (gen_bswapsi2 (word_low, op3_si));
+  DONE;
 }")
 
 (define_split
@@ -2563,6 +2565,7 @@ 
   emit_insn (gen_bswapsi2 (op3_si, op2_si));
   emit_insn (gen_ashldi3 (dest, dest, GEN_INT (32)));
   emit_insn (gen_iordi3 (dest, dest, op3));
+  DONE;
 }")
 
 (define_insn "bswapdi2_32bit"
@@ -2619,6 +2622,7 @@ 
 
   emit_insn (gen_bswapsi2 (dest2, word1));
   emit_insn (gen_bswapsi2 (dest1, word2));
+  DONE;
 }")
 
 (define_split
@@ -2667,6 +2671,7 @@ 
 
   emit_insn (gen_bswapsi2 (word2, src1));
   emit_insn (gen_bswapsi2 (word1, src2));
+  DONE;
 }")
 
 (define_split
@@ -2686,6 +2691,7 @@ 
 
   emit_insn (gen_bswapsi2 (dest1, src2));
   emit_insn (gen_bswapsi2 (dest2, src1));
+  DONE;
 }")
 
 (define_insn "mulsi3"