Patchwork [libitm] : Optimize x86_64 gtm_jmpbuf layout

login
register
mail settings
Submitter Uros Bizjak
Date Jan. 24, 2012, 12:05 a.m.
Message ID <CAFULd4aEt3CzaDi6SL9b997d_U7BGuAwdUqjmv9X_b2m3BLCQA@mail.gmail.com>
Download mbox | patch
Permalink /patch/137497/
State New
Headers show

Comments

Uros Bizjak - Jan. 24, 2012, 12:05 a.m.
Hello!

Attached patch optimizes x86_64 gtm_jmpbuf layout to avoid copying
return address. Optimized layout uses the same trick as x86_32 - call
used registers are saved just below return address. And there is just
enough space in the call alignment hole to put all of them there.

2012-01-24  Uros Bizjak  <ubizjak@gmail.com>

	* config/x86/target.h (gtm_jmpbuf) [__x86_64__]: Move rip to the
	end of struct.
	* config/x86/sjlj.S (_ITM_beginTransaction) [__x86_64__]: Update
	offset values.  Do not copy return address.  Decrement stack
	by 56 bytes only.
	(GTM_longjump) [__x86_64__]: Update offset values.

Tested on x86_64-pc-linux-gnu.

OK for mainline?

BTW: Do we really need move at the end of GTM_longjump:

	movq	48(%rsi), %r15
	movq	56(%rsi), %rdx
>>	movl	%edi, %eax
	cfi_def_cfa(%rcx, 0)
	cfi_register(%rip, %rdx)
	movq	%rcx, %rsp
	jmp	*%rdx

Uros.
Richard Henderson - Jan. 24, 2012, 12:25 a.m.
On 01/24/2012 11:05 AM, Uros Bizjak wrote:
> Hello!
> 
> Attached patch optimizes x86_64 gtm_jmpbuf layout to avoid copying
> return address. Optimized layout uses the same trick as x86_32 - call
> used registers are saved just below return address. And there is just
> enough space in the call alignment hole to put all of them there.
> 
> 2012-01-24  Uros Bizjak  <ubizjak@gmail.com>
> 
> 	* config/x86/target.h (gtm_jmpbuf) [__x86_64__]: Move rip to the
> 	end of struct.
> 	* config/x86/sjlj.S (_ITM_beginTransaction) [__x86_64__]: Update
> 	offset values.  Do not copy return address.  Decrement stack
> 	by 56 bytes only.
> 	(GTM_longjump) [__x86_64__]: Update offset values.

Ok.

> BTW: Do we really need move at the end of GTM_longjump:
> 
> 	movq	48(%rsi), %r15
> 	movq	56(%rsi), %rdx
>>> 	movl	%edi, %eax

Yes, that copies the first argument to the return register.


r~

Patch

Index: config/x86/sjlj.S
===================================================================
--- config/x86/sjlj.S	(revision 183449)
+++ config/x86/sjlj.S	(working copy)
@@ -61,20 +61,18 @@  SYM(_ITM_beginTransaction):
 	cfi_startproc
 #ifdef __x86_64__
 	leaq	8(%rsp), %rax
-	movq	(%rsp), %r8
-	subq	$72, %rsp
-	cfi_def_cfa_offset(80)
+	subq	$56, %rsp
+	cfi_def_cfa_offset(64)
 	movq	%rax, (%rsp)
-	movq	%r8, 8(%rsp)
-	movq	%rbx, 16(%rsp)
-	movq	%rbp, 24(%rsp)
-	movq	%r12, 32(%rsp)
-	movq	%r13, 40(%rsp)
-	movq	%r14, 48(%rsp)
-	movq	%r15, 56(%rsp)
+	movq	%rbx, 8(%rsp)
+	movq	%rbp, 16(%rsp)
+	movq	%r12, 24(%rsp)
+	movq	%r13, 32(%rsp)
+	movq	%r14, 40(%rsp)
+	movq	%r15, 48(%rsp)
 	movq	%rsp, %rsi
 	call	SYM(GTM_begin_transaction)
-	addq	$72, %rsp
+	addq	$56, %rsp
 	cfi_def_cfa_offset(8)
 	ret
 #else
@@ -115,13 +113,13 @@  SYM(GTM_longjmp):
 	cfi_startproc
 #ifdef __x86_64__
 	movq	(%rsi), %rcx
-	movq	8(%rsi), %rdx
-	movq	16(%rsi), %rbx
-	movq	24(%rsi), %rbp
-	movq	32(%rsi), %r12
-	movq	40(%rsi), %r13
-	movq	48(%rsi), %r14
-	movq	56(%rsi), %r15
+	movq	8(%rsi), %rbx
+	movq	16(%rsi), %rbp
+	movq	24(%rsi), %r12
+	movq	32(%rsi), %r13
+	movq	40(%rsi), %r14
+	movq	48(%rsi), %r15
+	movq	56(%rsi), %rdx
 	movl	%edi, %eax
 	cfi_def_cfa(%rcx, 0)
 	cfi_register(%rip, %rdx)
Index: config/x86/target.h
===================================================================
--- config/x86/target.h	(revision 183449)
+++ config/x86/target.h	(working copy)
@@ -29,13 +29,13 @@  namespace GTM HIDDEN {
 typedef struct gtm_jmpbuf
 {
   void *cfa;
-  unsigned long rip;
   unsigned long rbx;
   unsigned long rbp;
   unsigned long r12;
   unsigned long r13;
   unsigned long r14;
   unsigned long r15;
+  unsigned long rip;
 } gtm_jmpbuf;
 #else
 typedef struct gtm_jmpbuf