Patchwork Add faster HTM fastpath for libitm TSX v2

login
register
mail settings
Submitter Uros Bizjak
Date Jan. 25, 2013, 1:37 p.m.
Message ID <CAFULd4Z+wvnf9wiL5OcvCTDBrXp4ReCLCgqyzzLnjMUnD-Ztjg@mail.gmail.com>
Download mbox | patch
Permalink /patch/215679/
State New
Headers show

Comments

Uros Bizjak - Jan. 25, 2013, 1:37 p.m.
Hello!

> The libitm TSX hardware transaction fast path currently does quite a bit of
> unnecessary work (saving registers etc.) before even trying to start
> a hardware transaction. This patch moves the initial attempt at a
> transaction early into the assembler stub. Complicated work like retries
> is still done in C. So this is essentially a fast path for the fast
> path.
>
> The assembler code doesn't understand the layout of "serial_lock", but
> it needs to check that serial_lock is free. We export just the lock
> variable as a separate pointer for this.

Probably the attached (RFC) patch can be useful in this case. The
patch allows to specify the label for xbegin, so it is possible to
implement code like following (non-sensical) example:

--cut here--
extern int lock1;
extern int lock2;

int test ()
{
  register unsigned int res asm ("eax");

  __builtin_ia32_xbegin_label (&&__txn_abort);

  lock1 = 0;

 __txn_abort:
  if (res & 0x10)
    lock2 = 1;

  return 0;
}
--cut here--

gcc -O2 -mrtm:

test:
        xbegin  .L2
        movl    $0, lock1(%rip)
.L2:
        testb   $16, %al
        je      .L3
        movl    $1, lock2(%rip)
.L3:
        xorl    %eax, %eax
        ret

Please note that the edge from __builtin_ia32_xbegin_label is not
known to tree optimizers, so someone familiar with this part of the
compiler should enhance/fix the patch. With the (fixed) patch, you can
implement your assembly using plain C++, probably also using ifunc
relocations.

Uros.
Andi Kleen - Jan. 25, 2013, 3:54 p.m.
> Probably the attached (RFC) patch can be useful in this case. The
> patch allows to specify the label for xbegin, so it is possible to
> implement code like following (non-sensical) example:

It can be actually implemented using asm goto. I have some macros
for this.  And the tree optimizers should even support it.

#define XBEGIN(label)   \
     asm volatile goto(".byte 0xc7,0xf8 ; .long %l0-1f\n1:" ::: "eax" :
label)
#define XEND()    asm volatile(".byte 0x0f,0x01,0xd5")
#define XFAIL(label) label: asm volatile("" ::: "eax")
#define XFAIL_STATUS(label, status) label: asm volatile("" : "=a"
(status))

But the assembler code is still needed for this because the non TSX path needs
to save all registers (it's like a setjmp/longjmp), and that cannot 
be implemented in C.

The goal of the assembler code was not to use the label, but to move
the initial transaction before the setjmp code.

-Andi

Patch

Index: i386.md
===================================================================
--- i386.md	(revision 195460)
+++ i386.md	(working copy)
@@ -18093,6 +18093,17 @@ 
   DONE;
 })
 
+(define_expand "xbegin_label"
+  [(parallel
+    [(set (pc)
+	  (if_then_else (ne (unspec [(const_int 0)] UNSPEC_XBEGIN_ABORT)
+			    (const_int 0))
+			(match_operand 1)
+			(pc)))
+     (set (match_operand:SI 0 "register_operand")
+	  (unspec_volatile:SI [(match_dup 0)] UNSPECV_XBEGIN))])]
+  "TARGET_RTM")
+
 (define_insn "xbegin_1"
   [(set (pc)
 	(if_then_else (ne (unspec [(const_int 0)] UNSPEC_XBEGIN_ABORT)
Index: i386.c
===================================================================
--- i386.c	(revision 195460)
+++ i386.c	(working copy)
@@ -26570,6 +26570,7 @@  enum ix86_builtins
 
   /* RTM */
   IX86_BUILTIN_XBEGIN,
+  IX86_BUILTIN_XBEGIN_LABEL,
   IX86_BUILTIN_XEND,
   IX86_BUILTIN_XABORT,
   IX86_BUILTIN_XTEST,
@@ -26942,6 +26943,7 @@  static const struct builtin_description bdesc_spec
 
   /* RTM */
   { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
+  { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin_label, "__builtin_ia32_xbegin_label", IX86_BUILTIN_XBEGIN_LABEL, UNKNOWN, (int) VOID_FTYPE_PCVOID },
   { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
   { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
 };
@@ -32239,6 +32241,17 @@  addcarryx:
 
       return target;
 
+    case IX86_BUILTIN_XBEGIN_LABEL:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_normal (arg0);
+      if (GET_CODE (op0) != LABEL_REF)
+	{
+	  error ("the xbegin's argument must be a label");
+	  return const0_rtx;
+	}
+      emit_jump_insn (gen_xbegin_label (gen_rtx_REG (SImode, AX_REG), op0));
+      return 0;
+
     case IX86_BUILTIN_XABORT:
       icode = CODE_FOR_xabort;
       arg0 = CALL_EXPR_ARG (exp, 0);