diff mbox

PATCH: Update x86 rdrand intrinsics

Message ID AANLkTi=4X7KaNa4TDQmFE6mE3CP3sVi2SO=wxeuE4hWq@mail.gmail.com
State New
Headers show

Commit Message

Uros Bizjak Dec. 19, 2010, 3:53 p.m. UTC
On Thu, Dec 16, 2010 at 2:30 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:

> Intell will update rdrand intrinsic spec to replace _rdrand_uXX with
> _rdrandXX_step.  This patch implements it.  OK for trunk?

Are these specs available somewhere?

I have some comments on your approach:

- I believe that RDRAND does not need to be defined as
unspec_volatile, since its internal state is communicated through
carry flag.
- It is possible to get rid of extra rdrand<mode>_step patterns by
integrating cmove generation directly into ix86_expand_builtin.
- rdrand mnemonic should use tab insted of space to separate its argument.
- the pointer arg in immintrin.h should be uglified to __P, see many
examples in various *intrin.h header files.

I have attached to this message a patch that implements all above suggestions.

Uros.
diff mbox

Patch

Index: i386.md
===================================================================
--- i386.md	(revision 168050)
+++ i386.md	(working copy)
@@ -232,6 +232,9 @@ 
 
   ;; For BMI support
   UNSPEC_BEXTR
+
+  ;; For RDRAND support
+  UNSPEC_RDRAND
 ])
 
 (define_c_enum "unspecv" [
@@ -265,7 +268,6 @@ 
   UNSPECV_RDGSBASE
   UNSPECV_WRFSBASE
   UNSPECV_WRGSBASE
-  UNSPECV_RDRAND
   UNSPECV_SPLIT_STACK_RETURN
 ])
 
@@ -18284,36 +18286,13 @@ 
   [(set_attr "type" "other")
    (set_attr "prefix_extra" "2")])
 
-(define_expand "rdrand<mode>"
-  [(set (match_operand:SWI248 0 "register_operand" "=r")
-	(unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND))]
-  "TARGET_RDRND"
-{
-  rtx retry_label, insn, ccc;
-
-  retry_label = gen_label_rtx ();
-
-  emit_label (retry_label);
-
-  /* Generate rdrand.  */
-  emit_insn (gen_rdrand<mode>_1 (operands[0]));
-
-  /* Retry if the carry flag isn't valid.  */
-  ccc = gen_rtx_REG (CCCmode, FLAGS_REG);
-  ccc = gen_rtx_EQ (VOIDmode, ccc, const0_rtx);
-  ccc = gen_rtx_IF_THEN_ELSE (VOIDmode, ccc, pc_rtx,
-			      gen_rtx_LABEL_REF (VOIDmode, retry_label));
-  insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, ccc));
-  JUMP_LABEL (insn) = retry_label;
-
-  DONE;
-})
-
 (define_insn "rdrand<mode>_1"
   [(set (match_operand:SWI248 0 "register_operand" "=r")
-	(unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND))]
+	(unspec:SWI248 [(const_int 0)] UNSPEC_RDRAND))
+   (set (reg:CCC FLAGS_REG)
+	(unspec:CCC [(const_int 0)] UNSPEC_RDRAND))]
   "TARGET_RDRND"
-  "rdrand %0"
+  "rdrand\t%0"
   [(set_attr "type" "other")
    (set_attr "prefix_extra" "1")])
 
Index: i386-builtin-types.def
===================================================================
--- i386-builtin-types.def	(revision 168050)
+++ i386-builtin-types.def	(working copy)
@@ -107,6 +107,7 @@  DEF_POINTER_TYPE (PCVOID, VOID, CONST)
 DEF_POINTER_TYPE (PVOID, VOID)
 DEF_POINTER_TYPE (PDOUBLE, DOUBLE)
 DEF_POINTER_TYPE (PFLOAT, FLOAT)
+DEF_POINTER_TYPE (PUSHORT, USHORT)
 DEF_POINTER_TYPE (PINT, INT)
 DEF_POINTER_TYPE (PULONGLONG, ULONGLONG)
 DEF_POINTER_TYPE (PUNSIGNED, UNSIGNED)
@@ -128,7 +129,6 @@  DEF_POINTER_TYPE (PCV8SF, V8SF, CONST)
 DEF_FUNCTION_TYPE (FLOAT128)
 DEF_FUNCTION_TYPE (UINT64)
 DEF_FUNCTION_TYPE (UNSIGNED)
-DEF_FUNCTION_TYPE (UINT16)
 DEF_FUNCTION_TYPE (VOID)
 DEF_FUNCTION_TYPE (PVOID)
 
@@ -203,6 +203,9 @@  DEF_FUNCTION_TYPE (VOID, PCVOID)
 DEF_FUNCTION_TYPE (VOID, PVOID)
 DEF_FUNCTION_TYPE (VOID, UINT64)
 DEF_FUNCTION_TYPE (VOID, UNSIGNED)
+DEF_FUNCTION_TYPE (INT, PUSHORT)
+DEF_FUNCTION_TYPE (INT, PUNSIGNED)
+DEF_FUNCTION_TYPE (INT, PULONGLONG)
 
 DEF_FUNCTION_TYPE (DI, V2DI, INT)
 DEF_FUNCTION_TYPE (DOUBLE, V2DF, INT)
Index: immintrin.h
===================================================================
--- immintrin.h	(revision 168050)
+++ immintrin.h	(working copy)
@@ -57,18 +57,18 @@ 
 #endif
 
 #ifdef __RDRND__
-extern __inline unsigned short
+extern __inline int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdrand_u16 (void)
+_rdrand16_step (unsigned short *__P)
 {
-  return __builtin_ia32_rdrand16 ();
+  return __builtin_ia32_rdrand16_step (__P);
 }
 
-extern __inline unsigned int
+extern __inline int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdrand_u32 (void)
+_rdrand32_step (unsigned int *__P)
 {
-  return __builtin_ia32_rdrand32 ();
+  return __builtin_ia32_rdrand32_step (__P);
 }
 #endif /* __RDRND__ */
 
@@ -132,11 +132,11 @@  _writegsbase_u64 (unsigned long long __B
 #endif /* __FSGSBASE__ */
 
 #ifdef __RDRND__
-extern __inline unsigned long long
+extern __inline int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdrand_u64 (void)
+_rdrand64_step (unsigned long long *__P)
 {
-  return __builtin_ia32_rdrand64 ();
+  return __builtin_ia32_rdrand64_step (__P);
 }
 #endif /* __RDRND__ */
 #endif /* __x86_64__  */
Index: i386.c
===================================================================
--- i386.c	(revision 168050)
+++ i386.c	(working copy)
@@ -24142,9 +24142,9 @@  enum ix86_builtins
   IX86_BUILTIN_WRGSBASE64,
 
   /* RDRND instructions.  */
-  IX86_BUILTIN_RDRAND16,
-  IX86_BUILTIN_RDRAND32,
-  IX86_BUILTIN_RDRAND64,
+  IX86_BUILTIN_RDRAND16_STEP,
+  IX86_BUILTIN_RDRAND32_STEP,
+  IX86_BUILTIN_RDRAND64_STEP,
 
   /* F16C instructions.  */
   IX86_BUILTIN_CVTPH2PS,
@@ -24435,11 +24435,6 @@  static const struct builtin_description 
   { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
   { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
   { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
-
-  /* RDRND */
-  { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
-  { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
-  { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
 };
 
 /* Builtins with variable number of arguments.  */
@@ -25448,6 +25443,15 @@  ix86_init_mmx_sse_builtins (void)
   def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
 		     V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
 
+  /* RDRND */
+  def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
+	       INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
+  def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
+	       INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
+  def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
+	       "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
+	       IX86_BUILTIN_RDRAND64_STEP);
+
   /* MMX access to the vec_init patterns.  */
   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
 		     V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
@@ -26703,7 +26707,6 @@  ix86_expand_special_args_builtin (const 
       break;
     case UINT64_FTYPE_VOID:
     case UNSIGNED_FTYPE_VOID:
-    case UINT16_FTYPE_VOID:
       nargs = 0;
       klass = load;
       memory = 0;
@@ -27215,6 +27218,51 @@  ix86_expand_builtin (tree exp, rtx targe
           return target;
         }
 
+    case IX86_BUILTIN_RDRAND16_STEP:
+      icode = CODE_FOR_rdrandhi_1;
+      mode0 = HImode;
+      goto rdrand_step;
+
+    case IX86_BUILTIN_RDRAND32_STEP:
+      icode = CODE_FOR_rdrandsi_1;
+      mode0 = SImode;
+      goto rdrand_step;
+
+    case IX86_BUILTIN_RDRAND64_STEP:
+      icode = CODE_FOR_rdranddi_1;
+      mode0 = DImode;
+
+rdrand_step:
+      op0 = gen_reg_rtx (mode0);
+      emit_insn (GEN_FCN (icode) (op0));
+
+      op1 = gen_reg_rtx (SImode);
+      emit_move_insn (op1, CONST1_RTX (SImode));
+
+      /* Emit SImode conditional move.  */
+      if (mode0 == HImode)
+	{
+	  op2 = gen_reg_rtx (SImode);
+	  emit_insn (gen_zero_extendhisi2 (op2, op0));
+	}
+      else if (mode0 == SImode)
+	op2 = op0;
+      else
+	op2 = gen_rtx_SUBREG (SImode, op0, 0);
+
+      pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
+			 const0_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, op1,
+			      gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
+      emit_move_insn (target, op1);
+
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op1 = expand_normal (arg0);
+      if (!address_operand (op1, VOIDmode))
+	op1 = copy_addr_to_reg (op1);
+      emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
+      return target;
+
     default:
       break;
     }