diff mbox

PATCH: Update x86 rdrand intrinsics

Message ID AANLkTimVMzfWTgBxMdxL-Riaw8A0d9Gf_OG0aTCJHGJ+@mail.gmail.com
State New
Headers show

Commit Message

H.J. Lu Dec. 28, 2010, 4:50 p.m. UTC
On Sun, Dec 19, 2010 at 7:53 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Thu, Dec 16, 2010 at 2:30 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>
>> Intell will update rdrand intrinsic spec to replace _rdrand_uXX with
>> _rdrandXX_step.  This patch implements it.  OK for trunk?
>
> Are these specs available somewhere?
>
> I have some comments on your approach:
>
> - I believe that RDRAND does not need to be defined as
> unspec_volatile, since its internal state is communicated through
> carry flag.
> - It is possible to get rid of extra rdrand<mode>_step patterns by
> integrating cmove generation directly into ix86_expand_builtin.
> - rdrand mnemonic should use tab insted of space to separate its argument.
> - the pointer arg in immintrin.h should be uglified to __P, see many
> examples in various *intrin.h header files.
>
> I have attached to this message a patch that implements all above suggestions.
>
> Uros.
>

Here is the updated patch.  I will check it in after testing.

Thanks.
diff mbox

Patch

diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 09dd9eb..079c8ec 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -107,6 +107,7 @@  DEF_POINTER_TYPE (PCVOID, VOID, CONST)
 DEF_POINTER_TYPE (PVOID, VOID)
 DEF_POINTER_TYPE (PDOUBLE, DOUBLE)
 DEF_POINTER_TYPE (PFLOAT, FLOAT)
+DEF_POINTER_TYPE (PUSHORT, USHORT)
 DEF_POINTER_TYPE (PINT, INT)
 DEF_POINTER_TYPE (PULONGLONG, ULONGLONG)
 DEF_POINTER_TYPE (PUNSIGNED, UNSIGNED)
@@ -128,7 +129,6 @@  DEF_POINTER_TYPE (PCV8SF, V8SF, CONST)
 DEF_FUNCTION_TYPE (FLOAT128)
 DEF_FUNCTION_TYPE (UINT64)
 DEF_FUNCTION_TYPE (UNSIGNED)
-DEF_FUNCTION_TYPE (UINT16)
 DEF_FUNCTION_TYPE (VOID)
 DEF_FUNCTION_TYPE (PVOID)
 
@@ -203,6 +203,9 @@  DEF_FUNCTION_TYPE (VOID, PCVOID)
 DEF_FUNCTION_TYPE (VOID, PVOID)
 DEF_FUNCTION_TYPE (VOID, UINT64)
 DEF_FUNCTION_TYPE (VOID, UNSIGNED)
+DEF_FUNCTION_TYPE (INT, PUSHORT)
+DEF_FUNCTION_TYPE (INT, PUNSIGNED)
+DEF_FUNCTION_TYPE (INT, PULONGLONG)
 
 DEF_FUNCTION_TYPE (DI, V2DI, INT)
 DEF_FUNCTION_TYPE (DOUBLE, V2DF, INT)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 40999c8..9156d89 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -24142,9 +24142,9 @@  enum ix86_builtins
   IX86_BUILTIN_WRGSBASE64,
 
   /* RDRND instructions.  */
-  IX86_BUILTIN_RDRAND16,
-  IX86_BUILTIN_RDRAND32,
-  IX86_BUILTIN_RDRAND64,
+  IX86_BUILTIN_RDRAND16_STEP,
+  IX86_BUILTIN_RDRAND32_STEP,
+  IX86_BUILTIN_RDRAND64_STEP,
 
   /* F16C instructions.  */
   IX86_BUILTIN_CVTPH2PS,
@@ -24435,11 +24435,6 @@  static const struct builtin_description bdesc_special_args[] =
   { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
   { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
   { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
-
-  /* RDRND */
-  { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
-  { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
-  { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
 };
 
 /* Builtins with variable number of arguments.  */
@@ -25448,6 +25443,15 @@  ix86_init_mmx_sse_builtins (void)
   def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
 		     V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
 
+  /* RDRND */
+  def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
+	       INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
+  def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
+	       INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
+  def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
+	       "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
+	       IX86_BUILTIN_RDRAND64_STEP);
+
   /* MMX access to the vec_init patterns.  */
   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
 		     V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
@@ -26703,7 +26707,6 @@  ix86_expand_special_args_builtin (const struct builtin_description *d,
       break;
     case UINT64_FTYPE_VOID:
     case UNSIGNED_FTYPE_VOID:
-    case UINT16_FTYPE_VOID:
       nargs = 0;
       klass = load;
       memory = 0;
@@ -27215,6 +27218,51 @@  ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
           return target;
         }
 
+    case IX86_BUILTIN_RDRAND16_STEP:
+      icode = CODE_FOR_rdrandhi_1;
+      mode0 = HImode;
+      goto rdrand_step;
+
+    case IX86_BUILTIN_RDRAND32_STEP:
+      icode = CODE_FOR_rdrandsi_1;
+      mode0 = SImode;
+      goto rdrand_step;
+
+    case IX86_BUILTIN_RDRAND64_STEP:
+      icode = CODE_FOR_rdranddi_1;
+      mode0 = DImode;
+
+rdrand_step:
+      op0 = gen_reg_rtx (mode0);
+      emit_insn (GEN_FCN (icode) (op0));
+
+      op1 = gen_reg_rtx (SImode);
+      emit_move_insn (op1, CONST1_RTX (SImode));
+
+      /* Emit SImode conditional move.  */
+      if (mode0 == HImode)
+	{
+	  op2 = gen_reg_rtx (SImode);
+	  emit_insn (gen_zero_extendhisi2 (op2, op0));
+	}
+      else if (mode0 == SImode)
+	op2 = op0;
+      else
+	op2 = gen_rtx_SUBREG (SImode, op0, 0);
+
+      pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
+			 const0_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, op1,
+			      gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
+      emit_move_insn (target, op1);
+
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op1 = expand_normal (arg0);
+      if (!address_operand (op1, VOIDmode))
+	op1 = copy_addr_to_reg (op1);
+      emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
+      return target;
+
     default:
       break;
     }
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 88e6245..05937e6 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -232,6 +232,9 @@ 
 
   ;; For BMI support
   UNSPEC_BEXTR
+
+  ;; For RDRAND support
+  UNSPEC_RDRAND
 ])
 
 (define_c_enum "unspecv" [
@@ -265,7 +268,6 @@ 
   UNSPECV_RDGSBASE
   UNSPECV_WRFSBASE
   UNSPECV_WRGSBASE
-  UNSPECV_RDRAND
   UNSPECV_SPLIT_STACK_RETURN
 ])
 
@@ -18284,36 +18286,13 @@ 
   [(set_attr "type" "other")
    (set_attr "prefix_extra" "2")])
 
-(define_expand "rdrand<mode>"
-  [(set (match_operand:SWI248 0 "register_operand" "=r")
-	(unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND))]
-  "TARGET_RDRND"
-{
-  rtx retry_label, insn, ccc;
-
-  retry_label = gen_label_rtx ();
-
-  emit_label (retry_label);
-
-  /* Generate rdrand.  */
-  emit_insn (gen_rdrand<mode>_1 (operands[0]));
-
-  /* Retry if the carry flag isn't valid.  */
-  ccc = gen_rtx_REG (CCCmode, FLAGS_REG);
-  ccc = gen_rtx_EQ (VOIDmode, ccc, const0_rtx);
-  ccc = gen_rtx_IF_THEN_ELSE (VOIDmode, ccc, pc_rtx,
-			      gen_rtx_LABEL_REF (VOIDmode, retry_label));
-  insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, ccc));
-  JUMP_LABEL (insn) = retry_label;
-
-  DONE;
-})
-
 (define_insn "rdrand<mode>_1"
   [(set (match_operand:SWI248 0 "register_operand" "=r")
-	(unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND))]
+	(unspec:SWI248 [(const_int 0)] UNSPEC_RDRAND))
+   (set (reg:CCC FLAGS_REG)
+	(unspec:CCC [(const_int 0)] UNSPEC_RDRAND))]
   "TARGET_RDRND"
-  "rdrand %0"
+  "rdrand\t%0"
   [(set_attr "type" "other")
    (set_attr "prefix_extra" "1")])
 
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index 3e69060..055e49b 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -57,18 +57,18 @@ 
 #endif
 
 #ifdef __RDRND__
-extern __inline unsigned short
+extern __inline int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdrand_u16 (void)
+_rdrand16_step (unsigned short *__P)
 {
-  return __builtin_ia32_rdrand16 ();
+  return __builtin_ia32_rdrand16_step (__P);
 }
 
-extern __inline unsigned int
+extern __inline int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdrand_u32 (void)
+_rdrand32_step (unsigned int *__P)
 {
-  return __builtin_ia32_rdrand32 ();
+  return __builtin_ia32_rdrand32_step (__P);
 }
 #endif /* __RDRND__ */
 
@@ -132,11 +132,11 @@  _writegsbase_u64 (unsigned long long __B)
 #endif /* __FSGSBASE__ */
 
 #ifdef __RDRND__
-extern __inline unsigned long long
+extern __inline int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdrand_u64 (void)
+_rdrand64_step (unsigned long long *__P)
 {
-  return __builtin_ia32_rdrand64 ();
+  return __builtin_ia32_rdrand64_step (__P);
 }
 #endif /* __RDRND__ */
 #endif /* __x86_64__  */
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 1ac1d8d..958541b 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -9381,9 +9381,9 @@  used.  All of them generate the machine instruction that is part of the
 name.
 
 @smallexample
-unsigned short __builtin_ia32_rdrand16 (void)
-unsigned int __builtin_ia32_rdrand32 (void)
-unsigned long long __builtin_ia32_rdrand64 (void)
+unsigned int __builtin_ia32_rdrand16_step (unsigned short *)
+unsigned int __builtin_ia32_rdrand32_step (unsigned int *)
+unsigned int __builtin_ia32_rdrand64_step (unsigned long long *)
 @end smallexample
 
 The following built-in functions are available when @option{-msse4a} is used.
diff --git a/gcc/testsuite/gcc.target/i386/rdrand-1.c b/gcc/testsuite/gcc.target/i386/rdrand-1.c
index 4f6b9e1..beec9f1 100644
--- a/gcc/testsuite/gcc.target/i386/rdrand-1.c
+++ b/gcc/testsuite/gcc.target/i386/rdrand-1.c
@@ -1,12 +1,12 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mrdrnd " } */
-/* { dg-final { scan-assembler "rdrand\[ \t]+(%|)ax" } } */
-/* { dg-final { scan-assembler "jnc\[ \t]+" } } */
+/* { dg-options "-O2 -mrdrnd -dp" } */
+/* { dg-final { scan-assembler-times "rdrandhi_1" 1 } } */
+/* { dg-final { scan-assembler-times "\\*movsicc_noc" 1 } } */
 
 #include <immintrin.h>
 
-unsigned short
-read_rdrand16 (void)
+int
+foo (unsigned short *x)
 {
-  return _rdrand_u16 ();
+  return _rdrand16_step (x);
 }
diff --git a/gcc/testsuite/gcc.target/i386/rdrand-2.c b/gcc/testsuite/gcc.target/i386/rdrand-2.c
index 2297383..ea8e906 100644
--- a/gcc/testsuite/gcc.target/i386/rdrand-2.c
+++ b/gcc/testsuite/gcc.target/i386/rdrand-2.c
@@ -1,12 +1,12 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mrdrnd " } */
-/* { dg-final { scan-assembler "rdrand\[ \t]+(%|)eax" } } */
-/* { dg-final { scan-assembler "jnc\[ \t]+" } } */
+/* { dg-options "-O2 -mrdrnd -dp" } */
+/* { dg-final { scan-assembler-times "rdrandsi_1" 1 } } */
+/* { dg-final { scan-assembler-times "\\*movsicc_noc" 1 } } */
 
 #include <immintrin.h>
 
-unsigned int
-read_rdrand32 (void)
+int
+foo (unsigned int *x)
 {
-  return _rdrand_u32 ();
+  return _rdrand32_step (x);
 }
diff --git a/gcc/testsuite/gcc.target/i386/rdrand-3.c b/gcc/testsuite/gcc.target/i386/rdrand-3.c
index 17c7c6f..c494d3b 100644
--- a/gcc/testsuite/gcc.target/i386/rdrand-3.c
+++ b/gcc/testsuite/gcc.target/i386/rdrand-3.c
@@ -1,13 +1,13 @@ 
 /* { dg-do compile } */
 /* { dg-require-effective-target lp64 } */
-/* { dg-options "-O2 -mrdrnd " } */
-/* { dg-final { scan-assembler "rdrand\[ \t]+(%|)rax" } } */
-/* { dg-final { scan-assembler "jnc\[ \t]+" } } */
+/* { dg-options "-O2 -mrdrnd -dp" } */
+/* { dg-final { scan-assembler-times "rdranddi_1" 1 } } */
+/* { dg-final { scan-assembler-times "\\*movsicc_noc" 1 } } */
 
 #include <immintrin.h>
 
-unsigned long long
-read_rdrand64 (void)
+int
+foo (unsigned long long *x)
 {
-  return _rdrand_u64 ();
+  return _rdrand64_step (x);
 }