From patchwork Sun Dec 19 15:53:45 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Uros Bizjak X-Patchwork-Id: 76135 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 37D8EB6EF2 for ; Mon, 20 Dec 2010 02:53:56 +1100 (EST) Received: (qmail 31567 invoked by alias); 19 Dec 2010 15:53:54 -0000 Received: (qmail 31558 invoked by uid 22791); 19 Dec 2010 15:53:53 -0000 X-SWARE-Spam-Status: No, hits=-2.2 required=5.0 tests=AWL, BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, FREEMAIL_FROM, RCVD_IN_DNSWL_LOW X-Spam-Check-By: sourceware.org Received: from mail-px0-f176.google.com (HELO mail-px0-f176.google.com) (209.85.212.176) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Sun, 19 Dec 2010 15:53:47 +0000 Received: by pxi11 with SMTP id 11so536023pxi.21 for ; Sun, 19 Dec 2010 07:53:46 -0800 (PST) MIME-Version: 1.0 Received: by 10.142.163.20 with SMTP id l20mr2532730wfe.265.1292774025122; Sun, 19 Dec 2010 07:53:45 -0800 (PST) Received: by 10.142.252.8 with HTTP; Sun, 19 Dec 2010 07:53:45 -0800 (PST) In-Reply-To: <20101216133007.GA22084@intel.com> References: <20101216133007.GA22084@intel.com> Date: Sun, 19 Dec 2010 16:53:45 +0100 Message-ID: Subject: Re: PATCH: Update x86 rdrand intrinsics From: Uros Bizjak To: "H.J. Lu" Cc: gcc-patches@gcc.gnu.org Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org On Thu, Dec 16, 2010 at 2:30 PM, H.J. Lu wrote: > Intell will update rdrand intrinsic spec to replace _rdrand_uXX with > _rdrandXX_step.  This patch implements it.  OK for trunk? Are these specs available somewhere? I have some comments on your approach: - I believe that RDRAND does not need to be defined as unspec_volatile, since its internal state is communicated through carry flag. - It is possible to get rid of extra rdrand_step patterns by integrating cmove generation directly into ix86_expand_builtin. - rdrand mnemonic should use tab insted of space to separate its argument. - the pointer arg in immintrin.h should be uglified to __P, see many examples in various *intrin.h header files. I have attached to this message a patch that implements all above suggestions. Uros. Index: i386.md =================================================================== --- i386.md (revision 168050) +++ i386.md (working copy) @@ -232,6 +232,9 @@ ;; For BMI support UNSPEC_BEXTR + + ;; For RDRAND support + UNSPEC_RDRAND ]) (define_c_enum "unspecv" [ @@ -265,7 +268,6 @@ UNSPECV_RDGSBASE UNSPECV_WRFSBASE UNSPECV_WRGSBASE - UNSPECV_RDRAND UNSPECV_SPLIT_STACK_RETURN ]) @@ -18284,36 +18286,13 @@ [(set_attr "type" "other") (set_attr "prefix_extra" "2")]) -(define_expand "rdrand" - [(set (match_operand:SWI248 0 "register_operand" "=r") - (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND))] - "TARGET_RDRND" -{ - rtx retry_label, insn, ccc; - - retry_label = gen_label_rtx (); - - emit_label (retry_label); - - /* Generate rdrand. */ - emit_insn (gen_rdrand_1 (operands[0])); - - /* Retry if the carry flag isn't valid. */ - ccc = gen_rtx_REG (CCCmode, FLAGS_REG); - ccc = gen_rtx_EQ (VOIDmode, ccc, const0_rtx); - ccc = gen_rtx_IF_THEN_ELSE (VOIDmode, ccc, pc_rtx, - gen_rtx_LABEL_REF (VOIDmode, retry_label)); - insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, ccc)); - JUMP_LABEL (insn) = retry_label; - - DONE; -}) - (define_insn "rdrand_1" [(set (match_operand:SWI248 0 "register_operand" "=r") - (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND))] + (unspec:SWI248 [(const_int 0)] UNSPEC_RDRAND)) + (set (reg:CCC FLAGS_REG) + (unspec:CCC [(const_int 0)] UNSPEC_RDRAND))] "TARGET_RDRND" - "rdrand %0" + "rdrand\t%0" [(set_attr "type" "other") (set_attr "prefix_extra" "1")]) Index: i386-builtin-types.def =================================================================== --- i386-builtin-types.def (revision 168050) +++ i386-builtin-types.def (working copy) @@ -107,6 +107,7 @@ DEF_POINTER_TYPE (PCVOID, VOID, CONST) DEF_POINTER_TYPE (PVOID, VOID) DEF_POINTER_TYPE (PDOUBLE, DOUBLE) DEF_POINTER_TYPE (PFLOAT, FLOAT) +DEF_POINTER_TYPE (PUSHORT, USHORT) DEF_POINTER_TYPE (PINT, INT) DEF_POINTER_TYPE (PULONGLONG, ULONGLONG) DEF_POINTER_TYPE (PUNSIGNED, UNSIGNED) @@ -128,7 +129,6 @@ DEF_POINTER_TYPE (PCV8SF, V8SF, CONST) DEF_FUNCTION_TYPE (FLOAT128) DEF_FUNCTION_TYPE (UINT64) DEF_FUNCTION_TYPE (UNSIGNED) -DEF_FUNCTION_TYPE (UINT16) DEF_FUNCTION_TYPE (VOID) DEF_FUNCTION_TYPE (PVOID) @@ -203,6 +203,9 @@ DEF_FUNCTION_TYPE (VOID, PCVOID) DEF_FUNCTION_TYPE (VOID, PVOID) DEF_FUNCTION_TYPE (VOID, UINT64) DEF_FUNCTION_TYPE (VOID, UNSIGNED) +DEF_FUNCTION_TYPE (INT, PUSHORT) +DEF_FUNCTION_TYPE (INT, PUNSIGNED) +DEF_FUNCTION_TYPE (INT, PULONGLONG) DEF_FUNCTION_TYPE (DI, V2DI, INT) DEF_FUNCTION_TYPE (DOUBLE, V2DF, INT) Index: immintrin.h =================================================================== --- immintrin.h (revision 168050) +++ immintrin.h (working copy) @@ -57,18 +57,18 @@ #endif #ifdef __RDRND__ -extern __inline unsigned short +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_rdrand_u16 (void) +_rdrand16_step (unsigned short *__P) { - return __builtin_ia32_rdrand16 (); + return __builtin_ia32_rdrand16_step (__P); } -extern __inline unsigned int +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_rdrand_u32 (void) +_rdrand32_step (unsigned int *__P) { - return __builtin_ia32_rdrand32 (); + return __builtin_ia32_rdrand32_step (__P); } #endif /* __RDRND__ */ @@ -132,11 +132,11 @@ _writegsbase_u64 (unsigned long long __B #endif /* __FSGSBASE__ */ #ifdef __RDRND__ -extern __inline unsigned long long +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_rdrand_u64 (void) +_rdrand64_step (unsigned long long *__P) { - return __builtin_ia32_rdrand64 (); + return __builtin_ia32_rdrand64_step (__P); } #endif /* __RDRND__ */ #endif /* __x86_64__ */ Index: i386.c =================================================================== --- i386.c (revision 168050) +++ i386.c (working copy) @@ -24142,9 +24142,9 @@ enum ix86_builtins IX86_BUILTIN_WRGSBASE64, /* RDRND instructions. */ - IX86_BUILTIN_RDRAND16, - IX86_BUILTIN_RDRAND32, - IX86_BUILTIN_RDRAND64, + IX86_BUILTIN_RDRAND16_STEP, + IX86_BUILTIN_RDRAND32_STEP, + IX86_BUILTIN_RDRAND64_STEP, /* F16C instructions. */ IX86_BUILTIN_CVTPH2PS, @@ -24435,11 +24435,6 @@ static const struct builtin_description { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 }, { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED }, { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 }, - - /* RDRND */ - { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID }, - { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID }, - { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID }, }; /* Builtins with variable number of arguments. */ @@ -25448,6 +25443,15 @@ ix86_init_mmx_sse_builtins (void) def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128); + /* RDRND */ + def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step", + INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP); + def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step", + INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP); + def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, + "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG, + IX86_BUILTIN_RDRAND64_STEP); + /* MMX access to the vec_init patterns. */ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI); @@ -26703,7 +26707,6 @@ ix86_expand_special_args_builtin (const break; case UINT64_FTYPE_VOID: case UNSIGNED_FTYPE_VOID: - case UINT16_FTYPE_VOID: nargs = 0; klass = load; memory = 0; @@ -27215,6 +27218,51 @@ ix86_expand_builtin (tree exp, rtx targe return target; } + case IX86_BUILTIN_RDRAND16_STEP: + icode = CODE_FOR_rdrandhi_1; + mode0 = HImode; + goto rdrand_step; + + case IX86_BUILTIN_RDRAND32_STEP: + icode = CODE_FOR_rdrandsi_1; + mode0 = SImode; + goto rdrand_step; + + case IX86_BUILTIN_RDRAND64_STEP: + icode = CODE_FOR_rdranddi_1; + mode0 = DImode; + +rdrand_step: + op0 = gen_reg_rtx (mode0); + emit_insn (GEN_FCN (icode) (op0)); + + op1 = gen_reg_rtx (SImode); + emit_move_insn (op1, CONST1_RTX (SImode)); + + /* Emit SImode conditional move. */ + if (mode0 == HImode) + { + op2 = gen_reg_rtx (SImode); + emit_insn (gen_zero_extendhisi2 (op2, op0)); + } + else if (mode0 == SImode) + op2 = op0; + else + op2 = gen_rtx_SUBREG (SImode, op0, 0); + + pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG), + const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, op1, + gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1))); + emit_move_insn (target, op1); + + arg0 = CALL_EXPR_ARG (exp, 0); + op1 = expand_normal (arg0); + if (!address_operand (op1, VOIDmode)) + op1 = copy_addr_to_reg (op1); + emit_move_insn (gen_rtx_MEM (mode0, op1), op0); + return target; + default: break; }