[rs6000] Add built-in function support for Power9 byte instructions

Message ID	ab3a654d-f151-a37c-3c52-5c667f19dc34@linux.vnet.ibm.com
State	New
Headers	show Return-Path: <gcc-patches-return-441411-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:to:cc :from:subject:date:mime-version:content-type :content-transfer-encoding:message-id; q=dns; s=default; b=D9oNO 4JlfFVOYr1XIUUyVto0oNBcQCr6WKWiFkIrUxX51qG3oSw0Es6TDpRikD8h+4MzI LGvaBwr8Dd7GVNwo3ylmbDlV/uIbxNEUHchwvBRNfGxgTO45/V12Yj9oVFIetI2J C01oXh0oXDtGI/OuKZmll6cXPIPAmHxSVcYTvM= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org Gateway: Authorized Use Only! Violators will be prosecuted for <gcc-patches@gcc.gnu.org> from <kdnilsen@linux.vnet.ibm.com>; Mon, 14 Nov 2016 18:43:39 -0500 Gateway: Authorized Use Only! Violators will be prosecuted; Mon, 14 Nov 2016 18:43:37 -0500 To: gcc-patches@gcc.gnu.org Cc: Segher Boessenkool <segher@gcc.gnu.org> From: Kelvin Nilsen <kdnilsen@linux.vnet.ibm.com> Subject: [PATCH, rs6000] Add built-in function support for Power9 byte instructions Date: Mon, 14 Nov 2016 16:43:35 -0700 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Thunderbird/45.4.0 MIME-Version: 1.0 Content-Type: text/plain; charset=windows-1252 Content-Transfer-Encoding: 7bit Message-Id: <ab3a654d-f151-a37c-3c52-5c667f19dc34@linux.vnet.ibm.com>

Index: gcc/config/rs6000/altivec.md =================================================================== --- gcc/config/rs6000/altivec.md (revision 241245) +++ gcc/config/rs6000/altivec.md (working copy) @@ -153,6 +153,10 @@ UNSPEC_BCDADD UNSPEC_BCDSUB UNSPEC_BCD_OVERFLOW + UNSPEC_CMPRB + UNSPEC_CMPRB2 + UNSPEC_CMPEQB + UNSPEC_SETB ]) (define_c_enum "unspecv" @@ -3709,6 +3713,116 @@ "darn %0,1" [(set_attr "type" "integer")]) +;; Predicate: test byte within range. +;; Return in target register operand 0 a non-zero value iff the byte +;; held in bits 24:31 of operand 1 is within the inclusive range +;; bounded below by operand 2's bits 0:7 and above by operand 2's +;; bits 8:15. +(define_expand "cmprb_p" + [(set (match_dup 3) + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPRB)) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (unspec:SI [(match_dup 3)] + UNSPEC_SETB)) + ] + "TARGET_P9_MISC" +{ + operands[3] = gen_reg_rtx (CCmode); +}) + +;; Set bit 1 (the GT bit, 0x2) of CR register operand 0 to 1 iff the +;; byte found in bits 24:31 of register operand 1 is within the +;; inclusive range bounded below by operand 2's bits 0:7 and above by +;; operand 2's bits 8:15. The other 3 bits of the target CR register +;; are set to 0. +(define_insn "*cmprb" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPRB))] + "TARGET_P9_MISC" + "cmprb %0,0,%1,%2" + [(set_attr "type" "logical")]) + +;; Set operand 0 register to non-zero value iff the CR register named +;; by operand 1 has its GT bit (0x2) or its LT bit (0x1) set. +(define_insn "*setb" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (unspec:SI [(match_operand:CC 1 "cc_reg_operand" "y")] + UNSPEC_SETB))] + "TARGET_P9_MISC" + "setb %0,%1" + [(set_attr "type" "logical")]) + +;; Predicate: test byte within two ranges. +;; Return in target register operand 0 a non-zero value iff the byte +;; held in bits 24:31 of operand 1 is within the inclusive range +;; bounded below by operand 2's bits 0:7 and above by operand 2's +;; bits 8:15 or if the byte is within the inclusive range bounded +;; below by operand 2's bits 16:23 and above by operand 2's bits 24:31. +(define_expand "cmprb2_p" + [(set (match_dup 3) + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPRB2)) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (unspec:SI [(match_dup 3)] + UNSPEC_SETB)) + ] + "TARGET_P9_MISC" +{ + operands[3] = gen_reg_rtx (CCmode); +}) + +;; Set bit 1 (the GT bit, 0x2) of CR register operand 0 to 1 iff the +;; byte found in bits 24:31 of register operand 1 is within the +;; inclusive range bounded below by operand 2's bits 0:7 and above by +;; operand 2's bits 8:15 or within the inclusive range bounded below +;; by operand 2's bits 16:23 and above by operand 2's bits 24:31. The +;; other 3 bits of the target CR register are set to 0. +(define_insn "*cmprb2" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPRB2))] + "TARGET_P9_MISC" + "cmprb %0,1,%1,%2" + [(set_attr "type" "logical")]) + +;; Predicate: test byte membership within set of 8 bytes. +;; Return in target register operand 0 a non-zero value iff the byte +;; held in bits 24:31 of operand 1 equals at least one of the eight +;; byte values represented by the 64-bit register supplied as operand +;; 2. Note that the 8 byte values held within operand 2 need not be +;; unique. +(define_expand "cmpeqb_p" + [(set (match_dup 3) + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:DI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPEQB)) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (unspec:SI [(match_dup 3)] + UNSPEC_SETB)) + ] + "TARGET_P9_MISC && TARGET_64BIT" +{ + operands[3] = gen_reg_rtx (CCmode); +}) + +;; Set bit 1 (the GT bit, 0x2) of CR register operand 0 to 1 iff the +;; byte found in bits 24:31 of register operand 1 equals one of the 8 +;; bytes found within register operand 2. +(define_insn "*cmpeqb" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:DI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPEQB))] + "TARGET_P9_MISC && TARGET_64BIT" + "cmpeqb %0,%1,%2" + [(set_attr "type" "logical")]) + (define_expand "bcd<bcd_add_sub>_<code>" [(parallel [(set (reg:CCFP CR6_REGNO) (compare:CCFP Index: gcc/config/rs6000/rs6000-builtin.def =================================================================== --- gcc/config/rs6000/rs6000-builtin.def (revision 241245) +++ gcc/config/rs6000/rs6000-builtin.def (working copy) @@ -773,6 +773,15 @@ | RS6000_BTC_BINARY), \ CODE_FOR_ ## ICODE) /* ICODE */ +#define BU_P9V_64BIT_AV_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR \ + | RS6000_BTM_64BIT, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + #define BU_P9V_AV_3(ENUM, NAME, ATTR, ICODE) \ RS6000_BUILTIN_3 (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ "__builtin_altivec_" NAME, /* NAME */ \ @@ -848,6 +857,15 @@ (RS6000_BTC_OVERLOADED /* ATTR */ \ | RS6000_BTC_TERNARY), \ CODE_FOR_nothing) /* ICODE */ + +#define BU_P9_OVERLOAD_2(ENUM, NAME) \ + RS6000_BUILTIN_2 (P9V_BUILTIN_SCALAR_ ## ENUM, /* ENUM */ \ + "__builtin_scalar_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_nothing) /* ICODE */ + #endif @@ -2004,6 +2022,16 @@ BU_P9V_OVERLOAD_1 (VPRTYBD, "vprtybd") BU_P9V_OVERLOAD_1 (VPRTYBQ, "vprtybq") BU_P9V_OVERLOAD_1 (VPRTYBW, "vprtybw") +/* 2 argument functions added in ISA 3.0 (power9). */ +BU_P9V_AV_2 (CMPRB, "byte_in_range", CONST, cmprb_p) +BU_P9V_AV_2 (CMPRB2, "byte_in_either_range", CONST, cmprb2_p) +BU_P9V_64BIT_AV_2 (CMPEQB, "byte_in_set", CONST, cmpeqb_p) + +/* 2 argument overloaded functions added in ISA 3.0 (power9). */ +BU_P9_OVERLOAD_2 (CMPRB, "byte_in_range") +BU_P9_OVERLOAD_2 (CMPRB2, "byte_in_either_range") +BU_P9_OVERLOAD_2 (CMPEQB, "byte_in_set") + /* 1 argument IEEE 128-bit floating-point functions. */ BU_FLOAT128_1 (FABSQ, "fabsq", CONST, abskf2) Index: gcc/config/rs6000/rs6000-c.c =================================================================== --- gcc/config/rs6000/rs6000-c.c (revision 241245) +++ gcc/config/rs6000/rs6000-c.c (working copy) @@ -4556,6 +4556,13 @@ const struct altivec_builtin_types altivec_overloa { P9V_BUILTIN_VEC_VPRTYBQ, P9V_BUILTIN_VPRTYBQ, RS6000_BTI_UINTTI, RS6000_BTI_UINTTI, 0, 0 }, + { P9V_BUILTIN_SCALAR_CMPRB, P9V_BUILTIN_CMPRB, + RS6000_BTI_INTSI, RS6000_BTI_UINTQI, RS6000_BTI_UINTSI, 0 }, + { P9V_BUILTIN_SCALAR_CMPRB2, P9V_BUILTIN_CMPRB2, + RS6000_BTI_INTSI, RS6000_BTI_UINTQI, RS6000_BTI_UINTSI, 0 }, + { P9V_BUILTIN_SCALAR_CMPEQB, P9V_BUILTIN_CMPEQB, + RS6000_BTI_INTSI, RS6000_BTI_UINTQI, RS6000_BTI_UINTDI, 0 }, + { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM, RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM, Index: gcc/doc/extend.texi =================================================================== --- gcc/doc/extend.texi (revision 241245) +++ gcc/doc/extend.texi (working copy) @@ -15015,6 +15015,27 @@ long long __builtin_darn (void); long long __builtin_darn_raw (void); int __builtin_darn_32 (void); +unsigned int scalar_extract_exp (double source); +unsigned long long int scalar_extract_sig (double source); + +double +scalar_insert_exp (unsigned long long int significand, unsigned long long int exponent); + +int scalar_cmp_exp_gt (double arg1, double arg2); +int scalar_cmp_exp_lt (double arg1, double arg2); +int scalar_cmp_exp_eq (double arg1, double arg2); +int scalar_cmp_exp_unordered (double arg1, double arg2); + +int scalar_test_data_class (float source, unsigned int condition); +int scalar_test_data_class (double source, unsigned int condition); + +int scalar_test_neg (float source); +int scalar_test_neg (double source); + +int __builtin_scalar_byte_in_set (unsigned char u, unsigned long long set); +int __builtin_scalar_byte_in_range (unsigned char u, unsigned int range); +int __builtin_scalar_byte_in_either_range (unsigned char u, unsigned int ranges); + int __builtin_dfp_dtstsfi_lt (unsigned int comparison, _Decimal64 value); int __builtin_dfp_dtstsfi_lt (unsigned int comparison, _Decimal128 value); int __builtin_dfp_dtstsfi_lt_dd (unsigned int comparison, _Decimal64 value); @@ -15034,23 +15055,6 @@ int __builtin_dfp_dtstsfi_ov (unsigned int compari int __builtin_dfp_dtstsfi_ov (unsigned int comparison, _Decimal128 value); int __builtin_dfp_dtstsfi_ov_dd (unsigned int comparison, _Decimal64 value); int __builtin_dfp_dtstsfi_ov_td (unsigned int comparison, _Decimal128 value); - -unsigned int scalar_extract_exp (double source); -unsigned long long int scalar_extract_sig (double source); - -double -scalar_insert_exp (unsigned long long int significand, unsigned long long int exponent); - -int scalar_cmp_exp_gt (double arg1, double arg2); -int scalar_cmp_exp_lt (double arg1, double arg2); -int scalar_cmp_exp_eq (double arg1, double arg2); -int scalar_cmp_exp_unordered (double arg1, double arg2); - -int scalar_test_data_class (float source, unsigned int condition); -int scalar_test_data_class (double source, unsigned int condition); - -int scalar_test_neg (float source); -int scalar_test_neg (double source); @end smallexample The @code{__builtin_darn} and @code{__builtin_darn_raw} @@ -15105,6 +15109,22 @@ If all of the enabled test conditions are false, t The @code{scalar_test_neg} built-in functions return a non-zero value if their @code{source} argument holds a negative value. +The @code{__builtin_scalar_byte_in_set} function requires a +64-bit environment supporting ISA 3.0 or later. This function returns +a non-zero value if and only if its @code{u} argument exactly equals one of +the eight bytes contained within its 64-bit @code{set} argument. + +The @code{__builtin_scalar_byte_in_range} and +@code{__builtin_scalar_byte_in_either_range} require an environment +supporting ISA 3.0 or later. The first of these functions returns a +non-zero value if and only if its @code{u} argument is within the +range bounded between @code{(range >> 24)} and @code{((range >> 16) & 0xff)} +inclusive. The second of these functions returns non-zero if and only +if its @code{u} argument is either within the range bounded between +@code{(range >> 24)} and @code{((range >> 16) & 0xff)} +inclusive or is within the range bounded between +@code{((range >> 8) & 0xff)} and @code{(range & 0xff)} inclusive. + The @code{__builtin_dfp_dtstsfi_lt} function returns a non-zero value if and only if the number of signficant digits of its @code{value} argument is less than its @code{comparison} argument. The Index: gcc/testsuite/gcc.target/powerpc/byte-in-either-range-0.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/byte-in-either-range-0.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/byte-in-either-range-0.c (working copy) @@ -0,0 +1,25 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "" { powerpc*-*-aix* } } */ +/* { dg-options "-mcpu=power9" } */ + +/* This test should succeed on both 32- and 64-bit configurations. */ +#include <altivec.h> + +int +test_byte_in_either_range (unsigned char b, + unsigned char first_lo_bound, + unsigned char first_hi_bound, + unsigned char second_lo_bound, + unsigned char second_hi_bound) +{ + unsigned int range_encoding; + range_encoding = ((first_hi_bound << 24) | (first_lo_bound << 16) + | (second_hi_bound << 8) | second_lo_bound); + + return __builtin_scalar_byte_in_either_range (b, range_encoding); +} + +/* { dg-final { scan-assembler "cmprb" } } */ +/* { dg-final { scan-assembler "setb" } } */ Index: gcc/testsuite/gcc.target/powerpc/byte-in-either-range-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/byte-in-either-range-1.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/byte-in-either-range-1.c (working copy) @@ -0,0 +1,22 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "" { powerpc*-*-aix* } } */ +/* { dg-options "-mcpu=power8" } */ + +/* This test should succeed on both 32- and 64-bit configurations. */ +#include <altivec.h> + +int +test_byte_in_either_range (unsigned char b, + unsigned char first_lo_bound, + unsigned char first_hi_bound, + unsigned char second_lo_bound, + unsigned char second_hi_bound) +{ + unsigned int range_encoding; + range_encoding = ((first_hi_bound << 24) | (first_lo_bound << 16) + | (second_hi_bound << 8) | second_lo_bound); + + return __builtin_scalar_byte_in_either_range (b, range_encoding); /* { dg-error "Builtin function __builtin_altivec_byte_in_either_range requires" } */ +} Index: gcc/testsuite/gcc.target/powerpc/byte-in-range-0.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/byte-in-range-0.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/byte-in-range-0.c (working copy) @@ -0,0 +1,19 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "" { powerpc*-*-aix* } } */ +/* { dg-options "-mcpu=power9" } */ + +/* This test should succeed on both 32- and 64-bit configurations. */ +#include <altivec.h> + +int +test_byte_in_range (unsigned char b, + unsigned char low_range, unsigned char high_range) +{ + unsigned int range_encoding = (high_range << 24) | (low_range << 16); + return __builtin_scalar_byte_in_range (b, range_encoding); +} + +/* { dg-final { scan-assembler "cmprb" } } */ +/* { dg-final { scan-assembler "setb" } } */ Index: gcc/testsuite/gcc.target/powerpc/byte-in-range-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/byte-in-range-1.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/byte-in-range-1.c (working copy) @@ -0,0 +1,16 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "" { powerpc*-*-aix* } } */ +/* { dg-options "-mcpu=power8" } */ + +#include <altivec.h> + +int +test_byte_in_range (unsigned char b, + unsigned char low_range, unsigned char high_range) +{ + unsigned int range_encoding = (high_range << 24) | (low_range << 16); + return __builtin_scalar_byte_in_range (b, range_encoding); /* { dg-error "Builtin function __builtin_altivec_byte_in_range requires" } */ +} + Index: gcc/testsuite/gcc.target/powerpc/byte-in-set-0.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/byte-in-set-0.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/byte-in-set-0.c (working copy) @@ -0,0 +1,18 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "" { powerpc*-*-aix* } } */ +/* { dg-options "-mcpu=power9" } */ + +/* This test should succeed only on 64-bit configurations. */ +#include <altivec.h> + +int +test_byte_in_set (unsigned char b, unsigned long long set_members) +{ + return __builtin_scalar_byte_in_set (b, set_members); +} + +/* { dg-final { scan-assembler "cmpeqb" } } */ +/* { dg-final { scan-assembler "setb" } } */ Index: gcc/testsuite/gcc.target/powerpc/byte-in-set-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/byte-in-set-1.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/byte-in-set-1.c (working copy) @@ -0,0 +1,14 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "" { powerpc*-*-aix* } } */ +/* { dg-options "-mcpu=power8" } */ + +#include <altivec.h> + +int +test_byte_in_set (unsigned char b, unsigned long long set_members) +{ + return __builtin_scalar_byte_in_set (b, set_members); /* { dg-error "Builtin function __builtin_altivec_byte_in_set requires" } */ +} Index: gcc/testsuite/gcc.target/powerpc/byte-in-set-2.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/byte-in-set-2.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/byte-in-set-2.c (working copy) @@ -0,0 +1,16 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-require-effective-target ilp32 } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "" { powerpc*-*-aix* } } */ +/* { dg-options "-mcpu=power9" } */ + +#include <altivec.h> + +/* This test should succeed only on 32-bit configurations. */ + +int +test_byte_in_set (unsigned char b, unsigned long long set_members) +{ + return __builtin_scalar_byte_in_set (b, set_members); /* { dg-error "Builtin function __builtin_scalar_byte_in_set not supported in this compiler configuration" } */ +}

[rs6000] Add built-in function support for Power9 byte instructions

Commit Message

Comments

Patch