Message ID | 570413B4.2010004@linux.vnet.ibm.com |
---|---|
State | New |
Headers | show |
On Tue, Apr 5, 2016 at 3:36 PM, Bill Seurer <seurer@linux.vnet.ibm.com> wrote: > This patch adds support for the signed and unsigned int versions of the > vec_adde altivec builtins from the Power Architecture 64-Bit ELF V2 ABI > OpenPOWER ABI for Linux Supplement (16 July 2015 Version 1.1). There are > many of the builtins that are missing and this is the first of a series > of patches to add them. > > There aren't instructions for the int versions of vec_adde so the > output code is built from other built-ins that do have instructions > which in this case is just two vec_adds. > > The new test cases are executable tests which verify that the generated > code produces expected values. C macros were used so that the same > test case could be used for both the signed and unsigned versions. An > extra executable test case is also included to ensure that the modified > support for the __int128 versions of vec_adde is not broken. The same > test case could not be used for both int and __int128 because of some > differences in loading and storing the vectors. > > Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no > regressions. Is this ok for trunk? > > [gcc] > > 2016-04-06 Bill Seurer <seurer@linux.vnet.ibm.com> > > * config/rs6000/rs6000-builtin.def (vec_adde): Change vec_adde to a > special case builtin. > * config/rs6000/rs6000-c.c (altivec_overloaded_builtins, > altivec_resolve_overloaded_builtin): Remove ALTIVEC_BUILTIN_VEC_ADDE > from altivec_overloaded_builtins structure. Add support for it to > altivec_resolve_overloaded_builtin function. > * config/rs6000/rs6000.c (altivec_init_builtins): Add definition > for __builtin_vec_adde. > > [gcc/testsuite] > > 2016-04-06 Bill Seurer <seurer@linux.vnet.ibm.com> > > * gcc.target/powerpc/vec-adde.c: New test. > * gcc.target/powerpc/vec-adde-int128.c: New test. > > Index: gcc/config/rs6000/rs6000-builtin.def > =================================================================== > --- gcc/config/rs6000/rs6000-builtin.def (revision 234745) > +++ gcc/config/rs6000/rs6000-builtin.def (working copy) > @@ -951,7 +951,6 @@ BU_ALTIVEC_X (VEC_EXT_V4SF, "vec_ext_v4sf", CO > before we get to the point about classifying the builtin type. */ > > /* 3 argument Altivec overloaded builtins. */ > -BU_ALTIVEC_OVERLOAD_3 (ADDE, "adde") > BU_ALTIVEC_OVERLOAD_3 (ADDEC, "addec") > BU_ALTIVEC_OVERLOAD_3 (MADD, "madd") > BU_ALTIVEC_OVERLOAD_3 (MADDS, "madds") > @@ -1137,6 +1136,7 @@ BU_ALTIVEC_OVERLOAD_P (VCMPGT_P, "vcmpgt_p") > BU_ALTIVEC_OVERLOAD_P (VCMPGE_P, "vcmpge_p") > > /* Overloaded Altivec builtins that are handled as special cases. */ > +BU_ALTIVEC_OVERLOAD_X (ADDE, "adde") > BU_ALTIVEC_OVERLOAD_X (CTF, "ctf") > BU_ALTIVEC_OVERLOAD_X (CTS, "cts") > BU_ALTIVEC_OVERLOAD_X (CTU, "ctu") > Index: gcc/config/rs6000/rs6000-c.c > =================================================================== > --- gcc/config/rs6000/rs6000-c.c (revision 234745) > +++ gcc/config/rs6000/rs6000-c.c (working copy) > @@ -842,11 +842,6 @@ const struct altivec_builtin_types altivec_overloa > RS6000_BTI_unsigned_V1TI, 0 }, > { ALTIVEC_BUILTIN_VEC_ADDC, P8V_BUILTIN_VADDCUQ, > RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, > - { ALTIVEC_BUILTIN_VEC_ADDE, P8V_BUILTIN_VADDEUQM, > - RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, > - RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, > - { ALTIVEC_BUILTIN_VEC_ADDE, P8V_BUILTIN_VADDEUQM, > - RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI }, > { ALTIVEC_BUILTIN_VEC_ADDEC, P8V_BUILTIN_VADDECUQ, > RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, > RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, > @@ -4515,6 +4510,59 @@ assignment for unaligned loads and stores"); > warning (OPT_Wdeprecated, "vec_lvsr is deprecated for little endian; use \ > assignment for unaligned loads and stores"); > > + if (fcode == ALTIVEC_BUILTIN_VEC_ADDE) > + { > + /* vec_adde needs to be special cased because there is no instruction > + for the {un}signed int version */ End comment sentence with period and two spaces > + if (nargs != 3) > + { > + error ("vec_adde only accepts 3 arguments"); > + return error_mark_node; > + } > + > + tree arg0 = (*arglist)[0]; > + tree arg0_type = TREE_TYPE (arg0); > + tree arg1 = (*arglist)[1]; > + tree arg1_type = TREE_TYPE (arg1); > + tree arg2 = (*arglist)[2]; > + tree arg2_type = TREE_TYPE (arg2); > + > + /* All 3 arguments must be vectors of (signed or unsigned) (int or > + __int128) and the types must match */ Same. > + if ((arg0_type != arg1_type) || (arg1_type != arg2_type)) > + goto bad; > + if (TREE_CODE (arg0_type) != VECTOR_TYPE) > + goto bad; > + > + switch (TYPE_MODE (TREE_TYPE (arg0_type))) > + { > + /* for {un}signed ints, > + vec_adde (va, vb, carryv) == vec_add (vec_add (va, vb), carryv) */ Same. > + case SImode: > + { > + vec<tree, va_gc> *params = make_tree_vector(); > + vec_safe_push (params, arg0); > + vec_safe_push (params, arg1); > + tree call = altivec_resolve_overloaded_builtin > + (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD], params); > + params = make_tree_vector(); > + vec_safe_push (params, call); > + vec_safe_push (params, arg2); > + return altivec_resolve_overloaded_builtin > + (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD], params); > + } > + /* for {un}signed __int128s use the vaddeuqm instruction directly */ Same. > + case TImode: > + return altivec_resolve_overloaded_builtin > + (loc, rs6000_builtin_decls[P8V_BUILTIN_VEC_VADDEUQM], arglist); > + > + /* Types other than {un}signed int and {un}signed __int128 > + are errors */ Same. > + default: > + goto bad; > + } > + } > + > /* For now treat vec_splats and vec_promote as the same. */ > if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS > || fcode == ALTIVEC_BUILTIN_VEC_PROMOTE) > Index: gcc/config/rs6000/rs6000.c > =================================================================== > --- gcc/config/rs6000/rs6000.c (revision 234745) > +++ gcc/config/rs6000/rs6000.c (working copy) > @@ -15582,6 +15582,10 @@ altivec_init_builtins (void) > = build_function_type_list (opaque_V4SI_type_node, > opaque_V4SI_type_node, opaque_V4SI_type_node, > integer_type_node, NULL_TREE); > + tree opaque_ftype_opaque_opaque_opaque > + = build_function_type_list (opaque_V4SI_type_node, > + opaque_V4SI_type_node, opaque_V4SI_type_node, > + opaque_V4SI_type_node, NULL_TREE); > tree int_ftype_int_opaque_opaque > = build_function_type_list (integer_type_node, > integer_type_node, opaque_V4SI_type_node, > @@ -15818,6 +15822,8 @@ altivec_init_builtins (void) > def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS); > def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU); > > + def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque, ALTIVEC_BUILTIN_VEC_ADDE); > + > /* Cell builtins. */ > def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX); > def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL); > Index: gcc/testsuite/gcc.target/powerpc/vec-adde-int128.c > =================================================================== > --- gcc/testsuite/gcc.target/powerpc/vec-adde-int128.c (revision 0) > +++ gcc/testsuite/gcc.target/powerpc/vec-adde-int128.c (working copy) > @@ -0,0 +1,78 @@ > +/* { dg-do run { target { powerpc64le-*-* } } } */ > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ > +/* { dg-options "-mcpu=power8 -O3" } */ > + > +/* Test that the vec_adde builtin works as expected */ Same. > + > +#include "altivec.h" > + > +#define N 4096 > + > +void abort (); > + > +#define define_test_functions(STYPE, NAMESUFFIX) \ > +\ > +STYPE result_##NAMESUFFIX[N]; \ > +STYPE addend1_##NAMESUFFIX[N]; \ > +STYPE addend2_##NAMESUFFIX[N]; \ > +STYPE carry_##NAMESUFFIX[N]; \ > +STYPE expected_##NAMESUFFIX[N]; \ > +\ > +__attribute__((noinline)) void vector_tests_##NAMESUFFIX () \ > +{ \ > + int i; \ > + vector STYPE v1, v2, v3, tmp; \ > + for (i = 0; i < N; i+=16/sizeof(STYPE)) { \ > + /* result=addend1+addend2+carry */ \ > + v1 = (vector STYPE) { addend1_##NAMESUFFIX[i] }; \ > + v2 = (vector STYPE) { addend2_##NAMESUFFIX[i] }; \ > + v3 = (vector STYPE) { carry_##NAMESUFFIX[i] }; \ > +\ > + tmp = vec_adde (v1, v2, v3); \ > + result_##NAMESUFFIX[i] = tmp[0]; \ > + } \ > +} \ > +\ > +__attribute__((noinline)) void init_##NAMESUFFIX () \ > +{ \ > + int i; \ > + for (i = 0; i < N; ++i) { \ > + result_##NAMESUFFIX[i] = 0; \ > + addend1_##NAMESUFFIX[i] = 1; \ > + addend2_##NAMESUFFIX[i] = 2; \ > + carry_##NAMESUFFIX[i] = (i%2==0)? 1: 0; \ > + expected_##NAMESUFFIX[i] = addend1_##NAMESUFFIX[i] + \ > + addend2_##NAMESUFFIX[i] + carry_##NAMESUFFIX[i]; \ > + } \ > +} \ > +\ > +__attribute__((noinline)) void verify_results_##NAMESUFFIX () \ > +{ \ > + int i; \ > + for (i = 0; i < N; ++i) { \ > + if (result_##NAMESUFFIX[i] != expected_##NAMESUFFIX[i]) \ > + abort(); \ > + } \ > +} > + > + > +#define execute_test_functions(STYPE, NAMESUFFIX) \ > +{ \ > + init_##NAMESUFFIX (); \ > + vector_tests_##NAMESUFFIX (); \ > + verify_results_##NAMESUFFIX (); \ > +} > + > + > +define_test_functions(signed __int128, si128); > +define_test_functions(unsigned __int128, ui128); > + > +int main () > +{ > + execute_test_functions(signed __int128, si128); > + execute_test_functions(unsigned __int128, ui128); > + > + return 0; > +} > + > + > Index: gcc/testsuite/gcc.target/powerpc/vec-adde.c > =================================================================== > --- gcc/testsuite/gcc.target/powerpc/vec-adde.c (revision 0) > +++ gcc/testsuite/gcc.target/powerpc/vec-adde.c (working copy) > @@ -0,0 +1,78 @@ > +/* { dg-do run { target { powerpc64le-*-* } } } */ > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ > +/* { dg-options "-mcpu=power8 -O3" } */ > + > +/* Test that the vec_adde builtin works as expected */ Same. > + > +#include "altivec.h" > + > +#define N 4096 > + > +void abort (); > + > +#define define_test_functions(STYPE, NAMESUFFIX) \ > +\ > +STYPE result_##NAMESUFFIX[N]; \ > +STYPE addend1_##NAMESUFFIX[N]; \ > +STYPE addend2_##NAMESUFFIX[N]; \ > +STYPE carry_##NAMESUFFIX[N]; \ > +STYPE expected_##NAMESUFFIX[N]; \ > +\ > +__attribute__((noinline)) void vector_tests_##NAMESUFFIX () \ > +{ \ > + int i; \ > + vector STYPE v1, v2, v3, tmp; \ > + for (i = 0; i < N; i+=16/sizeof(STYPE)) { \ > + /* result=addend1+addend2+carry */ \ > + v1 = vec_vsx_ld (0, &addend1_##NAMESUFFIX[i]); \ > + v2 = vec_vsx_ld (0, &addend2_##NAMESUFFIX[i]); \ > + v3 = vec_vsx_ld (0, &carry_##NAMESUFFIX[i]); \ > +\ > + tmp = vec_adde (v1, v2, v3); \ > + vec_vsx_st (tmp, 0, &result_##NAMESUFFIX[i]); \ > + } \ > +} \ > +\ > +__attribute__((noinline)) void init_##NAMESUFFIX () \ > +{ \ > + int i; \ > + for (i = 0; i < N; ++i) { \ > + result_##NAMESUFFIX[i] = 0; \ > + addend1_##NAMESUFFIX[i] = 1; \ > + addend2_##NAMESUFFIX[i] = 2; \ > + carry_##NAMESUFFIX[i] = (i%2==0)? 1: 0; \ > + expected_##NAMESUFFIX[i] = addend1_##NAMESUFFIX[i] + \ > + addend2_##NAMESUFFIX[i] + carry_##NAMESUFFIX[i]; \ > + } \ > +} \ > +\ > +__attribute__((noinline)) void verify_results_##NAMESUFFIX () \ > +{ \ > + int i; \ > + for (i = 0; i < N; ++i) { \ > + if (result_##NAMESUFFIX[i] != expected_##NAMESUFFIX[i]) \ > + abort(); \ > + } \ > +} > + > + > +#define execute_test_functions(STYPE, NAMESUFFIX) \ > +{ \ > + init_##NAMESUFFIX (); \ > + vector_tests_##NAMESUFFIX (); \ > + verify_results_##NAMESUFFIX (); \ > +} > + > + > +define_test_functions(signed int, si); > +define_test_functions(unsigned int, ui); > + > +int main () > +{ > + execute_test_functions(signed int, si); > + execute_test_functions(unsigned int, ui); > + > + return 0; > +} > + > + > -- > > -Bill Seurer >
On 04/05/16 21:27, David Edelsohn wrote: > On Tue, Apr 5, 2016 at 3:36 PM, Bill Seurer <seurer@linux.vnet.ibm.com> wrote: >> This patch adds support for the signed and unsigned int versions of the >> vec_adde altivec builtins from the Power Architecture 64-Bit ELF V2 ABI >> OpenPOWER ABI for Linux Supplement (16 July 2015 Version 1.1). There are >> many of the builtins that are missing and this is the first of a series >> of patches to add them. >> >> There aren't instructions for the int versions of vec_adde so the >> output code is built from other built-ins that do have instructions >> which in this case is just two vec_adds. >> >> The new test cases are executable tests which verify that the generated >> code produces expected values. C macros were used so that the same >> test case could be used for both the signed and unsigned versions. An >> extra executable test case is also included to ensure that the modified >> support for the __int128 versions of vec_adde is not broken. The same >> test case could not be used for both int and __int128 because of some >> differences in loading and storing the vectors. >> >> Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no >> regressions. Is this ok for trunk? >> >> [gcc] >> >> 2016-04-06 Bill Seurer <seurer@linux.vnet.ibm.com> >> >> * config/rs6000/rs6000-builtin.def (vec_adde): Change vec_adde to a >> special case builtin. >> * config/rs6000/rs6000-c.c (altivec_overloaded_builtins, >> altivec_resolve_overloaded_builtin): Remove ALTIVEC_BUILTIN_VEC_ADDE >> from altivec_overloaded_builtins structure. Add support for it to >> altivec_resolve_overloaded_builtin function. >> * config/rs6000/rs6000.c (altivec_init_builtins): Add definition >> for __builtin_vec_adde. >> >> [gcc/testsuite] >> >> 2016-04-06 Bill Seurer <seurer@linux.vnet.ibm.com> >> >> * gcc.target/powerpc/vec-adde.c: New test. >> * gcc.target/powerpc/vec-adde-int128.c: New test. >> >> Index: gcc/config/rs6000/rs6000-builtin.def >> =================================================================== >> --- gcc/config/rs6000/rs6000-builtin.def (revision 234745) >> +++ gcc/config/rs6000/rs6000-builtin.def (working copy) >> @@ -951,7 +951,6 @@ BU_ALTIVEC_X (VEC_EXT_V4SF, "vec_ext_v4sf", CO >> before we get to the point about classifying the builtin type. */ >> >> /* 3 argument Altivec overloaded builtins. */ >> -BU_ALTIVEC_OVERLOAD_3 (ADDE, "adde") >> BU_ALTIVEC_OVERLOAD_3 (ADDEC, "addec") >> BU_ALTIVEC_OVERLOAD_3 (MADD, "madd") >> BU_ALTIVEC_OVERLOAD_3 (MADDS, "madds") >> @@ -1137,6 +1136,7 @@ BU_ALTIVEC_OVERLOAD_P (VCMPGT_P, "vcmpgt_p") >> BU_ALTIVEC_OVERLOAD_P (VCMPGE_P, "vcmpge_p") >> >> /* Overloaded Altivec builtins that are handled as special cases. */ >> +BU_ALTIVEC_OVERLOAD_X (ADDE, "adde") >> BU_ALTIVEC_OVERLOAD_X (CTF, "ctf") >> BU_ALTIVEC_OVERLOAD_X (CTS, "cts") >> BU_ALTIVEC_OVERLOAD_X (CTU, "ctu") >> Index: gcc/config/rs6000/rs6000-c.c >> =================================================================== >> --- gcc/config/rs6000/rs6000-c.c (revision 234745) >> +++ gcc/config/rs6000/rs6000-c.c (working copy) >> @@ -842,11 +842,6 @@ const struct altivec_builtin_types altivec_overloa >> RS6000_BTI_unsigned_V1TI, 0 }, >> { ALTIVEC_BUILTIN_VEC_ADDC, P8V_BUILTIN_VADDCUQ, >> RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, >> - { ALTIVEC_BUILTIN_VEC_ADDE, P8V_BUILTIN_VADDEUQM, >> - RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, >> - RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, >> - { ALTIVEC_BUILTIN_VEC_ADDE, P8V_BUILTIN_VADDEUQM, >> - RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI }, >> { ALTIVEC_BUILTIN_VEC_ADDEC, P8V_BUILTIN_VADDECUQ, >> RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, >> RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, >> @@ -4515,6 +4510,59 @@ assignment for unaligned loads and stores"); >> warning (OPT_Wdeprecated, "vec_lvsr is deprecated for little endian; use \ >> assignment for unaligned loads and stores"); >> >> + if (fcode == ALTIVEC_BUILTIN_VEC_ADDE) >> + { >> + /* vec_adde needs to be special cased because there is no instruction >> + for the {un}signed int version */ > > End comment sentence with period and two spaces > >> + if (nargs != 3) >> + { >> + error ("vec_adde only accepts 3 arguments"); >> + return error_mark_node; >> + } >> + >> + tree arg0 = (*arglist)[0]; >> + tree arg0_type = TREE_TYPE (arg0); >> + tree arg1 = (*arglist)[1]; >> + tree arg1_type = TREE_TYPE (arg1); >> + tree arg2 = (*arglist)[2]; >> + tree arg2_type = TREE_TYPE (arg2); >> + >> + /* All 3 arguments must be vectors of (signed or unsigned) (int or >> + __int128) and the types must match */ > > Same. > >> + if ((arg0_type != arg1_type) || (arg1_type != arg2_type)) >> + goto bad; >> + if (TREE_CODE (arg0_type) != VECTOR_TYPE) >> + goto bad; >> + >> + switch (TYPE_MODE (TREE_TYPE (arg0_type))) >> + { >> + /* for {un}signed ints, >> + vec_adde (va, vb, carryv) == vec_add (vec_add (va, vb), carryv) */ > > Same. > >> + case SImode: >> + { >> + vec<tree, va_gc> *params = make_tree_vector(); >> + vec_safe_push (params, arg0); >> + vec_safe_push (params, arg1); >> + tree call = altivec_resolve_overloaded_builtin >> + (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD], params); >> + params = make_tree_vector(); >> + vec_safe_push (params, call); >> + vec_safe_push (params, arg2); >> + return altivec_resolve_overloaded_builtin >> + (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD], params); >> + } >> + /* for {un}signed __int128s use the vaddeuqm instruction directly */ > > Same. > >> + case TImode: >> + return altivec_resolve_overloaded_builtin >> + (loc, rs6000_builtin_decls[P8V_BUILTIN_VEC_VADDEUQM], arglist); >> + >> + /* Types other than {un}signed int and {un}signed __int128 >> + are errors */ > > Same. > >> + default: >> + goto bad; >> + } >> + } >> + >> /* For now treat vec_splats and vec_promote as the same. */ >> if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS >> || fcode == ALTIVEC_BUILTIN_VEC_PROMOTE) >> Index: gcc/config/rs6000/rs6000.c >> =================================================================== >> --- gcc/config/rs6000/rs6000.c (revision 234745) >> +++ gcc/config/rs6000/rs6000.c (working copy) >> @@ -15582,6 +15582,10 @@ altivec_init_builtins (void) >> = build_function_type_list (opaque_V4SI_type_node, >> opaque_V4SI_type_node, opaque_V4SI_type_node, >> integer_type_node, NULL_TREE); >> + tree opaque_ftype_opaque_opaque_opaque >> + = build_function_type_list (opaque_V4SI_type_node, >> + opaque_V4SI_type_node, opaque_V4SI_type_node, >> + opaque_V4SI_type_node, NULL_TREE); >> tree int_ftype_int_opaque_opaque >> = build_function_type_list (integer_type_node, >> integer_type_node, opaque_V4SI_type_node, >> @@ -15818,6 +15822,8 @@ altivec_init_builtins (void) >> def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS); >> def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU); >> >> + def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque, ALTIVEC_BUILTIN_VEC_ADDE); >> + >> /* Cell builtins. */ >> def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX); >> def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL); >> Index: gcc/testsuite/gcc.target/powerpc/vec-adde-int128.c >> =================================================================== >> --- gcc/testsuite/gcc.target/powerpc/vec-adde-int128.c (revision 0) >> +++ gcc/testsuite/gcc.target/powerpc/vec-adde-int128.c (working copy) >> @@ -0,0 +1,78 @@ >> +/* { dg-do run { target { powerpc64le-*-* } } } */ >> +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ >> +/* { dg-options "-mcpu=power8 -O3" } */ >> + >> +/* Test that the vec_adde builtin works as expected */ > > Same. > >> + >> +#include "altivec.h" >> + >> +#define N 4096 >> + >> +void abort (); >> + >> +#define define_test_functions(STYPE, NAMESUFFIX) \ >> +\ >> +STYPE result_##NAMESUFFIX[N]; \ >> +STYPE addend1_##NAMESUFFIX[N]; \ >> +STYPE addend2_##NAMESUFFIX[N]; \ >> +STYPE carry_##NAMESUFFIX[N]; \ >> +STYPE expected_##NAMESUFFIX[N]; \ >> +\ >> +__attribute__((noinline)) void vector_tests_##NAMESUFFIX () \ >> +{ \ >> + int i; \ >> + vector STYPE v1, v2, v3, tmp; \ >> + for (i = 0; i < N; i+=16/sizeof(STYPE)) { \ >> + /* result=addend1+addend2+carry */ \ >> + v1 = (vector STYPE) { addend1_##NAMESUFFIX[i] }; \ >> + v2 = (vector STYPE) { addend2_##NAMESUFFIX[i] }; \ >> + v3 = (vector STYPE) { carry_##NAMESUFFIX[i] }; \ >> +\ >> + tmp = vec_adde (v1, v2, v3); \ >> + result_##NAMESUFFIX[i] = tmp[0]; \ >> + } \ >> +} \ >> +\ >> +__attribute__((noinline)) void init_##NAMESUFFIX () \ >> +{ \ >> + int i; \ >> + for (i = 0; i < N; ++i) { \ >> + result_##NAMESUFFIX[i] = 0; \ >> + addend1_##NAMESUFFIX[i] = 1; \ >> + addend2_##NAMESUFFIX[i] = 2; \ >> + carry_##NAMESUFFIX[i] = (i%2==0)? 1: 0; \ >> + expected_##NAMESUFFIX[i] = addend1_##NAMESUFFIX[i] + \ >> + addend2_##NAMESUFFIX[i] + carry_##NAMESUFFIX[i]; \ >> + } \ >> +} \ >> +\ >> +__attribute__((noinline)) void verify_results_##NAMESUFFIX () \ >> +{ \ >> + int i; \ >> + for (i = 0; i < N; ++i) { \ >> + if (result_##NAMESUFFIX[i] != expected_##NAMESUFFIX[i]) \ >> + abort(); \ >> + } \ >> +} >> + >> + >> +#define execute_test_functions(STYPE, NAMESUFFIX) \ >> +{ \ >> + init_##NAMESUFFIX (); \ >> + vector_tests_##NAMESUFFIX (); \ >> + verify_results_##NAMESUFFIX (); \ >> +} >> + >> + >> +define_test_functions(signed __int128, si128); >> +define_test_functions(unsigned __int128, ui128); >> + >> +int main () >> +{ >> + execute_test_functions(signed __int128, si128); >> + execute_test_functions(unsigned __int128, ui128); >> + >> + return 0; >> +} >> + >> + >> Index: gcc/testsuite/gcc.target/powerpc/vec-adde.c >> =================================================================== >> --- gcc/testsuite/gcc.target/powerpc/vec-adde.c (revision 0) >> +++ gcc/testsuite/gcc.target/powerpc/vec-adde.c (working copy) >> @@ -0,0 +1,78 @@ >> +/* { dg-do run { target { powerpc64le-*-* } } } */ >> +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ >> +/* { dg-options "-mcpu=power8 -O3" } */ >> + >> +/* Test that the vec_adde builtin works as expected */ > > Same. > >> + >> +#include "altivec.h" >> + >> +#define N 4096 >> + >> +void abort (); >> + >> +#define define_test_functions(STYPE, NAMESUFFIX) \ >> +\ >> +STYPE result_##NAMESUFFIX[N]; \ >> +STYPE addend1_##NAMESUFFIX[N]; \ >> +STYPE addend2_##NAMESUFFIX[N]; \ >> +STYPE carry_##NAMESUFFIX[N]; \ >> +STYPE expected_##NAMESUFFIX[N]; \ >> +\ >> +__attribute__((noinline)) void vector_tests_##NAMESUFFIX () \ >> +{ \ >> + int i; \ >> + vector STYPE v1, v2, v3, tmp; \ >> + for (i = 0; i < N; i+=16/sizeof(STYPE)) { \ >> + /* result=addend1+addend2+carry */ \ >> + v1 = vec_vsx_ld (0, &addend1_##NAMESUFFIX[i]); \ >> + v2 = vec_vsx_ld (0, &addend2_##NAMESUFFIX[i]); \ >> + v3 = vec_vsx_ld (0, &carry_##NAMESUFFIX[i]); \ >> +\ >> + tmp = vec_adde (v1, v2, v3); \ >> + vec_vsx_st (tmp, 0, &result_##NAMESUFFIX[i]); \ >> + } \ >> +} \ >> +\ >> +__attribute__((noinline)) void init_##NAMESUFFIX () \ >> +{ \ >> + int i; \ >> + for (i = 0; i < N; ++i) { \ >> + result_##NAMESUFFIX[i] = 0; \ >> + addend1_##NAMESUFFIX[i] = 1; \ >> + addend2_##NAMESUFFIX[i] = 2; \ >> + carry_##NAMESUFFIX[i] = (i%2==0)? 1: 0; \ >> + expected_##NAMESUFFIX[i] = addend1_##NAMESUFFIX[i] + \ >> + addend2_##NAMESUFFIX[i] + carry_##NAMESUFFIX[i]; \ >> + } \ >> +} \ >> +\ >> +__attribute__((noinline)) void verify_results_##NAMESUFFIX () \ >> +{ \ >> + int i; \ >> + for (i = 0; i < N; ++i) { \ >> + if (result_##NAMESUFFIX[i] != expected_##NAMESUFFIX[i]) \ >> + abort(); \ >> + } \ >> +} >> + >> + >> +#define execute_test_functions(STYPE, NAMESUFFIX) \ >> +{ \ >> + init_##NAMESUFFIX (); \ >> + vector_tests_##NAMESUFFIX (); \ >> + verify_results_##NAMESUFFIX (); \ >> +} >> + >> + >> +define_test_functions(signed int, si); >> +define_test_functions(unsigned int, ui); >> + >> +int main () >> +{ >> + execute_test_functions(signed int, si); >> + execute_test_functions(unsigned int, ui); >> + >> + return 0; >> +} >> + >> + >> -- >> >> -Bill Seurer >> > Thanks. I have fixed all the comments in my local copy. Any more comments especially about the code or test cases?
Hi Bill, On Thu, Apr 07, 2016 at 04:04:30PM -0500, Bill Seurer wrote: > On 04/05/16 21:27, David Edelsohn wrote: > >On Tue, Apr 5, 2016 at 3:36 PM, Bill Seurer <seurer@linux.vnet.ibm.com> > >> * config/rs6000/rs6000-c.c (altivec_overloaded_builtins, > >> altivec_resolve_overloaded_builtin): Remove > >> ALTIVEC_BUILTIN_VEC_ADDE > >> from altivec_overloaded_builtins structure. Add support for it > >> to > >> altivec_resolve_overloaded_builtin function. A total nit, but... If you change one thing in one places, and another in another, put them in the changelog separately. > >>+ /* for {un}signed ints, > >>+ vec_adde (va, vb, carryv) == vec_add (vec_add (va, vb), > >>carryv) */ > > > >Same. Also sentences start with a capital letter. What is the definition of the builtin? The machine insn does va + vb + (carryv & 1); your implementation skips the AND. > >>--- gcc/testsuite/gcc.target/powerpc/vec-adde-int128.c (revision 0) > >>+++ gcc/testsuite/gcc.target/powerpc/vec-adde-int128.c (working copy) > >>@@ -0,0 +1,78 @@ > >>+/* { dg-do run { target { powerpc64le-*-* } } } */ Will powerpc64 (i.e. BE) not work? Segher
On 04/08/16 09:50, Segher Boessenkool wrote: > Hi Bill, > > On Thu, Apr 07, 2016 at 04:04:30PM -0500, Bill Seurer wrote: >> On 04/05/16 21:27, David Edelsohn wrote: >>> On Tue, Apr 5, 2016 at 3:36 PM, Bill Seurer <seurer@linux.vnet.ibm.com> >>>> * config/rs6000/rs6000-c.c (altivec_overloaded_builtins, >>>> altivec_resolve_overloaded_builtin): Remove >>>> ALTIVEC_BUILTIN_VEC_ADDE >>>> from altivec_overloaded_builtins structure. Add support for it >>>> to >>>> altivec_resolve_overloaded_builtin function. > > A total nit, but... If you change one thing in one places, and another > in another, put them in the changelog separately. OK. >>>> + /* for {un}signed ints, >>>> + vec_adde (va, vb, carryv) == vec_add (vec_add (va, vb), >>>> carryv) */ >>> >>> Same. > > Also sentences start with a capital letter. > > What is the definition of the builtin? The machine insn does > va + vb + (carryv & 1); your implementation skips the AND. From the ABI document: Purpose: Returns a vector containing the result of adding each set of the corresponding elements of ARG1 and ARG2 with a carry (that has a value of either 0 or 1) specified as the ARG3 vector. Result value: The value of each element of the result is produced by adding the corresponding elements of ARG1 and ARG2 and a carry specified in ARG3 (1 if there is a carry, 0 otherwise). It's not really clear what should be done if the carry is not 1 or 0 from that description because it specifies that the carry IS 1 or 0. I tried it and the instruction (for __int128's only) does indeed do the and which means it will add a 0 if the "carry" is something like 2. I will change the test case to include "bad" values. I can add an and easily enough. >>>> --- gcc/testsuite/gcc.target/powerpc/vec-adde-int128.c (revision 0) >>>> +++ gcc/testsuite/gcc.target/powerpc/vec-adde-int128.c (working copy) >>>> @@ -0,0 +1,78 @@ >>>> +/* { dg-do run { target { powerpc64le-*-* } } } */ > > Will powerpc64 (i.e. BE) not work? No, it does work. The other test cases I was using as models only targeted LE (though on inspection most if not all of them would also work on BE). I suppose there's no reason to do that here, though.
Index: gcc/config/rs6000/rs6000-builtin.def =================================================================== --- gcc/config/rs6000/rs6000-builtin.def (revision 234745) +++ gcc/config/rs6000/rs6000-builtin.def (working copy) @@ -951,7 +951,6 @@ BU_ALTIVEC_X (VEC_EXT_V4SF, "vec_ext_v4sf", CO before we get to the point about classifying the builtin type. */ /* 3 argument Altivec overloaded builtins. */ -BU_ALTIVEC_OVERLOAD_3 (ADDE, "adde") BU_ALTIVEC_OVERLOAD_3 (ADDEC, "addec") BU_ALTIVEC_OVERLOAD_3 (MADD, "madd") BU_ALTIVEC_OVERLOAD_3 (MADDS, "madds") @@ -1137,6 +1136,7 @@ BU_ALTIVEC_OVERLOAD_P (VCMPGT_P, "vcmpgt_p") BU_ALTIVEC_OVERLOAD_P (VCMPGE_P, "vcmpge_p") /* Overloaded Altivec builtins that are handled as special cases. */ +BU_ALTIVEC_OVERLOAD_X (ADDE, "adde") BU_ALTIVEC_OVERLOAD_X (CTF, "ctf") BU_ALTIVEC_OVERLOAD_X (CTS, "cts") BU_ALTIVEC_OVERLOAD_X (CTU, "ctu") Index: gcc/config/rs6000/rs6000-c.c =================================================================== --- gcc/config/rs6000/rs6000-c.c (revision 234745) +++ gcc/config/rs6000/rs6000-c.c (working copy) @@ -842,11 +842,6 @@ const struct altivec_builtin_types altivec_overloa RS6000_BTI_unsigned_V1TI, 0 }, { ALTIVEC_BUILTIN_VEC_ADDC, P8V_BUILTIN_VADDCUQ, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, - { ALTIVEC_BUILTIN_VEC_ADDE, P8V_BUILTIN_VADDEUQM, - RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, - RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, - { ALTIVEC_BUILTIN_VEC_ADDE, P8V_BUILTIN_VADDEUQM, - RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI }, { ALTIVEC_BUILTIN_VEC_ADDEC, P8V_BUILTIN_VADDECUQ, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, @@ -4515,6 +4510,59 @@ assignment for unaligned loads and stores"); warning (OPT_Wdeprecated, "vec_lvsr is deprecated for little endian; use \ assignment for unaligned loads and stores"); + if (fcode == ALTIVEC_BUILTIN_VEC_ADDE) + { + /* vec_adde needs to be special cased because there is no instruction + for the {un}signed int version */ + if (nargs != 3) + { + error ("vec_adde only accepts 3 arguments"); + return error_mark_node; + } + + tree arg0 = (*arglist)[0]; + tree arg0_type = TREE_TYPE (arg0); + tree arg1 = (*arglist)[1]; + tree arg1_type = TREE_TYPE (arg1); + tree arg2 = (*arglist)[2]; + tree arg2_type = TREE_TYPE (arg2); + + /* All 3 arguments must be vectors of (signed or unsigned) (int or + __int128) and the types must match */ + if ((arg0_type != arg1_type) || (arg1_type != arg2_type)) + goto bad; + if (TREE_CODE (arg0_type) != VECTOR_TYPE) + goto bad; + + switch (TYPE_MODE (TREE_TYPE (arg0_type))) + { + /* for {un}signed ints, + vec_adde (va, vb, carryv) == vec_add (vec_add (va, vb), carryv) */ + case SImode: + { + vec<tree, va_gc> *params = make_tree_vector(); + vec_safe_push (params, arg0); + vec_safe_push (params, arg1); + tree call = altivec_resolve_overloaded_builtin + (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD], params); + params = make_tree_vector(); + vec_safe_push (params, call); + vec_safe_push (params, arg2); + return altivec_resolve_overloaded_builtin + (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD], params); + } + /* for {un}signed __int128s use the vaddeuqm instruction directly */ + case TImode: + return altivec_resolve_overloaded_builtin + (loc, rs6000_builtin_decls[P8V_BUILTIN_VEC_VADDEUQM], arglist); + + /* Types other than {un}signed int and {un}signed __int128 + are errors */ + default: + goto bad; + } + } + /* For now treat vec_splats and vec_promote as the same. */ if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS || fcode == ALTIVEC_BUILTIN_VEC_PROMOTE) Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 234745) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -15582,6 +15582,10 @@ altivec_init_builtins (void) = build_function_type_list (opaque_V4SI_type_node, opaque_V4SI_type_node, opaque_V4SI_type_node, integer_type_node, NULL_TREE); + tree opaque_ftype_opaque_opaque_opaque + = build_function_type_list (opaque_V4SI_type_node, + opaque_V4SI_type_node, opaque_V4SI_type_node, + opaque_V4SI_type_node, NULL_TREE); tree int_ftype_int_opaque_opaque = build_function_type_list (integer_type_node, integer_type_node, opaque_V4SI_type_node, @@ -15818,6 +15822,8 @@ altivec_init_builtins (void) def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS); def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU); + def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque, ALTIVEC_BUILTIN_VEC_ADDE); + /* Cell builtins. */ def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX); def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL); Index: gcc/testsuite/gcc.target/powerpc/vec-adde-int128.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vec-adde-int128.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/vec-adde-int128.c (working copy) @@ -0,0 +1,78 @@ +/* { dg-do run { target { powerpc64le-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-options "-mcpu=power8 -O3" } */ + +/* Test that the vec_adde builtin works as expected */ + +#include "altivec.h" + +#define N 4096 + +void abort (); + +#define define_test_functions(STYPE, NAMESUFFIX) \ +\ +STYPE result_##NAMESUFFIX[N]; \ +STYPE addend1_##NAMESUFFIX[N]; \ +STYPE addend2_##NAMESUFFIX[N]; \ +STYPE carry_##NAMESUFFIX[N]; \ +STYPE expected_##NAMESUFFIX[N]; \ +\ +__attribute__((noinline)) void vector_tests_##NAMESUFFIX () \ +{ \ + int i; \ + vector STYPE v1, v2, v3, tmp; \ + for (i = 0; i < N; i+=16/sizeof(STYPE)) { \ + /* result=addend1+addend2+carry */ \ + v1 = (vector STYPE) { addend1_##NAMESUFFIX[i] }; \ + v2 = (vector STYPE) { addend2_##NAMESUFFIX[i] }; \ + v3 = (vector STYPE) { carry_##NAMESUFFIX[i] }; \ +\ + tmp = vec_adde (v1, v2, v3); \ + result_##NAMESUFFIX[i] = tmp[0]; \ + } \ +} \ +\ +__attribute__((noinline)) void init_##NAMESUFFIX () \ +{ \ + int i; \ + for (i = 0; i < N; ++i) { \ + result_##NAMESUFFIX[i] = 0; \ + addend1_##NAMESUFFIX[i] = 1; \ + addend2_##NAMESUFFIX[i] = 2; \ + carry_##NAMESUFFIX[i] = (i%2==0)? 1: 0; \ + expected_##NAMESUFFIX[i] = addend1_##NAMESUFFIX[i] + \ + addend2_##NAMESUFFIX[i] + carry_##NAMESUFFIX[i]; \ + } \ +} \ +\ +__attribute__((noinline)) void verify_results_##NAMESUFFIX () \ +{ \ + int i; \ + for (i = 0; i < N; ++i) { \ + if (result_##NAMESUFFIX[i] != expected_##NAMESUFFIX[i]) \ + abort(); \ + } \ +} + + +#define execute_test_functions(STYPE, NAMESUFFIX) \ +{ \ + init_##NAMESUFFIX (); \ + vector_tests_##NAMESUFFIX (); \ + verify_results_##NAMESUFFIX (); \ +} + + +define_test_functions(signed __int128, si128); +define_test_functions(unsigned __int128, ui128); + +int main () +{ + execute_test_functions(signed __int128, si128); + execute_test_functions(unsigned __int128, ui128); + + return 0; +} + + Index: gcc/testsuite/gcc.target/powerpc/vec-adde.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vec-adde.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/vec-adde.c (working copy) @@ -0,0 +1,78 @@ +/* { dg-do run { target { powerpc64le-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-options "-mcpu=power8 -O3" } */ + +/* Test that the vec_adde builtin works as expected */ + +#include "altivec.h" + +#define N 4096 + +void abort (); + +#define define_test_functions(STYPE, NAMESUFFIX) \ +\ +STYPE result_##NAMESUFFIX[N]; \ +STYPE addend1_##NAMESUFFIX[N]; \ +STYPE addend2_##NAMESUFFIX[N]; \ +STYPE carry_##NAMESUFFIX[N]; \ +STYPE expected_##NAMESUFFIX[N]; \ +\ +__attribute__((noinline)) void vector_tests_##NAMESUFFIX () \ +{ \ + int i; \ + vector STYPE v1, v2, v3, tmp; \ + for (i = 0; i < N; i+=16/sizeof(STYPE)) { \ + /* result=addend1+addend2+carry */ \ + v1 = vec_vsx_ld (0, &addend1_##NAMESUFFIX[i]); \ + v2 = vec_vsx_ld (0, &addend2_##NAMESUFFIX[i]); \ + v3 = vec_vsx_ld (0, &carry_##NAMESUFFIX[i]); \ +\ + tmp = vec_adde (v1, v2, v3); \ + vec_vsx_st (tmp, 0, &result_##NAMESUFFIX[i]); \ + } \ +} \ +\ +__attribute__((noinline)) void init_##NAMESUFFIX () \ +{ \ + int i; \ + for (i = 0; i < N; ++i) { \ + result_##NAMESUFFIX[i] = 0; \ + addend1_##NAMESUFFIX[i] = 1; \ + addend2_##NAMESUFFIX[i] = 2; \ + carry_##NAMESUFFIX[i] = (i%2==0)? 1: 0; \ + expected_##NAMESUFFIX[i] = addend1_##NAMESUFFIX[i] + \ + addend2_##NAMESUFFIX[i] + carry_##NAMESUFFIX[i]; \ + } \ +} \ +\ +__attribute__((noinline)) void verify_results_##NAMESUFFIX () \ +{ \ + int i; \ + for (i = 0; i < N; ++i) { \ + if (result_##NAMESUFFIX[i] != expected_##NAMESUFFIX[i]) \ + abort(); \ + } \ +} + + +#define execute_test_functions(STYPE, NAMESUFFIX) \ +{ \ + init_##NAMESUFFIX (); \ + vector_tests_##NAMESUFFIX (); \ + verify_results_##NAMESUFFIX (); \ +} + + +define_test_functions(signed int, si); +define_test_functions(unsigned int, ui); + +int main () +{ + execute_test_functions(signed int, si); + execute_test_functions(unsigned int, ui); + + return 0; +} + +