Message ID | 20190524093335.22241-4-david@redhat.com |
---|---|
State | New |
Headers | show |
Series | [v2,1/5] s390x/tcg: Implement VECTOR FIND ANY ELEMENT EQUAL | expand |
On 5/24/19 4:33 AM, David Hildenbrand wrote: > + /* identify the smaller element */ > + if (first_inequal < 16) { > + uint8_t enr = first_inequal / (1 << es); > + uint32_t a = s390_vec_read_element(v2, enr, es); > + uint32_t b = s390_vec_read_element(v3, enr, es); > + > + smaller = a < b; > + } > + > + if (zs) { > + z0 = zero_search(a0, mask); > + z1 = zero_search(a1, mask); > + first_zero = match_index(z0, z1); > + } > + > + s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero)); > + s390_vec_write_element64(v1, 1, 0); > + if (first_zero == 16 && first_inequal == 16) { > + return 3; > + } else if (first_zero < first_inequal) { > + return 0; > + } > + return smaller ? 1 : 2; Perhaps move the computation of smaller down here where it is used. Otherwise, Reviewed-by: Richard Henderson <richard.henderson@linaro.org> r~
On 28.05.19 14:55, Richard Henderson wrote: > On 5/24/19 4:33 AM, David Hildenbrand wrote: >> + /* identify the smaller element */ >> + if (first_inequal < 16) { >> + uint8_t enr = first_inequal / (1 << es); >> + uint32_t a = s390_vec_read_element(v2, enr, es); >> + uint32_t b = s390_vec_read_element(v3, enr, es); >> + >> + smaller = a < b; >> + } >> + >> + if (zs) { >> + z0 = zero_search(a0, mask); >> + z1 = zero_search(a1, mask); >> + first_zero = match_index(z0, z1); >> + } >> + >> + s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero)); >> + s390_vec_write_element64(v1, 1, 0); >> + if (first_zero == 16 && first_inequal == 16) { >> + return 3; >> + } else if (first_zero < first_inequal) { >> + return 0; >> + } >> + return smaller ? 1 : 2; > > Perhaps move the computation of smaller down here where it is used. Wanted to do that but then I realized that I would have to move s390_vec_write_element64() as well, because v1 and v2/v3 could overlap. Thanks! > > Otherwise, > Reviewed-by: Richard Henderson <richard.henderson@linaro.org> > > > r~ >
On 5/28/19 8:02 AM, David Hildenbrand wrote: > On 28.05.19 14:55, Richard Henderson wrote: >> On 5/24/19 4:33 AM, David Hildenbrand wrote: >>> + /* identify the smaller element */ >>> + if (first_inequal < 16) { >>> + uint8_t enr = first_inequal / (1 << es); >>> + uint32_t a = s390_vec_read_element(v2, enr, es); >>> + uint32_t b = s390_vec_read_element(v3, enr, es); >>> + >>> + smaller = a < b; >>> + } >>> + >>> + if (zs) { >>> + z0 = zero_search(a0, mask); >>> + z1 = zero_search(a1, mask); >>> + first_zero = match_index(z0, z1); >>> + } >>> + >>> + s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero)); >>> + s390_vec_write_element64(v1, 1, 0); >>> + if (first_zero == 16 && first_inequal == 16) { >>> + return 3; >>> + } else if (first_zero < first_inequal) { >>> + return 0; >>> + } >>> + return smaller ? 1 : 2; >> >> Perhaps move the computation of smaller down here where it is used. > > Wanted to do that but then I realized that I would have to move > s390_vec_write_element64() as well, because v1 and v2/v3 could overlap. Oh, yes of course. R-B without any changes. ;-) r~
On 28.05.19 15:03, Richard Henderson wrote: > On 5/28/19 8:02 AM, David Hildenbrand wrote: >> On 28.05.19 14:55, Richard Henderson wrote: >>> On 5/24/19 4:33 AM, David Hildenbrand wrote: >>>> + /* identify the smaller element */ >>>> + if (first_inequal < 16) { >>>> + uint8_t enr = first_inequal / (1 << es); >>>> + uint32_t a = s390_vec_read_element(v2, enr, es); >>>> + uint32_t b = s390_vec_read_element(v3, enr, es); >>>> + >>>> + smaller = a < b; >>>> + } >>>> + >>>> + if (zs) { >>>> + z0 = zero_search(a0, mask); >>>> + z1 = zero_search(a1, mask); >>>> + first_zero = match_index(z0, z1); >>>> + } >>>> + >>>> + s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero)); >>>> + s390_vec_write_element64(v1, 1, 0); >>>> + if (first_zero == 16 && first_inequal == 16) { >>>> + return 3; >>>> + } else if (first_zero < first_inequal) { >>>> + return 0; >>>> + } >>>> + return smaller ? 1 : 2; >>> >>> Perhaps move the computation of smaller down here where it is used. >> >> Wanted to do that but then I realized that I would have to move >> s390_vec_write_element64() as well, because v1 and v2/v3 could overlap. > > Oh, yes of course. R-B without any changes. ;-) > Thanks Richard, will send a pull request to Conny for this part soon. I'll start getting the vector floating-point instruction into shape this week. So don't start to relax ;) Cheers! > > r~ >
diff --git a/target/s390x/helper.h b/target/s390x/helper.h index a1b169b666..fb50b404db 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -224,6 +224,12 @@ DEF_HELPER_FLAGS_4(gvec_vfee32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_5(gvec_vfee_cc8, void, ptr, cptr, cptr, env, i32) DEF_HELPER_5(gvec_vfee_cc16, void, ptr, cptr, cptr, env, i32) DEF_HELPER_5(gvec_vfee_cc32, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vfene8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vfene16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vfene32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_5(gvec_vfene_cc8, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vfene_cc16, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vfene_cc32, void, ptr, cptr, cptr, env, i32) #ifndef CONFIG_USER_ONLY DEF_HELPER_3(servc, i32, env, i64, i64) diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def index d8907ef6a5..d03c1ee0b3 100644 --- a/target/s390x/insn-data.def +++ b/target/s390x/insn-data.def @@ -1197,6 +1197,8 @@ F(0xe782, VFAE, VRR_b, V, 0, 0, 0, 0, vfae, 0, IF_VEC) /* VECTOR FIND ELEMENT EQUAL */ F(0xe780, VFEE, VRR_b, V, 0, 0, 0, 0, vfee, 0, IF_VEC) +/* VECTOR FIND ELEMENT NOT EQUAL */ + F(0xe781, VFENE, VRR_b, V, 0, 0, 0, 0, vfene, 0, IF_VEC) #ifndef CONFIG_USER_ONLY /* COMPARE AND SWAP AND PURGE */ diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c index b25afbc011..1ad0b62517 100644 --- a/target/s390x/translate_vx.inc.c +++ b/target/s390x/translate_vx.inc.c @@ -2414,3 +2414,34 @@ static DisasJumpType op_vfee(DisasContext *s, DisasOps *o) } return DISAS_NEXT; } + +static DisasJumpType op_vfene(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s->fields, m4); + const uint8_t m5 = get_field(s->fields, m5); + static gen_helper_gvec_3 * const g[3] = { + gen_helper_gvec_vfene8, + gen_helper_gvec_vfene16, + gen_helper_gvec_vfene32, + }; + static gen_helper_gvec_3_ptr * const g_cc[3] = { + gen_helper_gvec_vfene_cc8, + gen_helper_gvec_vfene_cc16, + gen_helper_gvec_vfene_cc32, + }; + + if (es > ES_32 || m5 & ~0x3) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (extract32(m5, 0, 1)) { + gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), cpu_env, m5, g_cc[es]); + set_cc_static(s); + } else { + gen_gvec_3_ool(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), m5, g[es]); + } + return DISAS_NEXT; +} diff --git a/target/s390x/vec.h b/target/s390x/vec.h index 3313fb43ee..affc62874c 100644 --- a/target/s390x/vec.h +++ b/target/s390x/vec.h @@ -12,6 +12,8 @@ #ifndef S390X_VEC_H #define S390X_VEC_H +#include "tcg/tcg.h" + typedef union S390Vector { uint64_t doubleword[2]; uint32_t word[4]; @@ -70,6 +72,23 @@ static inline uint64_t s390_vec_read_element64(const S390Vector *v, uint8_t enr) return v->doubleword[enr]; } +static inline uint64_t s390_vec_read_element(const S390Vector *v, uint8_t enr, + uint8_t es) +{ + switch (es) { + case MO_8: + return s390_vec_read_element8(v, enr); + case MO_16: + return s390_vec_read_element16(v, enr); + case MO_32: + return s390_vec_read_element32(v, enr); + case MO_64: + return s390_vec_read_element64(v, enr); + default: + g_assert_not_reached(); + } +} + static inline void s390_vec_write_element8(S390Vector *v, uint8_t enr, uint8_t data) { diff --git a/target/s390x/vec_string_helper.c b/target/s390x/vec_string_helper.c index 05ad99e173..0ee3470112 100644 --- a/target/s390x/vec_string_helper.c +++ b/target/s390x/vec_string_helper.c @@ -27,6 +27,15 @@ static inline uint64_t zero_search(uint64_t a, uint64_t mask) return ~(((a & mask) + mask) | a | mask); } +/* + * Returns a bit set in the MSB of each element that is not zero, + * as defined by the mask. + */ +static inline uint64_t nonzero_search(uint64_t a, uint64_t mask) +{ + return (((a & mask) + mask) | a) & ~mask; +} + /* * Returns the byte offset for the first match, or 16 for no match. */ @@ -209,3 +218,68 @@ void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3, \ DEF_VFEE_CC_HELPER(8) DEF_VFEE_CC_HELPER(16) DEF_VFEE_CC_HELPER(32) + +static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es) +{ + const uint64_t mask = get_element_lsbs_mask(es); + uint64_t a0, a1, b0, b1, e0, e1, z0, z1; + uint64_t first_zero = 16; + uint64_t first_inequal; + bool smaller = false; + + a0 = s390_vec_read_element64(v2, 0); + a1 = s390_vec_read_element64(v2, 1); + b0 = s390_vec_read_element64(v3, 0); + b1 = s390_vec_read_element64(v3, 1); + e0 = nonzero_search(a0 ^ b0, mask); + e1 = nonzero_search(a1 ^ b1, mask); + first_inequal = match_index(e0, e1); + + /* identify the smaller element */ + if (first_inequal < 16) { + uint8_t enr = first_inequal / (1 << es); + uint32_t a = s390_vec_read_element(v2, enr, es); + uint32_t b = s390_vec_read_element(v3, enr, es); + + smaller = a < b; + } + + if (zs) { + z0 = zero_search(a0, mask); + z1 = zero_search(a1, mask); + first_zero = match_index(z0, z1); + } + + s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero)); + s390_vec_write_element64(v1, 1, 0); + if (first_zero == 16 && first_inequal == 16) { + return 3; + } else if (first_zero < first_inequal) { + return 0; + } + return smaller ? 1 : 2; +} + +#define DEF_VFENE_HELPER(BITS) \ +void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + vfene(v1, v2, v3, zs, MO_##BITS); \ +} +DEF_VFENE_HELPER(8) +DEF_VFENE_HELPER(16) +DEF_VFENE_HELPER(32) + +#define DEF_VFENE_CC_HELPER(BITS) \ +void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3, \ + CPUS390XState *env, uint32_t desc) \ +{ \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS); \ +} +DEF_VFENE_CC_HELPER(8) +DEF_VFENE_CC_HELPER(16) +DEF_VFENE_CC_HELPER(32)
Similar to VECTOR FIND ELEMENT EQUAL. Core logic courtesy of Richard H. Add s390_vec_read_element() that can deal with element sizes. Signed-off-by: David Hildenbrand <david@redhat.com> --- target/s390x/helper.h | 6 +++ target/s390x/insn-data.def | 2 + target/s390x/translate_vx.inc.c | 31 +++++++++++++ target/s390x/vec.h | 19 ++++++++ target/s390x/vec_string_helper.c | 74 ++++++++++++++++++++++++++++++++ 5 files changed, 132 insertions(+)