Message ID | 20190515203112.506-4-david@redhat.com |
---|---|
State | New |
Headers | show |
Series | s390x/tcg: Vector Instruction SupportPart 3 | expand |
On 5/15/19 1:31 PM, David Hildenbrand wrote: > Similar to VECTOR FIND ELEMENT EQUAL, however the search also stops on > any inequality. A match for inequality seems to have precedence over > a match for zero, because both elements have to be zero. > > Signed-off-by: David Hildenbrand <david@redhat.com> > --- > target/s390x/helper.h | 6 ++++ > target/s390x/insn-data.def | 2 ++ > target/s390x/translate_vx.inc.c | 31 +++++++++++++++++++ > target/s390x/vec_string_helper.c | 53 ++++++++++++++++++++++++++++++++ > 4 files changed, 92 insertions(+) Like the previous, only with static inline uint64_t nonzero_search(uint64_t a, uint64_t m) { return (((a & m) + m) | a) & ~m; } for the inequality. r~
On 17.05.19 19:56, Richard Henderson wrote: > On 5/15/19 1:31 PM, David Hildenbrand wrote: >> Similar to VECTOR FIND ELEMENT EQUAL, however the search also stops on >> any inequality. A match for inequality seems to have precedence over >> a match for zero, because both elements have to be zero. >> >> Signed-off-by: David Hildenbrand <david@redhat.com> >> --- >> target/s390x/helper.h | 6 ++++ >> target/s390x/insn-data.def | 2 ++ >> target/s390x/translate_vx.inc.c | 31 +++++++++++++++++++ >> target/s390x/vec_string_helper.c | 53 ++++++++++++++++++++++++++++++++ >> 4 files changed, 92 insertions(+) > > Like the previous, only with > > static inline uint64_t nonzero_search(uint64_t a, uint64_t m) > { > return (((a & m) + m) | a) & ~m; > } > > for the inequality. > > > r~ > It's a little bit more tricky. because we have to identify the smaller element. Right now I have this +static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es) +{ + const uint64_t mask = dup_const(es, -1ull >> (65 - (1 << es) * 8)); + uint64_t a0, a1, b0, b1, e0, e1, z0, z1; + uint64_t first_zero = 16; + uint64_t first_inequal; + bool smaller = false; + + a0 = s390_vec_read_element64(v2, 0); + a1 = s390_vec_read_element64(v2, 1); + b0 = s390_vec_read_element64(v3, 0); + b1 = s390_vec_read_element64(v3, 1); + e0 = nonzero_search(a0 ^ b0, mask); + e1 = nonzero_search(a1 ^ b1, mask); + first_inequal = match_index(e0, e1); + + /* identify the smaller element */ + if (first_inequal < 16) { + uint8_t enr = first_inequal / (1 << es); + uint32_t a = s390_vec_read_element(v2, enr, es); + uint32_t b = s390_vec_read_element(v3, enr, es); + smaller = a < b; + } + + if (zs) { + z0 = zero_search(a0, mask); + z1 = zero_search(a1, mask); + first_zero = match_index(z0, z1); + } + + /* zero out the destination vector */ + s390_vec_write_element64(v1, 0, 0); + s390_vec_write_element64(v1, 1, 0); + + if (first_zero == 16 && first_inequal == 16) { + s390_vec_write_element8(v1, 7, 16); + return 3; + } else if (first_zero < first_inequal) { + s390_vec_write_element8(v1, 7, first_zero); + return 0; + } + s390_vec_write_element8(v1, 7, first_inequal); + return smaller ? 1 : 2; +}
diff --git a/target/s390x/helper.h b/target/s390x/helper.h index a1b169b666..fb50b404db 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -224,6 +224,12 @@ DEF_HELPER_FLAGS_4(gvec_vfee32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_5(gvec_vfee_cc8, void, ptr, cptr, cptr, env, i32) DEF_HELPER_5(gvec_vfee_cc16, void, ptr, cptr, cptr, env, i32) DEF_HELPER_5(gvec_vfee_cc32, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vfene8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vfene16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vfene32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_5(gvec_vfene_cc8, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vfene_cc16, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vfene_cc32, void, ptr, cptr, cptr, env, i32) #ifndef CONFIG_USER_ONLY DEF_HELPER_3(servc, i32, env, i64, i64) diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def index d8907ef6a5..d03c1ee0b3 100644 --- a/target/s390x/insn-data.def +++ b/target/s390x/insn-data.def @@ -1197,6 +1197,8 @@ F(0xe782, VFAE, VRR_b, V, 0, 0, 0, 0, vfae, 0, IF_VEC) /* VECTOR FIND ELEMENT EQUAL */ F(0xe780, VFEE, VRR_b, V, 0, 0, 0, 0, vfee, 0, IF_VEC) +/* VECTOR FIND ELEMENT NOT EQUAL */ + F(0xe781, VFENE, VRR_b, V, 0, 0, 0, 0, vfene, 0, IF_VEC) #ifndef CONFIG_USER_ONLY /* COMPARE AND SWAP AND PURGE */ diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c index 848f6d7163..e36cc5c401 100644 --- a/target/s390x/translate_vx.inc.c +++ b/target/s390x/translate_vx.inc.c @@ -2415,3 +2415,34 @@ static DisasJumpType op_vfee(DisasContext *s, DisasOps *o) } return DISAS_NEXT; } + +static DisasJumpType op_vfene(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s->fields, m4); + const uint8_t m5 = get_field(s->fields, m5); + static gen_helper_gvec_3_ptr * const cc[3] = { + gen_helper_gvec_vfene_cc8, + gen_helper_gvec_vfene_cc16, + gen_helper_gvec_vfene_cc32, + }; + static gen_helper_gvec_3 * const nocc[3] = { + gen_helper_gvec_vfene8, + gen_helper_gvec_vfene16, + gen_helper_gvec_vfene32, + }; + + if (es > ES_32 || m5 & ~0x3) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (m5 & 1) { + gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), cpu_env, m5, cc[es]); + set_cc_static(s); + } else { + gen_gvec_3_ool(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), m5, nocc[es]); + } + return DISAS_NEXT; +} diff --git a/target/s390x/vec_string_helper.c b/target/s390x/vec_string_helper.c index 6a5d05271c..181f044fe5 100644 --- a/target/s390x/vec_string_helper.c +++ b/target/s390x/vec_string_helper.c @@ -154,3 +154,56 @@ void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3, \ DEF_VFEE_CC_HELPER(8) DEF_VFEE_CC_HELPER(16) DEF_VFEE_CC_HELPER(32) + +#define DEF_VFENE(BITS) \ +static int vfene##BITS(void *v1, const void *v2, const void *v3, uint8_t m5) \ +{ \ + const bool zs = extract32(m5, 1, 1); \ + S390Vector tmp = {}; \ + int first_byte = 16; \ + int cc = 3; /* no match */ \ + int i; \ + \ + for (i = 0; i < (128 / BITS); i++) { \ + const uint##BITS##_t data1 = s390_vec_read_element##BITS(v2, i); \ + const uint##BITS##_t data2 = s390_vec_read_element##BITS(v3, i); \ + \ + if (data1 != data2) { \ + first_byte = i * (BITS / 8); \ + cc = data1 < data2 ? 1 : 2; /* inequality found */ \ + break; \ + } \ + \ + if (zs && !data1) { \ + first_byte = i * (BITS / 8); \ + cc = 0; /* match for zero */ \ + break; \ + } \ + } \ + s390_vec_write_element8(&tmp, 7, first_byte); \ + *(S390Vector *)v1 = tmp; \ + return cc; \ +} +DEF_VFENE(8) +DEF_VFENE(16) +DEF_VFENE(32) + +#define DEF_VFENE_HELPER(BITS) \ +void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + vfene##BITS(v1, v2, v3, simd_data(desc)); \ +} +DEF_VFENE_HELPER(8) +DEF_VFENE_HELPER(16) +DEF_VFENE_HELPER(32) + +#define DEF_VFENE_CC_HELPER(BITS) \ +void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3, \ + CPUS390XState *env, uint32_t desc) \ +{ \ + env->cc_op = vfene##BITS(v1, v2, v3, simd_data(desc)); \ +} +DEF_VFENE_CC_HELPER(8) +DEF_VFENE_CC_HELPER(16) +DEF_VFENE_CC_HELPER(32)
Similar to VECTOR FIND ELEMENT EQUAL, however the search also stops on any inequality. A match for inequality seems to have precedence over a match for zero, because both elements have to be zero. Signed-off-by: David Hildenbrand <david@redhat.com> --- target/s390x/helper.h | 6 ++++ target/s390x/insn-data.def | 2 ++ target/s390x/translate_vx.inc.c | 31 +++++++++++++++++++ target/s390x/vec_string_helper.c | 53 ++++++++++++++++++++++++++++++++ 4 files changed, 92 insertions(+)