diff mbox series

[v1,07/41] s390x/tcg: Implement VECTOR AVERAGE

Message ID 20190411100836.646-8-david@redhat.com
State New
Headers show
Series s390x/tcg: Vector Instruction Support Part 2 | expand

Commit Message

David Hildenbrand April 11, 2019, 10:08 a.m. UTC
Handle 32/64-bit elements via gvec expansion and the 8/16 bits via
ool helpers.

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h           |  2 ++
 target/s390x/insn-data.def      |  2 ++
 target/s390x/translate_vx.inc.c | 63 +++++++++++++++++++++++++++++++++
 target/s390x/vec_int_helper.c   | 16 +++++++++
 4 files changed, 83 insertions(+)

Comments

Richard Henderson April 12, 2019, 10:34 p.m. UTC | #1
On 4/11/19 12:08 AM, David Hildenbrand wrote:
> +}
> +static DisasJumpType op_vavg(DisasContext *s, DisasOps *o)
> +{

Watch your spacing.


> +    static const GVecGen3 g[4] = {
> +        { .fno = gen_helper_gvec_vavg8, },
> +        { .fno = gen_helper_gvec_vavg16, },
> +        { .fni4 = gen_avg_i32, },
> +        { .fni8 = gen_avg_i64, },
> +    };

Pondering possible vector expansions.  I think one possibility is

  t1 = (a >> 1) + (b >> 1);

We still have the two "0.5 bits" to add back in, plus we round up by adding
another 0.5.  This means if either lsb is set, then we have carry in to the 1's
bit.  So:

  t1 = t1 + ((a | b) & 1);

Which leads to

  tcg_gen_sari_vec(vece, t0, a, 1);
  tcg_gen_sari_vec(vece, t1, b, 1);
  tcg_gen_or_vec(vece, t2, a, b);
  tcg_gen_add_vec(vece, t0, t0, t1);
  tcg_gen_dupi_vec(vece, t1, 1);
  tcg_gen_and_vec(vece, t2, t2, t1);
  tcg_gen_add_vec(vece, t0, t0, t2);

  { .fnv = gen_avg_vec,
    .fno = gen_helper_gvec_vavg8,
    .opc = INDEX_op_sari_vec },

But what you have here is correct and the above is mere optimization so,
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~
David Hildenbrand April 16, 2019, 8:52 a.m. UTC | #2
On 13.04.19 00:34, Richard Henderson wrote:
> On 4/11/19 12:08 AM, David Hildenbrand wrote:
>> +}
>> +static DisasJumpType op_vavg(DisasContext *s, DisasOps *o)
>> +{
> 
> Watch your spacing.

Whoops.

> 
> 
>> +    static const GVecGen3 g[4] = {
>> +        { .fno = gen_helper_gvec_vavg8, },
>> +        { .fno = gen_helper_gvec_vavg16, },
>> +        { .fni4 = gen_avg_i32, },
>> +        { .fni8 = gen_avg_i64, },
>> +    };
> 
> Pondering possible vector expansions.  I think one possibility is
> 
>   t1 = (a >> 1) + (b >> 1);
> 
> We still have the two "0.5 bits" to add back in, plus we round up by adding
> another 0.5.  This means if either lsb is set, then we have carry in to the 1's
> bit.  So:
> 
>   t1 = t1 + ((a | b) & 1);
> 
> Which leads to
> 
>   tcg_gen_sari_vec(vece, t0, a, 1);
>   tcg_gen_sari_vec(vece, t1, b, 1);
>   tcg_gen_or_vec(vece, t2, a, b);
>   tcg_gen_add_vec(vece, t0, t0, t1);
>   tcg_gen_dupi_vec(vece, t1, 1);
>   tcg_gen_and_vec(vece, t2, t2, t1);
>   tcg_gen_add_vec(vece, t0, t0, t2);
> 
>   { .fnv = gen_avg_vec,
>     .fno = gen_helper_gvec_vavg8,
>     .opc = INDEX_op_sari_vec },
> 
> But what you have here is correct and the above is mere optimization so,
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> 

Looks sane, as discussed, let's handle vector expansions later.

Thanks!

> 
> r~
>
diff mbox series

Patch

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index e1847e8877..2b6b716909 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -148,6 +148,8 @@  DEF_HELPER_FLAGS_4(vstl, TCG_CALL_NO_WG, void, env, cptr, i64, i64)
 /* === Vector Integer Instructions === */
 DEF_HELPER_FLAGS_4(gvec_vacc128, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_5(gvec_vaccc128, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vavg8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vavg16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 456d5597ca..6f8b42e327 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1068,6 +1068,8 @@ 
     F(0xe768, VN,      VRR_c, V,   0, 0, 0, 0, vn, 0, IF_VEC)
 /* VECTOR AND WITH COMPLEMENT */
     F(0xe769, VNC,     VRR_c, V,   0, 0, 0, 0, vnc, 0, IF_VEC)
+/* VECTOR AVERAGE */
+    F(0xe7f2, VAVG,    VRR_c, V,   0, 0, 0, 0, vavg, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index aaa247e855..50e03bf151 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -256,6 +256,17 @@  static void zero_vec(uint8_t reg)
     tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, 0);
 }
 
+static void gen_addi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
+                          uint64_t b)
+{
+    TCGv_i64 bl = tcg_const_i64(b);
+    TCGv_i64 bh = tcg_const_i64(0);
+
+    tcg_gen_add2_i64(dl, dh, al, ah, bl, bh);
+    tcg_temp_free_i64(bl);
+    tcg_temp_free_i64(bh);
+}
+
 static DisasJumpType op_vge(DisasContext *s, DisasOps *o)
 {
     const uint8_t es = s->insn->data;
@@ -1149,3 +1160,55 @@  static DisasJumpType op_vnc(DisasContext *s, DisasOps *o)
                   get_field(s->fields, v2), get_field(s->fields, v3));
     return DISAS_NEXT;
 }
+
+static void gen_avg_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    tcg_gen_ext_i32_i64(t0, a);
+    tcg_gen_ext_i32_i64(t1, b);
+    tcg_gen_add_i64(t0, t0, t1);
+    tcg_gen_addi_i64(t0, t0, 1);
+    tcg_gen_shri_i64(t0, t0, 1);
+    tcg_gen_extrl_i64_i32(d, t0);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+static void gen_avg_i64(TCGv_i64 dl, TCGv_i64 al, TCGv_i64 bl)
+{
+    TCGv_i64 dh = tcg_temp_new_i64();
+    TCGv_i64 ah = tcg_temp_new_i64();
+    TCGv_i64 bh = tcg_temp_new_i64();
+
+    /* extending the sign by one bit is sufficient */
+    tcg_gen_extract_i64(ah, al, 63, 1);
+    tcg_gen_extract_i64(bh, bl, 63, 1);
+    tcg_gen_add2_i64(dl, dh, al, ah, bl, bh);
+    gen_addi2_i64(dl, dh, dl, dh, 1);
+    tcg_gen_extract2_i64(dl, dl, dh, 1);
+
+    tcg_temp_free_i64(dh);
+    tcg_temp_free_i64(ah);
+    tcg_temp_free_i64(bh);
+}
+static DisasJumpType op_vavg(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s->fields, m4);
+    static const GVecGen3 g[4] = {
+        { .fno = gen_helper_gvec_vavg8, },
+        { .fno = gen_helper_gvec_vavg16, },
+        { .fni4 = gen_avg_i32, },
+        { .fni8 = gen_avg_i64, },
+    };
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    gen_gvec_3(get_field(s->fields, v1), get_field(s->fields, v2),
+               get_field(s->fields, v3), &g[es]);
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/vec_int_helper.c b/target/s390x/vec_int_helper.c
index 97fc559da0..149cfaaeae 100644
--- a/target/s390x/vec_int_helper.c
+++ b/target/s390x/vec_int_helper.c
@@ -61,3 +61,19 @@  void HELPER(gvec_vaccc128)(void *v1, const void *v2, const void *v3,
     dst->doubleword[0] = 0;
     dst->doubleword[1] = carry;
 }
+
+#define DEF_VAVG(BITS)                                                         \
+void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3,         \
+                             uint32_t desc)                                    \
+{                                                                              \
+    int i;                                                                     \
+                                                                               \
+    for (i = 0; i < (128 / BITS); i++) {                                       \
+        const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
+        const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
+                                                                               \
+        s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
+    }                                                                          \
+}
+DEF_VAVG(8)
+DEF_VAVG(16)