diff mbox series

[v1,4/6] s390x/tcg: Fix VECTOR SUBTRACT COMPUTE BORROW INDICATION

Message ID 20191018161044.6983-5-david@redhat.com
State New
Headers show
Series s390x/tcg: Vector instruction fixes | expand

Commit Message

David Hildenbrand Oct. 18, 2019, 4:10 p.m. UTC
Looks like my idea of what a "borrow" is was wrong. We are dealing with
unsigned numbers. A subtraction is simply an addition with the bitwise
complement. If we get a carry during the addition, that's the borrow.
"The operands are treated as unsigned binary integers."

This is nice, as we can reuse the VECTOR ADD COMPUTE CARRY functions
and avoid helpers, all we have to do is compute the bitwise complement.

Fixes: 1ee2d7ba72f6 ("s390x/tcg: Implement VECTOR SUBTRACT COMPUTE BORROW INDICATION")
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h           |  2 --
 target/s390x/translate_vx.inc.c | 45 ++++++++++++++++++++++++---------
 target/s390x/vec_int_helper.c   | 16 ------------
 3 files changed, 33 insertions(+), 30 deletions(-)

Comments

David Hildenbrand Oct. 18, 2019, 5:41 p.m. UTC | #1
On 18.10.19 18:10, David Hildenbrand wrote:
> Looks like my idea of what a "borrow" is was wrong. We are dealing with
> unsigned numbers. A subtraction is simply an addition with the bitwise
> complement. If we get a carry during the addition, that's the borrow.
> "The operands are treated as unsigned binary integers."
> 
> This is nice, as we can reuse the VECTOR ADD COMPUTE CARRY functions
> and avoid helpers, all we have to do is compute the bitwise complement.
> 
> Fixes: 1ee2d7ba72f6 ("s390x/tcg: Implement VECTOR SUBTRACT COMPUTE BORROW INDICATION")
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
>   target/s390x/helper.h           |  2 --
>   target/s390x/translate_vx.inc.c | 45 ++++++++++++++++++++++++---------
>   target/s390x/vec_int_helper.c   | 16 ------------
>   3 files changed, 33 insertions(+), 30 deletions(-)
> 
> diff --git a/target/s390x/helper.h b/target/s390x/helper.h
> index 56e8149866..ca1e08100a 100644
> --- a/target/s390x/helper.h
> +++ b/target/s390x/helper.h
> @@ -207,8 +207,6 @@ DEF_HELPER_FLAGS_4(gvec_verim16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
>   DEF_HELPER_FLAGS_4(gvec_vsl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
>   DEF_HELPER_FLAGS_4(gvec_vsra, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
>   DEF_HELPER_FLAGS_4(gvec_vsrl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
> -DEF_HELPER_FLAGS_4(gvec_vscbi8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
> -DEF_HELPER_FLAGS_4(gvec_vscbi16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
>   DEF_HELPER_4(gvec_vtm, void, ptr, cptr, env, i32)
>   
>   /* === Vector String Instructions === */
> diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
> index 5ce7bfb0af..40bcc1604e 100644
> --- a/target/s390x/translate_vx.inc.c
> +++ b/target/s390x/translate_vx.inc.c
> @@ -2130,14 +2130,40 @@ static DisasJumpType op_vs(DisasContext *s, DisasOps *o)
>       return DISAS_NEXT;
>   }
>   
> +static void gen_scbi8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
> +{
> +    TCGv_i64 t = tcg_temp_new_i64();
> +
> +    tcg_gen_not_i64(t, b);
> +    gen_acc(d, a, t, ES_8);
> +    tcg_temp_free_i64(t);
> +}

BTW, I would have thought that we need the 2nd complement in all these 
cases. However, the description of the other functions confused me 
(VECTOR SUBTRACT WITH BORROW INDICATION) - add bitwise complement and 
add the borrow.

This passes my test cases (that are verified against real HW), but I am 
not sure if I check all the corner cases.

@Richard, do you have any idea how to do it the right way for this 
instruction?
David Hildenbrand Oct. 18, 2019, 6:18 p.m. UTC | #2
On 18.10.19 19:41, David Hildenbrand wrote:
> On 18.10.19 18:10, David Hildenbrand wrote:
>> Looks like my idea of what a "borrow" is was wrong. We are dealing with
>> unsigned numbers. A subtraction is simply an addition with the bitwise
>> complement. If we get a carry during the addition, that's the borrow.
>> "The operands are treated as unsigned binary integers."
>>
>> This is nice, as we can reuse the VECTOR ADD COMPUTE CARRY functions
>> and avoid helpers, all we have to do is compute the bitwise complement.
>>
>> Fixes: 1ee2d7ba72f6 ("s390x/tcg: Implement VECTOR SUBTRACT COMPUTE BORROW INDICATION")
>> Signed-off-by: David Hildenbrand <david@redhat.com>
>> ---
>>    target/s390x/helper.h           |  2 --
>>    target/s390x/translate_vx.inc.c | 45 ++++++++++++++++++++++++---------
>>    target/s390x/vec_int_helper.c   | 16 ------------
>>    3 files changed, 33 insertions(+), 30 deletions(-)
>>
>> diff --git a/target/s390x/helper.h b/target/s390x/helper.h
>> index 56e8149866..ca1e08100a 100644
>> --- a/target/s390x/helper.h
>> +++ b/target/s390x/helper.h
>> @@ -207,8 +207,6 @@ DEF_HELPER_FLAGS_4(gvec_verim16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
>>    DEF_HELPER_FLAGS_4(gvec_vsl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
>>    DEF_HELPER_FLAGS_4(gvec_vsra, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
>>    DEF_HELPER_FLAGS_4(gvec_vsrl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
>> -DEF_HELPER_FLAGS_4(gvec_vscbi8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
>> -DEF_HELPER_FLAGS_4(gvec_vscbi16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
>>    DEF_HELPER_4(gvec_vtm, void, ptr, cptr, env, i32)
>>    
>>    /* === Vector String Instructions === */
>> diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
>> index 5ce7bfb0af..40bcc1604e 100644
>> --- a/target/s390x/translate_vx.inc.c
>> +++ b/target/s390x/translate_vx.inc.c
>> @@ -2130,14 +2130,40 @@ static DisasJumpType op_vs(DisasContext *s, DisasOps *o)
>>        return DISAS_NEXT;
>>    }
>>    
>> +static void gen_scbi8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
>> +{
>> +    TCGv_i64 t = tcg_temp_new_i64();
>> +
>> +    tcg_gen_not_i64(t, b);
>> +    gen_acc(d, a, t, ES_8);
>> +    tcg_temp_free_i64(t);
>> +}
> 
> BTW, I would have thought that we need the 2nd complement in all these
> cases. However, the description of the other functions confused me
> (VECTOR SUBTRACT WITH BORROW INDICATION) - add bitwise complement and
> add the borrow.
> 
> This passes my test cases (that are verified against real HW), but I am
> not sure if I check all the corner cases.
> 
> @Richard, do you have any idea how to do it the right way for this
> instruction?
> 

My impression was right. A simple "0-0" test makes this visible. The 
other two fixes seem to be correct, though.

Will rework.
Richard Henderson Oct. 18, 2019, 6:52 p.m. UTC | #3
On 10/18/19 11:18 AM, David Hildenbrand wrote:
> On 18.10.19 19:41, David Hildenbrand wrote:
>> On 18.10.19 18:10, David Hildenbrand wrote:
>>> Looks like my idea of what a "borrow" is was wrong. We are dealing with
>>> unsigned numbers. A subtraction is simply an addition with the bitwise
>>> complement. If we get a carry during the addition, that's the borrow.
>>> "The operands are treated as unsigned binary integers."
>>>
>>> This is nice, as we can reuse the VECTOR ADD COMPUTE CARRY functions
>>> and avoid helpers, all we have to do is compute the bitwise complement.
>>>
>>> Fixes: 1ee2d7ba72f6 ("s390x/tcg: Implement VECTOR SUBTRACT COMPUTE BORROW
>>> INDICATION")
>>> Signed-off-by: David Hildenbrand <david@redhat.com>
>>> ---
>>>    target/s390x/helper.h           |  2 --
>>>    target/s390x/translate_vx.inc.c | 45 ++++++++++++++++++++++++---------
>>>    target/s390x/vec_int_helper.c   | 16 ------------
>>>    3 files changed, 33 insertions(+), 30 deletions(-)
>>>
>>> diff --git a/target/s390x/helper.h b/target/s390x/helper.h
>>> index 56e8149866..ca1e08100a 100644
>>> --- a/target/s390x/helper.h
>>> +++ b/target/s390x/helper.h
>>> @@ -207,8 +207,6 @@ DEF_HELPER_FLAGS_4(gvec_verim16, TCG_CALL_NO_RWG, void,
>>> ptr, cptr, cptr, i32)
>>>    DEF_HELPER_FLAGS_4(gvec_vsl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
>>>    DEF_HELPER_FLAGS_4(gvec_vsra, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
>>>    DEF_HELPER_FLAGS_4(gvec_vsrl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
>>> -DEF_HELPER_FLAGS_4(gvec_vscbi8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
>>> -DEF_HELPER_FLAGS_4(gvec_vscbi16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
>>>    DEF_HELPER_4(gvec_vtm, void, ptr, cptr, env, i32)
>>>       /* === Vector String Instructions === */
>>> diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
>>> index 5ce7bfb0af..40bcc1604e 100644
>>> --- a/target/s390x/translate_vx.inc.c
>>> +++ b/target/s390x/translate_vx.inc.c
>>> @@ -2130,14 +2130,40 @@ static DisasJumpType op_vs(DisasContext *s, DisasOps
>>> *o)
>>>        return DISAS_NEXT;
>>>    }
>>>    +static void gen_scbi8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
>>> +{
>>> +    TCGv_i64 t = tcg_temp_new_i64();
>>> +
>>> +    tcg_gen_not_i64(t, b);
>>> +    gen_acc(d, a, t, ES_8);
>>> +    tcg_temp_free_i64(t);
>>> +}
>>
>> BTW, I would have thought that we need the 2nd complement in all these
>> cases. However, the description of the other functions confused me
>> (VECTOR SUBTRACT WITH BORROW INDICATION) - add bitwise complement and
>> add the borrow.
>>
>> This passes my test cases (that are verified against real HW), but I am
>> not sure if I check all the corner cases.
>>
>> @Richard, do you have any idea how to do it the right way for this
>> instruction?
>>
> 
> My impression was right. A simple "0-0" test makes this visible. The other two
> fixes seem to be correct, though.

Your description seems to indicate that you want carry output, which is
!borrow.  ARM represents things this way, but I didn't recall it for S390.

If you want to implement sub r,x,y with add r,x,~y, you also have to add one --
often times with the carry-in.  But since we don't have a carry-in here, I
wonder if it isn't easier to invert your result:

     tcg_gen_sub2_i64(tl, th, al, zero, bl, zero);
     tcg_gen_andi_i64(th, th, 1);
     tcg_gen_sub2_i64(tl, th, ah, zero, th, zero);
     tcg_gen_sub2_i64(tl, th, tl, th, bh, zero);
-    tcg_gen_andi_i64(dl, th, 1);
+    /* "invert" the result: -1 -> 0; 0 -> 1 */
+    tcg_gen_addi_i64(dl, th, 1);


r~
diff mbox series

Patch

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 56e8149866..ca1e08100a 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -207,8 +207,6 @@  DEF_HELPER_FLAGS_4(gvec_verim16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vsl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
 DEF_HELPER_FLAGS_4(gvec_vsra, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
 DEF_HELPER_FLAGS_4(gvec_vsrl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
-DEF_HELPER_FLAGS_4(gvec_vscbi8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
-DEF_HELPER_FLAGS_4(gvec_vscbi16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_4(gvec_vtm, void, ptr, cptr, env, i32)
 
 /* === Vector String Instructions === */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 5ce7bfb0af..40bcc1604e 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2130,14 +2130,40 @@  static DisasJumpType op_vs(DisasContext *s, DisasOps *o)
     return DISAS_NEXT;
 }
 
+static void gen_scbi8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+    TCGv_i64 t = tcg_temp_new_i64();
+
+    tcg_gen_not_i64(t, b);
+    gen_acc(d, a, t, ES_8);
+    tcg_temp_free_i64(t);
+}
+
+static void gen_scbi16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+    TCGv_i64 t = tcg_temp_new_i64();
+
+    tcg_gen_not_i64(t, b);
+    gen_acc(d, a, t, ES_16);
+    tcg_temp_free_i64(t);
+}
+
 static void gen_scbi_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
 {
-    tcg_gen_setcond_i32(TCG_COND_LTU, d, a, b);
+    TCGv_i32 t = tcg_temp_new_i32();
+
+    tcg_gen_not_i32(t, b);
+    gen_acc_i32(d, a, t);
+    tcg_temp_free_i32(t);
 }
 
 static void gen_scbi_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
 {
-    tcg_gen_setcond_i64(TCG_COND_LTU, d, a, b);
+    TCGv_i64 t = tcg_temp_new_i64();
+
+    tcg_gen_not_i64(t, b);
+    gen_acc_i64(d, a, t);
+    tcg_temp_free_i64(t);
 }
 
 static void gen_scbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
@@ -2145,26 +2171,21 @@  static void gen_scbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
 {
     TCGv_i64 th = tcg_temp_new_i64();
     TCGv_i64 tl = tcg_temp_new_i64();
-    TCGv_i64 zero = tcg_const_i64(0);
 
-    tcg_gen_sub2_i64(tl, th, al, zero, bl, zero);
-    tcg_gen_andi_i64(th, th, 1);
-    tcg_gen_sub2_i64(tl, th, ah, zero, th, zero);
-    tcg_gen_sub2_i64(tl, th, tl, th, bh, zero);
-    tcg_gen_andi_i64(dl, th, 1);
-    tcg_gen_mov_i64(dh, zero);
+    tcg_gen_not_i64(tl, bl);
+    tcg_gen_not_i64(th, bh);
+    gen_acc2_i64(dl, dh, al, ah, tl, th);
 
     tcg_temp_free_i64(th);
     tcg_temp_free_i64(tl);
-    tcg_temp_free_i64(zero);
 }
 
 static DisasJumpType op_vscbi(DisasContext *s, DisasOps *o)
 {
     const uint8_t es = get_field(s->fields, m4);
     static const GVecGen3 g[4] = {
-        { .fno = gen_helper_gvec_vscbi8, },
-        { .fno = gen_helper_gvec_vscbi16, },
+        { .fni8 = gen_scbi8_i64, },
+        { .fni8 = gen_scbi16_i64, },
         { .fni4 = gen_scbi_i32, },
         { .fni8 = gen_scbi_i64, },
     };
diff --git a/target/s390x/vec_int_helper.c b/target/s390x/vec_int_helper.c
index d38405848f..e8fe7af496 100644
--- a/target/s390x/vec_int_helper.c
+++ b/target/s390x/vec_int_helper.c
@@ -583,22 +583,6 @@  void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count,
     s390_vec_shr(v1, v2, count);
 }
 
-#define DEF_VSCBI(BITS)                                                        \
-void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3,        \
-                              uint32_t desc)                                   \
-{                                                                              \
-    int i;                                                                     \
-                                                                               \
-    for (i = 0; i < (128 / BITS); i++) {                                       \
-        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
-        const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
-                                                                               \
-        s390_vec_write_element##BITS(v1, i, a < b);                            \
-    }                                                                          \
-}
-DEF_VSCBI(8)
-DEF_VSCBI(16)
-
 void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env,
                       uint32_t desc)
 {