Patchwork [2/3] target-sparc: Simplify ICC generation; fix ADDX carry generation.

login
register
mail settings
Submitter Richard Henderson
Date May 10, 2010, 10:23 p.m.
Message ID <ee128ef834123110459491a8d75d6dd0d667d50a.1273529974.git.rth@twiddle.net>
Download mbox | patch
Permalink /patch/52242/
State New
Headers show

Comments

Richard Henderson - May 10, 2010, 10:23 p.m.
Use int32 types instead of target_ulong when computing ICC.  This
simplifies the generated code for 32-bit host and 64-bit guest.
Use the same simplified expressions for ICC as were already used
for XCC in carry flag generation.

ADDX ICC carry generation was using the same routines as ADD ICC,
which is incorrect if the input carry bit produces the output carry.
Use the algorithms already in place for ADDX XCC carry generation.
Similarly for SUBX.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-sparc/op_helper.c |  106 ++++++++++++++++++++++++++++++----------------
 1 files changed, 69 insertions(+), 37 deletions(-)
Blue Swirl - May 11, 2010, 7:14 p.m.
On 5/11/10, Richard Henderson <rth@twiddle.net> wrote:
> Use int32 types instead of target_ulong when computing ICC.  This
>  simplifies the generated code for 32-bit host and 64-bit guest.
>  Use the same simplified expressions for ICC as were already used
>  for XCC in carry flag generation.
>
>  ADDX ICC carry generation was using the same routines as ADD ICC,
>  which is incorrect if the input carry bit produces the output carry.
>  Use the algorithms already in place for ADDX XCC carry generation.
>  Similarly for SUBX.
>
>  Signed-off-by: Richard Henderson <rth@twiddle.net>
>  ---
>   target-sparc/op_helper.c |  106 ++++++++++++++++++++++++++++++----------------
>   1 files changed, 69 insertions(+), 37 deletions(-)
>
>  diff --git a/target-sparc/op_helper.c b/target-sparc/op_helper.c
>  index 09449c5..c36bc54 100644
>  --- a/target-sparc/op_helper.c
>  +++ b/target-sparc/op_helper.c
>  @@ -896,13 +896,13 @@ static uint32_t compute_C_flags(void)
>      return env->psr & PSR_CARRY;
>   }
>
>  -static inline uint32_t get_NZ_icc(target_ulong dst)
>  +static inline uint32_t get_NZ_icc(int32_t dst)
>   {
>      uint32_t ret = 0;
>
>  -    if (!(dst & 0xffffffffULL))
>  +    if (dst == 0)

Could you add the braces to fix CODING_STYLE while at it?

>          ret |= PSR_ZERO;
>  -    if ((int32_t) (dst & 0xffffffffULL) < 0)
>  +    if (dst < 0)
>          ret |= PSR_NEG;
>      return ret;
>   }
>  @@ -918,13 +918,13 @@ static uint32_t compute_C_flags_xcc(void)
>      return env->xcc & PSR_CARRY;
>   }
>
>  -static inline uint32_t get_NZ_xcc(target_ulong dst)
>  +static inline uint32_t get_NZ_xcc(target_long dst)
>   {
>      uint32_t ret = 0;
>
>      if (!dst)
>          ret |= PSR_ZERO;
>  -    if ((int64_t)dst < 0)
>  +    if (dst < 0)
>          ret |= PSR_NEG;
>      return ret;
>   }
>  @@ -953,25 +953,21 @@ static uint32_t compute_C_div(void)
>      return 0;
>   }
>
>  -/* carry = (src1[31] & src2[31]) | ( ~dst[31] & (src1[31] | src2[31])) */
>  -static inline uint32_t get_C_add_icc(target_ulong dst, target_ulong src1,
>  -                                     target_ulong src2)
>  +static inline uint32_t get_C_add_icc(uint32_t dst, uint32_t src1)
>   {
>      uint32_t ret = 0;
>
>  -    if (((src1 & (1ULL << 31)) & (src2 & (1ULL << 31)))
>  -        | ((~(dst & (1ULL << 31)))
>  -           & ((src1 & (1ULL << 31)) | (src2 & (1ULL << 31)))))
>  +    if (dst < src1)
>          ret |= PSR_CARRY;
>      return ret;
>   }
>
>  -static inline uint32_t get_V_add_icc(target_ulong dst, target_ulong src1,
>  -                                         target_ulong src2)
>  +static inline uint32_t get_V_add_icc(uint32_t dst, uint32_t src1,
>  +                                     uint32_t src2)
>   {
>      uint32_t ret = 0;
>
>  -    if (((src1 ^ src2 ^ -1) & (src1 ^ dst)) & (1ULL << 31))
>  +    if (((src1 ^ src2 ^ -1) & (src1 ^ dst)) & (1U << 31))
>          ret |= PSR_OVF;
>      return ret;
>   }
>  @@ -1017,14 +1013,14 @@ static uint32_t compute_all_add(void)
>      uint32_t ret;
>
>      ret = get_NZ_icc(CC_DST);
>  -    ret |= get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
>  +    ret |= get_C_add_icc(CC_DST, CC_SRC);
>      ret |= get_V_add_icc(CC_DST, CC_SRC, CC_SRC2);
>      return ret;
>   }
>
>   static uint32_t compute_C_add(void)
>   {
>  -    return get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
>  +    return get_C_add_icc(CC_DST, CC_SRC);
>   }
>
>   #ifdef TARGET_SPARC64
>  @@ -1049,6 +1045,26 @@ static uint32_t compute_C_addx_xcc(void)
>   }
>   #endif
>
>  +static uint32_t compute_all_addx(void)
>  +{
>  +    uint32_t ret;
>  +
>  +    ret = get_NZ_icc(CC_DST);
>  +    ret |= get_C_add_icc(CC_DST - CC_SRC2, CC_SRC);
>  +    ret |= get_C_add_icc(CC_DST, CC_SRC);
>  +    ret |= get_V_add_icc(CC_DST, CC_SRC, CC_SRC2);
>  +    return ret;
>  +}
>  +
>  +static uint32_t compute_C_addx(void)
>  +{
>  +    uint32_t ret;
>  +
>  +    ret = get_C_add_icc(CC_DST - CC_SRC2, CC_SRC);
>  +    ret |= get_C_add_icc(CC_DST, CC_SRC);
>  +    return ret;
>  +}
>  +
>   static inline uint32_t get_V_tag_icc(target_ulong src1, target_ulong src2)
>   {
>      uint32_t ret = 0;
>  @@ -1063,7 +1079,7 @@ static uint32_t compute_all_tadd(void)
>      uint32_t ret;
>
>      ret = get_NZ_icc(CC_DST);
>  -    ret |= get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
>  +    ret |= get_C_add_icc(CC_DST, CC_SRC);
>      ret |= get_V_add_icc(CC_DST, CC_SRC, CC_SRC2);
>      ret |= get_V_tag_icc(CC_SRC, CC_SRC2);
>      return ret;
>  @@ -1071,7 +1087,7 @@ static uint32_t compute_all_tadd(void)
>
>   static uint32_t compute_C_tadd(void)
>   {
>  -    return get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
>  +    return get_C_add_icc(CC_DST, CC_SRC);
>   }
>
>   static uint32_t compute_all_taddtv(void)
>  @@ -1079,34 +1095,30 @@ static uint32_t compute_all_taddtv(void)
>      uint32_t ret;
>
>      ret = get_NZ_icc(CC_DST);
>  -    ret |= get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
>  +    ret |= get_C_add_icc(CC_DST, CC_SRC);
>      return ret;
>   }
>
>   static uint32_t compute_C_taddtv(void)
>   {
>  -    return get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
>  +    return get_C_add_icc(CC_DST, CC_SRC);
>   }
>
>  -/* carry = (~src1[31] & src2[31]) | ( dst[31]  & (~src1[31] | src2[31])) */
>  -static inline uint32_t get_C_sub_icc(target_ulong dst, target_ulong src1,
>  -                                     target_ulong src2)
>  +static inline uint32_t get_C_sub_icc(uint32_t src1, uint32_t src2)
>   {
>      uint32_t ret = 0;
>
>  -    if (((~(src1 & (1ULL << 31))) & (src2 & (1ULL << 31)))
>  -        | ((dst & (1ULL << 31)) & (( ~(src1 & (1ULL << 31)))
>  -                                   | (src2 & (1ULL << 31)))))
>  +    if (src1 < src2)
>          ret |= PSR_CARRY;
>      return ret;
>   }
>
>  -static inline uint32_t get_V_sub_icc(target_ulong dst, target_ulong src1,
>  -                                     target_ulong src2)
>  +static inline uint32_t get_V_sub_icc(uint32_t dst, uint32_t src1,
>  +                                     uint32_t src2)
>   {
>      uint32_t ret = 0;
>
>  -    if (((src1 ^ src2) & (src1 ^ dst)) & (1ULL << 31))
>  +    if (((src1 ^ src2) & (src1 ^ dst)) & (1U << 31))
>          ret |= PSR_OVF;
>      return ret;
>   }
>  @@ -1153,14 +1165,14 @@ static uint32_t compute_all_sub(void)
>      uint32_t ret;
>
>      ret = get_NZ_icc(CC_DST);
>  -    ret |= get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
>  +    ret |= get_C_sub_icc(CC_SRC, CC_SRC2);
>      ret |= get_V_sub_icc(CC_DST, CC_SRC, CC_SRC2);
>      return ret;
>   }
>
>   static uint32_t compute_C_sub(void)
>   {
>  -    return get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
>  +    return get_C_sub_icc(CC_SRC, CC_SRC2);
>   }
>
>   #ifdef TARGET_SPARC64
>  @@ -1185,12 +1197,32 @@ static uint32_t compute_C_subx_xcc(void)
>   }
>   #endif
>
>  +static uint32_t compute_all_subx(void)
>  +{
>  +    uint32_t ret;
>  +
>  +    ret = get_NZ_icc(CC_DST);
>  +    ret |= get_C_sub_icc(CC_DST - CC_SRC2, CC_SRC);
>  +    ret |= get_C_sub_icc(CC_DST, CC_SRC2);
>  +    ret |= get_V_sub_icc(CC_DST, CC_SRC, CC_SRC2);
>  +    return ret;
>  +}
>  +
>  +static uint32_t compute_C_subx(void)
>  +{
>  +    uint32_t ret;
>  +
>  +    ret = get_C_sub_icc(CC_DST - CC_SRC2, CC_SRC);
>  +    ret |= get_C_sub_icc(CC_DST, CC_SRC2);
>  +    return ret;
>  +}
>  +
>   static uint32_t compute_all_tsub(void)
>   {
>      uint32_t ret;
>
>      ret = get_NZ_icc(CC_DST);
>  -    ret |= get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
>  +    ret |= get_C_sub_icc(CC_SRC, CC_SRC2);
>      ret |= get_V_sub_icc(CC_DST, CC_SRC, CC_SRC2);
>      ret |= get_V_tag_icc(CC_SRC, CC_SRC2);
>      return ret;
>  @@ -1198,7 +1230,7 @@ static uint32_t compute_all_tsub(void)
>
>   static uint32_t compute_C_tsub(void)
>   {
>  -    return get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
>  +    return get_C_sub_icc(CC_SRC, CC_SRC2);
>   }
>
>   static uint32_t compute_all_tsubtv(void)
>  @@ -1206,13 +1238,13 @@ static uint32_t compute_all_tsubtv(void)
>      uint32_t ret;
>
>      ret = get_NZ_icc(CC_DST);
>  -    ret |= get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
>  +    ret |= get_C_sub_icc(CC_SRC, CC_SRC2);
>      return ret;
>   }
>
>   static uint32_t compute_C_tsubtv(void)
>   {
>  -    return get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
>  +    return get_C_sub_icc(CC_SRC, CC_SRC2);
>   }
>
>   static uint32_t compute_all_logic(void)
>  @@ -1242,11 +1274,11 @@ static const CCTable icc_table[CC_OP_NB] = {
>      [CC_OP_FLAGS] = { compute_all_flags, compute_C_flags },
>      [CC_OP_DIV] = { compute_all_div, compute_C_div },
>      [CC_OP_ADD] = { compute_all_add, compute_C_add },
>  -    [CC_OP_ADDX] = { compute_all_add, compute_C_add },
>  +    [CC_OP_ADDX] = { compute_all_addx, compute_C_addx },
>      [CC_OP_TADD] = { compute_all_tadd, compute_C_tadd },
>      [CC_OP_TADDTV] = { compute_all_taddtv, compute_C_taddtv },
>      [CC_OP_SUB] = { compute_all_sub, compute_C_sub },
>  -    [CC_OP_SUBX] = { compute_all_sub, compute_C_sub },
>  +    [CC_OP_SUBX] = { compute_all_subx, compute_C_subx },
>      [CC_OP_TSUB] = { compute_all_tsub, compute_C_tsub },
>      [CC_OP_TSUBTV] = { compute_all_tsubtv, compute_C_tsubtv },
>      [CC_OP_LOGIC] = { compute_all_logic, compute_C_logic },
>
> --
>  1.7.0.1
>
>
Artyom Tarasenko - May 11, 2010, 9:31 p.m.
2010/5/11 Richard Henderson <rth@twiddle.net>:
> Use int32 types instead of target_ulong when computing ICC.  This
> simplifies the generated code for 32-bit host and 64-bit guest.
> Use the same simplified expressions for ICC as were already used
> for XCC in carry flag generation.
>
> ADDX ICC carry generation was using the same routines as ADD ICC,
> which is incorrect if the input carry bit produces the output carry.
> Use the algorithms already in place for ADDX XCC carry generation.
> Similarly for SUBX.

Nack. It looks like you reverted carry generation to the previous
(broken) behavior.
The current implementation is indeed not optimal regarding the performance,
but is [more] precise.

This patch breaks solaris boot at a very early stage, as it breaks subx .

Can you give a reference to the specification you are implementing?

> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  target-sparc/op_helper.c |  106 ++++++++++++++++++++++++++++++----------------
>  1 files changed, 69 insertions(+), 37 deletions(-)
>
> diff --git a/target-sparc/op_helper.c b/target-sparc/op_helper.c
> index 09449c5..c36bc54 100644
> --- a/target-sparc/op_helper.c
> +++ b/target-sparc/op_helper.c
> @@ -896,13 +896,13 @@ static uint32_t compute_C_flags(void)
>     return env->psr & PSR_CARRY;
>  }
>
> -static inline uint32_t get_NZ_icc(target_ulong dst)
> +static inline uint32_t get_NZ_icc(int32_t dst)
>  {
>     uint32_t ret = 0;
>
> -    if (!(dst & 0xffffffffULL))
> +    if (dst == 0)
>         ret |= PSR_ZERO;
> -    if ((int32_t) (dst & 0xffffffffULL) < 0)
> +    if (dst < 0)
>         ret |= PSR_NEG;
>     return ret;
>  }
> @@ -918,13 +918,13 @@ static uint32_t compute_C_flags_xcc(void)
>     return env->xcc & PSR_CARRY;
>  }
>
> -static inline uint32_t get_NZ_xcc(target_ulong dst)
> +static inline uint32_t get_NZ_xcc(target_long dst)
>  {
>     uint32_t ret = 0;
>
>     if (!dst)
>         ret |= PSR_ZERO;
> -    if ((int64_t)dst < 0)
> +    if (dst < 0)
>         ret |= PSR_NEG;
>     return ret;
>  }
> @@ -953,25 +953,21 @@ static uint32_t compute_C_div(void)
>     return 0;
>  }
>
> -/* carry = (src1[31] & src2[31]) | ( ~dst[31] & (src1[31] | src2[31])) */

This is how it is specified in the sparc v8 documentation.

> -static inline uint32_t get_C_add_icc(target_ulong dst, target_ulong src1,
> -                                     target_ulong src2)
> +static inline uint32_t get_C_add_icc(uint32_t dst, uint32_t src1)
>  {
>     uint32_t ret = 0;
>
> -    if (((src1 & (1ULL << 31)) & (src2 & (1ULL << 31)))
> -        | ((~(dst & (1ULL << 31)))
> -           & ((src1 & (1ULL << 31)) | (src2 & (1ULL << 31)))))
> +    if (dst < src1)
>         ret |= PSR_CARRY;
>     return ret;
>  }
>
> -static inline uint32_t get_V_add_icc(target_ulong dst, target_ulong src1,
> -                                         target_ulong src2)
> +static inline uint32_t get_V_add_icc(uint32_t dst, uint32_t src1,
> +                                     uint32_t src2)
>  {
>     uint32_t ret = 0;
>
> -    if (((src1 ^ src2 ^ -1) & (src1 ^ dst)) & (1ULL << 31))
> +    if (((src1 ^ src2 ^ -1) & (src1 ^ dst)) & (1U << 31))
>         ret |= PSR_OVF;
>     return ret;
>  }
> @@ -1017,14 +1013,14 @@ static uint32_t compute_all_add(void)
>     uint32_t ret;
>
>     ret = get_NZ_icc(CC_DST);
> -    ret |= get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
> +    ret |= get_C_add_icc(CC_DST, CC_SRC);
>     ret |= get_V_add_icc(CC_DST, CC_SRC, CC_SRC2);
>     return ret;
>  }
>
>  static uint32_t compute_C_add(void)
>  {
> -    return get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
> +    return get_C_add_icc(CC_DST, CC_SRC);
>  }
>
>  #ifdef TARGET_SPARC64
> @@ -1049,6 +1045,26 @@ static uint32_t compute_C_addx_xcc(void)
>  }
>  #endif
>
> +static uint32_t compute_all_addx(void)
> +{
> +    uint32_t ret;
> +
> +    ret = get_NZ_icc(CC_DST);
> +    ret |= get_C_add_icc(CC_DST - CC_SRC2, CC_SRC);
> +    ret |= get_C_add_icc(CC_DST, CC_SRC);
> +    ret |= get_V_add_icc(CC_DST, CC_SRC, CC_SRC2);
> +    return ret;
> +}
> +
> +static uint32_t compute_C_addx(void)
> +{
> +    uint32_t ret;
> +
> +    ret = get_C_add_icc(CC_DST - CC_SRC2, CC_SRC);
> +    ret |= get_C_add_icc(CC_DST, CC_SRC);
> +    return ret;
> +}
> +
>  static inline uint32_t get_V_tag_icc(target_ulong src1, target_ulong src2)
>  {
>     uint32_t ret = 0;
> @@ -1063,7 +1079,7 @@ static uint32_t compute_all_tadd(void)
>     uint32_t ret;
>
>     ret = get_NZ_icc(CC_DST);
> -    ret |= get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
> +    ret |= get_C_add_icc(CC_DST, CC_SRC);
>     ret |= get_V_add_icc(CC_DST, CC_SRC, CC_SRC2);
>     ret |= get_V_tag_icc(CC_SRC, CC_SRC2);
>     return ret;
> @@ -1071,7 +1087,7 @@ static uint32_t compute_all_tadd(void)
>
>  static uint32_t compute_C_tadd(void)
>  {
> -    return get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
> +    return get_C_add_icc(CC_DST, CC_SRC);
>  }
>
>  static uint32_t compute_all_taddtv(void)
> @@ -1079,34 +1095,30 @@ static uint32_t compute_all_taddtv(void)
>     uint32_t ret;
>
>     ret = get_NZ_icc(CC_DST);
> -    ret |= get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
> +    ret |= get_C_add_icc(CC_DST, CC_SRC);
>     return ret;
>  }
>
>  static uint32_t compute_C_taddtv(void)
>  {
> -    return get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
> +    return get_C_add_icc(CC_DST, CC_SRC);
>  }
>
> -/* carry = (~src1[31] & src2[31]) | ( dst[31]  & (~src1[31] | src2[31])) */

This was a copy & paste from the docs too.

> -static inline uint32_t get_C_sub_icc(target_ulong dst, target_ulong src1,
> -                                     target_ulong src2)
> +static inline uint32_t get_C_sub_icc(uint32_t src1, uint32_t src2)
>  {
>     uint32_t ret = 0;
>
> -    if (((~(src1 & (1ULL << 31))) & (src2 & (1ULL << 31)))
> -        | ((dst & (1ULL << 31)) & (( ~(src1 & (1ULL << 31)))
> -                                   | (src2 & (1ULL << 31)))))
> +    if (src1 < src2)
>         ret |= PSR_CARRY;
>     return ret;
>  }
>
> -static inline uint32_t get_V_sub_icc(target_ulong dst, target_ulong src1,
> -                                     target_ulong src2)
> +static inline uint32_t get_V_sub_icc(uint32_t dst, uint32_t src1,
> +                                     uint32_t src2)
>  {
>     uint32_t ret = 0;
>
> -    if (((src1 ^ src2) & (src1 ^ dst)) & (1ULL << 31))
> +    if (((src1 ^ src2) & (src1 ^ dst)) & (1U << 31))
>         ret |= PSR_OVF;
>     return ret;
>  }
> @@ -1153,14 +1165,14 @@ static uint32_t compute_all_sub(void)
>     uint32_t ret;
>
>     ret = get_NZ_icc(CC_DST);
> -    ret |= get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
> +    ret |= get_C_sub_icc(CC_SRC, CC_SRC2);
>     ret |= get_V_sub_icc(CC_DST, CC_SRC, CC_SRC2);
>     return ret;
>  }
>
>  static uint32_t compute_C_sub(void)
>  {
> -    return get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
> +    return get_C_sub_icc(CC_SRC, CC_SRC2);
>  }
>
>  #ifdef TARGET_SPARC64
> @@ -1185,12 +1197,32 @@ static uint32_t compute_C_subx_xcc(void)
>  }
>  #endif
>
> +static uint32_t compute_all_subx(void)
> +{
> +    uint32_t ret;
> +
> +    ret = get_NZ_icc(CC_DST);
> +    ret |= get_C_sub_icc(CC_DST - CC_SRC2, CC_SRC);
> +    ret |= get_C_sub_icc(CC_DST, CC_SRC2);
> +    ret |= get_V_sub_icc(CC_DST, CC_SRC, CC_SRC2);
> +    return ret;
> +}
> +
> +static uint32_t compute_C_subx(void)
> +{
> +    uint32_t ret;
> +
> +    ret = get_C_sub_icc(CC_DST - CC_SRC2, CC_SRC);
> +    ret |= get_C_sub_icc(CC_DST, CC_SRC2);
> +    return ret;
> +}
> +
>  static uint32_t compute_all_tsub(void)
>  {
>     uint32_t ret;
>
>     ret = get_NZ_icc(CC_DST);
> -    ret |= get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
> +    ret |= get_C_sub_icc(CC_SRC, CC_SRC2);
>     ret |= get_V_sub_icc(CC_DST, CC_SRC, CC_SRC2);
>     ret |= get_V_tag_icc(CC_SRC, CC_SRC2);
>     return ret;
> @@ -1198,7 +1230,7 @@ static uint32_t compute_all_tsub(void)
>
>  static uint32_t compute_C_tsub(void)
>  {
> -    return get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
> +    return get_C_sub_icc(CC_SRC, CC_SRC2);
>  }
>
>  static uint32_t compute_all_tsubtv(void)
> @@ -1206,13 +1238,13 @@ static uint32_t compute_all_tsubtv(void)
>     uint32_t ret;
>
>     ret = get_NZ_icc(CC_DST);
> -    ret |= get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
> +    ret |= get_C_sub_icc(CC_SRC, CC_SRC2);
>     return ret;
>  }
>
>  static uint32_t compute_C_tsubtv(void)
>  {
> -    return get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
> +    return get_C_sub_icc(CC_SRC, CC_SRC2);
>  }
>
>  static uint32_t compute_all_logic(void)
> @@ -1242,11 +1274,11 @@ static const CCTable icc_table[CC_OP_NB] = {
>     [CC_OP_FLAGS] = { compute_all_flags, compute_C_flags },
>     [CC_OP_DIV] = { compute_all_div, compute_C_div },
>     [CC_OP_ADD] = { compute_all_add, compute_C_add },
> -    [CC_OP_ADDX] = { compute_all_add, compute_C_add },
> +    [CC_OP_ADDX] = { compute_all_addx, compute_C_addx },
>     [CC_OP_TADD] = { compute_all_tadd, compute_C_tadd },
>     [CC_OP_TADDTV] = { compute_all_taddtv, compute_C_taddtv },
>     [CC_OP_SUB] = { compute_all_sub, compute_C_sub },
> -    [CC_OP_SUBX] = { compute_all_sub, compute_C_sub },
> +    [CC_OP_SUBX] = { compute_all_subx, compute_C_subx },
>     [CC_OP_TSUB] = { compute_all_tsub, compute_C_tsub },
>     [CC_OP_TSUBTV] = { compute_all_tsubtv, compute_C_tsubtv },
>     [CC_OP_LOGIC] = { compute_all_logic, compute_C_logic },
> --
> 1.7.0.1
>
>
>
Richard Henderson - May 12, 2010, 2:56 p.m.
On 05/11/2010 02:31 PM, Artyom Tarasenko wrote:
> Nack. It looks like you reverted carry generation to the previous
> (broken) behavior.

Oh?  I suppose I should go back and look at the logs, but the way
it's written there sure seems to match 5.1.5.1 of the sparcv9 manual:
You'll only get carry into the high bit on X - Y if X < Y.

In any case, if you meant to fix carry computation for subtraction,
you missed changing the 64-bit carry computation too.  It is still
written the same way before and after my patch, and matches both 
the expectation I have from the v9 manual and the post-patch code
along the 32-bit path.



r~
Richard Henderson - May 12, 2010, 3:04 p.m.
On 05/11/2010 02:31 PM, Artyom Tarasenko wrote:
> Nack. It looks like you reverted carry generation to the previous
> (broken) behavior.

Perhaps you could point out the change I'm reverting?  I don't see
any change to the actual computation of the flags since 
f0f26a06d51b7e7764f8951cdbf67ac9ad507f6d, last year.


r~
Artyom Tarasenko - May 12, 2010, 3:18 p.m.
2010/5/12 Richard Henderson <rth@twiddle.net>:
> On 05/11/2010 02:31 PM, Artyom Tarasenko wrote:
>> Nack. It looks like you reverted carry generation to the previous
>> (broken) behavior.
>
> Oh?  I suppose I should go back and look at the logs, but the way
> it's written there sure seems to match 5.1.5.1 of the sparcv9 manual:
> You'll only get carry into the high bit on X - Y if X < Y.
>
> In any case, if you meant to fix carry computation for subtraction,
> you missed changing the 64-bit carry computation too.

May very well be the case. I cared only about sparcv8. Was sort of
expecting that someone would stop me telling I broke v9...

> It is still
> written the same way before and after my patch, and matches both
> the expectation I have from the v9 manual and the post-patch code
> along the 32-bit path.

Probably v9 differs from v8?

> Perhaps you could point out the change I'm reverting?  I don't
> see any change to the actual computation of the flags since
> f0f26a06d51b7e7764f8951cdbf67ac9ad507f6d, last year.

It is last year, but
 3e6ba503400c34cbe0f9ad6e289921688bf303a3
Richard Henderson - May 12, 2010, 5:08 p.m.
On 05/12/2010 08:18 AM, Artyom Tarasenko wrote:
> It is last year, but
>  3e6ba503400c34cbe0f9ad6e289921688bf303a3

>     The page 108 of the SPARC Version 8 Architecture Manual describes
>     that addcc and addxcc shall compute carry flag the same way.
>     The page 110 claims the same about subcc and subxcc instructions.
>     This patch fixes carry computation in corner cases and removes redundant cod
>     The most visible effect of the patch is enabling Solaris boot when using OBP

I'll contradict you right there by asserting that the manual
says no such thing about computing the carry flag "the same way".

What it says is:

p29:
# Carry is set on addition if there is a carry out of bit 31.
# Carry is set on subtraction if there is borrow into bit 31.

p108:
# ADDX and ADDXcc also add the PSR's carry (c) bit; that is
# they compute "r[rs1] + r[rs2] + c"...

How are the two computations "the same"?  First, it says that
carry is set if there is carry out.  Second, it says that there
is the extra addition of the (previous) C bit.  If there is an
extra addition, there must be an extra chance for carry, and
thus the carry flag *cannot* be computed in the same way.

Looking closely at the generation of the addx carry (with the
attached test program), I see that the original definition of
the ADDX carry, as still seen in the _xcc versions, does not
generate correct values, and your definition of the carry does.
However, that does not mean that we need to use your version
of the carry for plain ADD, only for ADDX.

I'll send a revised patch sequence shortly.


r~

Patch

diff --git a/target-sparc/op_helper.c b/target-sparc/op_helper.c
index 09449c5..c36bc54 100644
--- a/target-sparc/op_helper.c
+++ b/target-sparc/op_helper.c
@@ -896,13 +896,13 @@  static uint32_t compute_C_flags(void)
     return env->psr & PSR_CARRY;
 }
 
-static inline uint32_t get_NZ_icc(target_ulong dst)
+static inline uint32_t get_NZ_icc(int32_t dst)
 {
     uint32_t ret = 0;
 
-    if (!(dst & 0xffffffffULL))
+    if (dst == 0)
         ret |= PSR_ZERO;
-    if ((int32_t) (dst & 0xffffffffULL) < 0)
+    if (dst < 0)
         ret |= PSR_NEG;
     return ret;
 }
@@ -918,13 +918,13 @@  static uint32_t compute_C_flags_xcc(void)
     return env->xcc & PSR_CARRY;
 }
 
-static inline uint32_t get_NZ_xcc(target_ulong dst)
+static inline uint32_t get_NZ_xcc(target_long dst)
 {
     uint32_t ret = 0;
 
     if (!dst)
         ret |= PSR_ZERO;
-    if ((int64_t)dst < 0)
+    if (dst < 0)
         ret |= PSR_NEG;
     return ret;
 }
@@ -953,25 +953,21 @@  static uint32_t compute_C_div(void)
     return 0;
 }
 
-/* carry = (src1[31] & src2[31]) | ( ~dst[31] & (src1[31] | src2[31])) */
-static inline uint32_t get_C_add_icc(target_ulong dst, target_ulong src1,
-                                     target_ulong src2)
+static inline uint32_t get_C_add_icc(uint32_t dst, uint32_t src1)
 {
     uint32_t ret = 0;
 
-    if (((src1 & (1ULL << 31)) & (src2 & (1ULL << 31)))
-        | ((~(dst & (1ULL << 31)))
-           & ((src1 & (1ULL << 31)) | (src2 & (1ULL << 31)))))
+    if (dst < src1)
         ret |= PSR_CARRY;
     return ret;
 }
 
-static inline uint32_t get_V_add_icc(target_ulong dst, target_ulong src1,
-                                         target_ulong src2)
+static inline uint32_t get_V_add_icc(uint32_t dst, uint32_t src1,
+                                     uint32_t src2)
 {
     uint32_t ret = 0;
 
-    if (((src1 ^ src2 ^ -1) & (src1 ^ dst)) & (1ULL << 31))
+    if (((src1 ^ src2 ^ -1) & (src1 ^ dst)) & (1U << 31))
         ret |= PSR_OVF;
     return ret;
 }
@@ -1017,14 +1013,14 @@  static uint32_t compute_all_add(void)
     uint32_t ret;
 
     ret = get_NZ_icc(CC_DST);
-    ret |= get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
+    ret |= get_C_add_icc(CC_DST, CC_SRC);
     ret |= get_V_add_icc(CC_DST, CC_SRC, CC_SRC2);
     return ret;
 }
 
 static uint32_t compute_C_add(void)
 {
-    return get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
+    return get_C_add_icc(CC_DST, CC_SRC);
 }
 
 #ifdef TARGET_SPARC64
@@ -1049,6 +1045,26 @@  static uint32_t compute_C_addx_xcc(void)
 }
 #endif
 
+static uint32_t compute_all_addx(void)
+{
+    uint32_t ret;
+
+    ret = get_NZ_icc(CC_DST);
+    ret |= get_C_add_icc(CC_DST - CC_SRC2, CC_SRC);
+    ret |= get_C_add_icc(CC_DST, CC_SRC);
+    ret |= get_V_add_icc(CC_DST, CC_SRC, CC_SRC2);
+    return ret;
+}
+
+static uint32_t compute_C_addx(void)
+{
+    uint32_t ret;
+
+    ret = get_C_add_icc(CC_DST - CC_SRC2, CC_SRC);
+    ret |= get_C_add_icc(CC_DST, CC_SRC);
+    return ret;
+}
+
 static inline uint32_t get_V_tag_icc(target_ulong src1, target_ulong src2)
 {
     uint32_t ret = 0;
@@ -1063,7 +1079,7 @@  static uint32_t compute_all_tadd(void)
     uint32_t ret;
 
     ret = get_NZ_icc(CC_DST);
-    ret |= get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
+    ret |= get_C_add_icc(CC_DST, CC_SRC);
     ret |= get_V_add_icc(CC_DST, CC_SRC, CC_SRC2);
     ret |= get_V_tag_icc(CC_SRC, CC_SRC2);
     return ret;
@@ -1071,7 +1087,7 @@  static uint32_t compute_all_tadd(void)
 
 static uint32_t compute_C_tadd(void)
 {
-    return get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
+    return get_C_add_icc(CC_DST, CC_SRC);
 }
 
 static uint32_t compute_all_taddtv(void)
@@ -1079,34 +1095,30 @@  static uint32_t compute_all_taddtv(void)
     uint32_t ret;
 
     ret = get_NZ_icc(CC_DST);
-    ret |= get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
+    ret |= get_C_add_icc(CC_DST, CC_SRC);
     return ret;
 }
 
 static uint32_t compute_C_taddtv(void)
 {
-    return get_C_add_icc(CC_DST, CC_SRC, CC_SRC2);
+    return get_C_add_icc(CC_DST, CC_SRC);
 }
 
-/* carry = (~src1[31] & src2[31]) | ( dst[31]  & (~src1[31] | src2[31])) */
-static inline uint32_t get_C_sub_icc(target_ulong dst, target_ulong src1,
-                                     target_ulong src2)
+static inline uint32_t get_C_sub_icc(uint32_t src1, uint32_t src2)
 {
     uint32_t ret = 0;
 
-    if (((~(src1 & (1ULL << 31))) & (src2 & (1ULL << 31)))
-        | ((dst & (1ULL << 31)) & (( ~(src1 & (1ULL << 31)))
-                                   | (src2 & (1ULL << 31)))))
+    if (src1 < src2)
         ret |= PSR_CARRY;
     return ret;
 }
 
-static inline uint32_t get_V_sub_icc(target_ulong dst, target_ulong src1,
-                                     target_ulong src2)
+static inline uint32_t get_V_sub_icc(uint32_t dst, uint32_t src1,
+                                     uint32_t src2)
 {
     uint32_t ret = 0;
 
-    if (((src1 ^ src2) & (src1 ^ dst)) & (1ULL << 31))
+    if (((src1 ^ src2) & (src1 ^ dst)) & (1U << 31))
         ret |= PSR_OVF;
     return ret;
 }
@@ -1153,14 +1165,14 @@  static uint32_t compute_all_sub(void)
     uint32_t ret;
 
     ret = get_NZ_icc(CC_DST);
-    ret |= get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
+    ret |= get_C_sub_icc(CC_SRC, CC_SRC2);
     ret |= get_V_sub_icc(CC_DST, CC_SRC, CC_SRC2);
     return ret;
 }
 
 static uint32_t compute_C_sub(void)
 {
-    return get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
+    return get_C_sub_icc(CC_SRC, CC_SRC2);
 }
 
 #ifdef TARGET_SPARC64
@@ -1185,12 +1197,32 @@  static uint32_t compute_C_subx_xcc(void)
 }
 #endif
 
+static uint32_t compute_all_subx(void)
+{
+    uint32_t ret;
+
+    ret = get_NZ_icc(CC_DST);
+    ret |= get_C_sub_icc(CC_DST - CC_SRC2, CC_SRC);
+    ret |= get_C_sub_icc(CC_DST, CC_SRC2);
+    ret |= get_V_sub_icc(CC_DST, CC_SRC, CC_SRC2);
+    return ret;
+}
+
+static uint32_t compute_C_subx(void)
+{
+    uint32_t ret;
+
+    ret = get_C_sub_icc(CC_DST - CC_SRC2, CC_SRC);
+    ret |= get_C_sub_icc(CC_DST, CC_SRC2);
+    return ret;
+}
+
 static uint32_t compute_all_tsub(void)
 {
     uint32_t ret;
 
     ret = get_NZ_icc(CC_DST);
-    ret |= get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
+    ret |= get_C_sub_icc(CC_SRC, CC_SRC2);
     ret |= get_V_sub_icc(CC_DST, CC_SRC, CC_SRC2);
     ret |= get_V_tag_icc(CC_SRC, CC_SRC2);
     return ret;
@@ -1198,7 +1230,7 @@  static uint32_t compute_all_tsub(void)
 
 static uint32_t compute_C_tsub(void)
 {
-    return get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
+    return get_C_sub_icc(CC_SRC, CC_SRC2);
 }
 
 static uint32_t compute_all_tsubtv(void)
@@ -1206,13 +1238,13 @@  static uint32_t compute_all_tsubtv(void)
     uint32_t ret;
 
     ret = get_NZ_icc(CC_DST);
-    ret |= get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
+    ret |= get_C_sub_icc(CC_SRC, CC_SRC2);
     return ret;
 }
 
 static uint32_t compute_C_tsubtv(void)
 {
-    return get_C_sub_icc(CC_DST, CC_SRC, CC_SRC2);
+    return get_C_sub_icc(CC_SRC, CC_SRC2);
 }
 
 static uint32_t compute_all_logic(void)
@@ -1242,11 +1274,11 @@  static const CCTable icc_table[CC_OP_NB] = {
     [CC_OP_FLAGS] = { compute_all_flags, compute_C_flags },
     [CC_OP_DIV] = { compute_all_div, compute_C_div },
     [CC_OP_ADD] = { compute_all_add, compute_C_add },
-    [CC_OP_ADDX] = { compute_all_add, compute_C_add },
+    [CC_OP_ADDX] = { compute_all_addx, compute_C_addx },
     [CC_OP_TADD] = { compute_all_tadd, compute_C_tadd },
     [CC_OP_TADDTV] = { compute_all_taddtv, compute_C_taddtv },
     [CC_OP_SUB] = { compute_all_sub, compute_C_sub },
-    [CC_OP_SUBX] = { compute_all_sub, compute_C_sub },
+    [CC_OP_SUBX] = { compute_all_subx, compute_C_subx },
     [CC_OP_TSUB] = { compute_all_tsub, compute_C_tsub },
     [CC_OP_TSUBTV] = { compute_all_tsubtv, compute_C_tsubtv },
     [CC_OP_LOGIC] = { compute_all_logic, compute_C_logic },