Patchwork [V3,03/14] target-ppc: Add ISA2.06 divdeu[o] Instructions

login
register
mail settings
Submitter Tom Musta
Date Dec. 18, 2013, 8:48 p.m.
Message ID <1387399747-4994-4-git-send-email-tommusta@gmail.com>
Download mbox | patch
Permalink /patch/303087/
State New
Headers show

Comments

Tom Musta - Dec. 18, 2013, 8:48 p.m.
This patch adds the Divide Doubleword Extended Unsigned
instructions.  This instruction requires dividing a 128-bit
value by a 64 bit value.  Since 128 bit integer division is
not supported in TCG, a helper is used, providing a
repeated difference algorithm.

V2: Moved the 128-bit divide routine into host-utils per Richard
Henderson's suggestion.

Signed-off-by: Tom Musta <tommusta@gmail.com>
---
 include/qemu/host-utils.h |   14 ++++++++++++++
 target-ppc/helper.h       |    1 +
 target-ppc/int_helper.c   |   27 +++++++++++++++++++++++++++
 target-ppc/translate.c    |   20 ++++++++++++++++++++
 util/host-utils.c         |   38 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 100 insertions(+), 0 deletions(-)
Richard Henderson - Dec. 24, 2013, 3:20 p.m.
On 12/18/2013 12:48 PM, Tom Musta wrote:
> This patch adds the Divide Doubleword Extended Unsigned
> instructions.  This instruction requires dividing a 128-bit
> value by a 64 bit value.  Since 128 bit integer division is
> not supported in TCG, a helper is used, providing a
> repeated difference algorithm.
> 
> V2: Moved the 128-bit divide routine into host-utils per Richard
> Henderson's suggestion.
> 
> Signed-off-by: Tom Musta <tommusta@gmail.com>
> ---
>  include/qemu/host-utils.h |   14 ++++++++++++++
>  target-ppc/helper.h       |    1 +
>  target-ppc/int_helper.c   |   27 +++++++++++++++++++++++++++
>  target-ppc/translate.c    |   20 ++++++++++++++++++++
>  util/host-utils.c         |   38 ++++++++++++++++++++++++++++++++++++++
>  5 files changed, 100 insertions(+), 0 deletions(-)

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~
Scott Wood - Dec. 28, 2013, 12:30 a.m.
On Wed, 2013-12-18 at 14:48 -0600, Tom Musta wrote:
> This patch adds the Divide Doubleword Extended Unsigned
> instructions.  This instruction requires dividing a 128-bit
> value by a 64 bit value.  Since 128 bit integer division is
> not supported in TCG, a helper is used, providing a
> repeated difference algorithm.
> 
> V2: Moved the 128-bit divide routine into host-utils per Richard
> Henderson's suggestion.
> 
> Signed-off-by: Tom Musta <tommusta@gmail.com>
> ---
>  include/qemu/host-utils.h |   14 ++++++++++++++
>  target-ppc/helper.h       |    1 +
>  target-ppc/int_helper.c   |   27 +++++++++++++++++++++++++++
>  target-ppc/translate.c    |   20 ++++++++++++++++++++
>  util/host-utils.c         |   38 ++++++++++++++++++++++++++++++++++++++
>  5 files changed, 100 insertions(+), 0 deletions(-)
> 
> diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
> index 0f688c1..0ca187d 100644
> --- a/include/qemu/host-utils.h
> +++ b/include/qemu/host-utils.h
> @@ -44,9 +44,23 @@ static inline void muls64(uint64_t *plow, uint64_t *phigh,
>      *plow = r;
>      *phigh = r >> 64;
>  }
> +
> +static inline int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
> +{
> +    if (divisor == 0) {
> +        return 1;
> +    } else {
> +        __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
> +        __uint128_t result = dividend / divisor;
> +        *plow = result;
> +        *phigh = dividend % divisor;
> +        return result > UINT64_MAX;
> +    }
> +}
>  #else
>  void muls64(uint64_t *phigh, uint64_t *plow, int64_t a, int64_t b);
>  void mulu64(uint64_t *phigh, uint64_t *plow, uint64_t a, uint64_t b);
> +int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
>  #endif
>  
>  /**
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index 1ec9c65..3eff4df 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -31,6 +31,7 @@ DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32)
>  
>  #if defined(TARGET_PPC64)
>  DEF_HELPER_3(mulldo, i64, env, i64, i64)
> +DEF_HELPER_4(divdeu, i64, env, i64, i64, i32)
>  #endif
>  
>  DEF_HELPER_FLAGS_1(cntlzw, TCG_CALL_NO_RWG_SE, tl, tl)
> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
> index abc69a7..d6dcac9 100644
> --- a/target-ppc/int_helper.c
> +++ b/target-ppc/int_helper.c
> @@ -41,6 +41,33 @@ uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
>  }
>  #endif
>  
> +#if defined(TARGET_PPC64)
> +
> +uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
> +{
> +    uint64_t rt = 0;
> +    int overflow = 0;
> +
> +    overflow = divu128(&rt, &ra, rb);
> +
> +    if (unlikely(overflow)) {
> +        rt = 0; /* Undefined */
> +    }
> +
> +    if (oe) {
> +        if (unlikely(overflow)) {
> +            env->so = env->ov = 1;
> +        } else {
> +            env->ov = 0;
> +        }
> +    }
> +
> +    return rt;
> +}
> +
> +#endif
> +
> +
>  target_ulong helper_cntlzw(target_ulong t)
>  {
>      return clz32(t);
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 0d39de2..7a51c6d 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -1032,6 +1032,23 @@ GEN_INT_ARITH_DIVD(divduo, 0x1E, 0, 1);
>  /* divw  divw.  divwo  divwo.   */
>  GEN_INT_ARITH_DIVD(divd, 0x0F, 1, 0);
>  GEN_INT_ARITH_DIVD(divdo, 0x1F, 1, 1);
> +
> +/* divdeu[o][.] */
> +#define GEN_DIVDE(name, hlpr, compute_ov)                                     \
> +static void gen_##name(DisasContext *ctx)                                     \
> +{                                                                             \
> +    TCGv_i32 t0 = tcg_const_i32(compute_ov);                                  \
> +    gen_helper_##hlpr(cpu_gpr[rD(ctx->opcode)], cpu_env,                      \
> +                     cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], t0); \
> +    tcg_temp_free_i32(t0);                                                    \
> +    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
> +        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);                           \
> +    }                                                                         \
> +}
> +
> +GEN_DIVDE(divdeu, divdeu, 0);
> +GEN_DIVDE(divdeuo, divdeu, 1);
> +
>  #endif
>  
>  /* mulhw  mulhw. */
> @@ -9594,6 +9611,9 @@ GEN_INT_ARITH_DIVD(divduo, 0x1E, 0, 1),
>  GEN_INT_ARITH_DIVD(divd, 0x0F, 1, 0),
>  GEN_INT_ARITH_DIVD(divdo, 0x1F, 1, 1),
>  
> +GEN_HANDLER_E(divdeu, 0x1F, 0x09, 0x0C, 0x00000000, PPC_NONE, PPC2_ISA206),
> +GEN_HANDLER_E(divdeuo, 0x1F, 0x09, 0x1C, 0x00000000, PPC_NONE, PPC2_ISA206),

These instructions are phased-in on embedded, and unlike bpermd are not
present on e5500/e6500 which are 64-bit ISA 2.06 implementations.
Wasn't the conclusion in a previous thread to use separate flags for
these instruction groups?

-Scott
Tom Musta - Jan. 3, 2014, 7:24 p.m.
On 12/27/2013 6:30 PM, Scott Wood wrote:
> These instructions are phased-in on embedded, and unlike bpermd are not
> present on e5500/e6500 which are 64-bit ISA 2.06 implementations.
> Wasn't the conclusion in a previous thread to use separate flags for
> these instruction groups?
> 
> -Scott

Scott:

Yes ... but those comments were made *after* this revision was published
(http://lists.nongnu.org/archive/html/qemu-devel/2013-12/msg03681.html).

I have yet to produce V4 of this series which will include the new and
separate flags.
Scott Wood - Jan. 3, 2014, 7:43 p.m.
On Fri, 2014-01-03 at 13:24 -0600, Tom Musta wrote:
> On 12/27/2013 6:30 PM, Scott Wood wrote:
> > These instructions are phased-in on embedded, and unlike bpermd are not
> > present on e5500/e6500 which are 64-bit ISA 2.06 implementations.
> > Wasn't the conclusion in a previous thread to use separate flags for
> > these instruction groups?
> > 
> > -Scott
> 
> Scott:
> 
> Yes ... but those comments were made *after* this revision was published
> (http://lists.nongnu.org/archive/html/qemu-devel/2013-12/msg03681.html).
> 
> I have yet to produce V4 of this series which will include the new and
> separate flags.

Oops... for some reason I thought the patchset was newer.  Sorry about
that.

-Scott

Patch

diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index 0f688c1..0ca187d 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -44,9 +44,23 @@  static inline void muls64(uint64_t *plow, uint64_t *phigh,
     *plow = r;
     *phigh = r >> 64;
 }
+
+static inline int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
+{
+    if (divisor == 0) {
+        return 1;
+    } else {
+        __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
+        __uint128_t result = dividend / divisor;
+        *plow = result;
+        *phigh = dividend % divisor;
+        return result > UINT64_MAX;
+    }
+}
 #else
 void muls64(uint64_t *phigh, uint64_t *plow, int64_t a, int64_t b);
 void mulu64(uint64_t *phigh, uint64_t *plow, uint64_t a, uint64_t b);
+int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
 #endif
 
 /**
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 1ec9c65..3eff4df 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -31,6 +31,7 @@  DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32)
 
 #if defined(TARGET_PPC64)
 DEF_HELPER_3(mulldo, i64, env, i64, i64)
+DEF_HELPER_4(divdeu, i64, env, i64, i64, i32)
 #endif
 
 DEF_HELPER_FLAGS_1(cntlzw, TCG_CALL_NO_RWG_SE, tl, tl)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index abc69a7..d6dcac9 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -41,6 +41,33 @@  uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
 }
 #endif
 
+#if defined(TARGET_PPC64)
+
+uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
+{
+    uint64_t rt = 0;
+    int overflow = 0;
+
+    overflow = divu128(&rt, &ra, rb);
+
+    if (unlikely(overflow)) {
+        rt = 0; /* Undefined */
+    }
+
+    if (oe) {
+        if (unlikely(overflow)) {
+            env->so = env->ov = 1;
+        } else {
+            env->ov = 0;
+        }
+    }
+
+    return rt;
+}
+
+#endif
+
+
 target_ulong helper_cntlzw(target_ulong t)
 {
     return clz32(t);
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 0d39de2..7a51c6d 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1032,6 +1032,23 @@  GEN_INT_ARITH_DIVD(divduo, 0x1E, 0, 1);
 /* divw  divw.  divwo  divwo.   */
 GEN_INT_ARITH_DIVD(divd, 0x0F, 1, 0);
 GEN_INT_ARITH_DIVD(divdo, 0x1F, 1, 1);
+
+/* divdeu[o][.] */
+#define GEN_DIVDE(name, hlpr, compute_ov)                                     \
+static void gen_##name(DisasContext *ctx)                                     \
+{                                                                             \
+    TCGv_i32 t0 = tcg_const_i32(compute_ov);                                  \
+    gen_helper_##hlpr(cpu_gpr[rD(ctx->opcode)], cpu_env,                      \
+                     cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], t0); \
+    tcg_temp_free_i32(t0);                                                    \
+    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
+        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);                           \
+    }                                                                         \
+}
+
+GEN_DIVDE(divdeu, divdeu, 0);
+GEN_DIVDE(divdeuo, divdeu, 1);
+
 #endif
 
 /* mulhw  mulhw. */
@@ -9594,6 +9611,9 @@  GEN_INT_ARITH_DIVD(divduo, 0x1E, 0, 1),
 GEN_INT_ARITH_DIVD(divd, 0x0F, 1, 0),
 GEN_INT_ARITH_DIVD(divdo, 0x1F, 1, 1),
 
+GEN_HANDLER_E(divdeu, 0x1F, 0x09, 0x0C, 0x00000000, PPC_NONE, PPC2_ISA206),
+GEN_HANDLER_E(divdeuo, 0x1F, 0x09, 0x1C, 0x00000000, PPC_NONE, PPC2_ISA206),
+
 #undef GEN_INT_ARITH_MUL_HELPER
 #define GEN_INT_ARITH_MUL_HELPER(name, opc3)                                  \
 GEN_HANDLER(name, 0x1F, 0x09, opc3, 0x00000000, PPC_64B)
diff --git a/util/host-utils.c b/util/host-utils.c
index f0784d6..b6f7a6e 100644
--- a/util/host-utils.c
+++ b/util/host-utils.c
@@ -86,4 +86,42 @@  void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
     }
     *phigh = rh;
 }
+
+/* Unsigned 128x64 division.  Returns 1 if overflow (divide by zero or */
+/* quotient exceeds 64 bits).  Otherwise returns quotient via plow and */
+/* remainder via phigh. */
+int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
+{
+    uint64_t dhi = *phigh;
+    uint64_t dlo = *plow;
+    unsigned i;
+    uint64_t carry = 0;
+
+    if (divisor == 0) {
+        return 1;
+    } else if (dhi == 0) {
+        *plow  = dlo / divisor;
+        *phigh = dlo % divisor;
+        return 0;
+    } else if (dhi > divisor) {
+        return 1;
+    } else {
+
+        for (i = 0; i < 64; i++) {
+            carry = dhi >> 63;
+            dhi = (dhi << 1) | (dlo >> 63);
+            if (carry | dhi >= divisor) {
+                dhi -= divisor;
+                carry = 1;
+            } else {
+                carry = 0;
+            }
+            dlo = (dlo << 1) | carry;
+        }
+
+        *plow = dlo;
+        *phigh = dhi;
+        return 0;
+    }
+}
 #endif /* !CONFIG_INT128 */