diff mbox

[12/19] Add VSX ISA2.06 xtsqrt Instructions

Message ID 526949C6.7050403@gmail.com
State New
Headers show

Commit Message

Tom Musta Oct. 24, 2013, 4:24 p.m. UTC
This patch adds the VSX floating point test for software square
root instructions defined by V2.06 of the PowerPC ISA: xstsqrtdp,
xvtsqrtdp, xvtsqrtsp.

Signed-off-by: Tom Musta <tommusta@gmail.com>
---
  target-ppc/fpu_helper.c |   52 +++++++++++++++++++++++++++++++++++++++++++++++
  target-ppc/helper.h     |    3 ++
  target-ppc/translate.c  |    6 +++++
  3 files changed, 61 insertions(+), 0 deletions(-)

Comments

Richard Henderson Oct. 24, 2013, 8:34 p.m. UTC | #1
On 10/24/2013 09:24 AM, Tom Musta wrote:
> This patch adds the VSX floating point test for software square
> root instructions defined by V2.06 of the PowerPC ISA: xstsqrtdp,
> xvtsqrtdp, xvtsqrtsp.
> 
> Signed-off-by: Tom Musta <tommusta@gmail.com>
> ---

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~
diff mbox

Patch

diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 0dc498c..4e484a3 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -2126,3 +2126,55 @@  void helper_##op(CPUPPCState *env, uint32_t opcode)                     \
  VSX_TDIV(xstdivdp, 1, float64, f64, -1022, 1023, 52)
  VSX_TDIV(xvtdivdp, 2, float64, f64, -1022, 1023, 52)
  VSX_TDIV(xvtdivsp, 4, float32, f32, -126, 127, 23)
+
+/* VSX_TSQRT - VSX floating point test for square root
+ *   op    - instruction mnemonic
+ *   nels  - number of elements (1, 2 or 4)
+ *   tp    - type (float32 or float64)
+ *   fld   - vsr_t field (f32 or f64)
+ *   emin  - minimum unbiased exponent
+ *   emax  - maximum unbiased exponent
+ *   nbits - number of fraction bits
+ */
+#define VSX_TSQRT(op, nels, tp, fld, emin, nbits)                       \
+void helper_##op(CPUPPCState *env, uint32_t opcode)                     \
+{                                                                       \
+    ppc_vsr_t xa, xb;                                                   \
+    int i;                                                              \
+    int fe_flag = 0;                                                    \
+    int fg_flag = 0;                                                    \
+                                                                        \
+    getVSR(xA(opcode), &xa, env);                                       \
+    getVSR(xB(opcode), &xb, env);                                       \
+                                                                        \
+    for (i = 0; i < nels; i++) {                                        \
+        if (unlikely(tp##_is_infinity(xb.fld[i]) ||                     \
+                     tp##_is_zero(xb.fld[i]))) {                        \
+            fe_flag = 1;                                                \
+            fg_flag = 1;                                                \
+        } else {                                                        \
+            int e_b = tp##_get_unbiased_exp(xb.fld[i]);                 \
+                                                                        \
+            if (unlikely(tp##_is_any_nan(xb.fld[i]))) {                 \
+                fe_flag = 1;                                            \
+            } else if (unlikely(tp##_is_zero(xb.fld[i]))) {             \
+                fe_flag = 1;                                            \
+            } else if (unlikely(tp##_is_neg(xb.fld[i]))) {              \
+                fe_flag = 1;                                            \
+            } else if (!tp##_is_zero(xb.fld[i]) &&                      \
+                      (e_b <= (emin+nbits))) {                          \
+                fe_flag = 1;                                            \
+            }                                                           \
+                                                                        \
+            if (unlikely(tp##_is_denormal(xb.fld[i]))) {                \
+                fg_flag = 1;                                            \
+            }                                                           \
+        }                                                               \
+    }                                                                   \
+                                                                        \
+    env->crf[BF(opcode)] = 0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0); \
+}
+
+VSX_TSQRT(xstsqrtdp, 1, float64, f64, -1022, 52)
+VSX_TSQRT(xvtsqrtdp, 2, float64, f64, -1022, 52)
+VSX_TSQRT(xvtsqrtsp, 4, float32, f32, -126, 23)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 316b16f..e1abada 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -259,6 +259,7 @@  DEF_HELPER_2(xsredp, void, env, i32)
  DEF_HELPER_2(xssqrtdp, void, env, i32)
  DEF_HELPER_2(xsrsqrtedp, void, env, i32)
  DEF_HELPER_2(xstdivdp, void, env, i32)
+DEF_HELPER_2(xstsqrtdp, void, env, i32)

  DEF_HELPER_2(xvadddp, void, env, i32)
  DEF_HELPER_2(xvsubdp, void, env, i32)
@@ -268,6 +269,7 @@  DEF_HELPER_2(xvredp, void, env, i32)
  DEF_HELPER_2(xvsqrtdp, void, env, i32)
  DEF_HELPER_2(xvrsqrtedp, void, env, i32)
  DEF_HELPER_2(xvtdivdp, void, env, i32)
+DEF_HELPER_2(xvtsqrtdp, void, env, i32)

  DEF_HELPER_2(xvaddsp, void, env, i32)
  DEF_HELPER_2(xvsubsp, void, env, i32)
@@ -277,6 +279,7 @@  DEF_HELPER_2(xvresp, void, env, i32)
  DEF_HELPER_2(xvsqrtsp, void, env, i32)
  DEF_HELPER_2(xvrsqrtesp, void, env, i32)
  DEF_HELPER_2(xvtdivsp, void, env, i32)
+DEF_HELPER_2(xvtsqrtsp, void, env, i32)

  DEF_HELPER_2(efscfsi, i32, env, i32)
  DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index fe071f0..6978fe0 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7301,6 +7301,7 @@  GEN_VSX_HELPER_2(xsredp, 0x14, 0x05, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xssqrtdp, 0x16, 0x04, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xsrsqrtedp, 0x14, 0x04, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xstdivdp, 0x14, 0x07, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xstsqrtdp, 0x14, 0x06, 0, PPC2_VSX)

  GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
@@ -7310,6 +7311,7 @@  GEN_VSX_HELPER_2(xvredp, 0x14, 0x0D, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xvsqrtdp, 0x16, 0x0C, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xvrsqrtedp, 0x14, 0x0C, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xvtdivdp, 0x14, 0x0F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvtsqrtdp, 0x14, 0x0E, 0, PPC2_VSX)

  GEN_VSX_HELPER_2(xvaddsp, 0x00, 0x08, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xvsubsp, 0x00, 0x09, 0, PPC2_VSX)
@@ -7319,6 +7321,7 @@  GEN_VSX_HELPER_2(xvresp, 0x14, 0x09, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xvsqrtsp, 0x16, 0x08, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xvrsqrtesp, 0x14, 0x08, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xvtdivsp, 0x14, 0x0B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvtsqrtsp, 0x14, 0x0A, 0, PPC2_VSX)

  #define VSX_LOGICAL(name, tcg_op)                                    \
  static void glue(gen_, name)(DisasContext * ctx)                     \
@@ -10010,6 +10013,7 @@  GEN_XX2FORM(xsredp,  0x14, 0x05, PPC2_VSX),
  GEN_XX2FORM(xssqrtdp,  0x16, 0x04, PPC2_VSX),
  GEN_XX2FORM(xsrsqrtedp,  0x14, 0x04, PPC2_VSX),
  GEN_XX3FORM(xstdivdp,  0x14, 0x07, PPC2_VSX),
+GEN_XX2FORM(xstsqrtdp,  0x14, 0x06, PPC2_VSX),

  GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
  GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
@@ -10019,6 +10023,7 @@  GEN_XX2FORM(xvredp,  0x14, 0x0D, PPC2_VSX),
  GEN_XX2FORM(xvsqrtdp,  0x16, 0x0C, PPC2_VSX),
  GEN_XX2FORM(xvrsqrtedp,  0x14, 0x0C, PPC2_VSX),
  GEN_XX3FORM(xvtdivdp, 0x14, 0x0F, PPC2_VSX),
+GEN_XX2FORM(xvtsqrtdp, 0x14, 0x0E, PPC2_VSX),

  GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX),
  GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX),
@@ -10028,6 +10033,7 @@  GEN_XX2FORM(xvresp, 0x14, 0x09, PPC2_VSX),
  GEN_XX2FORM(xvsqrtsp, 0x16, 0x08, PPC2_VSX),
  GEN_XX2FORM(xvrsqrtesp, 0x14, 0x08, PPC2_VSX),
  GEN_XX3FORM(xvtdivsp, 0x14, 0x0B, PPC2_VSX),
+GEN_XX2FORM(xvtsqrtsp, 0x14, 0x0A, PPC2_VSX),

  #undef VSX_LOGICAL
  #define VSX_LOGICAL(name, opc2, opc3, fl2) \