Patchwork [V4,22/22] target-ppc: Add VSX Rounding Instructions

login
register
mail settings
Submitter Tom Musta
Date Dec. 18, 2013, 8:19 p.m.
Message ID <1387397961-4894-23-git-send-email-tommusta@gmail.com>
Download mbox | patch
Permalink /patch/303043/
State New
Headers show

Comments

Tom Musta - Dec. 18, 2013, 8:19 p.m.
This patch adds the VSX Round to Floating Point Integer instructions:

  - xsrdpi, xsrdpic, xsrdpim, xsrdpip, xsrdpiz
  - xvrdpi, xvrdpic, xvrdpim, xvrdpip, xvrdpiz
  - xvrspi, xvrspic, xvrspim, xvrspip, xvrspiz

Signed-off-by: Tom Musta <tommusta@gmail.com>
Reviewed-by: Richard Henderson <address@hidden>
---
 target-ppc/fpu_helper.c |   68 +++++++++++++++++++++++++++++++++++++++++++++++
 target-ppc/helper.h     |   15 ++++++++++
 target-ppc/translate.c  |   30 ++++++++++++++++++++
 3 files changed, 113 insertions(+), 0 deletions(-)

Patch

diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index f913ad7..f3d02cc 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -2548,3 +2548,71 @@  VSX_CVT_INT_TO_FP(xvcvuxdsp, 2, uint64, float32, u64[i], f32[j], \
                   2*i + JOFFSET, 0)
 VSX_CVT_INT_TO_FP(xvcvsxwsp, 4, int32, float32, u32[j], f32[i], i, 0)
 VSX_CVT_INT_TO_FP(xvcvuxwsp, 4, uint32, float32, u32[j], f32[i], i, 0)
+
+/* For "use current rounding mode", define a value that will not be one of
+ * the existing rounding model enums.
+ */
+#define FLOAT_ROUND_CURRENT (float_round_nearest_even + float_round_down + \
+  float_round_up + float_round_to_zero)
+
+/* VSX_ROUND - VSX floating point round
+ *   op    - instruction mnemonic
+ *   nels  - number of elements (1, 2 or 4)
+ *   tp    - type (float32 or float64)
+ *   fld   - vsr_t field (f32 or f64)
+ *   rmode - rounding mode
+ *   sfprf - set FPRF
+ */
+#define VSX_ROUND(op, nels, tp, fld, rmode, sfprf)                     \
+void helper_##op(CPUPPCState *env, uint32_t opcode)                    \
+{                                                                      \
+    ppc_vsr_t xt, xb;                                                  \
+    int i;                                                             \
+    getVSR(xB(opcode), &xb, env);                                      \
+    getVSR(xT(opcode), &xt, env);                                      \
+                                                                       \
+    if (rmode != FLOAT_ROUND_CURRENT) {                                \
+        set_float_rounding_mode(rmode, &env->fp_status);               \
+    }                                                                  \
+                                                                       \
+    for (i = 0; i < nels; i++) {                                       \
+        if (unlikely(tp##_is_signaling_nan(xb.fld[i]))) {              \
+            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);     \
+            xt.fld[i] = tp##_snan_to_qnan(xb.fld[i]);                  \
+        } else {                                                       \
+            xt.fld[i] = tp##_round_to_int(xb.fld[i], &env->fp_status); \
+        }                                                              \
+        if (sfprf) {                                                   \
+            helper_compute_fprf(env, xt.fld[i], sfprf);                \
+        }                                                              \
+    }                                                                  \
+                                                                       \
+    /* If this is not a "use current rounding mode" instruction,       \
+     * then inhibit setting of the XX bit and restore rounding         \
+     * mode from FPSCR */                                              \
+    if (rmode != FLOAT_ROUND_CURRENT) {                                \
+        fpscr_set_rounding_mode(env);                                  \
+        env->fp_status.float_exception_flags &= ~float_flag_inexact;   \
+    }                                                                  \
+                                                                       \
+    putVSR(xT(opcode), &xt, env);                                      \
+    helper_float_check_status(env);                                    \
+}
+
+VSX_ROUND(xsrdpi, 1, float64, f64, float_round_nearest_even, 1)
+VSX_ROUND(xsrdpic, 1, float64, f64, FLOAT_ROUND_CURRENT, 1)
+VSX_ROUND(xsrdpim, 1, float64, f64, float_round_down, 1)
+VSX_ROUND(xsrdpip, 1, float64, f64, float_round_up, 1)
+VSX_ROUND(xsrdpiz, 1, float64, f64, float_round_to_zero, 1)
+
+VSX_ROUND(xvrdpi, 2, float64, f64, float_round_nearest_even, 0)
+VSX_ROUND(xvrdpic, 2, float64, f64, FLOAT_ROUND_CURRENT, 0)
+VSX_ROUND(xvrdpim, 2, float64, f64, float_round_down, 0)
+VSX_ROUND(xvrdpip, 2, float64, f64, float_round_up, 0)
+VSX_ROUND(xvrdpiz, 2, float64, f64, float_round_to_zero, 0)
+
+VSX_ROUND(xvrspi, 4, float32, f32, float_round_nearest_even, 0)
+VSX_ROUND(xvrspic, 4, float32, f32, FLOAT_ROUND_CURRENT, 0)
+VSX_ROUND(xvrspim, 4, float32, f32, float_round_down, 0)
+VSX_ROUND(xvrspip, 4, float32, f32, float_round_up, 0)
+VSX_ROUND(xvrspiz, 4, float32, f32, float_round_to_zero, 0)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index de46b6f..0276b02 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -280,6 +280,11 @@  DEF_HELPER_2(xscvdpuxds, void, env, i32)
 DEF_HELPER_2(xscvdpuxws, void, env, i32)
 DEF_HELPER_2(xscvsxddp, void, env, i32)
 DEF_HELPER_2(xscvuxddp, void, env, i32)
+DEF_HELPER_2(xsrdpi, void, env, i32)
+DEF_HELPER_2(xsrdpic, void, env, i32)
+DEF_HELPER_2(xsrdpim, void, env, i32)
+DEF_HELPER_2(xsrdpip, void, env, i32)
+DEF_HELPER_2(xsrdpiz, void, env, i32)
 
 DEF_HELPER_2(xvadddp, void, env, i32)
 DEF_HELPER_2(xvsubdp, void, env, i32)
@@ -312,6 +317,11 @@  DEF_HELPER_2(xvcvsxddp, void, env, i32)
 DEF_HELPER_2(xvcvuxddp, void, env, i32)
 DEF_HELPER_2(xvcvsxwdp, void, env, i32)
 DEF_HELPER_2(xvcvuxwdp, void, env, i32)
+DEF_HELPER_2(xvrdpi, void, env, i32)
+DEF_HELPER_2(xvrdpic, void, env, i32)
+DEF_HELPER_2(xvrdpim, void, env, i32)
+DEF_HELPER_2(xvrdpip, void, env, i32)
+DEF_HELPER_2(xvrdpiz, void, env, i32)
 
 DEF_HELPER_2(xvaddsp, void, env, i32)
 DEF_HELPER_2(xvsubsp, void, env, i32)
@@ -344,6 +354,11 @@  DEF_HELPER_2(xvcvsxdsp, void, env, i32)
 DEF_HELPER_2(xvcvuxdsp, void, env, i32)
 DEF_HELPER_2(xvcvsxwsp, void, env, i32)
 DEF_HELPER_2(xvcvuxwsp, void, env, i32)
+DEF_HELPER_2(xvrspi, void, env, i32)
+DEF_HELPER_2(xvrspic, void, env, i32)
+DEF_HELPER_2(xvrspim, void, env, i32)
+DEF_HELPER_2(xvrspip, void, env, i32)
+DEF_HELPER_2(xvrspiz, void, env, i32)
 
 DEF_HELPER_2(efscfsi, i32, env, i32)
 DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index bb36e8f..52d7165 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7324,6 +7324,11 @@  GEN_VSX_HELPER_2(xscvdpuxds, 0x10, 0x14, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xscvdpuxws, 0x10, 0x04, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xscvsxddp, 0x10, 0x17, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xscvuxddp, 0x10, 0x16, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrdpi, 0x12, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrdpic, 0x16, 0x06, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrdpim, 0x12, 0x07, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrdpip, 0x12, 0x06, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrdpiz, 0x12, 0x05, 0, PPC2_VSX)
 
 GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
@@ -7356,6 +7361,11 @@  GEN_VSX_HELPER_2(xvcvsxddp, 0x10, 0x1F, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xvcvuxddp, 0x10, 0x1E, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xvcvsxwdp, 0x10, 0x0F, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xvcvuxwdp, 0x10, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrdpi, 0x12, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrdpic, 0x16, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrdpim, 0x12, 0x0F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrdpip, 0x12, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrdpiz, 0x12, 0x0D, 0, PPC2_VSX)
 
 GEN_VSX_HELPER_2(xvaddsp, 0x00, 0x08, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xvsubsp, 0x00, 0x09, 0, PPC2_VSX)
@@ -7388,6 +7398,11 @@  GEN_VSX_HELPER_2(xvcvsxdsp, 0x10, 0x1B, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xvcvuxdsp, 0x10, 0x1A, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xvcvsxwsp, 0x10, 0x0B, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xvcvuxwsp, 0x10, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrspi, 0x12, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrspic, 0x16, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrspim, 0x12, 0x0B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrspip, 0x12, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrspiz, 0x12, 0x09, 0, PPC2_VSX)
 
 #define VSX_LOGICAL(name, tcg_op)                                    \
 static void glue(gen_, name)(DisasContext * ctx)                     \
@@ -10111,6 +10126,11 @@  GEN_XX2FORM(xscvdpuxds, 0x10, 0x14, PPC2_VSX),
 GEN_XX2FORM(xscvdpuxws, 0x10, 0x04, PPC2_VSX),
 GEN_XX2FORM(xscvsxddp, 0x10, 0x17, PPC2_VSX),
 GEN_XX2FORM(xscvuxddp, 0x10, 0x16, PPC2_VSX),
+GEN_XX2FORM(xsrdpi, 0x12, 0x04, PPC2_VSX),
+GEN_XX2FORM(xsrdpic, 0x16, 0x06, PPC2_VSX),
+GEN_XX2FORM(xsrdpim, 0x12, 0x07, PPC2_VSX),
+GEN_XX2FORM(xsrdpip, 0x12, 0x06, PPC2_VSX),
+GEN_XX2FORM(xsrdpiz, 0x12, 0x05, PPC2_VSX),
 
 GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
 GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
@@ -10143,6 +10163,11 @@  GEN_XX2FORM(xvcvsxddp, 0x10, 0x1F, PPC2_VSX),
 GEN_XX2FORM(xvcvuxddp, 0x10, 0x1E, PPC2_VSX),
 GEN_XX2FORM(xvcvsxwdp, 0x10, 0x0F, PPC2_VSX),
 GEN_XX2FORM(xvcvuxwdp, 0x10, 0x0E, PPC2_VSX),
+GEN_XX2FORM(xvrdpi, 0x12, 0x0C, PPC2_VSX),
+GEN_XX2FORM(xvrdpic, 0x16, 0x0E, PPC2_VSX),
+GEN_XX2FORM(xvrdpim, 0x12, 0x0F, PPC2_VSX),
+GEN_XX2FORM(xvrdpip, 0x12, 0x0E, PPC2_VSX),
+GEN_XX2FORM(xvrdpiz, 0x12, 0x0D, PPC2_VSX),
 
 GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX),
 GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX),
@@ -10175,6 +10200,11 @@  GEN_XX2FORM(xvcvsxdsp, 0x10, 0x1B, PPC2_VSX),
 GEN_XX2FORM(xvcvuxdsp, 0x10, 0x1A, PPC2_VSX),
 GEN_XX2FORM(xvcvsxwsp, 0x10, 0x0B, PPC2_VSX),
 GEN_XX2FORM(xvcvuxwsp, 0x10, 0x0A, PPC2_VSX),
+GEN_XX2FORM(xvrspi, 0x12, 0x08, PPC2_VSX),
+GEN_XX2FORM(xvrspic, 0x16, 0x0A, PPC2_VSX),
+GEN_XX2FORM(xvrspim, 0x12, 0x0B, PPC2_VSX),
+GEN_XX2FORM(xvrspip, 0x12, 0x0A, PPC2_VSX),
+GEN_XX2FORM(xvrspiz, 0x12, 0x09, PPC2_VSX),
 
 #undef VSX_LOGICAL
 #define VSX_LOGICAL(name, opc2, opc3, fl2) \