diff mbox

[v2,07/13] Add VSX Scalar Move Instructions

Message ID 5257F6E6.9050503@gmail.com
State New
Headers show

Commit Message

Tom Musta Oct. 11, 2013, 1:02 p.m. UTC
This patch adds the VSX scalar move instructions:

   - xsabsdp (Scalar Absolute Value Double-Precision)
   - xsnabspd (Scalar Negative Absolute Value Double-Precision)
   - xsnegdp (Scalar Negate Double-Precision)
   - xscpsgndp (Scalar Copy Sign Double-Precision)

A common generator macro (VSX_SCALAR_MOVE) is added since these
instructions vary only slightly from each other.

Macros to support VSX XX2 and XX3 form opcodes are also added.
These macros handle the overloading of "opcode 2" space (instruction
bits 26:30) caused by AX and BX bits (29 and 30, respectively).

Signed-off-by: Tom Musta <tommusta@gmail.com>
---
  target-ppc/translate.c |   66 ++++++++++++++++++++++++++++++++++++++++++++++++
  1 files changed, 66 insertions(+), 0 deletions(-)

  #undef GEN_SPE

Comments

Paolo Bonzini Oct. 22, 2013, 6:31 a.m. UTC | #1
Il 11/10/2013 14:02, Tom Musta ha scritto:
> 
> +            case OP_CPSGN: {                                      \
> +                TCGv_i64 xa = tcg_temp_new();                     \
> +                tcg_gen_mov_i64(xa, cpu_vsrh(xA(ctx->opcode)));   \
> +                tcg_gen_andi_i64(xa, xa, (sgn_mask));             \
> +                tcg_gen_andi_i64(xb, xb, ~(sgn_mask));            \
> +                tcg_gen_or_i64(xb, xb, xa);                       \
> +                tcg_temp_free(xa);                                \
> +                break;                                            \

You might get slightly better generated code if you move the sgn_mask
immediate to a temporary and then use and+andc.

Paolo
Tom Musta Oct. 22, 2013, 12:57 p.m. UTC | #2
On 10/22/2013 1:31 AM, Paolo Bonzini wrote:
> Il 11/10/2013 14:02, Tom Musta ha scritto:
>>
>> +            case OP_CPSGN: {                                      \
>> +                TCGv_i64 xa = tcg_temp_new();                     \
>> +                tcg_gen_mov_i64(xa, cpu_vsrh(xA(ctx->opcode)));   \
>> +                tcg_gen_andi_i64(xa, xa, (sgn_mask));             \
>> +                tcg_gen_andi_i64(xb, xb, ~(sgn_mask));            \
>> +                tcg_gen_or_i64(xb, xb, xa);                       \
>> +                tcg_temp_free(xa);                                \
>> +                break;                                            \
>
> You might get slightly better generated code if you move the sgn_mask
> immediate to a temporary and then use and+andc.
>
> Paolo
>
Thank you for the suggestion, Paolo.  I will recode, retest and resubmit this one and patch 8.
diff mbox

Patch

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 8d53cb5..36e04b0 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7167,6 +7167,55 @@  static void gen_xxpermdi(DisasContext *ctx)
      tcg_temp_free(xh);
      tcg_temp_free(xl);
  }
+#define OP_ABS 1
+#define OP_NABS 2
+#define OP_NEG 3
+#define OP_CPSGN 4
+#define SGN_MASK_DP  0x8000000000000000ul
+#define SGN_MASK_SP 0x8000000080000000ul
+
+#define VSX_SCALAR_MOVE(name, op, sgn_mask)                       \
+static void glue(gen_, name)(DisasContext * ctx)                  \
+    {                                                             \
+        TCGv_i64 xb;                                              \
+        if (unlikely(!ctx->vsx_enabled)) {                        \
+            gen_exception(ctx, POWERPC_EXCP_VSXU);                \
+            return;                                               \
+        }                                                         \
+        xb = tcg_temp_new();                                      \
+        tcg_gen_mov_i64(xb, cpu_vsrh(xB(ctx->opcode)));           \
+        switch (op) {                                             \
+            case OP_ABS: {                                        \
+                tcg_gen_andi_i64(xb, xb, ~(sgn_mask));            \
+                break;                                            \
+            }                                                     \
+            case OP_NABS: {                                       \
+                tcg_gen_ori_i64(xb, xb, (sgn_mask));              \
+                break;                                            \
+            }                                                     \
+            case OP_NEG: {                                        \
+                tcg_gen_xori_i64(xb, xb, (sgn_mask));             \
+                break;                                            \
+            }                                                     \
+            case OP_CPSGN: {                                      \
+                TCGv_i64 xa = tcg_temp_new();                     \
+                tcg_gen_mov_i64(xa, cpu_vsrh(xA(ctx->opcode)));   \
+                tcg_gen_andi_i64(xa, xa, (sgn_mask));             \
+                tcg_gen_andi_i64(xb, xb, ~(sgn_mask));            \
+                tcg_gen_or_i64(xb, xb, xa);                       \
+                tcg_temp_free(xa);                                \
+                break;                                            \
+            }                                                     \
+        }                                                         \
+        tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), xb);           \
+        tcg_temp_free(xb);                                        \
+    }
+
+VSX_SCALAR_MOVE(xsabsdp, OP_ABS, SGN_MASK_DP)
+VSX_SCALAR_MOVE(xsnabsdp, OP_NABS, SGN_MASK_DP)
+VSX_SCALAR_MOVE(xsnegdp, OP_NEG, SGN_MASK_DP)
+VSX_SCALAR_MOVE(xscpsgndp, OP_CPSGN, SGN_MASK_DP)
+

  /***                           SPE extension                               ***/
  /* Register moves */
@@ -9626,6 +9675,18 @@  GEN_HANDLER_E(stxsdx, 0x1F, 0xC, 0x16, 0, PPC_NONE, PPC2_VSX),
  GEN_HANDLER_E(stxvd2x, 0x1F, 0xC, 0x1E, 0, PPC_NONE, PPC2_VSX),
  GEN_HANDLER_E(stxvw4x, 0x1F, 0xC, 0x1C, 0, PPC_NONE, PPC2_VSX),

+#undef GEN_XX2FORM
+#define GEN_XX2FORM(name, opc2, opc3, fl2)                           \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2)
+
+#undef GEN_XX3FORM
+#define GEN_XX3FORM(name, opc2, opc3, fl2)                           \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 2, opc3, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 3, opc3, 0, PPC_NONE, fl2)
+
  #undef GEN_XX3FORM_DM
  #define GEN_XX3FORM_DM(name, opc2, opc3) \
  GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x00, opc3|0x00, 0, PPC_NONE, PPC2_VSX),\
@@ -9645,6 +9706,11 @@  GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x01, opc3|0x0C, 0, PPC_NONE, 
PPC2_VSX),\
  GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x02, opc3|0x0C, 0, PPC_NONE, PPC2_VSX),\
  GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x03, opc3|0x0C, 0, PPC_NONE, PPC2_VSX)

+GEN_XX2FORM(xsabsdp, 0x12, 0x15, PPC2_VSX),
+GEN_XX2FORM(xsnabsdp, 0x12, 0x16, PPC2_VSX),
+GEN_XX2FORM(xsnegdp, 0x12, 0x17, PPC2_VSX),
+GEN_XX3FORM(xscpsgndp, 0x00, 0x16, PPC2_VSX),
+
  GEN_XX3FORM_DM(xxpermdi, 0x08, 0x01),