diff mbox

[v2,1/5] target-ppc: add vector insert instructions

Message ID 1470737540-5526-2-git-send-email-raji@linux.vnet.ibm.com
State New
Headers show

Commit Message

Rajalakshmi Srinivasaraghavan Aug. 9, 2016, 10:12 a.m. UTC
The following vector insert instructions are added from ISA 3.0.

vinsertb - Vector Insert Byte
vinserth - Vector Insert Halfword
vinsertw - Vector Insert Word
vinsertd - Vector Insert Doubleword

Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
---
 target-ppc/helper.h             |    4 +++
 target-ppc/int_helper.c         |   41 +++++++++++++++++++++++++++++++++++++++
 target-ppc/translate/vmx-impl.c |   10 +++++++++
 target-ppc/translate/vmx-ops.c  |   18 ++++++++++++----
 4 files changed, 68 insertions(+), 5 deletions(-)

Comments

Richard Henderson Aug. 9, 2016, 6:20 p.m. UTC | #1
On 08/09/2016 03:42 PM, Rajalakshmi Srinivasaraghavan wrote:
> +        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
> +            result.element[i] = r->element[i];                              \
> +        }                                                                   \

memcpy, or assignment.

> +        for (i = 0; i < sizeof(r->element[0]); i++) {                       \
> +            result.u8[SPLAT(element) + i] = b->u8[s + i];                   \
> +        }                                                                   \

Also memcpy.

I think your mistake is in your definition of SPLAT, as pointed out by David(?) 
elsewhere.  Any conditional should take place at translate time.

If an exception isn't legal for halfword splat=15, then forcing splat=14 (or 0, 
or...) would be legal, since splat=15 is undefined.


r~
Rajalakshmi Srinivasaraghavan Aug. 10, 2016, 4:05 a.m. UTC | #2
On 08/09/2016 11:50 PM, Richard Henderson wrote:
> On 08/09/2016 03:42 PM, Rajalakshmi Srinivasaraghavan wrote:
>> +        for (i = 0; i < ARRAY_SIZE(r->element); i++) 
>> {                      \
>> +            result.element[i] = 
>> r->element[i];                              \
>> + } \
>
> memcpy, or assignment.
>
>> +        for (i = 0; i < sizeof(r->element[0]); i++) 
>> {                       \
>> +            result.u8[SPLAT(element) + i] = b->u8[s + 
>> i];                   \
>> + } \
>
> Also memcpy.
Do you mean memcpy is preferred here?
>
> I think your mistake is in your definition of SPLAT, as pointed out by 
> David(?) elsewhere.  Any conditional should take place at translate time.
David pointed not to use SPLAT_ELEMENT which I have corrected in the 
last patch.(v2)
>
> If an exception isn't legal for halfword splat=15, then forcing 
> splat=14 (or 0, or...) would be legal, since splat=15 is undefined.
Yes. I have done the same here.
#define SPLAT(element) \
((splat > (16 - sizeof(r->element[0]))) ? 16 - sizeof(r->element[0]) : 
splat)

>
>
> r~
>
>
diff mbox

Patch

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 93ac9e1..0923779 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -250,6 +250,10 @@  DEF_HELPER_2(vspltisw, void, avr, i32)
 DEF_HELPER_3(vspltb, void, avr, avr, i32)
 DEF_HELPER_3(vsplth, void, avr, avr, i32)
 DEF_HELPER_3(vspltw, void, avr, avr, i32)
+DEF_HELPER_3(vinsertb, void, avr, avr, i32)
+DEF_HELPER_3(vinserth, void, avr, avr, i32)
+DEF_HELPER_3(vinsertw, void, avr, avr, i32)
+DEF_HELPER_3(vinsertd, void, avr, avr, i32)
 DEF_HELPER_2(vupkhpx, void, avr, avr)
 DEF_HELPER_2(vupklpx, void, avr, avr)
 DEF_HELPER_2(vupkhsb, void, avr, avr)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 552b2e0..ece5543 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -1792,6 +1792,47 @@  VSPLT(w, u32)
 #undef VSPLT
 #undef SPLAT_ELEMENT
 #undef _SPLAT_MASKED
+#define SPLAT(element)                                                      \
+((splat > (16 - sizeof(r->element[0]))) ? 16 - sizeof(r->element[0]) : splat)
+#if defined(HOST_WORDS_BIGENDIAN)
+#define VINSERT(suffix, element, index)                                     \
+    void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
+    {                                                                       \
+        ppc_avr_t result;                                                   \
+        uint32_t s = sizeof(b->element[0]) * index;                         \
+        int i;                                                              \
+        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
+            result.element[i] = r->element[i];                              \
+        }                                                                   \
+        for (i = 0; i < sizeof(r->element[0]); i++) {                       \
+            result.u8[SPLAT(element) + i] = b->u8[s + i];                   \
+        }                                                                   \
+        *r = result;                                                        \
+    }
+#else
+#define VINSERT(suffix, element, index)                                     \
+    void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
+    {                                                                       \
+        ppc_avr_t result;                                                   \
+        uint32_t s = sizeof(b->element[0]) *                                \
+                           ((ARRAY_SIZE(r->element) - index) - 1);          \
+        int i;                                                              \
+        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
+            result.element[i] = r->element[i];                              \
+        }                                                                   \
+        for (i = 0; i < sizeof(r->element[0]); i++) {                       \
+            result.u8[(16 - SPLAT(element)) - sizeof(r->element[0]) + i] =  \
+                                                              b->u8[s + i]; \
+        }                                                                   \
+        *r = result;                                                        \
+    }
+#endif
+VINSERT(b, u8, 7)
+VINSERT(h, u16, 3)
+VINSERT(w, u32, 1)
+VINSERT(d, u64, 0)
+#undef VINSERT
+#undef SPLAT
 
 #define VSPLTI(suffix, element, splat_type)                     \
     void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat)   \
diff --git a/target-ppc/translate/vmx-impl.c b/target-ppc/translate/vmx-impl.c
index ac78caf..4940ae3 100644
--- a/target-ppc/translate/vmx-impl.c
+++ b/target-ppc/translate/vmx-impl.c
@@ -626,10 +626,20 @@  static void glue(gen_, name)(DisasContext *ctx)                         \
 GEN_VXFORM_UIMM(vspltb, 6, 8);
 GEN_VXFORM_UIMM(vsplth, 6, 9);
 GEN_VXFORM_UIMM(vspltw, 6, 10);
+GEN_VXFORM_UIMM(vinsertb, 6, 12);
+GEN_VXFORM_UIMM(vinserth, 6, 13);
+GEN_VXFORM_UIMM(vinsertw, 6, 14);
+GEN_VXFORM_UIMM(vinsertd, 6, 15);
 GEN_VXFORM_UIMM_ENV(vcfux, 5, 12);
 GEN_VXFORM_UIMM_ENV(vcfsx, 5, 13);
 GEN_VXFORM_UIMM_ENV(vctuxs, 5, 14);
 GEN_VXFORM_UIMM_ENV(vctsxs, 5, 15);
+GEN_VXFORM_DUAL(vspltisb, PPC_NONE, PPC2_ALTIVEC_207,
+                      vinsertb, PPC_NONE, PPC2_ISA300);
+GEN_VXFORM_DUAL(vspltish, PPC_NONE, PPC2_ALTIVEC_207,
+                      vinserth, PPC_NONE, PPC2_ISA300);
+GEN_VXFORM_DUAL(vspltisw, PPC_NONE, PPC2_ALTIVEC_207,
+                      vinsertw, PPC_NONE, PPC2_ISA300);
 
 static void gen_vsldoi(DisasContext *ctx)
 {
diff --git a/target-ppc/translate/vmx-ops.c b/target-ppc/translate/vmx-ops.c
index 7449396..ca69e56 100644
--- a/target-ppc/translate/vmx-ops.c
+++ b/target-ppc/translate/vmx-ops.c
@@ -41,6 +41,9 @@  GEN_HANDLER_E(name, 0x04, opc2, opc3, 0x00000000, PPC_NONE, PPC2_ALTIVEC_207)
 #define GEN_VXFORM_300(name, opc2, opc3)                                \
 GEN_HANDLER_E(name, 0x04, opc2, opc3, 0x00000000, PPC_NONE, PPC2_ISA300)
 
+#define GEN_VXFORM_300_EXT(name, opc2, opc3, inval)                     \
+GEN_HANDLER_E(name, 0x04, opc2, opc3, inval, PPC_NONE, PPC2_ISA300)
+
 #define GEN_VXFORM_DUAL(name0, name1, opc2, opc3, type0, type1) \
 GEN_HANDLER_E(name0##_##name1, 0x4, opc2, opc3, 0x00000000, type0, type1)
 
@@ -191,11 +194,16 @@  GEN_VXRFORM(vcmpgefp, 3, 7)
 GEN_VXRFORM_DUAL(vcmpgtfp, vcmpgtud, 3, 11, PPC_ALTIVEC, PPC_NONE)
 GEN_VXRFORM_DUAL(vcmpbfp, vcmpgtsd, 3, 15, PPC_ALTIVEC, PPC_NONE)
 
-#define GEN_VXFORM_SIMM(name, opc2, opc3)                               \
-    GEN_HANDLER(name, 0x04, opc2, opc3, 0x00000000, PPC_ALTIVEC)
-GEN_VXFORM_SIMM(vspltisb, 6, 12),
-GEN_VXFORM_SIMM(vspltish, 6, 13),
-GEN_VXFORM_SIMM(vspltisw, 6, 14),
+#define GEN_VXFORM_DUAL_INV(name0, name1, opc2, opc3, inval0, inval1, type) \
+GEN_OPCODE_DUAL(name0##_##name1, 0x04, opc2, opc3, inval0, inval1, type, \
+                                                               PPC_NONE)
+GEN_VXFORM_DUAL_INV(vspltisb, vinsertb, 6, 12, 0x00000000, 0x100000,
+                                               PPC2_ALTIVEC_207),
+GEN_VXFORM_DUAL_INV(vspltish, vinserth, 6, 13, 0x00000000, 0x100000,
+                                               PPC2_ALTIVEC_207),
+GEN_VXFORM_DUAL_INV(vspltisw, vinsertw, 6, 14, 0x00000000, 0x100000,
+                                               PPC2_ALTIVEC_207),
+GEN_VXFORM_300_EXT(vinsertd, 6, 15, 0x100000),
 
 #define GEN_VXFORM_NOA(name, opc2, opc3)                                \
     GEN_HANDLER(name, 0x04, opc2, opc3, 0x001f0000, PPC_ALTIVEC)