diff mbox

[v5,2/5] target-ppc: add vector extract instructions

Message ID 1472711818-13514-3-git-send-email-raji@linux.vnet.ibm.com
State New
Headers show

Commit Message

Rajalakshmi Srinivasaraghavan Sept. 1, 2016, 6:36 a.m. UTC
The following vector extract instructions are added from ISA 3.0.

vextractub - Vector Extract Unsigned Byte
vextractuh - Vector Extract Unsigned Halfword
vextractuw - Vector Extract Unsigned Word
vextractd - Vector Extract Unsigned Doubleword

Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
---
 target-ppc/helper.h                 |    4 ++++
 target-ppc/int_helper.c             |   22 ++++++++++++++++++++++
 target-ppc/translate/vmx-impl.inc.c |   10 ++++++++++
 target-ppc/translate/vmx-ops.inc.c  |   10 +++++++---
 4 files changed, 43 insertions(+), 3 deletions(-)

Comments

Richard Henderson Sept. 1, 2016, 4:08 p.m. UTC | #1
On 08/31/2016 11:36 PM, Rajalakshmi Srinivasaraghavan wrote:
> +#if defined(HOST_WORDS_BIGENDIAN)
> +#define VEXTRACT(suffix, element)                                            \
> +    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
> +    {                                                                        \
> +        r->u64[0] = r->u64[1] = 0;                                           \
> +        memmove(&r->u8[8 - sizeof(r->element)], &b->u8[index],               \
> +               sizeof(r->element[0]));                                       \

Again, you must consider R == B.  I made this same comment wrt v2, when you 
still had a memcpy here.

This is trivial:

   (1) Use memmove to set first sizeof(r->element[0]) bytes,
   (2) Use memset 0 to clean last (16 - sizeof(r->element[0]) bytes.

You have some test cases for these insns, don't you?


r~
Rajalakshmi Srinivasaraghavan Sept. 2, 2016, 9:31 a.m. UTC | #2
On 09/01/2016 09:38 PM, Richard Henderson wrote:
> On 08/31/2016 11:36 PM, Rajalakshmi Srinivasaraghavan wrote:
>> +#if defined(HOST_WORDS_BIGENDIAN)
>> +#define VEXTRACT(suffix, 
>> element)                                            \
>> +    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, 
>> uint32_t index) \
>> + { \
>> +        r->u64[0] = r->u64[1] = 
>> 0;                                           \
>> +        memmove(&r->u8[8 - sizeof(r->element)], 
>> &b->u8[index],               \
>> + sizeof(r->element[0])); \
>
> Again, you must consider R == B.  I made this same comment wrt v2, 
> when you still had a memcpy here.
>
> This is trivial:
>
>   (1) Use memmove to set first sizeof(r->element[0]) bytes,
>   (2) Use memset 0 to clean last (16 - sizeof(r->element[0]) bytes.
>
Ack. So I have to call memset twice to clean initial and last bytes.

#if defined(HOST_WORDS_BIGENDIAN)
#define VEXTRACT(suffix, 
element)                                            \
     void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t 
index) \
{ \
         uint32_t es = 
sizeof(r->element[0]);                                 \
         memmove(&r->u8[8 - es], &b->u8[index], 
es);                          \
         memset(&r->u8[8], 0, 
8);                                             \
         memset(&r->u8[0], 0, 8 - 
es);                                        \
     }
#else
#define VEXTRACT(suffix, 
element)                                            \
     void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t 
index) \
{ \
         uint32_t es = 
sizeof(r->element[0]);                                 \
         uint32_t s = (16 - index) - 
es;                                      \
         memmove(&r->u8[8], &b->u8[s], 
es);                                   \
         memset(&r->u8[0], 0, 
8);                                             \
         memset(&r->u8[8 + es], 0, 8 - 
es);                                   \
     }
#endif

> You have some test cases for these insns, don't you?
>
>
> r~
>
>
diff mbox

Patch

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index e8db233..c3502b3 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -252,6 +252,10 @@  DEF_HELPER_2(vspltisw, void, avr, i32)
 DEF_HELPER_3(vspltb, void, avr, avr, i32)
 DEF_HELPER_3(vsplth, void, avr, avr, i32)
 DEF_HELPER_3(vspltw, void, avr, avr, i32)
+DEF_HELPER_3(vextractub, void, avr, avr, i32)
+DEF_HELPER_3(vextractuh, void, avr, avr, i32)
+DEF_HELPER_3(vextractuw, void, avr, avr, i32)
+DEF_HELPER_3(vextractd, void, avr, avr, i32)
 DEF_HELPER_3(vinsertb, void, avr, avr, i32)
 DEF_HELPER_3(vinserth, void, avr, avr, i32)
 DEF_HELPER_3(vinsertw, void, avr, avr, i32)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 4217547..84e57dc 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -1828,6 +1828,28 @@  VINSERT(h, u16)
 VINSERT(w, u32)
 VINSERT(d, u64)
 #undef VINSERT
+#if defined(HOST_WORDS_BIGENDIAN)
+#define VEXTRACT(suffix, element)                                            \
+    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
+    {                                                                        \
+        r->u64[0] = r->u64[1] = 0;                                           \
+        memmove(&r->u8[8 - sizeof(r->element)], &b->u8[index],               \
+               sizeof(r->element[0]));                                       \
+    }
+#else
+#define VEXTRACT(suffix, element)                                            \
+    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
+    {                                                                        \
+        uint32_t s = (16 - index) - sizeof(r->element[0]);                   \
+        r->u64[0] = r->u64[1] = 0;                                           \
+        memmove(&r->u8[8], &b->u8[s], sizeof(r->element[0]));                \
+    }
+#endif
+VEXTRACT(ub, u8)
+VEXTRACT(uh, u16)
+VEXTRACT(uw, u32)
+VEXTRACT(d, u64)
+#undef VEXTRACT
 
 #define VSPLTI(suffix, element, splat_type)                     \
     void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat)   \
diff --git a/target-ppc/translate/vmx-impl.inc.c b/target-ppc/translate/vmx-impl.inc.c
index 14af89b..682b5de 100644
--- a/target-ppc/translate/vmx-impl.inc.c
+++ b/target-ppc/translate/vmx-impl.inc.c
@@ -664,6 +664,10 @@  static void glue(gen_, name)(DisasContext *ctx)                         \
 GEN_VXFORM_UIMM(vspltb, 6, 8);
 GEN_VXFORM_UIMM(vsplth, 6, 9);
 GEN_VXFORM_UIMM(vspltw, 6, 10);
+GEN_VXFORM_UIMM_SPLAT(vextractub, 6, 8, 15);
+GEN_VXFORM_UIMM_SPLAT(vextractuh, 6, 9, 14);
+GEN_VXFORM_UIMM_SPLAT(vextractuw, 6, 10, 12);
+GEN_VXFORM_UIMM_SPLAT(vextractd, 6, 11, 8);
 GEN_VXFORM_UIMM_SPLAT(vinsertb, 6, 12, 15);
 GEN_VXFORM_UIMM_SPLAT(vinserth, 6, 13, 14);
 GEN_VXFORM_UIMM_SPLAT(vinsertw, 6, 14, 12);
@@ -672,6 +676,12 @@  GEN_VXFORM_UIMM_ENV(vcfux, 5, 12);
 GEN_VXFORM_UIMM_ENV(vcfsx, 5, 13);
 GEN_VXFORM_UIMM_ENV(vctuxs, 5, 14);
 GEN_VXFORM_UIMM_ENV(vctsxs, 5, 15);
+GEN_VXFORM_DUAL(vspltb, PPC_NONE, PPC2_ALTIVEC_207,
+                      vextractub, PPC_NONE, PPC2_ISA300);
+GEN_VXFORM_DUAL(vsplth, PPC_NONE, PPC2_ALTIVEC_207,
+                      vextractuh, PPC_NONE, PPC2_ISA300);
+GEN_VXFORM_DUAL(vspltw, PPC_NONE, PPC2_ALTIVEC_207,
+                      vextractuw, PPC_NONE, PPC2_ISA300);
 GEN_VXFORM_DUAL(vspltisb, PPC_NONE, PPC2_ALTIVEC_207,
                       vinsertb, PPC_NONE, PPC2_ISA300);
 GEN_VXFORM_DUAL(vspltish, PPC_NONE, PPC2_ALTIVEC_207,
diff --git a/target-ppc/translate/vmx-ops.inc.c b/target-ppc/translate/vmx-ops.inc.c
index e6abeae..01d36bb 100644
--- a/target-ppc/translate/vmx-ops.inc.c
+++ b/target-ppc/translate/vmx-ops.inc.c
@@ -197,6 +197,13 @@  GEN_VXRFORM_DUAL(vcmpbfp, vcmpgtsd, 3, 15, PPC_ALTIVEC, PPC_NONE)
 #define GEN_VXFORM_DUAL_INV(name0, name1, opc2, opc3, inval0, inval1, type) \
 GEN_OPCODE_DUAL(name0##_##name1, 0x04, opc2, opc3, inval0, inval1, type, \
                                                                PPC_NONE)
+GEN_VXFORM_DUAL_INV(vspltb, vextractub, 6, 8, 0x00000000, 0x100000,
+                                               PPC2_ALTIVEC_207),
+GEN_VXFORM_DUAL_INV(vsplth, vextractuh, 6, 9, 0x00000000, 0x100000,
+                                               PPC2_ALTIVEC_207),
+GEN_VXFORM_DUAL_INV(vspltw, vextractuw, 6, 10, 0x00000000, 0x100000,
+                                               PPC2_ALTIVEC_207),
+GEN_VXFORM_300_EXT(vextractd, 6, 11, 0x100000),
 GEN_VXFORM_DUAL_INV(vspltisb, vinsertb, 6, 12, 0x00000000, 0x100000,
                                                PPC2_ALTIVEC_207),
 GEN_VXFORM_DUAL_INV(vspltish, vinserth, 6, 13, 0x00000000, 0x100000,
@@ -226,9 +233,6 @@  GEN_VXFORM_NOA(vrfiz, 5, 9),
 
 #define GEN_VXFORM_UIMM(name, opc2, opc3)                               \
     GEN_HANDLER(name, 0x04, opc2, opc3, 0x00000000, PPC_ALTIVEC)
-GEN_VXFORM_UIMM(vspltb, 6, 8),
-GEN_VXFORM_UIMM(vsplth, 6, 9),
-GEN_VXFORM_UIMM(vspltw, 6, 10),
 GEN_VXFORM_UIMM(vcfux, 5, 12),
 GEN_VXFORM_UIMM(vcfsx, 5, 13),
 GEN_VXFORM_UIMM(vctuxs, 5, 14),