diff mbox

[04/13] target-ppc: implement lxvll instruction

Message ID 1480937130-24561-5-git-send-email-nikunj@linux.vnet.ibm.com
State New
Headers show

Commit Message

Nikunj A Dadhania Dec. 5, 2016, 11:25 a.m. UTC
lxvll: Load VSX Vector Left-justified with Length

Little/Big-endian Storage:
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
|“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|FF|FF|
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+

Loading 14 bytes to vector (8-bit elements) in BE/LE:
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
|“T”|“h”|“i”|“s”|“ ”|“i”|“s”|“ ”|“a”|“ ”|“T”|“E”|“S”|“T”|00|00|
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/helper.h                 |  1 +
 target-ppc/mem_helper.c             | 25 +++++++++++++++++++++++++
 target-ppc/translate/vsx-impl.inc.c |  1 +
 target-ppc/translate/vsx-ops.inc.c  |  1 +
 4 files changed, 28 insertions(+)

Comments

Richard Henderson Dec. 5, 2016, 5:52 p.m. UTC | #1
On 12/05/2016 03:25 AM, Nikunj A Dadhania wrote:
> +void helper_lxvll(CPUPPCState *env, target_ulong addr,
> +                  target_ulong xt_num, target_ulong rb)
> +{
> +    ppc_vsr_t xt;
> +
> +    getVSR(xt_num, &xt, env);
> +    if (unlikely((rb & 0xFF) == 0)) {
> +        xt.s128 = int128_make128(0, 0);

Nit: int128_zero.

> +    } else {
> +        target_ulong end = ((rb & 0xFF) * 8) - 1;
> +        if (msr_le) {
> +            xt.u64[LO_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
> +            addr = addr_add(env, addr, 8);
> +            xt.u64[HI_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
> +            xt.s128 = int128_and(xt.s128, mask_u128(127 - end, 127));

The ISA document says that this is a sequence of byte operations.  Which means
that END < 127 will not access bytes outside of the length.  Which means that
your code will trigger SIGSEGV near page boundaries when real hardware won't.

I also don't see how this does the right thing for little-endian.


r~
Richard Henderson Dec. 5, 2016, 5:59 p.m. UTC | #2
On 12/05/2016 09:52 AM, Richard Henderson wrote:
> On 12/05/2016 03:25 AM, Nikunj A Dadhania wrote:
>> +void helper_lxvll(CPUPPCState *env, target_ulong addr,
>> +                  target_ulong xt_num, target_ulong rb)
>> +{
>> +    ppc_vsr_t xt;
>> +
>> +    getVSR(xt_num, &xt, env);
>> +    if (unlikely((rb & 0xFF) == 0)) {
>> +        xt.s128 = int128_make128(0, 0);
> 
> Nit: int128_zero.
> 
>> +    } else {
>> +        target_ulong end = ((rb & 0xFF) * 8) - 1;
>> +        if (msr_le) {
>> +            xt.u64[LO_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
>> +            addr = addr_add(env, addr, 8);
>> +            xt.u64[HI_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
>> +            xt.s128 = int128_and(xt.s128, mask_u128(127 - end, 127));
> 
> The ISA document says that this is a sequence of byte operations.  Which means
> that END < 127 will not access bytes outside of the length.  Which means that
> your code will trigger SIGSEGV near page boundaries when real hardware won't.
> 
> I also don't see how this does the right thing for little-endian.

Oh, and one more thing:

Do you need to perform the permission check on all NB bytes before writing any
of them?  I suspect that real hardware does, otherwise the instruction might
not be restartable.

Are there any atomicity guarantees made by real hardware?  If so, you may need
to implement this differently.  If not, a comment to that effect would be helpful.


r~
Nikunj A Dadhania Dec. 6, 2016, 5:45 a.m. UTC | #3
Richard Henderson <rth@twiddle.net> writes:

> On 12/05/2016 03:25 AM, Nikunj A Dadhania wrote:
>> +void helper_lxvll(CPUPPCState *env, target_ulong addr,
>> +                  target_ulong xt_num, target_ulong rb)
>> +{
>> +    ppc_vsr_t xt;
>> +
>> +    getVSR(xt_num, &xt, env);
>> +    if (unlikely((rb & 0xFF) == 0)) {
>> +        xt.s128 = int128_make128(0, 0);
>
> Nit: int128_zero.

Sure.

>> +    } else {
>> +        target_ulong end = ((rb & 0xFF) * 8) - 1;
>> +        if (msr_le) {
>> +            xt.u64[LO_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
>> +            addr = addr_add(env, addr, 8);
>> +            xt.u64[HI_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
>> +            xt.s128 = int128_and(xt.s128, mask_u128(127 - end, 127));
>
> The ISA document says that this is a sequence of byte operations.  Which means
> that END < 127 will not access bytes outside of the length.  Which means that
> your code will trigger SIGSEGV near page boundaries when real hardware
> won't.

In that case, I can use I can use cpu_ldub_data_ra()

> I also don't see how this does the right thing for little-endian.

Needs to be in big-endian order - two things
1) LO/HI swapped
2) No byte swapping

AFAIU the example given in ISA, i see the right output in my test.

Regards
Nikunj
diff mbox

Patch

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index d9ccafd..67c8b71 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -318,6 +318,7 @@  DEF_HELPER_3(stvebx, void, env, avr, tl)
 DEF_HELPER_3(stvehx, void, env, avr, tl)
 DEF_HELPER_3(stvewx, void, env, avr, tl)
 DEF_HELPER_4(lxvl, void, env, tl, tl, tl)
+DEF_HELPER_4(lxvll, void, env, tl, tl, tl)
 DEF_HELPER_4(vsumsws, void, env, avr, avr, avr)
 DEF_HELPER_4(vsum2sws, void, env, avr, avr, avr)
 DEF_HELPER_4(vsum4sbs, void, env, avr, avr, avr)
diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c
index 0a8ff54..c5826bc 100644
--- a/target-ppc/mem_helper.c
+++ b/target-ppc/mem_helper.c
@@ -309,6 +309,31 @@  void helper_lxvl(CPUPPCState *env, target_ulong addr,
     putVSR(xt_num, &xt, env);
 }
 
+void helper_lxvll(CPUPPCState *env, target_ulong addr,
+                  target_ulong xt_num, target_ulong rb)
+{
+    ppc_vsr_t xt;
+
+    getVSR(xt_num, &xt, env);
+    if (unlikely((rb & 0xFF) == 0)) {
+        xt.s128 = int128_make128(0, 0);
+    } else {
+        target_ulong end = ((rb & 0xFF) * 8) - 1;
+        if (msr_le) {
+            xt.u64[LO_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
+            addr = addr_add(env, addr, 8);
+            xt.u64[HI_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
+            xt.s128 = int128_and(xt.s128, mask_u128(127 - end, 127));
+        } else {
+            xt.u64[HI_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
+            addr = addr_add(env, addr, 8);
+            xt.u64[LO_IDX] = cpu_ldq_data_ra(env, addr, GETPC());
+            xt.s128 = int128_and(xt.s128, mask_u128(0, end));
+        }
+    }
+    putVSR(xt_num, &xt, env);
+}
+
 #undef HI_IDX
 #undef LO_IDX
 
diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
index e53f91e..40f584e 100644
--- a/target-ppc/translate/vsx-impl.inc.c
+++ b/target-ppc/translate/vsx-impl.inc.c
@@ -266,6 +266,7 @@  static void gen_##name(DisasContext *ctx)                       \
 }
 
 VSX_VECTOR_LOAD_STORE_LENGTH(lxvl)
+VSX_VECTOR_LOAD_STORE_LENGTH(lxvll)
 
 #define VSX_LOAD_SCALAR_DS(name, operation)                       \
 static void gen_##name(DisasContext *ctx)                         \
diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
index 3383cdd..7751a7b 100644
--- a/target-ppc/translate/vsx-ops.inc.c
+++ b/target-ppc/translate/vsx-ops.inc.c
@@ -11,6 +11,7 @@  GEN_HANDLER_E(lxvh8x, 0x1F, 0x0C, 0x19, 0, PPC_NONE,  PPC2_ISA300),
 GEN_HANDLER_E(lxvb16x, 0x1F, 0x0C, 0x1B, 0, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(lxvx, 0x1F, 0x0C, 0x08, 0x00000040, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(lxvl, 0x1F, 0x0D, 0x08, 0, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(lxvll, 0x1F, 0x0D, 0x09, 0, PPC_NONE, PPC2_ISA300),
 
 GEN_HANDLER_E(stxsdx, 0x1F, 0xC, 0x16, 0, PPC_NONE, PPC2_VSX),
 GEN_HANDLER_E(stxsibx, 0x1F, 0xD, 0x1C, 0, PPC_NONE, PPC2_ISA300),