diff mbox

Adding support for LPD and LPDG instructions

Message ID 20170227112219.29313-2-ebischoff@suse.com
State New
Headers show

Commit Message

Eric Bischoff Feb. 27, 2017, 11:22 a.m. UTC
From: Eric Bischoff <ebischoff@nerim.net>

LPD = LOAD PAIR DISJOINT
---
 target/s390x/insn-data.def |  4 +++-
 target/s390x/translate.c   | 21 +++++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

Comments

Richard Henderson Feb. 27, 2017, 7:10 p.m. UTC | #1
On 02/27/2017 10:22 PM, Eric Bischoff wrote:
> From: Eric Bischoff <ebischoff@nerim.net>
>
> LPD = LOAD PAIR DISJOINT
> ---
>  target/s390x/insn-data.def |  4 +++-
>  target/s390x/translate.c   | 21 +++++++++++++++++++++
>  2 files changed, 24 insertions(+), 1 deletion(-)
>
> diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
> index 075ff59..e427988 100644
> --- a/target/s390x/insn-data.def
> +++ b/target/s390x/insn-data.def
> @@ -504,7 +504,9 @@
>      C(0xb9e2, LOCGR,   RRF_c, LOC, r1, r2, r1, 0, loc, 0)
>      C(0xebf2, LOC,     RSY_b, LOC, r1, m2_32u, new, r1_32, loc, 0)
>      C(0xebe2, LOCG,    RSY_b, LOC, r1, m2_64, r1, 0, loc, 0)
> -/* LOAD PAIR DISJOINT TODO */
> +/* LOAD PAIR DISJOINT */
> +    C(0xc804, LPD,     SSF,   ILA, m1_32s, m2_32s, 0, r3_P32, movx, zero)
> +    C(0xc805, LPDG,    SSF,   ILA, m1_64, m2_64, 0, r3_P64, movx, zero)

The think is, in order to be able to say that the two loads were interlocked, 
which is what you're doing with CC=0, we need to provide some atomicity.

In general, this is going to require that you check parallel_cpus, and if true, 
signal cpu_loop_exit_atomic.

As a special case, it would be possible to check for two loads that happen to 
be sequential and perform them as an atomic read.  Whether that happens often 
enough to be worthwhile I don't know.


r~
Eric Bischoff Feb. 28, 2017, 8:58 a.m. UTC | #2
Le mardi 28 février 2017, 06:10:45 CET Richard Henderson a écrit :
> On 02/27/2017 10:22 PM, Eric Bischoff wrote:
> > From: Eric Bischoff <ebischoff@nerim.net>
> > 
> > LPD = LOAD PAIR DISJOINT
> > ---
> > 
> >  target/s390x/insn-data.def |  4 +++-
> >  target/s390x/translate.c   | 21 +++++++++++++++++++++
> >  2 files changed, 24 insertions(+), 1 deletion(-)
> > 
> > diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
> > index 075ff59..e427988 100644
> > --- a/target/s390x/insn-data.def
> > +++ b/target/s390x/insn-data.def
> > @@ -504,7 +504,9 @@
> > 
> >      C(0xb9e2, LOCGR,   RRF_c, LOC, r1, r2, r1, 0, loc, 0)
> >      C(0xebf2, LOC,     RSY_b, LOC, r1, m2_32u, new, r1_32, loc, 0)
> >      C(0xebe2, LOCG,    RSY_b, LOC, r1, m2_64, r1, 0, loc, 0)
> > 
> > -/* LOAD PAIR DISJOINT TODO */
> > +/* LOAD PAIR DISJOINT */
> > +    C(0xc804, LPD,     SSF,   ILA, m1_32s, m2_32s, 0, r3_P32, movx, zero)
> > +    C(0xc805, LPDG,    SSF,   ILA, m1_64, m2_64, 0, r3_P64, movx, zero)
> 
> The think is, in order to be able to say that the two loads were
> interlocked, which is what you're doing with CC=0, we need to provide some
> atomicity.
> 
> In general, this is going to require that you check parallel_cpus, and if
> true, signal cpu_loop_exit_atomic.
> 
> As a special case, it would be possible to check for two loads that happen
> to be sequential and perform them as an atomic read.  Whether that happens
> often enough to be worthwhile I don't know.

Understood now.

I'm working on a v3 patch based on the code kindly sent in private mail by 
Richard.
diff mbox

Patch

diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 075ff59..e427988 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -504,7 +504,9 @@ 
     C(0xb9e2, LOCGR,   RRF_c, LOC, r1, r2, r1, 0, loc, 0)
     C(0xebf2, LOC,     RSY_b, LOC, r1, m2_32u, new, r1_32, loc, 0)
     C(0xebe2, LOCG,    RSY_b, LOC, r1, m2_64, r1, 0, loc, 0)
-/* LOAD PAIR DISJOINT TODO */
+/* LOAD PAIR DISJOINT */
+    C(0xc804, LPD,     SSF,   ILA, m1_32s, m2_32s, 0, r3_P32, movx, zero)
+    C(0xc805, LPDG,    SSF,   ILA, m1_64, m2_64, 0, r3_P64, movx, zero)
 /* LOAD POSITIVE */
     C(0x1000, LPR,     RR_a,  Z,   0, r2_32s, new, r1_32, abs, abs32)
     C(0xb900, LPGR,    RRE,   Z,   0, r2, r1, 0, abs, abs64)
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 01c6217..a363efb 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -4158,6 +4158,11 @@  static ExitStatus op_zero2(DisasContext *s, DisasOps *o)
    the original inputs), update the various cc data structures in order to
    be able to compute the new condition code.  */
 
+static void cout_zero(DisasContext *s, DisasOps *o)
+{
+    gen_op_movi_cc(s, 0);
+}
+
 static void cout_abs32(DisasContext *s, DisasOps *o)
 {
     gen_op_update1_cc_i64(s, CC_OP_ABS_32, o->out);
@@ -4420,6 +4425,22 @@  static void wout_r1_D32(DisasContext *s, DisasFields *f, DisasOps *o)
 }
 #define SPEC_wout_r1_D32 SPEC_r1_even
 
+static void wout_r3_P32(DisasContext *s, DisasFields *f, DisasOps *o)
+{
+    int r3 = get_field(f, r3);
+    store_reg32_i64(r3, o->out);
+    store_reg32_i64(r3 + 1, o->out2);
+}
+#define SPEC_wout_r3_P32 SPEC_r3_even
+
+static void wout_r3_P64(DisasContext *s, DisasFields *f, DisasOps *o)
+{
+    int r3 = get_field(f, r3);
+    store_reg(r3, o->out);
+    store_reg(r3 + 1, o->out2);
+}
+#define SPEC_wout_r3_P64 SPEC_r3_even
+
 static void wout_e1(DisasContext *s, DisasFields *f, DisasOps *o)
 {
     store_freg32_i64(get_field(f, r1), o->out);