Message ID | 20200401225253.30745-1-j@jannau.net |
---|---|
State | New |
Headers | show |
Series | [1/1] target/i386: fix phadd* with identical destination and source register | expand |
On 4/1/20 3:52 PM, Janne Grunau wrote: > Detected by asm test suite failures in dav1d > (https://code.videolan.org/videolan/dav1d). Can be reproduced by > `qemu-x86_64 -cpu core2duo ./tests/checkasm --test=mc_8bpc 1659890620`. > > Signed-off-by: Janne Grunau <j@jannau.net> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> r~
On 02/04/20 00:52, Janne Grunau wrote: > Detected by asm test suite failures in dav1d > (https://code.videolan.org/videolan/dav1d). Can be reproduced by > `qemu-x86_64 -cpu core2duo ./tests/checkasm --test=mc_8bpc 1659890620`. > > Signed-off-by: Janne Grunau <j@jannau.net> > --- > target/i386/ops_sse.h | 53 +++++++++++++++++++++++++++---------------- > 1 file changed, 33 insertions(+), 20 deletions(-) > > diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h > index ec1ec745d0..2f41511aef 100644 > --- a/target/i386/ops_sse.h > +++ b/target/i386/ops_sse.h > @@ -1435,34 +1435,47 @@ void glue(helper_pshufb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) > > void glue(helper_phaddw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) > { > - d->W(0) = (int16_t)d->W(0) + (int16_t)d->W(1); > - d->W(1) = (int16_t)d->W(2) + (int16_t)d->W(3); > - XMM_ONLY(d->W(2) = (int16_t)d->W(4) + (int16_t)d->W(5)); > - XMM_ONLY(d->W(3) = (int16_t)d->W(6) + (int16_t)d->W(7)); > - d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1); > - d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3); > - XMM_ONLY(d->W(6) = (int16_t)s->W(4) + (int16_t)s->W(5)); > - XMM_ONLY(d->W(7) = (int16_t)s->W(6) + (int16_t)s->W(7)); > + > + Reg r; > + > + r.W(0) = (int16_t)d->W(0) + (int16_t)d->W(1); > + r.W(1) = (int16_t)d->W(2) + (int16_t)d->W(3); > + XMM_ONLY(r.W(2) = (int16_t)d->W(4) + (int16_t)d->W(5)); > + XMM_ONLY(r.W(3) = (int16_t)d->W(6) + (int16_t)d->W(7)); > + r.W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1); > + r.W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3); > + XMM_ONLY(r.W(6) = (int16_t)s->W(4) + (int16_t)s->W(5)); > + XMM_ONLY(r.W(7) = (int16_t)s->W(6) + (int16_t)s->W(7)); > + > + *d = r; > } > > void glue(helper_phaddd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) > { > - d->L(0) = (int32_t)d->L(0) + (int32_t)d->L(1); > - XMM_ONLY(d->L(1) = (int32_t)d->L(2) + (int32_t)d->L(3)); > - d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1); > - XMM_ONLY(d->L(3) = (int32_t)s->L(2) + (int32_t)s->L(3)); > + Reg r; > + > + r.L(0) = (int32_t)d->L(0) + (int32_t)d->L(1); > + XMM_ONLY(r.L(1) = (int32_t)d->L(2) + (int32_t)d->L(3)); > + r.L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1); > + XMM_ONLY(r.L(3) = (int32_t)s->L(2) + (int32_t)s->L(3)); > + > + *d = r; > } > > void glue(helper_phaddsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) > { > - d->W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1)); > - d->W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3)); > - XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5))); > - XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7))); > - d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1)); > - d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3)); > - XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5))); > - XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7))); > + Reg r; > + > + r.W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1)); > + r.W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3)); > + XMM_ONLY(r.W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5))); > + XMM_ONLY(r.W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7))); > + r.W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1)); > + r.W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3)); > + XMM_ONLY(r.W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5))); > + XMM_ONLY(r.W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7))); > + > + *d = r; > } > > void glue(helper_pmaddubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) > Queued, thanks. Paolo
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index ec1ec745d0..2f41511aef 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -1435,34 +1435,47 @@ void glue(helper_pshufb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) void glue(helper_phaddw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) { - d->W(0) = (int16_t)d->W(0) + (int16_t)d->W(1); - d->W(1) = (int16_t)d->W(2) + (int16_t)d->W(3); - XMM_ONLY(d->W(2) = (int16_t)d->W(4) + (int16_t)d->W(5)); - XMM_ONLY(d->W(3) = (int16_t)d->W(6) + (int16_t)d->W(7)); - d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1); - d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3); - XMM_ONLY(d->W(6) = (int16_t)s->W(4) + (int16_t)s->W(5)); - XMM_ONLY(d->W(7) = (int16_t)s->W(6) + (int16_t)s->W(7)); + + Reg r; + + r.W(0) = (int16_t)d->W(0) + (int16_t)d->W(1); + r.W(1) = (int16_t)d->W(2) + (int16_t)d->W(3); + XMM_ONLY(r.W(2) = (int16_t)d->W(4) + (int16_t)d->W(5)); + XMM_ONLY(r.W(3) = (int16_t)d->W(6) + (int16_t)d->W(7)); + r.W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1); + r.W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3); + XMM_ONLY(r.W(6) = (int16_t)s->W(4) + (int16_t)s->W(5)); + XMM_ONLY(r.W(7) = (int16_t)s->W(6) + (int16_t)s->W(7)); + + *d = r; } void glue(helper_phaddd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) { - d->L(0) = (int32_t)d->L(0) + (int32_t)d->L(1); - XMM_ONLY(d->L(1) = (int32_t)d->L(2) + (int32_t)d->L(3)); - d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1); - XMM_ONLY(d->L(3) = (int32_t)s->L(2) + (int32_t)s->L(3)); + Reg r; + + r.L(0) = (int32_t)d->L(0) + (int32_t)d->L(1); + XMM_ONLY(r.L(1) = (int32_t)d->L(2) + (int32_t)d->L(3)); + r.L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1); + XMM_ONLY(r.L(3) = (int32_t)s->L(2) + (int32_t)s->L(3)); + + *d = r; } void glue(helper_phaddsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) { - d->W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1)); - d->W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3)); - XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5))); - XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7))); - d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1)); - d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3)); - XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5))); - XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7))); + Reg r; + + r.W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1)); + r.W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3)); + XMM_ONLY(r.W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5))); + XMM_ONLY(r.W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7))); + r.W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1)); + r.W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3)); + XMM_ONLY(r.W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5))); + XMM_ONLY(r.W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7))); + + *d = r; } void glue(helper_pmaddubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
Detected by asm test suite failures in dav1d (https://code.videolan.org/videolan/dav1d). Can be reproduced by `qemu-x86_64 -cpu core2duo ./tests/checkasm --test=mc_8bpc 1659890620`. Signed-off-by: Janne Grunau <j@jannau.net> --- target/i386/ops_sse.h | 53 +++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 20 deletions(-)