diff mbox series

[1/1] target/i386: fix phadd* with identical destination and source register

Message ID 20200401225253.30745-1-j@jannau.net
State New
Headers show
Series [1/1] target/i386: fix phadd* with identical destination and source register | expand

Commit Message

Janne Grunau April 1, 2020, 10:52 p.m. UTC
Detected by asm test suite failures in dav1d
(https://code.videolan.org/videolan/dav1d). Can be reproduced by
`qemu-x86_64 -cpu core2duo ./tests/checkasm --test=mc_8bpc 1659890620`.

Signed-off-by: Janne Grunau <j@jannau.net>
---
 target/i386/ops_sse.h | 53 +++++++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 20 deletions(-)

Comments

Richard Henderson April 2, 2020, 5:23 p.m. UTC | #1
On 4/1/20 3:52 PM, Janne Grunau wrote:
> Detected by asm test suite failures in dav1d
> (https://code.videolan.org/videolan/dav1d). Can be reproduced by
> `qemu-x86_64 -cpu core2duo ./tests/checkasm --test=mc_8bpc 1659890620`.
> 
> Signed-off-by: Janne Grunau <j@jannau.net>

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~
Paolo Bonzini May 21, 2020, 4:04 p.m. UTC | #2
On 02/04/20 00:52, Janne Grunau wrote:
> Detected by asm test suite failures in dav1d
> (https://code.videolan.org/videolan/dav1d). Can be reproduced by
> `qemu-x86_64 -cpu core2duo ./tests/checkasm --test=mc_8bpc 1659890620`.
> 
> Signed-off-by: Janne Grunau <j@jannau.net>
> ---
>  target/i386/ops_sse.h | 53 +++++++++++++++++++++++++++----------------
>  1 file changed, 33 insertions(+), 20 deletions(-)
> 
> diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
> index ec1ec745d0..2f41511aef 100644
> --- a/target/i386/ops_sse.h
> +++ b/target/i386/ops_sse.h
> @@ -1435,34 +1435,47 @@ void glue(helper_pshufb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
>  
>  void glue(helper_phaddw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
>  {
> -    d->W(0) = (int16_t)d->W(0) + (int16_t)d->W(1);
> -    d->W(1) = (int16_t)d->W(2) + (int16_t)d->W(3);
> -    XMM_ONLY(d->W(2) = (int16_t)d->W(4) + (int16_t)d->W(5));
> -    XMM_ONLY(d->W(3) = (int16_t)d->W(6) + (int16_t)d->W(7));
> -    d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1);
> -    d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3);
> -    XMM_ONLY(d->W(6) = (int16_t)s->W(4) + (int16_t)s->W(5));
> -    XMM_ONLY(d->W(7) = (int16_t)s->W(6) + (int16_t)s->W(7));
> +
> +    Reg r;
> +
> +    r.W(0) = (int16_t)d->W(0) + (int16_t)d->W(1);
> +    r.W(1) = (int16_t)d->W(2) + (int16_t)d->W(3);
> +    XMM_ONLY(r.W(2) = (int16_t)d->W(4) + (int16_t)d->W(5));
> +    XMM_ONLY(r.W(3) = (int16_t)d->W(6) + (int16_t)d->W(7));
> +    r.W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1);
> +    r.W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3);
> +    XMM_ONLY(r.W(6) = (int16_t)s->W(4) + (int16_t)s->W(5));
> +    XMM_ONLY(r.W(7) = (int16_t)s->W(6) + (int16_t)s->W(7));
> +
> +    *d = r;
>  }
>  
>  void glue(helper_phaddd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
>  {
> -    d->L(0) = (int32_t)d->L(0) + (int32_t)d->L(1);
> -    XMM_ONLY(d->L(1) = (int32_t)d->L(2) + (int32_t)d->L(3));
> -    d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1);
> -    XMM_ONLY(d->L(3) = (int32_t)s->L(2) + (int32_t)s->L(3));
> +    Reg r;
> +
> +    r.L(0) = (int32_t)d->L(0) + (int32_t)d->L(1);
> +    XMM_ONLY(r.L(1) = (int32_t)d->L(2) + (int32_t)d->L(3));
> +    r.L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1);
> +    XMM_ONLY(r.L(3) = (int32_t)s->L(2) + (int32_t)s->L(3));
> +
> +    *d = r;
>  }
>  
>  void glue(helper_phaddsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
>  {
> -    d->W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1));
> -    d->W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3));
> -    XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5)));
> -    XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7)));
> -    d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1));
> -    d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3));
> -    XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5)));
> -    XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7)));
> +    Reg r;
> +
> +    r.W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1));
> +    r.W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3));
> +    XMM_ONLY(r.W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5)));
> +    XMM_ONLY(r.W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7)));
> +    r.W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1));
> +    r.W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3));
> +    XMM_ONLY(r.W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5)));
> +    XMM_ONLY(r.W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7)));
> +
> +    *d = r;
>  }
>  
>  void glue(helper_pmaddubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
> 

Queued, thanks.

Paolo
diff mbox series

Patch

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index ec1ec745d0..2f41511aef 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -1435,34 +1435,47 @@  void glue(helper_pshufb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 
 void glue(helper_phaddw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
-    d->W(0) = (int16_t)d->W(0) + (int16_t)d->W(1);
-    d->W(1) = (int16_t)d->W(2) + (int16_t)d->W(3);
-    XMM_ONLY(d->W(2) = (int16_t)d->W(4) + (int16_t)d->W(5));
-    XMM_ONLY(d->W(3) = (int16_t)d->W(6) + (int16_t)d->W(7));
-    d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1);
-    d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3);
-    XMM_ONLY(d->W(6) = (int16_t)s->W(4) + (int16_t)s->W(5));
-    XMM_ONLY(d->W(7) = (int16_t)s->W(6) + (int16_t)s->W(7));
+
+    Reg r;
+
+    r.W(0) = (int16_t)d->W(0) + (int16_t)d->W(1);
+    r.W(1) = (int16_t)d->W(2) + (int16_t)d->W(3);
+    XMM_ONLY(r.W(2) = (int16_t)d->W(4) + (int16_t)d->W(5));
+    XMM_ONLY(r.W(3) = (int16_t)d->W(6) + (int16_t)d->W(7));
+    r.W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1);
+    r.W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3);
+    XMM_ONLY(r.W(6) = (int16_t)s->W(4) + (int16_t)s->W(5));
+    XMM_ONLY(r.W(7) = (int16_t)s->W(6) + (int16_t)s->W(7));
+
+    *d = r;
 }
 
 void glue(helper_phaddd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
-    d->L(0) = (int32_t)d->L(0) + (int32_t)d->L(1);
-    XMM_ONLY(d->L(1) = (int32_t)d->L(2) + (int32_t)d->L(3));
-    d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1);
-    XMM_ONLY(d->L(3) = (int32_t)s->L(2) + (int32_t)s->L(3));
+    Reg r;
+
+    r.L(0) = (int32_t)d->L(0) + (int32_t)d->L(1);
+    XMM_ONLY(r.L(1) = (int32_t)d->L(2) + (int32_t)d->L(3));
+    r.L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1);
+    XMM_ONLY(r.L(3) = (int32_t)s->L(2) + (int32_t)s->L(3));
+
+    *d = r;
 }
 
 void glue(helper_phaddsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
-    d->W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1));
-    d->W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3));
-    XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5)));
-    XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7)));
-    d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1));
-    d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3));
-    XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5)));
-    XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7)));
+    Reg r;
+
+    r.W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1));
+    r.W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3));
+    XMM_ONLY(r.W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5)));
+    XMM_ONLY(r.W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7)));
+    r.W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1));
+    r.W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3));
+    XMM_ONLY(r.W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5)));
+    XMM_ONLY(r.W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7)));
+
+    *d = r;
 }
 
 void glue(helper_pmaddubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)