diff mbox

[ARM] Fix PR target/48252

Message ID OF8E0EC0C3.51E64C85-ONC2257883.00282C16-C2257883.0029331D@il.ibm.com
State New
Headers show

Commit Message

Ira Rosen May 1, 2011, 7:30 a.m. UTC
Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> wrote on 07/04/2011
03:16:44 PM:

>
> On 07/04/11 08:42, Ira Rosen wrote:
> > Hi,
> >
> > This patch makes both outputs of neon_vzip/vuzp/vtrn_internal
> > explicitly dependent on both inputs, preventing incorrect
> > optimization:
> > for
> > (a,b)<- vzip (c,d)
> > and
> > (e,f)<- vzip (g,d)
> > CSE decides that b==f, since b and f depend only on d.
> >
> > Tested on arm-linux-gnueabi. OK for trunk?
>
> This is OK for trunk.
>
> > OK for 4.6 after testing?
>
> I have no objections to this going into 4.5 and 4.6 since it corrects
> the implementation of the neon intrinsics but please check with the
> release managers.

OK to backport to 4.5 and 4.6 - both tested on arm-linux-gnueabi?

Thanks,
Ira

4.5 and 4.6 ChangeLog:

	Backport from mainline:
	2011-04-18  Ulrich Weigand  <ulrich.weigand@linaro.org>
                  Ira Rosen  <ira.rosen@linaro.org>

	PR target/48252
	* config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments
	to match neon_vzip/vuzp/vtrn_internal.
	* config/arm/neon.md (neon_vtrn<mode>_internal): Make both
	outputs explicitly dependent on both inputs.
	(neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise.

testsuite/Changelog:

	Backport from mainline:
	2011-04-18  Ulrich Weigand  <ulrich.weigand@linaro.org>
                  Ira Rosen  <ira.rosen@linaro.org>

	PR target/48252
	* gcc.target/arm/pr48252.c: New test.


4.5 patch:



>
> cheers
> Ramana
>
> >
> > Thanks,
> > Ira
> >
> > ChangeLog:
> >
> > 2011-04-07  Ulrich Weigand<ulrich.weigand@linaro.org>
> >                    Ira Rosen<ira.rosen@linaro.org>
> >
> >       PR target/48252
> >       * config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments
> >       to match neon_vzip/vuzp/vtrn_internal.
> >       * config/arm/neon.md (neon_vtrn<mode>_internal): Make both
> >       outputs explicitly dependent on both inputs.
> >       (neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise.
> >
> > testsuite/Changelog:
> >
> >       PR target/48252
> >       * gcc.target/arm/pr48252.c: New test.
>

Comments

Ramana Radhakrishnan May 3, 2011, 2:56 p.m. UTC | #1
>> I have no objections to this going into 4.5 and 4.6 since it corrects
>> the implementation of the neon intrinsics but please check with the
>> release managers.
>
> OK to backport to 4.5 and 4.6 - both tested on arm-linux-gnueabi?


Ok. Please allow 24 hours for an RM (cc'd) to comment since this is 
technically not a regression but is a long term bug that needs fixing.

cheers
Ramana

>
> Thanks,
> Ira
>
> 4.5 and 4.6 ChangeLog:
>
> 	Backport from mainline:
> 	2011-04-18  Ulrich Weigand<ulrich.weigand@linaro.org>
>                    Ira Rosen<ira.rosen@linaro.org>
>
> 	PR target/48252
> 	* config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments
> 	to match neon_vzip/vuzp/vtrn_internal.
> 	* config/arm/neon.md (neon_vtrn<mode>_internal): Make both
> 	outputs explicitly dependent on both inputs.
> 	(neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise.
>
> testsuite/Changelog:
>
> 	Backport from mainline:
> 	2011-04-18  Ulrich Weigand<ulrich.weigand@linaro.org>
>                    Ira Rosen<ira.rosen@linaro.org>
>
> 	PR target/48252
> 	* gcc.target/arm/pr48252.c: New test.
>
>
> 4.5 patch:
>
> Index: config/arm/arm.c
> ===================================================================
> --- config/arm/arm.c    (revision 172714)
> +++ config/arm/arm.c    (working copy)
> @@ -18237,7 +18237,7 @@ neon_emit_pair_result_insn (enum machine_mode mode
>     rtx tmp1 = gen_reg_rtx (mode);
>     rtx tmp2 = gen_reg_rtx (mode);
>
> -  emit_insn (intfn (tmp1, op1, tmp2, op2));
> +  emit_insn (intfn (tmp1, op1, op2, tmp2));
>
>     emit_move_insn (mem, tmp1);
>     mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
> Index: config/arm/neon.md
> ===================================================================
> --- config/arm/neon.md  (revision 172714)
> +++ config/arm/neon.md  (working copy)
> @@ -3895,13 +3895,14 @@
>
>   (define_insn "neon_vtrn<mode>_internal"
>     [(set (match_operand:VDQW 0 "s_register_operand" "=w")
> -       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
> -                    UNSPEC_VTRN1))
> -   (set (match_operand:VDQW 2 "s_register_operand" "=w")
> -        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
> -                    UNSPEC_VTRN2))]
> +        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
> +                      (match_operand:VDQW 2 "s_register_operand" "w")]
> +                     UNSPEC_VTRN1))
> +   (set (match_operand:VDQW 3 "s_register_operand" "=2")
> +         (unspec:VDQW [(match_dup 1) (match_dup 2)]
> +                     UNSPEC_VTRN2))]
>     "TARGET_NEON"
> -  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
> +  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
>     [(set (attr "neon_type")
>         (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
>                       (const_string "neon_bp_simple")
> @@ -3921,13 +3922,14 @@
>
>   (define_insn "neon_vzip<mode>_internal"
>     [(set (match_operand:VDQW 0 "s_register_operand" "=w")
> -       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
> -                    UNSPEC_VZIP1))
> -   (set (match_operand:VDQW 2 "s_register_operand" "=w")
> -        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
> -                    UNSPEC_VZIP2))]
> +        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
> +                      (match_operand:VDQW 2 "s_register_operand" "w")]
> +                     UNSPEC_VZIP1))
> +   (set (match_operand:VDQW 3 "s_register_operand" "=2")
> +        (unspec:VDQW [(match_dup 1) (match_dup 2)]
> +                     UNSPEC_VZIP2))]
>     "TARGET_NEON"
> -  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
> +  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
>     [(set (attr "neon_type")
>         (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
>                       (const_string "neon_bp_simple")
> @@ -3947,13 +3949,14 @@
>
>   (define_insn "neon_vuzp<mode>_internal"
>     [(set (match_operand:VDQW 0 "s_register_operand" "=w")
> -       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
> +        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
> +                      (match_operand:VDQW 2 "s_register_operand" "w")]
>                        UNSPEC_VUZP1))
> -   (set (match_operand:VDQW 2 "s_register_operand" "=w")
> -        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
> -                    UNSPEC_VUZP2))]
> +   (set (match_operand:VDQW 3 "s_register_operand" "=2")
> +        (unspec:VDQW [(match_dup 1) (match_dup 2)]
> +                     UNSPEC_VUZP2))]
>     "TARGET_NEON"
> -  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
> +  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
>     [(set (attr "neon_type")
>         (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
>                       (const_string "neon_bp_simple")
> Index: testsuite/gcc.target/arm/pr48252.c
> ===================================================================
> --- testsuite/gcc.target/arm/pr48252.c  (revision 0)
> +++ testsuite/gcc.target/arm/pr48252.c  (revision 0)
> @@ -0,0 +1,32 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2" } */
> +/* { dg-add-options arm_neon } */
> +
> +#include "arm_neon.h"
> +#include<stdlib.h>
> +
> +int main(void)
> +{
> +    uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1};
> +    uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2};
> +    uint8x8x2_t vd1, vd2;
> +    union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4;
> +    int i;
> +
> +    vd1 = vzip_u8(v1, vdup_n_u8(0));
> +    vd2 = vzip_u8(v2, vdup_n_u8(0));
> +
> +    vst1_u8(d1.buf, vd1.val[0]);
> +    vst1_u8(d2.buf, vd1.val[1]);
> +    vst1_u8(d3.buf, vd2.val[0]);
> +    vst1_u8(d4.buf, vd2.val[1]);
> +
> +    for (i = 0; i<  8; i++)
> +      if ((i % 2 == 0&&  d4.buf[i] != 2)
> +          || (i % 2 == 1&&  d4.buf[i] != 0))
> +         abort ();
> +
> +    return 0;
> +}
> +
>
>
> 4.6 patch:
>
> Index: config/arm/arm.c
> ===================================================================
> --- config/arm/arm.c    (revision 172810)
> +++ config/arm/arm.c    (working copy)
> @@ -19564,7 +19564,7 @@ neon_emit_pair_result_insn (enum machine_mode mode
>     rtx tmp1 = gen_reg_rtx (mode);
>     rtx tmp2 = gen_reg_rtx (mode);
>
> -  emit_insn (intfn (tmp1, op1, tmp2, op2));
> +  emit_insn (intfn (tmp1, op1, op2, tmp2));
>
>     emit_move_insn (mem, tmp1);
>     mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
> Index: config/arm/neon.md
> ===================================================================
> --- config/arm/neon.md  (revision 172810)
> +++ config/arm/neon.md  (working copy)
> @@ -4079,13 +4079,14 @@
>
>   (define_insn "neon_vtrn<mode>_internal"
>     [(set (match_operand:VDQW 0 "s_register_operand" "=w")
> -       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
> -                    UNSPEC_VTRN1))
> -   (set (match_operand:VDQW 2 "s_register_operand" "=w")
> -        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
> -                    UNSPEC_VTRN2))]
> +        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
> +                      (match_operand:VDQW 2 "s_register_operand" "w")]
> +                     UNSPEC_VTRN1))
> +   (set (match_operand:VDQW 3 "s_register_operand" "=2")
> +         (unspec:VDQW [(match_dup 1) (match_dup 2)]
> +                     UNSPEC_VTRN2))]
>     "TARGET_NEON"
> -  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
> +  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
>     [(set (attr "neon_type")
>         (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
>                       (const_string "neon_bp_simple")
> @@ -4105,13 +4106,14 @@
>
>   (define_insn "neon_vzip<mode>_internal"
>     [(set (match_operand:VDQW 0 "s_register_operand" "=w")
> -       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
> -                    UNSPEC_VZIP1))
> -   (set (match_operand:VDQW 2 "s_register_operand" "=w")
> -        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
> -                    UNSPEC_VZIP2))]
> +        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
> +                      (match_operand:VDQW 2 "s_register_operand" "w")]
> +                     UNSPEC_VZIP1))
> +   (set (match_operand:VDQW 3 "s_register_operand" "=2")
> +        (unspec:VDQW [(match_dup 1) (match_dup 2)]
> +                     UNSPEC_VZIP2))]
>     "TARGET_NEON"
> -  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
> +  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
>     [(set (attr "neon_type")
>         (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
>                       (const_string "neon_bp_simple")
> @@ -4131,13 +4133,14 @@
>
>   (define_insn "neon_vuzp<mode>_internal"
>     [(set (match_operand:VDQW 0 "s_register_operand" "=w")
> -       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
> +        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
> +                      (match_operand:VDQW 2 "s_register_operand" "w")]
>                        UNSPEC_VUZP1))
> -   (set (match_operand:VDQW 2 "s_register_operand" "=w")
> -        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
> -                    UNSPEC_VUZP2))]
> +   (set (match_operand:VDQW 3 "s_register_operand" "=2")
> +        (unspec:VDQW [(match_dup 1) (match_dup 2)]
> +                     UNSPEC_VUZP2))]
>     "TARGET_NEON"
> -  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
> +  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
>     [(set (attr "neon_type")
>         (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
>                       (const_string "neon_bp_simple")
> Index: testsuite/gcc.target/arm/pr48252.c
> ===================================================================
> --- testsuite/gcc.target/arm/pr48252.c  (revision 0)
> +++ testsuite/gcc.target/arm/pr48252.c  (revision 0)
> @@ -0,0 +1,32 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2" } */
> +/* { dg-add-options arm_neon } */
> +
> +#include "arm_neon.h"
> +#include<stdlib.h>
> +
> +int main(void)
> +{
> +    uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1};
> +    uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2};
> +    uint8x8x2_t vd1, vd2;
> +    union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4;
> +    int i;
> +
> +    vd1 = vzip_u8(v1, vdup_n_u8(0));
> +    vd2 = vzip_u8(v2, vdup_n_u8(0));
> +
> +    vst1_u8(d1.buf, vd1.val[0]);
> +    vst1_u8(d2.buf, vd1.val[1]);
> +    vst1_u8(d3.buf, vd2.val[0]);
> +    vst1_u8(d4.buf, vd2.val[1]);
> +
> +    for (i = 0; i<  8; i++)
> +      if ((i % 2 == 0&&  d4.buf[i] != 2)
> +          || (i % 2 == 1&&  d4.buf[i] != 0))
> +         abort ();
> +
> +    return 0;
> +}
> +
>
>
>>
>> cheers
>> Ramana
>>
>>>
>>> Thanks,
>>> Ira
>>>
>>> ChangeLog:
>>>
>>> 2011-04-07  Ulrich Weigand<ulrich.weigand@linaro.org>
>>>                     Ira Rosen<ira.rosen@linaro.org>
>>>
>>>        PR target/48252
>>>        * config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments
>>>        to match neon_vzip/vuzp/vtrn_internal.
>>>        * config/arm/neon.md (neon_vtrn<mode>_internal): Make both
>>>        outputs explicitly dependent on both inputs.
>>>        (neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise.
>>>
>>> testsuite/Changelog:
>>>
>>>        PR target/48252
>>>        * gcc.target/arm/pr48252.c: New test.
>>
>
Richard Biener May 3, 2011, 2:57 p.m. UTC | #2
On Tue, 3 May 2011, Ramana Radhakrishnan wrote:

> 
> > > I have no objections to this going into 4.5 and 4.6 since it corrects
> > > the implementation of the neon intrinsics but please check with the
> > > release managers.
> > 
> > OK to backport to 4.5 and 4.6 - both tested on arm-linux-gnueabi?
> 
> 
> Ok. Please allow 24 hours for an RM (cc'd) to comment since this is
> technically not a regression but is a long term bug that needs fixing.

As it's purely target local we defer to the target maintainers.

Richard.

> cheers
> Ramana
> 
> > 
> > Thanks,
> > Ira
> > 
> > 4.5 and 4.6 ChangeLog:
> > 
> > 	Backport from mainline:
> > 	2011-04-18  Ulrich Weigand<ulrich.weigand@linaro.org>
> >                    Ira Rosen<ira.rosen@linaro.org>
> > 
> > 	PR target/48252
> > 	* config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments
> > 	to match neon_vzip/vuzp/vtrn_internal.
> > 	* config/arm/neon.md (neon_vtrn<mode>_internal): Make both
> > 	outputs explicitly dependent on both inputs.
> > 	(neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise.
> > 
> > testsuite/Changelog:
> > 
> > 	Backport from mainline:
> > 	2011-04-18  Ulrich Weigand<ulrich.weigand@linaro.org>
> >                    Ira Rosen<ira.rosen@linaro.org>
> > 
> > 	PR target/48252
> > 	* gcc.target/arm/pr48252.c: New test.
> > 
> > 
> > 4.5 patch:
> > 
> > Index: config/arm/arm.c
> > ===================================================================
> > --- config/arm/arm.c    (revision 172714)
> > +++ config/arm/arm.c    (working copy)
> > @@ -18237,7 +18237,7 @@ neon_emit_pair_result_insn (enum machine_mode mode
> >     rtx tmp1 = gen_reg_rtx (mode);
> >     rtx tmp2 = gen_reg_rtx (mode);
> > 
> > -  emit_insn (intfn (tmp1, op1, tmp2, op2));
> > +  emit_insn (intfn (tmp1, op1, op2, tmp2));
> > 
> >     emit_move_insn (mem, tmp1);
> >     mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
> > Index: config/arm/neon.md
> > ===================================================================
> > --- config/arm/neon.md  (revision 172714)
> > +++ config/arm/neon.md  (working copy)
> > @@ -3895,13 +3895,14 @@
> > 
> >   (define_insn "neon_vtrn<mode>_internal"
> >     [(set (match_operand:VDQW 0 "s_register_operand" "=w")
> > -       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
> > -                    UNSPEC_VTRN1))
> > -   (set (match_operand:VDQW 2 "s_register_operand" "=w")
> > -        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
> > -                    UNSPEC_VTRN2))]
> > +        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
> > +                      (match_operand:VDQW 2 "s_register_operand" "w")]
> > +                     UNSPEC_VTRN1))
> > +   (set (match_operand:VDQW 3 "s_register_operand" "=2")
> > +         (unspec:VDQW [(match_dup 1) (match_dup 2)]
> > +                     UNSPEC_VTRN2))]
> >     "TARGET_NEON"
> > -  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
> > +  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
> >     [(set (attr "neon_type")
> >         (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
> >                       (const_string "neon_bp_simple")
> > @@ -3921,13 +3922,14 @@
> > 
> >   (define_insn "neon_vzip<mode>_internal"
> >     [(set (match_operand:VDQW 0 "s_register_operand" "=w")
> > -       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
> > -                    UNSPEC_VZIP1))
> > -   (set (match_operand:VDQW 2 "s_register_operand" "=w")
> > -        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
> > -                    UNSPEC_VZIP2))]
> > +        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
> > +                      (match_operand:VDQW 2 "s_register_operand" "w")]
> > +                     UNSPEC_VZIP1))
> > +   (set (match_operand:VDQW 3 "s_register_operand" "=2")
> > +        (unspec:VDQW [(match_dup 1) (match_dup 2)]
> > +                     UNSPEC_VZIP2))]
> >     "TARGET_NEON"
> > -  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
> > +  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
> >     [(set (attr "neon_type")
> >         (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
> >                       (const_string "neon_bp_simple")
> > @@ -3947,13 +3949,14 @@
> > 
> >   (define_insn "neon_vuzp<mode>_internal"
> >     [(set (match_operand:VDQW 0 "s_register_operand" "=w")
> > -       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
> > +        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
> > +                      (match_operand:VDQW 2 "s_register_operand" "w")]
> >                        UNSPEC_VUZP1))
> > -   (set (match_operand:VDQW 2 "s_register_operand" "=w")
> > -        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
> > -                    UNSPEC_VUZP2))]
> > +   (set (match_operand:VDQW 3 "s_register_operand" "=2")
> > +        (unspec:VDQW [(match_dup 1) (match_dup 2)]
> > +                     UNSPEC_VUZP2))]
> >     "TARGET_NEON"
> > -  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
> > +  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
> >     [(set (attr "neon_type")
> >         (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
> >                       (const_string "neon_bp_simple")
> > Index: testsuite/gcc.target/arm/pr48252.c
> > ===================================================================
> > --- testsuite/gcc.target/arm/pr48252.c  (revision 0)
> > +++ testsuite/gcc.target/arm/pr48252.c  (revision 0)
> > @@ -0,0 +1,32 @@
> > +/* { dg-do run } */
> > +/* { dg-require-effective-target arm_neon_hw } */
> > +/* { dg-options "-O2" } */
> > +/* { dg-add-options arm_neon } */
> > +
> > +#include "arm_neon.h"
> > +#include<stdlib.h>
> > +
> > +int main(void)
> > +{
> > +    uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1};
> > +    uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2};
> > +    uint8x8x2_t vd1, vd2;
> > +    union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4;
> > +    int i;
> > +
> > +    vd1 = vzip_u8(v1, vdup_n_u8(0));
> > +    vd2 = vzip_u8(v2, vdup_n_u8(0));
> > +
> > +    vst1_u8(d1.buf, vd1.val[0]);
> > +    vst1_u8(d2.buf, vd1.val[1]);
> > +    vst1_u8(d3.buf, vd2.val[0]);
> > +    vst1_u8(d4.buf, vd2.val[1]);
> > +
> > +    for (i = 0; i<  8; i++)
> > +      if ((i % 2 == 0&&  d4.buf[i] != 2)
> > +          || (i % 2 == 1&&  d4.buf[i] != 0))
> > +         abort ();
> > +
> > +    return 0;
> > +}
> > +
> > 
> > 
> > 4.6 patch:
> > 
> > Index: config/arm/arm.c
> > ===================================================================
> > --- config/arm/arm.c    (revision 172810)
> > +++ config/arm/arm.c    (working copy)
> > @@ -19564,7 +19564,7 @@ neon_emit_pair_result_insn (enum machine_mode mode
> >     rtx tmp1 = gen_reg_rtx (mode);
> >     rtx tmp2 = gen_reg_rtx (mode);
> > 
> > -  emit_insn (intfn (tmp1, op1, tmp2, op2));
> > +  emit_insn (intfn (tmp1, op1, op2, tmp2));
> > 
> >     emit_move_insn (mem, tmp1);
> >     mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
> > Index: config/arm/neon.md
> > ===================================================================
> > --- config/arm/neon.md  (revision 172810)
> > +++ config/arm/neon.md  (working copy)
> > @@ -4079,13 +4079,14 @@
> > 
> >   (define_insn "neon_vtrn<mode>_internal"
> >     [(set (match_operand:VDQW 0 "s_register_operand" "=w")
> > -       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
> > -                    UNSPEC_VTRN1))
> > -   (set (match_operand:VDQW 2 "s_register_operand" "=w")
> > -        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
> > -                    UNSPEC_VTRN2))]
> > +        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
> > +                      (match_operand:VDQW 2 "s_register_operand" "w")]
> > +                     UNSPEC_VTRN1))
> > +   (set (match_operand:VDQW 3 "s_register_operand" "=2")
> > +         (unspec:VDQW [(match_dup 1) (match_dup 2)]
> > +                     UNSPEC_VTRN2))]
> >     "TARGET_NEON"
> > -  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
> > +  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
> >     [(set (attr "neon_type")
> >         (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
> >                       (const_string "neon_bp_simple")
> > @@ -4105,13 +4106,14 @@
> > 
> >   (define_insn "neon_vzip<mode>_internal"
> >     [(set (match_operand:VDQW 0 "s_register_operand" "=w")
> > -       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
> > -                    UNSPEC_VZIP1))
> > -   (set (match_operand:VDQW 2 "s_register_operand" "=w")
> > -        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
> > -                    UNSPEC_VZIP2))]
> > +        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
> > +                      (match_operand:VDQW 2 "s_register_operand" "w")]
> > +                     UNSPEC_VZIP1))
> > +   (set (match_operand:VDQW 3 "s_register_operand" "=2")
> > +        (unspec:VDQW [(match_dup 1) (match_dup 2)]
> > +                     UNSPEC_VZIP2))]
> >     "TARGET_NEON"
> > -  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
> > +  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
> >     [(set (attr "neon_type")
> >         (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
> >                       (const_string "neon_bp_simple")
> > @@ -4131,13 +4133,14 @@
> > 
> >   (define_insn "neon_vuzp<mode>_internal"
> >     [(set (match_operand:VDQW 0 "s_register_operand" "=w")
> > -       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
> > +        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
> > +                      (match_operand:VDQW 2 "s_register_operand" "w")]
> >                        UNSPEC_VUZP1))
> > -   (set (match_operand:VDQW 2 "s_register_operand" "=w")
> > -        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
> > -                    UNSPEC_VUZP2))]
> > +   (set (match_operand:VDQW 3 "s_register_operand" "=2")
> > +        (unspec:VDQW [(match_dup 1) (match_dup 2)]
> > +                     UNSPEC_VUZP2))]
> >     "TARGET_NEON"
> > -  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
> > +  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
> >     [(set (attr "neon_type")
> >         (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
> >                       (const_string "neon_bp_simple")
> > Index: testsuite/gcc.target/arm/pr48252.c
> > ===================================================================
> > --- testsuite/gcc.target/arm/pr48252.c  (revision 0)
> > +++ testsuite/gcc.target/arm/pr48252.c  (revision 0)
> > @@ -0,0 +1,32 @@
> > +/* { dg-do run } */
> > +/* { dg-require-effective-target arm_neon_hw } */
> > +/* { dg-options "-O2" } */
> > +/* { dg-add-options arm_neon } */
> > +
> > +#include "arm_neon.h"
> > +#include<stdlib.h>
> > +
> > +int main(void)
> > +{
> > +    uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1};
> > +    uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2};
> > +    uint8x8x2_t vd1, vd2;
> > +    union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4;
> > +    int i;
> > +
> > +    vd1 = vzip_u8(v1, vdup_n_u8(0));
> > +    vd2 = vzip_u8(v2, vdup_n_u8(0));
> > +
> > +    vst1_u8(d1.buf, vd1.val[0]);
> > +    vst1_u8(d2.buf, vd1.val[1]);
> > +    vst1_u8(d3.buf, vd2.val[0]);
> > +    vst1_u8(d4.buf, vd2.val[1]);
> > +
> > +    for (i = 0; i<  8; i++)
> > +      if ((i % 2 == 0&&  d4.buf[i] != 2)
> > +          || (i % 2 == 1&&  d4.buf[i] != 0))
> > +         abort ();
> > +
> > +    return 0;
> > +}
> > +
> > 
> > 
> > > 
> > > cheers
> > > Ramana
> > > 
> > > > 
> > > > Thanks,
> > > > Ira
> > > > 
> > > > ChangeLog:
> > > > 
> > > > 2011-04-07  Ulrich Weigand<ulrich.weigand@linaro.org>
> > > >                     Ira Rosen<ira.rosen@linaro.org>
> > > > 
> > > >        PR target/48252
> > > >        * config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments
> > > >        to match neon_vzip/vuzp/vtrn_internal.
> > > >        * config/arm/neon.md (neon_vtrn<mode>_internal): Make both
> > > >        outputs explicitly dependent on both inputs.
> > > >        (neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise.
> > > > 
> > > > testsuite/Changelog:
> > > > 
> > > >        PR target/48252
> > > >        * gcc.target/arm/pr48252.c: New test.
> > > 
> > 
> 
>
Richard Earnshaw May 6, 2011, 9:56 a.m. UTC | #3
On Sun, 2011-05-01 at 10:30 +0300, Ira Rosen wrote:
> 
> Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> wrote on 07/04/2011
> 03:16:44 PM:
> 
> >
> > On 07/04/11 08:42, Ira Rosen wrote:
> > > Hi,
> > >
> > > This patch makes both outputs of neon_vzip/vuzp/vtrn_internal
> > > explicitly dependent on both inputs, preventing incorrect
> > > optimization:
> > > for
> > > (a,b)<- vzip (c,d)
> > > and
> > > (e,f)<- vzip (g,d)
> > > CSE decides that b==f, since b and f depend only on d.
> > >
> > > Tested on arm-linux-gnueabi. OK for trunk?
> >
> > This is OK for trunk.
> >
> > > OK for 4.6 after testing?
> >

I don't understand how it has happened, but the 4.6 patch that has been
committed is corrupt (the patch submitted here looks OK).

Please remember that it is essential to test release branches before
commits are made.

R.

> > I have no objections to this going into 4.5 and 4.6 since it corrects
> > the implementation of the neon intrinsics but please check with the
> > release managers.
> 
> OK to backport to 4.5 and 4.6 - both tested on arm-linux-gnueabi?
> 
> Thanks,
> Ira
> 
> 4.5 and 4.6 ChangeLog:
> 
> 	Backport from mainline:
> 	2011-04-18  Ulrich Weigand  <ulrich.weigand@linaro.org>
>                   Ira Rosen  <ira.rosen@linaro.org>
> 
> 	PR target/48252
> 	* config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments
> 	to match neon_vzip/vuzp/vtrn_internal.
> 	* config/arm/neon.md (neon_vtrn<mode>_internal): Make both
> 	outputs explicitly dependent on both inputs.
> 	(neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise.
> 
> testsuite/Changelog:
> 
> 	Backport from mainline:
> 	2011-04-18  Ulrich Weigand  <ulrich.weigand@linaro.org>
>                   Ira Rosen  <ira.rosen@linaro.org>
> 
> 	PR target/48252
> 	* gcc.target/arm/pr48252.c: New test.
> 
> 
> 4.5 patch:
> 
> Index: config/arm/arm.c
> ===================================================================
> --- config/arm/arm.c    (revision 172714)
> +++ config/arm/arm.c    (working copy)
> @@ -18237,7 +18237,7 @@ neon_emit_pair_result_insn (enum machine_mode mode
>    rtx tmp1 = gen_reg_rtx (mode);
>    rtx tmp2 = gen_reg_rtx (mode);
> 
> -  emit_insn (intfn (tmp1, op1, tmp2, op2));
> +  emit_insn (intfn (tmp1, op1, op2, tmp2));
> 
>    emit_move_insn (mem, tmp1);
>    mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
> Index: config/arm/neon.md
> ===================================================================
> --- config/arm/neon.md  (revision 172714)
> +++ config/arm/neon.md  (working copy)
> @@ -3895,13 +3895,14 @@
> 
>  (define_insn "neon_vtrn<mode>_internal"
>    [(set (match_operand:VDQW 0 "s_register_operand" "=w")
> -       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
> -                    UNSPEC_VTRN1))
> -   (set (match_operand:VDQW 2 "s_register_operand" "=w")
> -        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
> -                    UNSPEC_VTRN2))]
> +        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
> +                      (match_operand:VDQW 2 "s_register_operand" "w")]
> +                     UNSPEC_VTRN1))
> +   (set (match_operand:VDQW 3 "s_register_operand" "=2")
> +         (unspec:VDQW [(match_dup 1) (match_dup 2)]
> +                     UNSPEC_VTRN2))]
>    "TARGET_NEON"
> -  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
> +  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
>    [(set (attr "neon_type")
>        (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
>                      (const_string "neon_bp_simple")
> @@ -3921,13 +3922,14 @@
> 
>  (define_insn "neon_vzip<mode>_internal"
>    [(set (match_operand:VDQW 0 "s_register_operand" "=w")
> -       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
> -                    UNSPEC_VZIP1))
> -   (set (match_operand:VDQW 2 "s_register_operand" "=w")
> -        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
> -                    UNSPEC_VZIP2))]
> +        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
> +                      (match_operand:VDQW 2 "s_register_operand" "w")]
> +                     UNSPEC_VZIP1))
> +   (set (match_operand:VDQW 3 "s_register_operand" "=2")
> +        (unspec:VDQW [(match_dup 1) (match_dup 2)]
> +                     UNSPEC_VZIP2))]
>    "TARGET_NEON"
> -  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
> +  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
>    [(set (attr "neon_type")
>        (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
>                      (const_string "neon_bp_simple")
> @@ -3947,13 +3949,14 @@
> 
>  (define_insn "neon_vuzp<mode>_internal"
>    [(set (match_operand:VDQW 0 "s_register_operand" "=w")
> -       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
> +        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
> +                      (match_operand:VDQW 2 "s_register_operand" "w")]
>                       UNSPEC_VUZP1))
> -   (set (match_operand:VDQW 2 "s_register_operand" "=w")
> -        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
> -                    UNSPEC_VUZP2))]
> +   (set (match_operand:VDQW 3 "s_register_operand" "=2")
> +        (unspec:VDQW [(match_dup 1) (match_dup 2)]
> +                     UNSPEC_VUZP2))]
>    "TARGET_NEON"
> -  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
> +  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
>    [(set (attr "neon_type")
>        (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
>                      (const_string "neon_bp_simple")
> Index: testsuite/gcc.target/arm/pr48252.c
> ===================================================================
> --- testsuite/gcc.target/arm/pr48252.c  (revision 0)
> +++ testsuite/gcc.target/arm/pr48252.c  (revision 0)
> @@ -0,0 +1,32 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2" } */
> +/* { dg-add-options arm_neon } */
> +
> +#include "arm_neon.h"
> +#include <stdlib.h>
> +
> +int main(void)
> +{
> +    uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1};
> +    uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2};
> +    uint8x8x2_t vd1, vd2;
> +    union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4;
> +    int i;
> +
> +    vd1 = vzip_u8(v1, vdup_n_u8(0));
> +    vd2 = vzip_u8(v2, vdup_n_u8(0));
> +
> +    vst1_u8(d1.buf, vd1.val[0]);
> +    vst1_u8(d2.buf, vd1.val[1]);
> +    vst1_u8(d3.buf, vd2.val[0]);
> +    vst1_u8(d4.buf, vd2.val[1]);
> +
> +    for (i = 0; i < 8; i++)
> +      if ((i % 2 == 0 && d4.buf[i] != 2)
> +          || (i % 2 == 1 && d4.buf[i] != 0))
> +         abort ();
> +
> +    return 0;
> +}
> +
> 
> 
> 4.6 patch:
> 
> Index: config/arm/arm.c
> ===================================================================
> --- config/arm/arm.c    (revision 172810)
> +++ config/arm/arm.c    (working copy)
> @@ -19564,7 +19564,7 @@ neon_emit_pair_result_insn (enum machine_mode mode
>    rtx tmp1 = gen_reg_rtx (mode);
>    rtx tmp2 = gen_reg_rtx (mode);
> 
> -  emit_insn (intfn (tmp1, op1, tmp2, op2));
> +  emit_insn (intfn (tmp1, op1, op2, tmp2));
> 
>    emit_move_insn (mem, tmp1);
>    mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
> Index: config/arm/neon.md
> ===================================================================
> --- config/arm/neon.md  (revision 172810)
> +++ config/arm/neon.md  (working copy)
> @@ -4079,13 +4079,14 @@
> 
>  (define_insn "neon_vtrn<mode>_internal"
>    [(set (match_operand:VDQW 0 "s_register_operand" "=w")
> -       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
> -                    UNSPEC_VTRN1))
> -   (set (match_operand:VDQW 2 "s_register_operand" "=w")
> -        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
> -                    UNSPEC_VTRN2))]
> +        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
> +                      (match_operand:VDQW 2 "s_register_operand" "w")]
> +                     UNSPEC_VTRN1))
> +   (set (match_operand:VDQW 3 "s_register_operand" "=2")
> +         (unspec:VDQW [(match_dup 1) (match_dup 2)]
> +                     UNSPEC_VTRN2))]
>    "TARGET_NEON"
> -  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
> +  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
>    [(set (attr "neon_type")
>        (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
>                      (const_string "neon_bp_simple")
> @@ -4105,13 +4106,14 @@
> 
>  (define_insn "neon_vzip<mode>_internal"
>    [(set (match_operand:VDQW 0 "s_register_operand" "=w")
> -       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
> -                    UNSPEC_VZIP1))
> -   (set (match_operand:VDQW 2 "s_register_operand" "=w")
> -        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
> -                    UNSPEC_VZIP2))]
> +        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
> +                      (match_operand:VDQW 2 "s_register_operand" "w")]
> +                     UNSPEC_VZIP1))
> +   (set (match_operand:VDQW 3 "s_register_operand" "=2")
> +        (unspec:VDQW [(match_dup 1) (match_dup 2)]
> +                     UNSPEC_VZIP2))]
>    "TARGET_NEON"
> -  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
> +  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
>    [(set (attr "neon_type")
>        (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
>                      (const_string "neon_bp_simple")
> @@ -4131,13 +4133,14 @@
> 
>  (define_insn "neon_vuzp<mode>_internal"
>    [(set (match_operand:VDQW 0 "s_register_operand" "=w")
> -       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
> +        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
> +                      (match_operand:VDQW 2 "s_register_operand" "w")]
>                       UNSPEC_VUZP1))
> -   (set (match_operand:VDQW 2 "s_register_operand" "=w")
> -        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
> -                    UNSPEC_VUZP2))]
> +   (set (match_operand:VDQW 3 "s_register_operand" "=2")
> +        (unspec:VDQW [(match_dup 1) (match_dup 2)]
> +                     UNSPEC_VUZP2))]
>    "TARGET_NEON"
> -  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
> +  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
>    [(set (attr "neon_type")
>        (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
>                      (const_string "neon_bp_simple")
> Index: testsuite/gcc.target/arm/pr48252.c
> ===================================================================
> --- testsuite/gcc.target/arm/pr48252.c  (revision 0)
> +++ testsuite/gcc.target/arm/pr48252.c  (revision 0)
> @@ -0,0 +1,32 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O2" } */
> +/* { dg-add-options arm_neon } */
> +
> +#include "arm_neon.h"
> +#include <stdlib.h>
> +
> +int main(void)
> +{
> +    uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1};
> +    uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2};
> +    uint8x8x2_t vd1, vd2;
> +    union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4;
> +    int i;
> +
> +    vd1 = vzip_u8(v1, vdup_n_u8(0));
> +    vd2 = vzip_u8(v2, vdup_n_u8(0));
> +
> +    vst1_u8(d1.buf, vd1.val[0]);
> +    vst1_u8(d2.buf, vd1.val[1]);
> +    vst1_u8(d3.buf, vd2.val[0]);
> +    vst1_u8(d4.buf, vd2.val[1]);
> +
> +    for (i = 0; i < 8; i++)
> +      if ((i % 2 == 0 && d4.buf[i] != 2)
> +          || (i % 2 == 1 && d4.buf[i] != 0))
> +         abort ();
> +
> +    return 0;
> +}
> +
> 
> 
> >
> > cheers
> > Ramana
> >
> > >
> > > Thanks,
> > > Ira
> > >
> > > ChangeLog:
> > >
> > > 2011-04-07  Ulrich Weigand<ulrich.weigand@linaro.org>
> > >                    Ira Rosen<ira.rosen@linaro.org>
> > >
> > >       PR target/48252
> > >       * config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments
> > >       to match neon_vzip/vuzp/vtrn_internal.
> > >       * config/arm/neon.md (neon_vtrn<mode>_internal): Make both
> > >       outputs explicitly dependent on both inputs.
> > >       (neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise.
> > >
> > > testsuite/Changelog:
> > >
> > >       PR target/48252
> > >       * gcc.target/arm/pr48252.c: New test.
> >
>
diff mbox

Patch

Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c    (revision 172714)
+++ config/arm/arm.c    (working copy)
@@ -18237,7 +18237,7 @@  neon_emit_pair_result_insn (enum machine_mode mode
   rtx tmp1 = gen_reg_rtx (mode);
   rtx tmp2 = gen_reg_rtx (mode);

-  emit_insn (intfn (tmp1, op1, tmp2, op2));
+  emit_insn (intfn (tmp1, op1, op2, tmp2));

   emit_move_insn (mem, tmp1);
   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
Index: config/arm/neon.md
===================================================================
--- config/arm/neon.md  (revision 172714)
+++ config/arm/neon.md  (working copy)
@@ -3895,13 +3895,14 @@ 

 (define_insn "neon_vtrn<mode>_internal"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
-       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
-                    UNSPEC_VTRN1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=w")
-        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
-                    UNSPEC_VTRN2))]
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
+                     UNSPEC_VTRN1))
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+         (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VTRN2))]
   "TARGET_NEON"
-  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
   [(set (attr "neon_type")
       (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                     (const_string "neon_bp_simple")
@@ -3921,13 +3922,14 @@ 

 (define_insn "neon_vzip<mode>_internal"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
-       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
-                    UNSPEC_VZIP1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=w")
-        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
-                    UNSPEC_VZIP2))]
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
+                     UNSPEC_VZIP1))
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+        (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VZIP2))]
   "TARGET_NEON"
-  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
   [(set (attr "neon_type")
       (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                     (const_string "neon_bp_simple")
@@ -3947,13 +3949,14 @@ 

 (define_insn "neon_vuzp<mode>_internal"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
-       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
                      UNSPEC_VUZP1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=w")
-        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
-                    UNSPEC_VUZP2))]
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+        (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VUZP2))]
   "TARGET_NEON"
-  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
   [(set (attr "neon_type")
       (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                     (const_string "neon_bp_simple")
Index: testsuite/gcc.target/arm/pr48252.c
===================================================================
--- testsuite/gcc.target/arm/pr48252.c  (revision 0)
+++ testsuite/gcc.target/arm/pr48252.c  (revision 0)
@@ -0,0 +1,32 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include <stdlib.h>
+
+int main(void)
+{
+    uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1};
+    uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2};
+    uint8x8x2_t vd1, vd2;
+    union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4;
+    int i;
+
+    vd1 = vzip_u8(v1, vdup_n_u8(0));
+    vd2 = vzip_u8(v2, vdup_n_u8(0));
+
+    vst1_u8(d1.buf, vd1.val[0]);
+    vst1_u8(d2.buf, vd1.val[1]);
+    vst1_u8(d3.buf, vd2.val[0]);
+    vst1_u8(d4.buf, vd2.val[1]);
+
+    for (i = 0; i < 8; i++)
+      if ((i % 2 == 0 && d4.buf[i] != 2)
+          || (i % 2 == 1 && d4.buf[i] != 0))
+         abort ();
+
+    return 0;
+}
+


4.6 patch:

Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c    (revision 172810)
+++ config/arm/arm.c    (working copy)
@@ -19564,7 +19564,7 @@  neon_emit_pair_result_insn (enum machine_mode mode
   rtx tmp1 = gen_reg_rtx (mode);
   rtx tmp2 = gen_reg_rtx (mode);

-  emit_insn (intfn (tmp1, op1, tmp2, op2));
+  emit_insn (intfn (tmp1, op1, op2, tmp2));

   emit_move_insn (mem, tmp1);
   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
Index: config/arm/neon.md
===================================================================
--- config/arm/neon.md  (revision 172810)
+++ config/arm/neon.md  (working copy)
@@ -4079,13 +4079,14 @@ 

 (define_insn "neon_vtrn<mode>_internal"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
-       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
-                    UNSPEC_VTRN1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=w")
-        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
-                    UNSPEC_VTRN2))]
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
+                     UNSPEC_VTRN1))
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+         (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VTRN2))]
   "TARGET_NEON"
-  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
   [(set (attr "neon_type")
       (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                     (const_string "neon_bp_simple")
@@ -4105,13 +4106,14 @@ 

 (define_insn "neon_vzip<mode>_internal"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
-       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
-                    UNSPEC_VZIP1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=w")
-        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
-                    UNSPEC_VZIP2))]
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
+                     UNSPEC_VZIP1))
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+        (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VZIP2))]
   "TARGET_NEON"
-  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
   [(set (attr "neon_type")
       (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                     (const_string "neon_bp_simple")
@@ -4131,13 +4133,14 @@ 

 (define_insn "neon_vuzp<mode>_internal"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
-       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
                      UNSPEC_VUZP1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=w")
-        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
-                    UNSPEC_VUZP2))]
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+        (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VUZP2))]
   "TARGET_NEON"
-  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
   [(set (attr "neon_type")
       (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                     (const_string "neon_bp_simple")
Index: testsuite/gcc.target/arm/pr48252.c
===================================================================
--- testsuite/gcc.target/arm/pr48252.c  (revision 0)
+++ testsuite/gcc.target/arm/pr48252.c  (revision 0)
@@ -0,0 +1,32 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include <stdlib.h>
+
+int main(void)
+{
+    uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1};
+    uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2};
+    uint8x8x2_t vd1, vd2;
+    union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4;
+    int i;
+
+    vd1 = vzip_u8(v1, vdup_n_u8(0));
+    vd2 = vzip_u8(v2, vdup_n_u8(0));
+
+    vst1_u8(d1.buf, vd1.val[0]);
+    vst1_u8(d2.buf, vd1.val[1]);
+    vst1_u8(d3.buf, vd2.val[0]);
+    vst1_u8(d4.buf, vd2.val[1]);
+
+    for (i = 0; i < 8; i++)
+      if ((i % 2 == 0 && d4.buf[i] != 2)
+          || (i % 2 == 1 && d4.buf[i] != 0))
+         abort ();
+
+    return 0;
+}
+