diff mbox

[ARM] Fix neon-reload-class.c testcase

Message ID 55715776.30707@linaro.org
State New
Headers show

Commit Message

Kugan Vivekanandarajah June 5, 2015, 8:01 a.m. UTC
Hi All,

After my patch for cprop is committed
(https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=224048),
neon-reload-class.c is failing in arm mode. But this is kind of expected
behaviour so I looked at the original patch related to
neon-reload-class.c, which is:
https://gcc.gnu.org/ml/gcc-patches/2011-04/msg01968.html

As I understand, it was added to make sure that these constants are not
created as constant pool. from that point of view it looks like we
should remove the check for movw/movt.

I also checked the asm generated before the constant split for arm
(https://gcc.gnu.org/viewcvs?rev=223235&root=gcc&view=rev) just to be
sure that this is not due to the earlier patch. Reason for this is that
the constant generation has changed compared to what was shown in
https://gcc.gnu.org/ml/gcc-patches/2011-04/msg01968.html.


I am also attaching the three versions of the neon-reload-class.s.

1. neon-reload-class_before_arm_split.s - before r223235
2. neon-reload-class_before_cprop.s - before r224048
3. neon-reload-class_new.s - after 224048

Is this OK for trunk?

Thanks,
Kugan

gcc/testsuite/ChangeLog:

2015-06-05  Kugan Vivekanandarajah  <kuganv@linaro.org>

	* gcc.target/arm/neon-reload-class.c: Remove movw and movt.

.arch armv7-a
	.fpu neon
	.eabi_attribute 20, 1
	.eabi_attribute 21, 1
	.eabi_attribute 23, 3
	.eabi_attribute 24, 1
	.eabi_attribute 25, 1
	.eabi_attribute 26, 2
	.eabi_attribute 30, 2
	.eabi_attribute 34, 1
	.eabi_attribute 18, 4
	.arm
	.syntax divided
	.file	"neon-reload-class.c"
	.text
	.align	2
	.global	_op_blend_p_caa_dp
	.type	_op_blend_p_caa_dp, %function
_op_blend_p_caa_dp:
	@ args = 0, pretend = 0, frame = 0
	@ frame_needed = 0, uses_anonymous_args = 0
	cmp	r2, r1
	bxcs	lr
	stmfd	sp!, {r4, r5, r6, r7, lr}
	add	r6, r1, #3
	add	lr, r2, #4
	add	r4, r2, #16
	sub	r6, r6, lr
	add	r7, r0, #16
	mov	r5, r4
	cmp	r2, r7
	cmpcc	r0, r5
	mov	ip, r6, lsr #2
	mov	r4, r7
	add	r5, ip, #1
	movcs	r4, #1
	movcc	r4, #0
	cmp	r5, #4
	movls	r4, #0
	andhi	r4, r4, #1
	cmp	r4, #0
	beq	.L3
	sub	ip, ip, #3
	cmp	r6, #11
	mov	ip, ip, lsr #2
	add	ip, ip, #1
	mov	r7, ip, asl #2
	bls	.L4
	vdup.32	q10, r3
	mov	r6, r0
	mov	r4, r2
	mov	lr, #0
.L5:
	vld1.32	{q8}, [r6]
	add	lr, lr, #1
	cmp	ip, lr
	add	r6, r6, #16
	vmov	q9, q8  @ v4si
	vshr.u32	q8, q8, #8
	vand.i16	q9, #255
	vand.i16	q8, #255
	vmul.i32	q9, q9, q10
	vmul.i32	q8, q8, q10
	vshr.u32	q9, q9, #8
	vand.i16	q9, #255
	vand.i16	q8, #65280
	vadd.i32	q8, q8, q9
	vst1.32	{q8}, [r4]
	add	r4, r4, #16
	bhi	.L5
	cmp	r5, r7
	mov	r7, r7, asl #2
	add	r0, r0, r7
	add	r2, r2, r7
	ldmeqfd	sp!, {r4, r5, r6, r7, pc}
	add	lr, r2, #4
.L4:
	mov	r5, #255
	mov	r6, #65280
	sub	r0, r0, #4
	movt	r5, 255
	movt	r6, 65280
.L7:
	ldr	ip, [r0, #4]!
	cmp	r1, lr
	and	r4, ip, r5
	and	ip, r5, ip, lsr #8
	mul	r4, r3, r4
	mul	ip, r3, ip
	and	r4, r5, r4, lsr #8
	and	ip, ip, r6
	add	ip, ip, r4
	str	ip, [r2]
	mov	r2, lr
	ldmlsfd	sp!, {r4, r5, r6, r7, pc}
	add	lr, lr, #4
	b	.L7
.L3:
	mov	r5, #255
	mov	r6, #65280
	sub	r0, r0, #4
	movt	r5, 255
	movt	r6, 65280
.L10:
	ldr	ip, [r0, #4]!
	cmp	r1, lr
	and	r4, ip, r5
	and	ip, r5, ip, lsr #8
	mul	r4, r3, r4
	mul	ip, r3, ip
	and	r4, r5, r4, lsr #8
	and	ip, ip, r6
	add	ip, ip, r4
	str	ip, [r2]
	mov	r2, lr
	ldmlsfd	sp!, {r4, r5, r6, r7, pc}
	add	lr, lr, #4
	b	.L10
	.size	_op_blend_p_caa_dp, .-_op_blend_p_caa_dp
	.ident	"GCC: (GNU) 6.0.0 20150602 (experimental)"
	.section	.note.GNU-stack,"",%progbits
.arch armv7-a
	.fpu neon
	.eabi_attribute 20, 1
	.eabi_attribute 21, 1
	.eabi_attribute 23, 3
	.eabi_attribute 24, 1
	.eabi_attribute 25, 1
	.eabi_attribute 26, 2
	.eabi_attribute 30, 2
	.eabi_attribute 34, 1
	.eabi_attribute 18, 4
	.arm
	.syntax divided
	.file	"neon-reload-class.c"
	.text
	.align	2
	.global	_op_blend_p_caa_dp
	.type	_op_blend_p_caa_dp, %function
_op_blend_p_caa_dp:
	@ args = 0, pretend = 0, frame = 0
	@ frame_needed = 0, uses_anonymous_args = 0
	cmp	r2, r1
	bxcs	lr
	stmfd	sp!, {r4, r5, r6, r7, lr}
	add	r6, r1, #3
	add	r4, r2, #4
	add	lr, r2, #16
	sub	r6, r6, r4
	add	r7, r0, #16
	mov	r5, lr
	cmp	r2, r7
	cmpcc	r0, r5
	mov	ip, r6, lsr #2
	mov	lr, r7
	add	r5, ip, #1
	movcs	lr, #1
	movcc	lr, #0
	cmp	r5, #4
	movls	lr, #0
	andhi	lr, lr, #1
	cmp	lr, #0
	beq	.L3
	sub	ip, ip, #3
	cmp	r6, #11
	mov	ip, ip, lsr #2
	add	ip, ip, #1
	mov	r7, ip, asl #2
	bls	.L4
	vdup.32	q10, r3
	mov	r6, r0
	mov	r4, r2
	mov	lr, #0
.L5:
	vld1.32	{q8}, [r6]
	add	lr, lr, #1
	cmp	ip, lr
	add	r6, r6, #16
	vmov	q9, q8  @ v4si
	vshr.u32	q8, q8, #8
	vand.i16	q9, #255
	vand.i16	q8, #255
	vmul.i32	q9, q9, q10
	vmul.i32	q8, q8, q10
	vshr.u32	q9, q9, #8
	vand.i16	q9, #255
	vand.i16	q8, #65280
	vadd.i32	q8, q8, q9
	vst1.32	{q8}, [r4]
	add	r4, r4, #16
	bhi	.L5
	cmp	r5, r7
	mov	r7, r7, asl #2
	add	r0, r0, r7
	add	r2, r2, r7
	ldmeqfd	sp!, {r4, r5, r6, r7, pc}
	add	r4, r2, #4
.L4:
	sub	r0, r0, #4
.L7:
	ldr	ip, [r0, #4]!
	cmp	r1, r4
	bic	r5, ip, #-16777216
	bic	r5, r5, #65280
	mov	ip, ip, lsr #8
	bic	ip, ip, #-16777216
	mul	r5, r3, r5
	bic	ip, ip, #65280
	mul	lr, r3, ip
	mov	ip, r5, lsr #8
	bic	r5, ip, #-16777216
	bic	ip, lr, #16711680
	bic	r5, r5, #65280
	bic	ip, ip, #255
	add	ip, ip, r5
	str	ip, [r2]
	mov	r2, r4
	ldmlsfd	sp!, {r4, r5, r6, r7, pc}
	add	r4, r4, #4
	b	.L7
.L3:
	sub	r0, r0, #4
.L10:
	ldr	ip, [r0, #4]!
	cmp	r1, r4
	bic	r5, ip, #-16777216
	bic	r5, r5, #65280
	mov	ip, ip, lsr #8
	bic	ip, ip, #-16777216
	mul	r5, r3, r5
	bic	ip, ip, #65280
	mul	lr, r3, ip
	mov	ip, r5, lsr #8
	bic	r5, ip, #-16777216
	bic	ip, lr, #16711680
	bic	r5, r5, #65280
	bic	ip, ip, #255
	add	ip, ip, r5
	str	ip, [r2]
	mov	r2, r4
	ldmlsfd	sp!, {r4, r5, r6, r7, pc}
	add	r4, r4, #4
	b	.L10
	.size	_op_blend_p_caa_dp, .-_op_blend_p_caa_dp
	.ident	"GCC: (GNU) 6.0.0 20150528 (experimental)"
	.section	.note.GNU-stack,"",%progbits
.arch armv7-a
	.fpu neon
	.eabi_attribute 20, 1
	.eabi_attribute 21, 1
	.eabi_attribute 23, 3
	.eabi_attribute 24, 1
	.eabi_attribute 25, 1
	.eabi_attribute 26, 2
	.eabi_attribute 30, 2
	.eabi_attribute 34, 1
	.eabi_attribute 18, 4
	.arm
	.syntax divided
	.file	"neon-reload-class.c"
	.text
	.align	2
	.global	_op_blend_p_caa_dp
	.type	_op_blend_p_caa_dp, %function
_op_blend_p_caa_dp:
	@ args = 0, pretend = 0, frame = 0
	@ frame_needed = 0, uses_anonymous_args = 0
	cmp	r2, r1
	bxcs	lr
	stmfd	sp!, {r4, r5, r6, r7, lr}
	add	r6, r1, #3
	add	r4, r2, #4
	add	lr, r2, #16
	sub	r6, r6, r4
	add	r7, r0, #16
	mov	r5, lr
	cmp	r2, r7
	cmpcc	r0, r5
	mov	ip, r6, lsr #2
	mov	lr, r7
	add	r5, ip, #1
	movcs	lr, #1
	movcc	lr, #0
	cmp	r5, #4
	movls	lr, #0
	andhi	lr, lr, #1
	cmp	lr, #0
	beq	.L3
	sub	ip, ip, #3
	cmp	r6, #11
	mov	ip, ip, lsr #2
	add	ip, ip, #1
	mov	r7, ip, asl #2
	bls	.L4
	vdup.32	q10, r3
	mov	r6, r0
	mov	r4, r2
	mov	lr, #0
.L5:
	vld1.32	{q8}, [r6]
	add	lr, lr, #1
	cmp	ip, lr
	add	r6, r6, #16
	vmov	q9, q8  @ v4si
	vshr.u32	q8, q8, #8
	vand.i16	q9, #255
	vand.i16	q8, #255
	vmul.i32	q9, q9, q10
	vmul.i32	q8, q8, q10
	vshr.u32	q9, q9, #8
	vand.i16	q9, #255
	vand.i16	q8, #65280
	vadd.i32	q8, q8, q9
	vst1.32	{q8}, [r4]
	add	r4, r4, #16
	bhi	.L5
	cmp	r5, r7
	mov	r7, r7, asl #2
	add	r0, r0, r7
	add	r2, r2, r7
	ldmeqfd	sp!, {r4, r5, r6, r7, pc}
	add	r4, r2, #4
.L4:
	sub	r0, r0, #4
.L7:
	ldr	ip, [r0, #4]!
	cmp	r1, r4
	bic	r5, ip, #-16777216
	bic	r5, r5, #65280
	mov	ip, ip, lsr #8
	bic	ip, ip, #-16777216
	mul	r5, r3, r5
	bic	ip, ip, #65280
	mul	lr, r3, ip
	mov	ip, r5, lsr #8
	bic	r5, ip, #-16777216
	bic	ip, lr, #16711680
	bic	r5, r5, #65280
	bic	ip, ip, #255
	add	ip, ip, r5
	str	ip, [r2]
	mov	r2, r4
	ldmlsfd	sp!, {r4, r5, r6, r7, pc}
	add	r4, r4, #4
	b	.L7
.L3:
	sub	r0, r0, #4
.L10:
	ldr	ip, [r0, #4]!
	cmp	r1, r4
	bic	r5, ip, #-16777216
	bic	r5, r5, #65280
	mov	ip, ip, lsr #8
	bic	ip, ip, #-16777216
	mul	r5, r3, r5
	bic	ip, ip, #65280
	mul	lr, r3, ip
	mov	ip, r5, lsr #8
	bic	r5, ip, #-16777216
	bic	ip, lr, #16711680
	bic	r5, r5, #65280
	bic	ip, ip, #255
	add	ip, ip, r5
	str	ip, [r2]
	mov	r2, r4
	ldmlsfd	sp!, {r4, r5, r6, r7, pc}
	add	r4, r4, #4
	b	.L10
	.size	_op_blend_p_caa_dp, .-_op_blend_p_caa_dp
	.ident	"GCC: (GNU) 6.0.0 20150520 (experimental)"
	.section	.note.GNU-stack,"",%progbits

Comments

Kyrylo Tkachov June 5, 2015, 8:23 a.m. UTC | #1
Hi Kugan,

On 05/06/15 09:01, Kugan wrote:
> Hi All,
>
> After my patch for cprop is committed
> (https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=224048),
> neon-reload-class.c is failing in arm mode. But this is kind of expected
> behaviour so I looked at the original patch related to
> neon-reload-class.c, which is:
> https://gcc.gnu.org/ml/gcc-patches/2011-04/msg01968.html
>
> As I understand, it was added to make sure that these constants are not
> created as constant pool. from that point of view it looks like we
> should remove the check for movw/movt.
>
> I also checked the asm generated before the constant split for arm
> (https://gcc.gnu.org/viewcvs?rev=223235&root=gcc&view=rev) just to be
> sure that this is not due to the earlier patch. Reason for this is that
> the constant generation has changed compared to what was shown in
> https://gcc.gnu.org/ml/gcc-patches/2011-04/msg01968.html.
>
>
> I am also attaching the three versions of the neon-reload-class.s.
>
> 1. neon-reload-class_before_arm_split.s - before r223235
> 2. neon-reload-class_before_cprop.s - before r224048
> 3. neon-reload-class_new.s - after 224048
>
> Is this OK for trunk?

I see that the new code is better and it is indeed still not
loading the immediates from memory, which is what the test tests for.
So as long as scanning for \\.d?word is enough to catch the presence
of a constant pool this should be ok.

Kyrill

>
> Thanks,
> Kugan
>
> gcc/testsuite/ChangeLog:
>
> 2015-06-05  Kugan Vivekanandarajah  <kuganv@linaro.org>
>
> 	* gcc.target/arm/neon-reload-class.c: Remove movw and movt.
>
>
diff mbox

Patch

diff --git a/gcc/testsuite/gcc.target/arm/neon-reload-class.c b/gcc/testsuite/gcc.target/arm/neon-reload-class.c
index c63aa04..48950f7 100644
--- a/gcc/testsuite/gcc.target/arm/neon-reload-class.c
+++ b/gcc/testsuite/gcc.target/arm/neon-reload-class.c
@@ -15,4 +15,4 @@  _op_blend_p_caa_dp(unsigned *s, unsigned* e, unsigned *d, unsigned c) {
 
 /* These constants should be emitted as immediates rather than loaded from memory.  */
 
-/* { dg-final { scan-assembler-not "(\\.d?word|mov(w|t))" } } */
+/* { dg-final { scan-assembler-not "(\\.d?word)" } } */