Patchwork [SH] PR 54685 - unsigned int comparison with 0x7FFFFFFF

login
register
mail settings
Submitter Oleg Endo
Date Oct. 8, 2012, 1:53 a.m.
Message ID <1349661200.21984.29.camel@yam-132-YW-E178-FTW>
Download mbox | patch
Permalink /patch/189879/
State New
Headers show

Comments

Oleg Endo - Oct. 8, 2012, 1:53 a.m.
On Mon, 2012-10-08 at 09:45 +0900, Kaz Kojima wrote:
> Oleg Endo <oleg.endo@t-online.de> wrote:
> > The attached patch improves comparisons such as
> > 'unsigned int <= 0x7FFFFFFF' on SH.
> > As mentioned in the PR, for some reason, those comparisons do not go
> > through the cstore expander.  As a consequence the comparison doesn't
> > get the chance to be canonicalized by the target code and ends up as
> > '(~x) >> 31'.
> > I've not investigated this further and just fixed the symptoms on SH.  I
> > don't know whether it's also an issue on other targets.
> > 
> > Tested on rev 192142 with
> > make -k check RUNTESTFLAGS="--target_board=sh-sim
> > \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"
> > 
> > and no new failures.
> > OK?
> 
> I've run CSiBE with and without the patch for sh4-unknown-linux-gnu
> at -O2.  Only one difference in the resulted sizes: jpeg-6b/jcphuff
> increases 5336 bytes to 5340 bytes with the patch.  Could you look
> into it?

Yep, that's actually the only place in the CSiBE set where this case
hits.  The function in question is encode_mcu_AC_refine.  The increase
seems to be due to different register allocation and different spill
code :T
I've attached the asm diff.

Cheers,
Oleg

Patch

--- CSiBE/m4-single-ml-O2-trunk/jpeg-6b/jcphuff.s
+++ CSiBE/m4-single-ml-O2/jpeg-6b/jcphuff.s
@@ -2147,7 +2147,7 @@ 
 	bt/s	.L611
 	mov.l	r2,@(24,r15)
 	bra	.L612
-	mov.l	@(44,r15),r0
+	mov.l	@(44,r15),r3
 .L611:
 	mov.l	.L565,r4
 	mov	r2,r5
@@ -2513,21 +2513,21 @@ 
 	mov	r0,r1
 	mov	r9,r0
 	and	r2,r1
-	mov.l	@(24,r15),r3
+	mov.l	@(28,r15),r3
 	mov.b	r1,@(r0,r8)
 	mov	r9,r11
-	mov.l	@(28,r15),r0
-	add	#1,r3
-	mov.l	@(36,r15),r1
+	mov.l	@(24,r15),r2
+	add	#4,r3
+	mov.l	@(36,r15),r0
 	add	#1,r11
-	mov.l	@(40,r15),r2
+	mov.l	@(40,r15),r1
+	add	#1,r2
 	add	#4,r0
-	add	#4,r1
-	mov.l	r3,@(24,r15)
-	mov.l	r0,@(28,r15)
-	cmp/ge	r3,r2
+	mov.l	r2,@(24,r15)
+	mov.l	r3,@(28,r15)
+	cmp/ge	r2,r1
 	bt/s	.L599
-	mov.l	r1,@(36,r15)
+	mov.l	r0,@(36,r15)
 	tst	r11,r11
 	bt/s	.L555
 	mov	r12,r14
@@ -2545,21 +2545,23 @@ 
 	mov.w	.L578,r3
 	cmp/hi	r3,r2
 	bf/s	.L612
-	mov.l	@(44,r15),r0
+	mov.l	@(44,r15),r3
 .L515:
 	mov.l	.L582,r2
 	jsr	@r2
 	mov	r14,r4
 .L459:
+	mov.l	@(44,r15),r3
+.L612:
 	mov.l	@(44,r15),r0
-.L612:
+	mov.l	@(24,r3),r2
 	mov.l	@(16,r14),r3
-	mov.l	@(24,r0),r2
 	mov.l	r3,@r2
 	mov.l	@(20,r14),r3
 	mov.l	r3,@(4,r2)
 	mov.w	.L580,r2
-	mov.l	@(r0,r2),r2
+	add	r0,r2
+	mov.l	@(8,r2),r2
 	tst	r2,r2
 	bt	.L544
 	add	#64,r14
@@ -2594,18 +2596,18 @@ 
 	add	#1,r2
 	mov.l	r2,@(16,r15)
 .L467:
-	mov.l	@(24,r15),r3
-	mov.l	@(28,r15),r0
-	mov.l	@(36,r15),r1
-	add	#1,r3
-	mov.l	@(40,r15),r2
+	mov.l	@(24,r15),r2
+	mov.l	@(28,r15),r3
+	mov.l	@(36,r15),r0
+	add	#1,r2
+	mov.l	@(40,r15),r1
+	add	#4,r3
 	add	#4,r0
-	add	#4,r1
-	mov.l	r3,@(24,r15)
-	mov.l	r0,@(28,r15)
-	cmp/ge	r3,r2
+	mov.l	r2,@(24,r15)
+	mov.l	r3,@(28,r15)
+	cmp/ge	r2,r1
 	bf/s	.L603
-	mov.l	r1,@(36,r15)
+	mov.l	r0,@(36,r15)
 .L599:
 	bra	.L617
 	mov.l	@(28,r15),r1
@@ -2614,8 +2616,8 @@ 
 	bf/s	.L523
 	mov	r12,r14
 .L555:
-	mov.l	@(16,r15),r3
-	cmp/pl	r3
+	mov.l	@(16,r15),r2
+	cmp/pl	r2
 	bf	.L459
 	mov.l	@(56,r14),r3
 	bra	.L625
@@ -2642,13 +2644,13 @@ 
 	add	#1,r2
 	mov.l	r2,@r3
 .L511:
-	mov.l	@(20,r15),r1
+	mov.l	@(20,r15),r0
 .L620:
-	mov	#0,r2
+	mov	#0,r1
 	mov	#0,r11
-	mov.l	r2,@(16,r15)
+	mov.l	r1,@(16,r15)
 	bra	.L467
-	mov.l	@(0,r1),r8
+	mov.l	@(0,r0),r8
 	.align 1
 .L522:
 	bra	.L619
@@ -2659,7 +2661,7 @@ 
 .L578:
 	.short	937
 .L580:
-	.short	196
+	.short	188
 .L581:
 	.short	312
 .L583:
@@ -2728,16 +2730,15 @@ 
 	tst	r3,r3
 	mov.l	r14,@(28,r12)
 	mov.l	@r1,r0
-	mov.l	@(52,r15),r2
+	mov.l	@(52,r15),r1
 	add	r0,r0
 	mov.l	r11,@(24,r12)
 	bf/s	.L511
-	mov.w	@(r0,r2),r1
-	not	r1,r1
-	mov	r14,r10
-	shll	r1
+	mov.w	@(r0,r1),r2
+	cmp/pz	r2
 	neg	r14,r3
 	movt	r1
+	mov	r14,r10
 	add	#23,r3
 	shld	r3,r1
 	add	#1,r10
@@ -2784,7 +2785,7 @@ 
 	mov	r9,r6
 .L601:
 	bra	.L620
-	mov.l	@(20,r15),r1
+	mov.l	@(20,r15),r0
 	.align 1
 .L556:
 	mov.l	.L589,r1
@@ -2812,9 +2813,9 @@ 
 	add	#-8,r10
 	.align 1
 .L558:
-	mov.l	.L589,r3
+	mov.l	.L589,r2
 	mov	r12,r4
-	jsr	@r3
+	jsr	@r2
 	mov.l	r1,@(4,r15)
 	mov.l	@(4,r15),r1
 	cmp/eq	r13,r1
@@ -2830,8 +2831,8 @@ 
 	dt	r2
 	bf/s	.L507
 	mov.l	r2,@(20,r12)
-	mov.l	.L589,r0
-	jsr	@r0
+	mov.l	.L589,r3
+	jsr	@r3
 	mov	r12,r4
 	bra	.L622
 	add	#-8,r10