Patchwork GNU Superopt patches 1/6

login
register
mail settings
Submitter Joe Seymour
Date Sept. 7, 2010, 9:51 a.m.
Message ID <201009070951.o879pxnQ021581@brsbs01.icerasemi.com>
Download mbox | patch
Permalink /patch/63977/
State New
Headers show

Comments

Joe Seymour - Sept. 7, 2010, 9:51 a.m.
Hi,

As part of a summer internship I've been working with GNU superopt at Icera.

I realise this isn't exactly the right place for this, but I haven't
been able to get in touch with the maintainer (Torbjorn Granlund) and
thought some of our enhancements may be useful to people here.

The first one makes it compile with GCC (4.4.1):

Tues Aug 31 2010  Joseph Seymour  <Seymour@IceraSemi.com>

	* superopt.h: Add undef alloc a to remove warning. Fixed
	deprecated multiline asm syntax.
	* longlong.h: Fixed deprecated multiline asm syntax.
	* superopt.c: Add include <stdlib.h> to remove warning.


--  
Joe Seymour
Andrew Pinski - Sept. 7, 2010, 6:47 p.m.
On Tue, Sep 7, 2010 at 2:51 AM, Joe Seymour <seymour@icerasemi.com> wrote:
> The first one makes it compile with GCC (4.4.1):
>
> Tues Aug 31 2010  Joseph Seymour  <Seymour@IceraSemi.com>
>
>        * superopt.h: Add undef alloc a to remove warning. Fixed
>        deprecated multiline asm syntax.
>        * longlong.h: Fixed deprecated multiline asm syntax.
>        * superopt.c: Add include <stdlib.h> to remove warning.

Even though I think this is offtopic.  I think you can grab a new
version of longlong.h from GMP (GCC includes our own copy too as
gcc/longlong.h for libgcc compiling).

Thanks,
Andrew Pinski

Patch

diff -Nup /home/seymour/superopt-2.5/longlong.h ./longlong.h
--- /home/seymour/superopt-2.5/longlong.h	1995-04-30 02:00:31.000000000 +0100
+++ ./longlong.h	2010-08-25 15:25:55.215649000 +0100
@@ -99,8 +99,8 @@  the Free Software Foundation, Inc., 675 
 
 #if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
-  __asm__ ("add %1,%4,%5
-	addc %0,%2,%3"							\
+  __asm__ ("add %1,%4,%5\n"
+"addc %0,%2,%3"							\
 	   : "=r" ((USItype)(sh)),					\
 	    "=&r" ((USItype)(sl))					\
 	   : "%r" ((USItype)(ah)),					\
@@ -108,8 +108,8 @@  the Free Software Foundation, Inc., 675 
 	     "%r" ((USItype)(al)),					\
 	     "rI" ((USItype)(bl)))
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
-  __asm__ ("sub %1,%4,%5
-	subc %0,%2,%3"							\
+  __asm__ ("sub %1,%4,%5\n"
+"subc %0,%2,%3"							\
 	   : "=r" ((USItype)(sh)),					\
 	     "=&r" ((USItype)(sl))					\
 	   : "r" ((USItype)(ah)),					\
@@ -165,8 +165,8 @@  extern UDItype __udiv_qrnnd ();
 
 #if defined (__arm__) && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
-  __asm__ ("adds	%1, %4, %5
-	adc	%0, %2, %3"						\
+  __asm__ ("adds	%1, %4, %5\n"
+"adc	%0, %2, %3"						\
 	   : "=r" ((USItype)(sh)),					\
 	     "=&r" ((USItype)(sl))					\
 	   : "%r" ((USItype)(ah)),					\
@@ -174,8 +174,8 @@  extern UDItype __udiv_qrnnd ();
 	     "%r" ((USItype)(al)),					\
 	     "rI" ((USItype)(bl)))
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
-  __asm__ ("subs	%1, %4, %5
-	sbc	%0, %2, %3"						\
+  __asm__ ("subs	%1, %4, %5\n"
+"sbc	%0, %2, %3"						\
 	   : "=r" ((USItype)(sh)),					\
 	     "=&r" ((USItype)(sl))					\
 	   : "r" ((USItype)(ah)),					\
@@ -183,19 +183,19 @@  extern UDItype __udiv_qrnnd ();
 	     "r" ((USItype)(al)),					\
 	     "rI" ((USItype)(bl)))
 #define umul_ppmm(xh, xl, a, b) \
-  __asm__ ("%@ Inlined umul_ppmm
-	mov	%|r0, %2, lsr #16
-	mov	%|r2, %3, lsr #16
-	bic	%|r1, %2, %|r0, lsl #16
-	bic	%|r2, %3, %|r2, lsl #16
-	mul	%1, %|r1, %|r2
-	mul	%|r2, %|r0, %|r2
-	mul	%|r1, %0, %|r1
-	mul	%0, %|r0, %0
-	adds	%|r1, %|r2, %|r1
-	addcs	%0, %0, #65536
-	adds	%1, %1, %|r1, lsl #16
-	adc	%0, %0, %|r1, lsr #16"					\
+  __asm__ ("%@ Inlined umul_ppmm\n"
+"mov	%|r0, %2, lsr #16\n"
+"mov	%|r2, %3, lsr #16\n"
+"bic	%|r1, %2, %|r0, lsl #16\n"
+"bic	%|r2, %3, %|r2, lsl #16\n"
+"mul	%1, %|r1, %|r2\n"
+"mul	%|r2, %|r0, %|r2\n"
+"mul	%|r1, %0, %|r1\n"
+"mul	%0, %|r0, %0\n"
+"adds	%|r1, %|r2, %|r1\n"
+"addcs	%0, %0, #65536\n"
+"adds	%1, %1, %|r1, lsl #16\n"
+"adc	%0, %0, %|r1, lsr #16"					\
 	   : "=&r" ((USItype)(xh)),					\
 	     "=r" ((USItype)(xl))					\
 	   : "r" ((USItype)(a)),					\
@@ -235,8 +235,8 @@  extern UDItype __udiv_qrnnd ();
 
 #if defined (__gmicro__) && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
-  __asm__ ("add.w %5,%1
-	addx %3,%0"							\
+  __asm__ ("add.w %5,%1\n"
+"addx %3,%0"							\
 	   : "=g" ((USItype)(sh)),					\
 	     "=&g" ((USItype)(sl))					\
 	   : "%0" ((USItype)(ah)),					\
@@ -244,8 +244,8 @@  extern UDItype __udiv_qrnnd ();
 	     "%1" ((USItype)(al)),					\
 	     "g" ((USItype)(bl)))
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
-  __asm__ ("sub.w %5,%1
-	subx %3,%0"							\
+  __asm__ ("sub.w %5,%1\n"
+"subx %3,%0"							\
 	   : "=g" ((USItype)(sh)),					\
 	     "=&g" ((USItype)(sl))					\
 	   : "0" ((USItype)(ah)),					\
@@ -274,8 +274,8 @@  extern UDItype __udiv_qrnnd ();
 
 #if defined (__hppa) && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
-  __asm__ ("add %4,%5,%1
-	addc %2,%3,%0"							\
+  __asm__ ("add %4,%5,%1\n"
+"addc %2,%3,%0"							\
 	   : "=r" ((USItype)(sh)),					\
 	     "=&r" ((USItype)(sl))					\
 	   : "%rM" ((USItype)(ah)),					\
@@ -283,8 +283,8 @@  extern UDItype __udiv_qrnnd ();
 	     "%rM" ((USItype)(al)),					\
 	     "rM" ((USItype)(bl)))
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
-  __asm__ ("sub %4,%5,%1
-	subb %2,%3,%0"							\
+  __asm__ ("sub %4,%5,%1\n"
+"subb %2,%3,%0"							\
 	   : "=r" ((USItype)(sh)),					\
 	     "=&r" ((USItype)(sl))					\
 	   : "rM" ((USItype)(ah)),					\
@@ -322,22 +322,22 @@  extern USItype __udiv_qrnnd ();
   do {									\
     USItype __tmp;							\
     __asm__ (								\
-       "ldi		1,%0
-	extru,=		%1,15,16,%%r0		; Bits 31..16 zero?
-	extru,tr	%1,15,16,%1		; No.  Shift down, skip add.
-	ldo		16(%0),%0		; Yes.  Perform add.
-	extru,=		%1,23,8,%%r0		; Bits 15..8 zero?
-	extru,tr	%1,23,8,%1		; No.  Shift down, skip add.
-	ldo		8(%0),%0		; Yes.  Perform add.
-	extru,=		%1,27,4,%%r0		; Bits 7..4 zero?
-	extru,tr	%1,27,4,%1		; No.  Shift down, skip add.
-	ldo		4(%0),%0		; Yes.  Perform add.
-	extru,=		%1,29,2,%%r0		; Bits 3..2 zero?
-	extru,tr	%1,29,2,%1		; No.  Shift down, skip add.
-	ldo		2(%0),%0		; Yes.  Perform add.
-	extru		%1,30,1,%1		; Extract bit 1.
-	sub		%0,%1,%0		; Subtract it.
-	" : "=r" (count), "=r" (__tmp) : "1" (x));			\
+       "ldi		1,%0\n"
+"extru,=		%1,15,16,%%r0		; Bits 31..16 zero?\n"
+"extru,tr	%1,15,16,%1		; No.  Shift down, skip add.\n"
+"ldo		16(%0),%0		; Yes.  Perform add.\n"
+"extru,=		%1,23,8,%%r0		; Bits 15..8 zero?\n"
+"extru,tr	%1,23,8,%1		; No.  Shift down, skip add.\n"
+"ldo		8(%0),%0		; Yes.  Perform add.\n"
+"extru,=		%1,27,4,%%r0		; Bits 7..4 zero?\n"
+"extru,tr	%1,27,4,%1		; No.  Shift down, skip add.\n"
+"ldo		4(%0),%0		; Yes.  Perform add.\n"
+"extru,=		%1,29,2,%%r0		; Bits 3..2 zero?\n"
+"extru,tr	%1,29,2,%1		; No.  Shift down, skip add.\n"
+"ldo		2(%0),%0		; Yes.  Perform add.\n"
+"extru		%1,30,1,%1		; Extract bit 1.\n"
+"sub		%0,%1,%0		; Subtract it.\n"
+"" : "=r" (count), "=r" (__tmp) : "1" (x));			\
   } while (0)
 #endif
 
@@ -384,8 +384,7 @@  extern USItype __udiv_qrnnd ();
 
 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
-  __asm__ ("addl %5,%1
-	adcl %3,%0"							\
+  __asm__ ("addl %5,%1\nadcl %3,%0"           				\
 	   : "=r" ((USItype)(sh)),					\
 	     "=&r" ((USItype)(sl))					\
 	   : "%0" ((USItype)(ah)),					\
@@ -393,8 +392,7 @@  extern USItype __udiv_qrnnd ();
 	     "%1" ((USItype)(al)),					\
 	     "g" ((USItype)(bl)))
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
-  __asm__ ("subl %5,%1
-	sbbl %3,%0"							\
+  __asm__ ("subl %5,%1\nsbbl %3,%0"					\
 	   : "=r" ((USItype)(sh)),					\
 	     "=&r" ((USItype)(sl))					\
 	   : "0" ((USItype)(ah)),					\
@@ -448,8 +446,8 @@  extern USItype __udiv_qrnnd ();
 
 #if (defined (__mc68000__) || defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
-  __asm__ ("add%.l %5,%1
-	addx%.l %3,%0"							\
+  __asm__ ("add%.l %5,%1\n"
+"addx%.l %3,%0"							\
 	   : "=d" ((USItype)(sh)),					\
 	     "=&d" ((USItype)(sl))					\
 	   : "%0" ((USItype)(ah)),					\
@@ -457,8 +455,8 @@  extern USItype __udiv_qrnnd ();
 	     "%1" ((USItype)(al)),					\
 	     "g" ((USItype)(bl)))
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
-  __asm__ ("sub%.l %5,%1
-	subx%.l %3,%0"							\
+  __asm__ ("sub%.l %5,%1\n"
+"subx%.l %3,%0"							\
 	   : "=d" ((USItype)(sh)),					\
 	     "=&d" ((USItype)(sl))					\
 	   : "0" ((USItype)(ah)),					\
@@ -495,28 +493,28 @@  extern USItype __udiv_qrnnd ();
 #else /* not mc68020 */
 #define umul_ppmmxx(xh, xl, a, b) \
   do { USItype __umul_tmp1, __umul_tmp2;				\
-	__asm__ ("| Inlined umul_ppmm
-	move%.l	%5,%3
-	move%.l	%2,%0
-	move%.w	%3,%1
-	swap	%3
-	swap	%0
-	mulu	%2,%1
-	mulu	%3,%0
-	mulu	%2,%3
-	swap	%2
-	mulu	%5,%2
-	add%.l	%3,%2
-	jcc	1f
-	add%.l	#0x10000,%0
-1:	move%.l	%2,%3
-	clr%.w	%2
-	swap	%2
-	swap	%3
-	clr%.w	%3
-	add%.l	%3,%1
-	addx%.l	%2,%0
-	| End inlined umul_ppmm"					\
+	__asm__ ("| Inlined umul_ppmm\n"
+"move%.l	%5,%3\n"
+"move%.l	%2,%0\n"
+"move%.w	%3,%1\n"
+"swap	%3\n"
+"swap	%0\n"
+"mulu	%2,%1\n"
+"mulu	%3,%0\n"
+"mulu	%2,%3\n"
+"swap	%2\n"
+"mulu	%5,%2\n"
+"add%.l	%3,%2\n"
+"jcc	1f\n"
+"add%.l	#0x10000,%0\n"
+"1:	move%.l	%2,%3\n"
+"clr%.w	%2\n"
+"swap	%2\n"
+"swap	%3\n"
+"clr%.w	%3\n"
+"add%.l	%3,%1\n"
+"addx%.l	%2,%0\n"
+"| End inlined umul_ppmm"					\
 	      : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)),		\
 	        "=d" (__umul_tmp1), "=&d" (__umul_tmp2)			\
 	      : "%2" ((USItype)(a)), "d" ((USItype)(b)));		\
@@ -528,8 +526,8 @@  extern USItype __udiv_qrnnd ();
 
 #if defined (__m88000__) && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
-  __asm__ ("addu.co %1,%r4,%r5
-	addu.ci %0,%r2,%r3"						\
+  __asm__ ("addu.co %1,%r4,%r5\n"
+"addu.ci %0,%r2,%r3"						\
 	   : "=r" ((USItype)(sh)),					\
 	     "=&r" ((USItype)(sl))					\
 	   : "%rJ" ((USItype)(ah)),					\
@@ -537,8 +535,8 @@  extern USItype __udiv_qrnnd ();
 	     "%rJ" ((USItype)(al)),					\
 	     "rJ" ((USItype)(bl)))
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
-  __asm__ ("subu.co %1,%r4,%r5
-	subu.ci %0,%r2,%r3"						\
+  __asm__ ("subu.co %1,%r4,%r5\n"
+"subu.ci %0,%r2,%r3"						\
 	   : "=r" ((USItype)(sh)),					\
 	     "=&r" ((USItype)(sl))					\
 	   : "rJ" ((USItype)(ah)),					\
@@ -587,9 +585,9 @@  extern USItype __udiv_qrnnd ();
 
 #if defined (__mips__) && W_TYPE_SIZE == 32
 #define umul_ppmm(w1, w0, u, v) \
-  __asm__ ("multu %2,%3
-	mflo %0
-	mfhi %1"							\
+  __asm__ ("multu %2,%3\n"
+"mflo %0\n"
+"mfhi %1"							\
 	   : "=d" ((USItype)(w0)),					\
 	     "=d" ((USItype)(w1))					\
 	   : "d" ((USItype)(u)),					\
@@ -600,9 +598,9 @@  extern USItype __udiv_qrnnd ();
 
 #if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64
 #define umul_ppmm(w1, w0, u, v) \
-  __asm__ ("dmultu %2,%3
-	mflo %0
-	mfhi %1"							\
+  __asm__ ("dmultu %2,%3\n"
+"mflo %0\n"
+"mfhi %1"							\
 	   : "=d" ((UDItype)(w0)),					\
 	     "=d" ((UDItype)(w1))					\
 	   : "d" ((UDItype)(u)),					\
@@ -768,8 +766,8 @@  extern USItype __udiv_qrnnd ();
 
 #if defined (__pyr__) && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
-  __asm__ ("addw	%5,%1
-	addwc	%3,%0"							\
+  __asm__ ("addw	%5,%1\n"
+"addwc	%3,%0"							\
 	   : "=r" ((USItype)(sh)),					\
 	     "=&r" ((USItype)(sl))					\
 	   : "%0" ((USItype)(ah)),					\
@@ -777,8 +775,8 @@  extern USItype __udiv_qrnnd ();
 	     "%1" ((USItype)(al)),					\
 	     "g" ((USItype)(bl)))
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
-  __asm__ ("subw	%5,%1
-	subwb	%3,%0"							\
+  __asm__ ("subw	%5,%1\n"
+"subwb	%3,%0"							\
 	   : "=r" ((USItype)(sh)),					\
 	     "=&r" ((USItype)(sl))					\
 	   : "0" ((USItype)(ah)),					\
@@ -801,8 +799,8 @@  extern USItype __udiv_qrnnd ();
 
 #if defined (__ibm032__) /* RT/ROMP */  && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
-  __asm__ ("a %1,%5
-	ae %0,%3"							\
+  __asm__ ("a %1,%5\n"
+"ae %0,%3"							\
 	   : "=r" ((USItype)(sh)),					\
 	     "=&r" ((USItype)(sl))					\
 	   : "%0" ((USItype)(ah)),					\
@@ -810,8 +808,8 @@  extern USItype __udiv_qrnnd ();
 	     "%1" ((USItype)(al)),					\
 	     "r" ((USItype)(bl)))
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
-  __asm__ ("s %1,%5
-	se %0,%3"							\
+  __asm__ ("s %1,%5\n"
+"se %0,%3"							\
 	   : "=r" ((USItype)(sh)),					\
 	     "=&r" ((USItype)(sl))					\
 	   : "0" ((USItype)(ah)),					\
@@ -822,26 +820,26 @@  extern USItype __udiv_qrnnd ();
   do {									\
     USItype __m0 = (m0), __m1 = (m1);					\
     __asm__ (								\
-       "s	r2,r2
-	mts	r10,%2
-	m	r2,%3
-	m	r2,%3
-	m	r2,%3
-	m	r2,%3
-	m	r2,%3
-	m	r2,%3
-	m	r2,%3
-	m	r2,%3
-	m	r2,%3
-	m	r2,%3
-	m	r2,%3
-	m	r2,%3
-	m	r2,%3
-	m	r2,%3
-	m	r2,%3
-	m	r2,%3
-	cas	%0,r2,r0
-	mfs	r10,%1"							\
+       "s	r2,r2\n"
+"mts	r10,%2\n"
+"m	r2,%3\n"
+"m	r2,%3\n"
+"m	r2,%3\n"
+"m	r2,%3\n"
+"m	r2,%3\n"
+"m	r2,%3\n"
+"m	r2,%3\n"
+"m	r2,%3\n"
+"m	r2,%3\n"
+"m	r2,%3\n"
+"m	r2,%3\n"
+"m	r2,%3\n"
+"m	r2,%3\n"
+"m	r2,%3\n"
+"m	r2,%3\n"
+"m	r2,%3\n"
+"cas	%0,r2,r0\n"
+"mfs	r10,%1"							\
 	     : "=r" ((USItype)(ph)),					\
 	       "=r" ((USItype)(pl))					\
 	     : "%r" (__m0),						\
@@ -870,8 +868,8 @@  extern USItype __udiv_qrnnd ();
 
 #if defined (__sparc__) && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
-  __asm__ ("addcc %r4,%5,%1
-	addx %r2,%3,%0"							\
+  __asm__ ("addcc %r4,%5,%1\n"
+"addx %r2,%3,%0"							\
 	   : "=r" ((USItype)(sh)),					\
 	     "=&r" ((USItype)(sl))					\
 	   : "%rJ" ((USItype)(ah)),					\
@@ -880,8 +878,8 @@  extern USItype __udiv_qrnnd ();
 	     "rI" ((USItype)(bl))					\
 	   __CLOBBER_CC)
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
-  __asm__ ("subcc %r4,%5,%1
-	subx %r2,%3,%0"							\
+  __asm__ ("subcc %r4,%5,%1\n"
+"subx %r2,%3,%0"							\
 	   : "=r" ((USItype)(sh)),					\
 	     "=&r" ((USItype)(sl))					\
 	   : "rJ" ((USItype)(ah)),					\
@@ -923,45 +921,45 @@  extern USItype __udiv_qrnnd ();
 	     "r" ((USItype)(v)))
 #define UMUL_TIME 5
 #define udiv_qrnnd(q, r, n1, n0, d) \
-  __asm__ ("! Inlined udiv_qrnnd
-	wr	%%g0,%2,%%y	! Not a delayed write for sparclite
-	tst	%%g0
-	divscc	%3,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%%g1
-	divscc	%%g1,%4,%0
-	rd	%%y,%1
-	bl,a 1f
-	add	%1,%4,%1
-1:	! End of inline udiv_qrnnd"					\
+  __asm__ ("! Inlined udiv_qrnnd\n"
+"wr	%%g0,%2,%%y	! Not a delayed write for sparclite\n"
+"tst	%%g0\n"
+"divscc	%3,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%%g1\n"
+"divscc	%%g1,%4,%0\n"
+"rd	%%y,%1\n"
+"bl,a 1f\n"
+"add	%1,%4,%1\n"
+"1:	! End of inline udiv_qrnnd"					\
 	   : "=r" ((USItype)(q)),					\
 	     "=r" ((USItype)(r))					\
 	   : "r" ((USItype)(n1)),					\
@@ -978,46 +976,46 @@  extern USItype __udiv_qrnnd ();
 /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd.  */
 #ifndef umul_ppmm
 #define umul_ppmm(w1, w0, u, v) \
-  __asm__ ("! Inlined umul_ppmm
-	wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr
-	sra	%3,31,%%g2	! Don't move this insn
-	and	%2,%%g2,%%g2	! Don't move this insn
-	andcc	%%g0,0,%%g1	! Don't move this insn
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,0,%%g1
-	add	%%g1,%%g2,%0
-	rd	%%y,%1"							\
+  __asm__ ("! Inlined umul_ppmm\n"
+"wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr\n"
+"sra	%3,31,%%g2	! Don't move this insn\n"
+"and	%2,%%g2,%%g2	! Don't move this insn\n"
+"andcc	%%g0,0,%%g1	! Don't move this insn\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,%3,%%g1\n"
+"mulscc	%%g1,0,%%g1\n"
+"add	%%g1,%%g2,%0\n"
+"rd	%%y,%1"							\
 	   : "=r" ((USItype)(w1)),					\
 	     "=r" ((USItype)(w0))					\
 	   : "%rI" ((USItype)(u)),					\
@@ -1040,8 +1038,8 @@  extern USItype __udiv_qrnnd ();
 
 #if defined (__vax__) && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
-  __asm__ ("addl2 %5,%1
-	adwc %3,%0"							\
+  __asm__ ("addl2 %5,%1\n"
+"adwc %3,%0"							\
 	   : "=g" ((USItype)(sh)),					\
 	     "=&g" ((USItype)(sl))					\
 	   : "%0" ((USItype)(ah)),					\
@@ -1049,8 +1047,8 @@  extern USItype __udiv_qrnnd ();
 	     "%1" ((USItype)(al)),					\
 	     "g" ((USItype)(bl)))
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
-  __asm__ ("subl2 %5,%1
-	sbwc %3,%0"							\
+  __asm__ ("subl2 %5,%1\n"
+"sbwc %3,%0"							\
 	   : "=g" ((USItype)(sh)),					\
 	     "=&g" ((USItype)(sl))					\
 	   : "0" ((USItype)(ah)),					\
diff -Nup /home/seymour/superopt-2.5/superopt.c ./superopt.c
--- /home/seymour/superopt-2.5/superopt.c	1995-06-03 00:29:35.000000000 +0100
+++ ./superopt.c	2010-08-25 15:34:44.724235000 +0100
@@ -21,6 +21,7 @@ 
 
 #include <stdio.h>
 #include <string.h>
+#include <stdlib.h>
 
 #include "superopt.h"
 
diff -Nup /home/seymour/superopt-2.5/superopt.h ./superopt.h
--- /home/seymour/superopt-2.5/superopt.h	1995-05-17 22:58:31.000000000 +0100
+++ ./superopt.h	2010-08-25 15:36:03.620851000 +0100
@@ -155,6 +155,7 @@  typedef unsigned_word word;
 #define TRUNC_CNT(cnt) ((unsigned) (cnt) % BITS_PER_WORD)
 
 #if defined(sparc) || defined(__GNUC__)
+#undef alloca
 #define alloca __builtin_alloca
 #endif
 
@@ -226,40 +227,40 @@  typedef struct
 
 #if sparc
 #define PERFORM_ADD_CIO(d, co, r1, r2, ci) \
-  asm ("subcc %%g0,%4,%%g0	! set cy if CI != 0
-	addxcc %2,%3,%0		! add R1 and R2
+  asm ("subcc %%g0,%4,%%g0	! set cy if CI != 0 \
+	addxcc %2,%3,%0		! add R1 and R2 \
 	addx %%g0,%%g0,%1	! set CO to cy"				\
        : "=r" (d), "=r" (co)						\
        : "%r" (r1), "rI" (r2), "rI" (ci)				\
        __CLOBBER_CC)
 #define PERFORM_ADD_CO(d, co, r1, r2, ci) \
-  asm ("addcc %2,%3,%0		! add R1 and R2
+  asm ("addcc %2,%3,%0		! add R1 and R2 \
 	addx %%g0,%%g0,%1	! set CO to cy"				\
        : "=r" (d), "=r" (co)						\
        : "%r" (r1), "rI" (r2)						\
        __CLOBBER_CC)
 #define PERFORM_SUB_CIO(d, co, r1, r2, ci) \
-  asm ("subcc %%g0,%4,%%g0	! set cy if CI != 0
-	subxcc %2,%3,%0		! subtract R2 from R1
+  asm ("subcc %%g0,%4,%%g0	! set cy if CI != 0 \
+	subxcc %2,%3,%0		! subtract R2 from R1 \
 	addx %%g0,%%g0,%1	! set CO to cy"				\
        : "=r" (d), "=r" (co)						\
        : "r" (r1), "rI" (r2), "rI" (ci)					\
        __CLOBBER_CC)
 #define PERFORM_SUB_CO(d, co, r1, r2, ci) \
-  asm ("subcc %2,%3,%0		! subtract R2 from R1
+  asm ("subcc %2,%3,%0		! subtract R2 from R1 \
 	addx %%g0,%%g0,%1	! set CO to cy"				\
        : "=r" (d), "=r" (co)						\
        : "r" (r1), "rI" (r2)						\
        __CLOBBER_CC)
 #define PERFORM_ADC_CIO(d, co, r1, r2, ci) \
-  asm ("subcc %4,1,%%g0		! cy = (CI == 0)
-	subxcc %2,%3,%0		! subtract R2 from R1
+  asm ("subcc %4,1,%%g0		! cy = (CI == 0) \
+	subxcc %2,%3,%0		! subtract R2 from R1 \
 	subx %%g0,-1,%1		! set CO to !cy"			\
        : "=&r" (d), "=r" (co)						\
        : "r" (r1), "rI" (r2), "rI" (ci)					\
        __CLOBBER_CC)
 #define PERFORM_ADC_CO(d, co, r1, r2, ci) \
-  asm ("subcc %2,%3,%0		! subtract R2 from R1
+  asm ("subcc %2,%3,%0		! subtract R2 from R1 \
 	subx %%g0,-1,%1		! set CO to !cy"			\
        : "=&r" (d), "=r" (co)						\
        : "r" (r1), "rI" (r2)						\
@@ -268,39 +269,39 @@  typedef struct
 
 #if m88k
 #define PERFORM_ADD_CIO(d, co, r1, r2, ci) \
-  asm ("or %0,r0,1
-	subu.co r0,%4,%0	; set cy if CI != 0
-	addu.cio %0,%2,%r3	; add R1 and R2
+  asm ("or %0,r0,1 \
+	subu.co r0,%4,%0	; set cy if CI != 0 \
+	addu.cio %0,%2,%r3	; add R1 and R2 \
 	addu.ci %1,r0,r0	; set CO to cy"				\
        : "=&r" (d), "=r" (co)						\
        : "%r" (r1), "Or" (r2), "r" (ci))
 #define PERFORM_ADD_CO(d, co, r1, r2, ci) \
-  asm ("addu.co %0,%2,%r3	; add R1 and R2
+  asm ("addu.co %0,%2,%r3	; add R1 and R2 \
 	addu.ci %1,r0,r0	; set CO to cy"				\
        : "=r" (d), "=r" (co)						\
        : "%r" (r1), "Or" (r2))
 #define PERFORM_SUB_CIO(d, co, r1, r2, ci) \
-  asm ("subu.co r0,r0,%r4	; reset cy if CI != 0
-	subu.cio %0,%2,%r3	; subtract R2 from R1
-	subu.ci %1,r0,r0	; set CO to -1+cy
+  asm ("subu.co r0,r0,%r4	; reset cy if CI != 0 \
+	subu.cio %0,%2,%r3	; subtract R2 from R1 \
+	subu.ci %1,r0,r0	; set CO to -1+cy \
 	subu %1,r0,%1		; set CO to !cy"			\
        : "=r" (d), "=r" (co)						\
        : "r" (r1), "Or" (r2), "Or" (ci))
 #define PERFORM_SUB_CO(d, co, r1, r2, ci) \
-  asm ("subu.co %0,%2,%r3	; subtract R2 from R1
-	subu.ci %1,r0,r0	; set CO to -1+cy
+  asm ("subu.co %0,%2,%r3	; subtract R2 from R1 \
+	subu.ci %1,r0,r0	; set CO to -1+cy \
 	subu %1,r0,%1		; set CO to !cy"			\
        : "=r" (d), "=r" (co)						\
        : "r" (r1), "Or" (r2))
 #define PERFORM_ADC_CIO(d, co, r1, r2, ci) \
-  asm ("or %0,r0,1
-	subu.co r0,%r4,%0	; set cy if CI != 0
-	subu.cio %0,%2,%r3	; subtract R2 from R1
+  asm ("or %0,r0,1 \
+	subu.co r0,%r4,%0	; set cy if CI != 0 \
+	subu.cio %0,%2,%r3	; subtract R2 from R1 \
 	addu.ci %1,r0,r0	; set CO to cy"				\
        : "=&r" (d), "=r" (co)						\
        : "r" (r1), "Or" (r2), "Or" (ci))
 #define PERFORM_ADC_CO(d, co, r1, r2, ci) \
-  asm ("subu.co %0,%2,%r3	; subtract R2 from R1
+  asm ("subu.co %0,%2,%r3	; subtract R2 from R1 \
 	addu.ci %1,r0,r0	; set CO to cy"				\
        : "=r" (d), "=r" (co)						\
        : "r" (r1), "Or" (r2))