Message ID | 1368220707.3378.57.camel@shinybook.infradead.org (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
On Fri, 10 May 2013 22:18:27 +0100 David Woodhouse <dwmw2@infradead.org> wrote: > From: David Woodhouse <David.Woodhouse@intel.com> > > Some versions of GCC apparently expect this to be provided by libgcc. Thanks Dave. We were discussing this with Alan Modra and he doesn't think the 64bit target should ever emit a call to __bswapdi2. Did you only see it on 32bit, or 64bit as well? Alan: I notice Dave is adding calls to __builtin_bswap, perhaps some versions of the 64bit compiler did emit __bswapdi2 calls for that. Anton > > Signed-off-by: David Woodhouse <David.Woodhouse@intel.com> > --- > Untested. > > diff --git a/arch/powerpc/kernel/misc_32.S > b/arch/powerpc/kernel/misc_32.S index 19e096b..f077dc2 100644 > --- a/arch/powerpc/kernel/misc_32.S > +++ b/arch/powerpc/kernel/misc_32.S > @@ -657,6 +657,17 @@ _GLOBAL(__ucmpdi2) > li r3,2 > blr > > +_GLOBAL(__bswapdi2) > + rlwinm 10,4,8,0xffffffff > + rlwinm 11,3,8,0xffffffff > + rlwimi 10,4,24,0,7 > + rlwimi 11,3,24,0,7 > + rlwimi 10,4,24,16,23 > + rlwimi 11,3,24,16,23 > + mr 4,11 > + mr 3,10 > + blr > + > _GLOBAL(abs) > srawi r4,r3,31 > xor r3,r3,r4 > diff --git a/arch/powerpc/kernel/misc_64.S > b/arch/powerpc/kernel/misc_64.S index 5cfa800..3b2e6e8 100644 > --- a/arch/powerpc/kernel/misc_64.S > +++ b/arch/powerpc/kernel/misc_64.S > @@ -234,6 +234,18 @@ _GLOBAL(__flush_dcache_icache) > isync > blr > > +_GLOBAL(__bswapdi2) > + srdi 8,3,32 > + rlwinm 7,3,8,0xffffffff > + rlwimi 7,3,24,0,7 > + rlwinm 9,8,8,0xffffffff > + rlwimi 7,3,24,16,23 > + rlwimi 9,8,24,0,7 > + rlwimi 9,8,24,16,23 > + sldi 7,7,32 > + or 7,7,9 > + mr 3,7 > + blr > > #if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) > /* > diff --git a/arch/powerpc/kernel/ppc_ksyms.c > b/arch/powerpc/kernel/ppc_ksyms.c index 78b8766..c296665 100644 > --- a/arch/powerpc/kernel/ppc_ksyms.c > +++ b/arch/powerpc/kernel/ppc_ksyms.c > @@ -143,7 +143,8 @@ EXPORT_SYMBOL(__lshrdi3); > int __ucmpdi2(unsigned long long, unsigned long long); > EXPORT_SYMBOL(__ucmpdi2); > #endif > - > +long long __bswapdi2(long long); > +EXPORT_SYMBOL(__bswapdi2); > EXPORT_SYMBOL(memcpy); > EXPORT_SYMBOL(memset); > EXPORT_SYMBOL(memmove); >
David Woodhouse <dwmw2@infradead.org> wrote: > From: David Woodhouse <David.Woodhouse@intel.com> > > Some versions of GCC apparently expect this to be provided by libgcc. > > Signed-off-by: David Woodhouse <David.Woodhouse@intel.com> > --- > Untested. > > diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S > index 19e096b..f077dc2 100644 > --- a/arch/powerpc/kernel/misc_32.S > +++ b/arch/powerpc/kernel/misc_32.S > @@ -657,6 +657,17 @@ _GLOBAL(__ucmpdi2) > li r3,2 > blr > > +_GLOBAL(__bswapdi2) > + rlwinm 10,4,8,0xffffffff > + rlwinm 11,3,8,0xffffffff > + rlwimi 10,4,24,0,7 > + rlwimi 11,3,24,0,7 > + rlwimi 10,4,24,16,23 > + rlwimi 11,3,24,16,23 > + mr 4,11 > + mr 3,10 > + blr > + This doesn't work for me but the below does: _GLOBAL(__bswapdi2) rotlwi r9,r4,8 rotlwi r10,r3,8 rlwimi r9,r4,24,0,7 rlwimi r10,r3,24,0,7 rlwimi r9,r4,24,16,23 rlwimi r10,r3,24,16,23 mr r4,r10 mr r3,r9 blr stolen from GCC -02 output of: unsigned long long __bswapdi2(unsigned long long x) { return ((x & 0x00000000000000ffULL) << 56) | ((x & 0x000000000000ff00ULL) << 40) | ((x & 0x0000000000ff0000ULL) << 24) | ((x & 0x00000000ff000000ULL) << 8) | ((x & 0x000000ff00000000ULL) >> 8) | ((x & 0x0000ff0000000000ULL) >> 24) | ((x & 0x00ff000000000000ULL) >> 40) | ((x & 0xff00000000000000ULL) >> 56); } > _GLOBAL(abs) > srawi r4,r3,31 > xor r3,r3,r4 > diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S > index 5cfa800..3b2e6e8 100644 > --- a/arch/powerpc/kernel/misc_64.S > +++ b/arch/powerpc/kernel/misc_64.S > @@ -234,6 +234,18 @@ _GLOBAL(__flush_dcache_icache) > isync > blr > > +_GLOBAL(__bswapdi2) > + srdi 8,3,32 > + rlwinm 7,3,8,0xffffffff > + rlwimi 7,3,24,0,7 > + rlwinm 9,8,8,0xffffffff > + rlwimi 7,3,24,16,23 > + rlwimi 9,8,24,0,7 > + rlwimi 9,8,24,16,23 > + sldi 7,7,32 > + or 7,7,9 > + mr 3,7 > + blr This works but we should add "r" to the register names. I'll repost Mikey > > #if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) > /* > diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c > index 78b8766..c296665 100644 > --- a/arch/powerpc/kernel/ppc_ksyms.c > +++ b/arch/powerpc/kernel/ppc_ksyms.c > @@ -143,7 +143,8 @@ EXPORT_SYMBOL(__lshrdi3); > int __ucmpdi2(unsigned long long, unsigned long long); > EXPORT_SYMBOL(__ucmpdi2); > #endif > - > +long long __bswapdi2(long long); > +EXPORT_SYMBOL(__bswapdi2); > EXPORT_SYMBOL(memcpy); > EXPORT_SYMBOL(memset); > EXPORT_SYMBOL(memmove); > > -- > dwmw2 > > _______________________________________________ > Linuxppc-dev mailing list > Linuxppc-dev@lists.ozlabs.org > https://lists.ozlabs.org/listinfo/linuxppc-dev
On Mon, May 13, 2013 at 04:48:19PM +1000, Anton Blanchard wrote: > On Fri, 10 May 2013 22:18:27 +0100 > David Woodhouse <dwmw2@infradead.org> wrote: > > > From: David Woodhouse <David.Woodhouse@intel.com> > > > > Some versions of GCC apparently expect this to be provided by libgcc. > > Thanks Dave. We were discussing this with Alan Modra and he doesn't > think the 64bit target should ever emit a call to __bswapdi2. Did you > only see it on 32bit, or 64bit as well? > > Alan: I notice Dave is adding calls to __builtin_bswap, perhaps some > versions of the 64bit compiler did emit __bswapdi2 calls for that. I did a little digging, and it looks like gcc-4.4 will emit __bswapdi2 calls. Support in rs6000.md appeared 2009-06-25.
On Mon, May 13, 2013 at 05:09:59PM +1000, Michael Neuling wrote: > David Woodhouse <dwmw2@infradead.org> wrote: > > > From: David Woodhouse <David.Woodhouse@intel.com> > > > > Some versions of GCC apparently expect this to be provided by libgcc. > > > > Signed-off-by: David Woodhouse <David.Woodhouse@intel.com> > > --- > > Untested. > > > > diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S > > index 19e096b..f077dc2 100644 > > --- a/arch/powerpc/kernel/misc_32.S > > +++ b/arch/powerpc/kernel/misc_32.S > > @@ -657,6 +657,17 @@ _GLOBAL(__ucmpdi2) > > li r3,2 > > blr > > > > +_GLOBAL(__bswapdi2) > > + rlwinm 10,4,8,0xffffffff > > + rlwinm 11,3,8,0xffffffff > > + rlwimi 10,4,24,0,7 > > + rlwimi 11,3,24,0,7 > > + rlwimi 10,4,24,16,23 > > + rlwimi 11,3,24,16,23 > > + mr 4,11 > > + mr 3,10 > > + blr > > + > > This doesn't work for me but the below does: > > _GLOBAL(__bswapdi2) > rotlwi r9,r4,8 > rotlwi r10,r3,8 > rlwimi r9,r4,24,0,7 > rlwimi r10,r3,24,0,7 > rlwimi r9,r4,24,16,23 > rlwimi r10,r3,24,16,23 > mr r4,r10 > mr r3,r9 > blr > Actually, I'd swap the two mr instructions to never have an instruction that uses the result from the previous one. > stolen from GCC -02 output of: > unsigned long long __bswapdi2(unsigned long long x) > { > return ((x & 0x00000000000000ffULL) << 56) | > ((x & 0x000000000000ff00ULL) << 40) | > ((x & 0x0000000000ff0000ULL) << 24) | > ((x & 0x00000000ff000000ULL) << 8) | > ((x & 0x000000ff00000000ULL) >> 8) | > ((x & 0x0000ff0000000000ULL) >> 24) | > ((x & 0x00ff000000000000ULL) >> 40) | > ((x & 0xff00000000000000ULL) >> 56); > } > > > _GLOBAL(abs) > > srawi r4,r3,31 > > xor r3,r3,r4 > > diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S > > index 5cfa800..3b2e6e8 100644 > > --- a/arch/powerpc/kernel/misc_64.S > > +++ b/arch/powerpc/kernel/misc_64.S > > @@ -234,6 +234,18 @@ _GLOBAL(__flush_dcache_icache) > > isync > > blr > > > > +_GLOBAL(__bswapdi2) > > + srdi 8,3,32 > > + rlwinm 7,3,8,0xffffffff > > + rlwimi 7,3,24,0,7 > > + rlwinm 9,8,8,0xffffffff > > + rlwimi 7,3,24,16,23 > > + rlwimi 9,8,24,0,7 > > + rlwimi 9,8,24,16,23 > > + sldi 7,7,32 > > + or 7,7,9 > > + mr 3,7 > > + blr > > This works but we should add "r" to the register names. > And merge the last two instructions as a single "or r3,r7,r9". Gabriel
On Mon, 2013-05-13 at 16:50 +0930, Alan Modra wrote: > I did a little digging, and it looks like gcc-4.4 will emit __bswapdi2 > calls. Support in rs6000.md appeared 2009-06-25. That's where it was seen. I don't have anything that ancient any more so it didn't show up in my testing, but Al saw it on a Debian system.
On Mon, 2013-05-13 at 17:09 +1000, Michael Neuling wrote: > > This doesn't work for me but the below does: ... > > stolen from GCC -02 output of: > unsigned long long __bswapdi2(unsigned long long x) Hm, so was mine: [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 .file "bswapdi2.c" .section ".text" .align 2 .p2align 4,,15 .globl __bswapdi2 .type __bswapdi2, @function __bswapdi2: rlwinm 10,4,8,0xffffffff rlwinm 11,3,8,0xffffffff rlwimi 10,4,24,0,7 rlwimi 11,3,24,0,7 rlwimi 10,4,24,16,23 rlwimi 11,3,24,16,23 mr 4,11 mr 3,10 blr .size __bswapdi2,.-__bswapdi2 .ident "GCC: (GNU) 4.7.2 20121105 (Red Hat 4.7.2-2.aa.20121114svn)" On Mon, 2013-05-13 at 09:33 +0200, Gabriel Paubert wrote: > Actually, I'd swap the two mr instructions to never > have an instruction that uses the result from the > previous one. Bad GCC. No biscuit. Should we file a PR?
On Mon, 2013-05-13 at 11:33 +0100, David Woodhouse wrote: > > On Mon, 2013-05-13 at 09:33 +0200, Gabriel Paubert wrote: > > Actually, I'd swap the two mr instructions to never > > have an instruction that uses the result from the > > previous one. > > Bad GCC. No biscuit. > > Should we file a PR? Maybe not. If you tell it to tune for an in-order machine like Cell, it swaps them round. Although now I'm confused about which of POWER[567] were in-order: [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 | grep -B1 mr rlwimi 11,3,24,16,23 mr 4,11 mr 3,10 [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 -mtune=cell | grep -B1 mr rlwimi 11,3,24,16,23 mr 3,10 mr 4,11 [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 -mtune=power5 | grep -B1 mr rlwimi 11,3,24,16,23 mr 3,10 mr 4,11 [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 -mtune=power6 | grep -B1 mr rlwimi 11,3,24,16,23 mr 4,11 mr 3,10 [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 -mtune=power7 | grep -B1 mr rlwimi 11,3,24,16,23 mr 4,11 mr 3,10
"Linuxppc-dev" <linuxppc-dev-bounces+joakim.tjernlund=transmode.se@lists.ozlabs.org> wrote 2013/05/13 12:38:13: > > On Mon, 2013-05-13 at 11:33 +0100, David Woodhouse wrote: > > > > On Mon, 2013-05-13 at 09:33 +0200, Gabriel Paubert wrote: > > > Actually, I'd swap the two mr instructions to never > > > have an instruction that uses the result from the > > > previous one. > > > > Bad GCC. No biscuit. > > > > Should we file a PR? > > Maybe not. If you tell it to tune for an in-order machine like Cell, it > swaps them round. Although now I'm confused about which of POWER[567] > were in-order: > > [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 | grep -B1 mr > rlwimi 11,3,24,16,23 > mr 4,11 > mr 3,10 > [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 -mtune=cell | grep -B1 mr > rlwimi 11,3,24,16,23 > mr 3,10 > mr 4,11 > [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 -mtune=power5 | grep -B1 mr > rlwimi 11,3,24,16,23 > mr 3,10 > mr 4,11 > [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 -mtune=power6 | grep -B1 mr > rlwimi 11,3,24,16,23 > mr 4,11 > mr 3,10 > [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 -mtune=power7 | grep -B1 mr > rlwimi 11,3,24,16,23 > mr 4,11 > mr 3,10 A bit rusty on the ppc asm but can you not remove the mr completely: rlwimi 10,4,24,16,23 rlwimi 11,3,24,16,23 mr 4,11 mr 3,10 to rlwimi 4,4,24,16,23 rlwimi 3,3,24,16,23 Jocke
"Linuxppc-dev" <linuxppc-dev-bounces+joakim.tjernlund=transmode.se@lists.ozlabs.org> wrote on 2013/05/13 12:51:59: > > "Linuxppc-dev" > <linuxppc-dev-bounces+joakim.tjernlund=transmode.se@lists.ozlabs.org> > wrote 2013/05/13 12:38:13: > > > > On Mon, 2013-05-13 at 11:33 +0100, David Woodhouse wrote: > > > > > > On Mon, 2013-05-13 at 09:33 +0200, Gabriel Paubert wrote: > > > > Actually, I'd swap the two mr instructions to never > > > > have an instruction that uses the result from the > > > > previous one. > > > > > > Bad GCC. No biscuit. > > > > > > Should we file a PR? > > > > Maybe not. If you tell it to tune for an in-order machine like Cell, it > > swaps them round. Although now I'm confused about which of POWER[567] > > were in-order: > > > > [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 | grep > -B1 mr > > rlwimi 11,3,24,16,23 > > mr 4,11 > > mr 3,10 > > [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 > -mtune=cell | grep -B1 mr > > rlwimi 11,3,24,16,23 > > mr 3,10 > > mr 4,11 > > [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 > -mtune=power5 | grep -B1 mr > > rlwimi 11,3,24,16,23 > > mr 3,10 > > mr 4,11 > > [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 > -mtune=power6 | grep -B1 mr > > rlwimi 11,3,24,16,23 > > mr 4,11 > > mr 3,10 > > [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 > -mtune=power7 | grep -B1 mr > > rlwimi 11,3,24,16,23 > > mr 4,11 > > mr 3,10 > > A bit rusty on the ppc asm but can you not remove the mr completely: > rlwimi 10,4,24,16,23 > rlwimi 11,3,24,16,23 > mr 4,11 > mr 3,10 > to > rlwimi 4,4,24,16,23 > rlwimi 3,3,24,16,23 Oops, that got twisted. Forget my comment.
On Mon, May 13, 2013 at 11:38:13AM +0100, David Woodhouse wrote: > On Mon, 2013-05-13 at 11:33 +0100, David Woodhouse wrote: > > > > On Mon, 2013-05-13 at 09:33 +0200, Gabriel Paubert wrote: > > > Actually, I'd swap the two mr instructions to never > > > have an instruction that uses the result from the > > > previous one. > > > > Bad GCC. No biscuit. > > > > Should we file a PR? > > Maybe not. If you tell it to tune for an in-order machine like Cell, it > swaps them round. Although now I'm confused about which of POWER[567] > were in-order: It was Power6 IIRC. On this kind of fine point, don't rely too much on what GCC produces. > > [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 | grep -B1 mr > rlwimi 11,3,24,16,23 > mr 4,11 > mr 3,10 > [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 -mtune=cell | grep -B1 mr > rlwimi 11,3,24,16,23 > mr 3,10 > mr 4,11 > [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 -mtune=power5 | grep -B1 mr > rlwimi 11,3,24,16,23 > mr 3,10 > mr 4,11 > [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 -mtune=power6 | grep -B1 mr > rlwimi 11,3,24,16,23 > mr 4,11 > mr 3,10 > [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 -mtune=power7 | grep -B1 mr > rlwimi 11,3,24,16,23 > mr 4,11 > mr 3,10 I don't know of any processor in which putting the mr 3,10 first can cause stalls, so even a generic tuning should put it first. Gabriel
>> I did a little digging, and it looks like gcc-4.4 will emit >> __bswapdi2 >> calls. Support in rs6000.md appeared 2009-06-25. > > That's where it was seen. I don't have anything that ancient any > more so > it didn't show up in my testing, but Al saw it on a Debian system. It should never happen on 32-bit -- it is broken into two bswapsi's -- although, old compiler, who knows. Lack of testing makes some people nervous though ;-) Segher
> rlwinm 10,4,8,0xffffffff > rlwinm 11,3,8,0xffffffff > rlwimi 10,4,24,0,7 > rlwimi 11,3,24,0,7 > rlwimi 10,4,24,16,23 > rlwimi 11,3,24,16,23 > mr 4,11 > mr 3,10 >> Actually, I'd swap the two mr instructions to never >> have an instruction that uses the result from the >> previous one. > > Bad GCC. No biscuit. > > Should we file a PR? This is scheduled just fine. Every pair of instructions here can execute together (on most CPUs, if not all); all instructions after it are dependent on previous instructions. There also is no issue (group) restriction that makes this scheduling suboptimal afaics. Segher
Hi Mikey, On Mon, 13 May 2013 17:09:59 +1000 Michael Neuling <mikey@neuling.org> wrote: > > This doesn't work for me but the below does: > > _GLOBAL(__bswapdi2) > rotlwi r9,r4,8 > rotlwi r10,r3,8 > rlwimi r9,r4,24,0,7 > rlwimi r10,r3,24,0,7 > rlwimi r9,r4,24,16,23 > rlwimi r10,r3,24,16,23 > mr r4,r10 > mr r3,r9 > blr > > stolen from GCC -02 output of: > unsigned long long __bswapdi2(unsigned long long x) > { > return ((x & 0x00000000000000ffULL) << 56) | > ((x & 0x000000000000ff00ULL) << 40) | > ((x & 0x0000000000ff0000ULL) << 24) | > ((x & 0x00000000ff000000ULL) << 8) | > ((x & 0x000000ff00000000ULL) >> 8) | > ((x & 0x0000ff0000000000ULL) >> 24) | > ((x & 0x00ff000000000000ULL) >> 40) | > ((x & 0xff00000000000000ULL) >> 56); > } So, if we are just stealing the output of gcc, why not just use the C version (at least for 32 bit)?
On 14 May 2013 11:09, "Stephen Rothwell" <sfr@canb.auug.org.au> wrote: > > Hi Mikey, > > On Mon, 13 May 2013 17:09:59 +1000 Michael Neuling <mikey@neuling.org> wrote: > > > > This doesn't work for me but the below does: > > > > _GLOBAL(__bswapdi2) > > rotlwi r9,r4,8 > > rotlwi r10,r3,8 > > rlwimi r9,r4,24,0,7 > > rlwimi r10,r3,24,0,7 > > rlwimi r9,r4,24,16,23 > > rlwimi r10,r3,24,16,23 > > mr r4,r10 > > mr r3,r9 > > blr > > > > stolen from GCC -02 output of: > > unsigned long long __bswapdi2(unsigned long long x) > > { > > return ((x & 0x00000000000000ffULL) << 56) | > > ((x & 0x000000000000ff00ULL) << 40) | > > ((x & 0x0000000000ff0000ULL) << 24) | > > ((x & 0x00000000ff000000ULL) << 8) | > > ((x & 0x000000ff00000000ULL) >> 8) | > > ((x & 0x0000ff0000000000ULL) >> 24) | > > ((x & 0x00ff000000000000ULL) >> 40) | > > ((x & 0xff00000000000000ULL) >> 56); > > } > > So, if we are just stealing the output of gcc, why not just use the C > version (at least for 32 bit)? Woodhouse: can we just do this? Mikey
On Tue, 2013-05-14 at 11:25 +1000, Michael Neuling wrote: > > > So, if we are just stealing the output of gcc, why not just use the C > > version (at least for 32 bit)? > > Woodhouse: can we just do this? Sure, if you don't mind GCC optimising the contents of your C function by turning it into a call to libgcc's __bswapdi2() :) OK, you might be able to do some archaeology and determine that the only compiler that emits calls to __bswapdi2() is GCC 4.4, and furthermore that the same compiler *doesn't* have the wit to notice that the contents of the function are a 64-bit byteswap, so it's never going to happen. But I don't like that approach. I'd feel I have to sacrifice a goat *anyway*, and I don't have a spare goat. Although now I come to explicitly explain why I did it that way... it occurs to me that the libgcc version is just written in C, and the compiler evidently trusts itself not to optimise that into a recursive call. Is there a compiler switch which guarantees that, which we could use without other unwanted side-effects?
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 19e096b..f077dc2 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -657,6 +657,17 @@ _GLOBAL(__ucmpdi2) li r3,2 blr +_GLOBAL(__bswapdi2) + rlwinm 10,4,8,0xffffffff + rlwinm 11,3,8,0xffffffff + rlwimi 10,4,24,0,7 + rlwimi 11,3,24,0,7 + rlwimi 10,4,24,16,23 + rlwimi 11,3,24,16,23 + mr 4,11 + mr 3,10 + blr + _GLOBAL(abs) srawi r4,r3,31 xor r3,r3,r4 diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 5cfa800..3b2e6e8 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -234,6 +234,18 @@ _GLOBAL(__flush_dcache_icache) isync blr +_GLOBAL(__bswapdi2) + srdi 8,3,32 + rlwinm 7,3,8,0xffffffff + rlwimi 7,3,24,0,7 + rlwinm 9,8,8,0xffffffff + rlwimi 7,3,24,16,23 + rlwimi 9,8,24,0,7 + rlwimi 9,8,24,16,23 + sldi 7,7,32 + or 7,7,9 + mr 3,7 + blr #if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) /* diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 78b8766..c296665 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -143,7 +143,8 @@ EXPORT_SYMBOL(__lshrdi3); int __ucmpdi2(unsigned long long, unsigned long long); EXPORT_SYMBOL(__ucmpdi2); #endif - +long long __bswapdi2(long long); +EXPORT_SYMBOL(__bswapdi2); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memmove);