diff mbox

[RFC,03/19] powerpc: gamecube: bootwrapper bits

Message ID 1258927311-4340-4-git-send-email-albert_herranz@yahoo.es (mailing list archive)
State Changes Requested
Headers show

Commit Message

Albert Herranz Nov. 22, 2009, 10:01 p.m. UTC
Add support for the Nintendo GameCube video game console to the powerpc
bootwrapper.

dtbImage.gamecube is a wrapped image that contains a flat device tree,
an entry point compatible with SDload, and an optional initrd.

Signed-off-by: Albert Herranz <albert_herranz@yahoo.es>
---
 arch/powerpc/boot/Makefile   |    4 ++-
 arch/powerpc/boot/gamecube.c |   78 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+), 1 deletions(-)
 create mode 100644 arch/powerpc/boot/gamecube.c

Comments

Grant Likely Nov. 22, 2009, 11:11 p.m. UTC | #1
On Sun, Nov 22, 2009 at 3:01 PM, Albert Herranz <albert_herranz@yahoo.es> wrote:
> Add support for the Nintendo GameCube video game console to the powerpc
> bootwrapper.
>
> dtbImage.gamecube is a wrapped image that contains a flat device tree,
> an entry point compatible with SDload, and an optional initrd.
>
> Signed-off-by: Albert Herranz <albert_herranz@yahoo.es>

Acked-by: Grant Likely <grant.likely@secretlab.ca>

> ---
>  arch/powerpc/boot/Makefile   |    4 ++-
>  arch/powerpc/boot/gamecube.c |   78 ++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 81 insertions(+), 1 deletions(-)
>  create mode 100644 arch/powerpc/boot/gamecube.c
>
> diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
> index 44bce21..b2f06b0 100644
> --- a/arch/powerpc/boot/Makefile
> +++ b/arch/powerpc/boot/Makefile
> @@ -76,7 +76,8 @@ src-plat := of.c cuboot-52xx.c cuboot-824x.c cuboot-83xx.c cuboot-85xx.c holly.c
>                cuboot-katmai.c cuboot-rainier.c redboot-8xx.c ep8248e.c \
>                cuboot-warp.c cuboot-85xx-cpm2.c cuboot-yosemite.c simpleboot.c \
>                virtex405-head.S virtex.c redboot-83xx.c cuboot-sam440ep.c \
> -               cuboot-acadia.c cuboot-amigaone.c cuboot-kilauea.c
> +               cuboot-acadia.c cuboot-amigaone.c cuboot-kilauea.c \
> +               gamecube.c
>  src-boot := $(src-wlib) $(src-plat) empty.c
>
>  src-boot := $(addprefix $(obj)/, $(src-boot))
> @@ -254,6 +255,7 @@ image-$(CONFIG_KSI8560)                     += cuImage.ksi8560
>  image-$(CONFIG_STORCENTER)             += cuImage.storcenter
>  image-$(CONFIG_MPC7448HPC2)            += cuImage.mpc7448hpc2
>  image-$(CONFIG_PPC_C2K)                        += cuImage.c2k
> +image-$(CONFIG_GAMECUBE)               += dtbImage.gamecube
>
>  # Board port in arch/powerpc/platform/amigaone/Kconfig
>  image-$(CONFIG_AMIGAONE)               += cuImage.amigaone
> diff --git a/arch/powerpc/boot/gamecube.c b/arch/powerpc/boot/gamecube.c
> new file mode 100644
> index 0000000..0d6c517
> --- /dev/null
> +++ b/arch/powerpc/boot/gamecube.c
> @@ -0,0 +1,78 @@
> +/*
> + * arch/powerpc/boot/gamecube.c
> + *
> + * Nintendo GameCube bootwrapper support
> + * Copyright (C) 2004-2009 The GameCube Linux Team
> + * Copyright (C) 2008,2009 Albert Herranz
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + */
> +
> +#include <stddef.h>
> +#include "stdio.h"
> +#include "types.h"
> +#include "io.h"
> +#include "ops.h"
> +
> +#include "ugecon.h"
> +
> +BSS_STACK(8192);
> +
> +/*
> + * We enter with the cache enabled, the MMU enabled and some known legacy
> + * memory mappings active. xBAT3 is unused.
> + *
> + * We leave the MMU enabled, but we switch to an identity mapped memory
> + * scheme as expected by the start code.
> + *
> + */
> +asm ("\n\
> +.text\n\
> +.globl _zimage_start\n\
> +_zimage_start:\n\
> +\n\
> +       isync\n\
> +       /* IBAT3,DBAT3 for first 16Mbytes */\n\
> +       li      8, 0x01ff       /* 16MB */\n\
> +       li      9, 0x0002       /* rw */\n\
> +       mtspr   0x216, 8        /* IBAT3U */\n\
> +       mtspr   0x217, 9        /* IBAT3L */\n\
> +       mtspr   0x21e, 8        /* DBAT3U */\n\
> +       mtspr   0x21f, 9        /* DBAT3L */\n\
> +\n\
> +       sync\n\
> +       isync\n\
> +\n\
> +       li      3, 0\n\
> +       li      4, 0\n\
> +       li      5, 0\n\
> +\n\
> +       bcl-    20,4*cr7+so,1f\n\
> +1:\n\
> +       mflr    8\n\
> +       clrlwi  8, 8, 3\n\
> +       addi    8, 8, 2f - 1b\n\
> +       mtlr    8\n\
> +       blr\n\
> +2:\n\
> +       b _zimage_start_lib\n\
> +");
> +
> +/*
> + *
> + */
> +void platform_init(unsigned long r3, unsigned long r4, unsigned long r5)
> +{
> +       u32 heapsize = 16*1024*1024 - (u32)_end;
> +
> +       simple_alloc_init(_end, heapsize, 32, 64);
> +       fdt_init(_dtb_start);
> +
> +       if (!ug_grab_io_base() && ug_is_adapter_present())
> +               console_ops.write = ug_console_write;
> +}
> +
> --
> 1.6.3.3
>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
>
Segher Boessenkool Nov. 24, 2009, 12:08 a.m. UTC | #2
Hi Albert,

> +asm ("\n\


A file scope asm?!  Please don't.

> + * We enter with the cache enabled, the MMU enabled and some known  
> legacy
> + * memory mappings active. xBAT3 is unused

It would be good if you could depend as little as possible on these  
things;
that makes writing another bootloader a lot easier.

> +	/* IBAT3,DBAT3 for first 16Mbytes */\n\
> +	li	8, 0x01ff	/* 16MB */\n\
> +	li      9, 0x0002	/* rw */\n\
> +	mtspr   0x216, 8	/* IBAT3U */\n\
> +	mtspr   0x217, 9	/* IBAT3L */\n\
> +	mtspr   0x21e, 8	/* DBAT3U */\n\
> +	mtspr   0x21f, 9	/* DBAT3L */\n\

WIMG=0000, are you sure?  Not M=1?

> +	bcl-    20,4*cr7+so,1f\n\

Just write  bcl 20,31,1f .


Segher
Albert Herranz Nov. 24, 2009, 5:38 p.m. UTC | #3
Segher Boessenkool wrote:
> Hi Albert,
> 
>> +asm ("\n\
> 
> 
> A file scope asm?!  Please don't.
> 

So what's your proposal then? Placing it within a fake func?
That asm snippet is the entry point. I took as an example how prpmc2800.c deals with that, providing an own version of the (weak) _zImage_start.

>> + * We enter with the cache enabled, the MMU enabled and some known
>> legacy
>> + * memory mappings active. xBAT3 is unused
> 
> It would be good if you could depend as little as possible on these things;
> that makes writing another bootloader a lot easier.
> 

Ok. I'll do a similar approach as done on the wii bootwrapper.

>> +    /* IBAT3,DBAT3 for first 16Mbytes */\n\
>> +    li    8, 0x01ff    /* 16MB */\n\
>> +    li      9, 0x0002    /* rw */\n\
>> +    mtspr   0x216, 8    /* IBAT3U */\n\
>> +    mtspr   0x217, 9    /* IBAT3L */\n\
>> +    mtspr   0x21e, 8    /* DBAT3U */\n\
>> +    mtspr   0x21f, 9    /* DBAT3L */\n\
> 
> WIMG=0000, are you sure?  Not M=1?
> 

To be honest, I don't recall the details now.
But it was tested in the very early days, the result was not the expected one and, in the end, manual cache coherency management was still needed.

So everything is designed and working assuming M=0.
This can be re-checked again later if needed.

>> +    bcl-    20,4*cr7+so,1f\n\
> 
> Just write  bcl 20,31,1f .

Ok, I used two variants for this and I know which one you like now ;).

> 
> 
> Segher
> 
> 

Thanks,
Albert
Segher Boessenkool Nov. 24, 2009, 9 p.m. UTC | #4
>>> +asm ("\n\
>>
>>
>> A file scope asm?!  Please don't.
>>
> So what's your proposal then? Placing it within a fake func?
> That asm snippet is the entry point. I took as an example how  
> prpmc2800.c deals with that, providing an own version of the (weak)  
> _zImage_start.

Use an assembler source file.  You'll get much nicer syntax as well
(none of that \n stuff).

>>> +    /* IBAT3,DBAT3 for first 16Mbytes */\n\
>>> +    li    8, 0x01ff    /* 16MB */\n\
>>> +    li      9, 0x0002    /* rw */\n\
>>> +    mtspr   0x216, 8    /* IBAT3U */\n\
>>> +    mtspr   0x217, 9    /* IBAT3L */\n\
>>> +    mtspr   0x21e, 8    /* DBAT3U */\n\
>>> +    mtspr   0x21f, 9    /* DBAT3L */\n\
>>
>> WIMG=0000, are you sure?  Not M=1?
>
> To be honest, I don't recall the details now.
> But it was tested in the very early days, the result was not the  
> expected one and, in the end, manual cache coherency management was  
> still needed.

Sure, the memory controllers don't do coherency.  I'm slightly worried
about two things:
1) Will the generic code use M=0 as well?  Is it a problem if it  
doesn't?
2) Do lwarx. etc. work in M=0?

And a question: does M=0 actually give better performance (lower bus
utilisation, and maybe saves a few cycles)?


Segher
Albert Herranz Nov. 24, 2009, 11:45 p.m. UTC | #5
Segher Boessenkool wrote:
>> So what's your proposal then? Placing it within a fake func?
>> That asm snippet is the entry point. I took as an example how
>> prpmc2800.c deals with that, providing an own version of the (weak)
>> _zImage_start.
> 
> Use an assembler source file.  You'll get much nicer syntax as well
> (none of that \n stuff).
> 

I found it cleaner to embed the entry point code in the .c file and avoid touching the wrapper script.
But I'm fine with that if it is the way to go (and I already finally touched the wrapper to increase the link address...).

>>> WIMG=0000, are you sure?  Not M=1?
>>
>> To be honest, I don't recall the details now.
>> But it was tested in the very early days, the result was not the
>> expected one and, in the end, manual cache coherency management was
>> still needed.
> 
> Sure, the memory controllers don't do coherency.  I'm slightly worried
> about two things:
> 1) Will the generic code use M=0 as well?  Is it a problem if it doesn't?
> 2) Do lwarx. etc. work in M=0?
> 
> And a question: does M=0 actually give better performance (lower bus
> utilisation, and maybe saves a few cycles)?
> 

I think that the generic code uses M=0 _except_ for SMP and some platforms (see comment in cputable.h).
And yes, the generic code works with these processors :)

M=0 should have a lower bus utilization, yes.
Also M=0 is a requirement if you use some Gekko/Broadway features like the locked (half-)cache.

Thanks,
Albert
Segher Boessenkool Nov. 25, 2009, 4:53 p.m. UTC | #6
>>>> WIMG=0000, are you sure?  Not M=1?
>>>
>>> To be honest, I don't recall the details now.
>>> But it was tested in the very early days, the result was not the
>>> expected one and, in the end, manual cache coherency management was
>>> still needed.
>>
>> Sure, the memory controllers don't do coherency.  I'm slightly worried
>> about two things:
>> 1) Will the generic code use M=0 as well?  Is it a problem if it
>> doesn't?
>> 2) Do lwarx. etc. work in M=0?
>>
>> And a question: does M=0 actually give better performance (lower bus
>> utilisation, and maybe saves a few cycles)?
>
> I think that the generic code uses M=0 _except_ for SMP and some platforms
> (see comment in cputable.h).
> And yes, the generic code works with these processors :)

I meant, if it doesn't, does it give conflicts.  But I did some
reading up and M=0 is indeed the way to go, so all is fine :-)

> M=0 should have a lower bus utilization, yes.
> Also M=0 is a requirement if you use some Gekko/Broadway features like the
> locked (half-)cache.

Well we don't use that, but point taken.


Segher
Benjamin Herrenschmidt Nov. 26, 2009, 4:35 a.m. UTC | #7
On Tue, 2009-11-24 at 18:38 +0100, Albert Herranz wrote:
> Segher Boessenkool wrote:
> > Hi Albert,
> > 
> >> +asm ("\n\
> > 
> > 
> > A file scope asm?!  Please don't.
> > 
> 
> So what's your proposal then? Placing it within a fake func?

Just do a .S file :-)

> That asm snippet is the entry point. I took as an example how prpmc2800.c
> deals with that, providing an own version of the (weak) _zImage_start.

Right but I agree with Segher here, it would be nicer as a .S file.

> >> + * We enter with the cache enabled, the MMU enabled and some known
> >> legacy
> >> + * memory mappings active. xBAT3 is unused
> > 
> > It would be good if you could depend as little as possible on these things;
> > that makes writing another bootloader a lot easier.
> > 
> 
> Ok. I'll do a similar approach as done on the wii bootwrapper.
> 
> >> +    /* IBAT3,DBAT3 for first 16Mbytes */\n\
> >> +    li    8, 0x01ff    /* 16MB */\n\
> >> +    li      9, 0x0002    /* rw */\n\
> >> +    mtspr   0x216, 8    /* IBAT3U */\n\
> >> +    mtspr   0x217, 9    /* IBAT3L */\n\
> >> +    mtspr   0x21e, 8    /* DBAT3U */\n\
> >> +    mtspr   0x21f, 9    /* DBAT3L */\n\
> > 
> > WIMG=0000, are you sure?  Not M=1?
> > 
> 
> To be honest, I don't recall the details now.
> But it was tested in the very early days, the result was not the expected one and,
> in the end, manual cache coherency management was still needed.

Ouch. I wouldn't be surprised if those guys don't do cache coherency
in the bridge anyways.

> So everything is designed and working assuming M=0.
> This can be re-checked again later if needed.

Agreed.

> >> +    bcl-    20,4*cr7+so,1f\n\
> > 
> > Just write  bcl 20,31,1f .
> 
> Ok, I used two variants for this and I know which one you like now ;).

Cheers,
Ben.

> > 
> > 
> > Segher
> > 
> > 
> 
> Thanks,
> Albert
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
Benjamin Herrenschmidt Nov. 26, 2009, 4:36 a.m. UTC | #8
On Tue, 2009-11-24 at 22:00 +0100, Segher Boessenkool wrote:
> 
> Sure, the memory controllers don't do coherency.  I'm slightly worried
> about two things:
> 1) Will the generic code use M=0 as well?  Is it a problem if it  
> doesn't?

We can make it not do it.

> 2) Do lwarx. etc. work in M=0? 

They should hopefully... as long as you don't rely on the reservation
blowing as a result of a DMA write.

Cheers,
Ben
Gabriel Paubert Nov. 26, 2009, 8:17 a.m. UTC | #9
On Thu, Nov 26, 2009 at 03:36:56PM +1100, Benjamin Herrenschmidt wrote:
> On Tue, 2009-11-24 at 22:00 +0100, Segher Boessenkool wrote:
> > 
> > Sure, the memory controllers don't do coherency.  I'm slightly worried
> > about two things:
> > 1) Will the generic code use M=0 as well?  Is it a problem if it  
> > doesn't?
> 
> We can make it not do it.
> 
> > 2) Do lwarx. etc. work in M=0? 
> 
> They should hopefully... as long as you don't rely on the reservation
> blowing as a result of a DMA write.

Hmm, this really depends on whether the DMA transfers generate bus cycles
that require coherency or not. Not the other way around. M=1 only forces
bus cycles to be snooped by other processors (asserting the GBL signal
on 603/604/750 busses).

The host bridge is free to systematically snoop processor accesses (to make 
sure that data queued in the bridge and not yet written to memory is seen
in the coherent memory domain even if, for example, interrupts propagate 
so fast that DMA target addresses are accessed before it is written to RAM).

On memory coherent systems, the host bridge has to assert the GBL signal,
to force data to be written to memory (for most DMA accesses), or to  
invalidate caches (for full line writes from devices).

	Gabriel
Benjamin Herrenschmidt Nov. 26, 2009, 8:46 a.m. UTC | #10
On Thu, 2009-11-26 at 09:17 +0100, Gabriel Paubert wrote:
> 
> > They should hopefully... as long as you don't rely on the reservation
> > blowing as a result of a DMA write.
> 
> Hmm, this really depends on whether the DMA transfers generate bus cycles
> that require coherency or not. Not the other way around. M=1 only forces
> bus cycles to be snooped by other processors (asserting the GBL signal
> on 603/604/750 busses).

You are absolutely right. Which makes it even more likely that
lwarx/stwcx. won't care unless the L2 cache plays tricks.

> The host bridge is free to systematically snoop processor accesses (to make 
> sure that data queued in the bridge and not yet written to memory is seen
> in the coherent memory domain even if, for example, interrupts propagate 
> so fast that DMA target addresses are accessed before it is written to RAM).
> 
> On memory coherent systems, the host bridge has to assert the GBL signal,
> to force data to be written to memory (for most DMA accesses), or to  
> invalidate caches (for full line writes from devices). 

Cheers,
Ben.
Segher Boessenkool Nov. 26, 2009, 10:23 p.m. UTC | #11
>> So what's your proposal then? Placing it within a fake func?
>
> Just do a .S file :-)

Yeah.  You might be able to do one that handles both GC and Wii,
maybe it's easier/clearer to keep them separate though.

> Ouch. I wouldn't be surprised if those guys don't do cache coherency
> in the bridge anyways.

That is correct; we haven't found any way to enable it, at least.
To be fair, it's actually better for performance to _not_ do RAM
coherency when used as a game system.


Segher
Segher Boessenkool Nov. 26, 2009, 11:06 p.m. UTC | #12
>>> Sure, the memory controllers don't do coherency.  I'm slightly  
>>> worried
>>> about two things:
>>> 1) Will the generic code use M=0 as well?  Is it a problem if it
>>> doesn't?
>>
>> We can make it not do it.
>>
>>> 2) Do lwarx. etc. work in M=0?
>>
>> They should hopefully... as long as you don't rely on the reservation
>> blowing as a result of a DMA write.
>
> Hmm, this really depends on whether the DMA transfers generate bus  
> cycles
> that require coherency or not.

They do not; device DMA never goes to the 6xx bus with this bridge.

> Not the other way around. M=1 only forces
> bus cycles to be snooped by other processors (asserting the GBL signal
> on 603/604/750 busses).

Right, it enables sending probes, not receiving them.  On this CPU  
anyway.
The architecture specification is quite silent on this all.


Segher
diff mbox

Patch

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 44bce21..b2f06b0 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -76,7 +76,8 @@  src-plat := of.c cuboot-52xx.c cuboot-824x.c cuboot-83xx.c cuboot-85xx.c holly.c
 		cuboot-katmai.c cuboot-rainier.c redboot-8xx.c ep8248e.c \
 		cuboot-warp.c cuboot-85xx-cpm2.c cuboot-yosemite.c simpleboot.c \
 		virtex405-head.S virtex.c redboot-83xx.c cuboot-sam440ep.c \
-		cuboot-acadia.c cuboot-amigaone.c cuboot-kilauea.c
+		cuboot-acadia.c cuboot-amigaone.c cuboot-kilauea.c \
+		gamecube.c
 src-boot := $(src-wlib) $(src-plat) empty.c
 
 src-boot := $(addprefix $(obj)/, $(src-boot))
@@ -254,6 +255,7 @@  image-$(CONFIG_KSI8560)			+= cuImage.ksi8560
 image-$(CONFIG_STORCENTER)		+= cuImage.storcenter
 image-$(CONFIG_MPC7448HPC2)		+= cuImage.mpc7448hpc2
 image-$(CONFIG_PPC_C2K)			+= cuImage.c2k
+image-$(CONFIG_GAMECUBE)		+= dtbImage.gamecube
 
 # Board port in arch/powerpc/platform/amigaone/Kconfig
 image-$(CONFIG_AMIGAONE)		+= cuImage.amigaone
diff --git a/arch/powerpc/boot/gamecube.c b/arch/powerpc/boot/gamecube.c
new file mode 100644
index 0000000..0d6c517
--- /dev/null
+++ b/arch/powerpc/boot/gamecube.c
@@ -0,0 +1,78 @@ 
+/*
+ * arch/powerpc/boot/gamecube.c
+ *
+ * Nintendo GameCube bootwrapper support
+ * Copyright (C) 2004-2009 The GameCube Linux Team
+ * Copyright (C) 2008,2009 Albert Herranz
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ */
+
+#include <stddef.h>
+#include "stdio.h"
+#include "types.h"
+#include "io.h"
+#include "ops.h"
+
+#include "ugecon.h"
+
+BSS_STACK(8192);
+
+/*
+ * We enter with the cache enabled, the MMU enabled and some known legacy
+ * memory mappings active. xBAT3 is unused.
+ *
+ * We leave the MMU enabled, but we switch to an identity mapped memory
+ * scheme as expected by the start code.
+ *
+ */
+asm ("\n\
+.text\n\
+.globl _zimage_start\n\
+_zimage_start:\n\
+\n\
+	isync\n\
+	/* IBAT3,DBAT3 for first 16Mbytes */\n\
+	li	8, 0x01ff	/* 16MB */\n\
+	li      9, 0x0002	/* rw */\n\
+	mtspr   0x216, 8	/* IBAT3U */\n\
+	mtspr   0x217, 9	/* IBAT3L */\n\
+	mtspr   0x21e, 8	/* DBAT3U */\n\
+	mtspr   0x21f, 9	/* DBAT3L */\n\
+\n\
+	sync\n\
+	isync\n\
+\n\
+	li	3, 0\n\
+	li	4, 0\n\
+	li	5, 0\n\
+\n\
+	bcl-    20,4*cr7+so,1f\n\
+1:\n\
+	mflr    8\n\
+	clrlwi  8, 8, 3\n\
+	addi    8, 8, 2f - 1b\n\
+	mtlr    8\n\
+	blr\n\
+2:\n\
+	b _zimage_start_lib\n\
+");
+
+/*
+ *
+ */
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5)
+{
+	u32 heapsize = 16*1024*1024 - (u32)_end;
+
+	simple_alloc_init(_end, heapsize, 32, 64);
+	fdt_init(_dtb_start);
+
+	if (!ug_grab_io_base() && ug_is_adapter_present())
+		console_ops.write = ug_console_write;
+}
+