Patchwork [U-Boot,RFC,v2] ARM: Avoid compiler optimization for usages of readb, writeb and friends.

login
register
mail settings
Submitter Alexander Holler
Date Dec. 18, 2010, 10:27 p.m.
Message ID <1292711230-3234-1-git-send-email-holler@ahsoftware.de>
Download mbox | patch
Permalink /patch/76100/
State Superseded
Headers show

Comments

Alexander Holler - Dec. 18, 2010, 10:27 p.m.
gcc 4.5.1 seems to ignore (at least some) volatile definitions,
avoid that as done in the kernel.

Reading C99 6.7.3 8 and the comment 114) there, I think it is a bug of that
gcc version to ignore the volatile type qualifier used e.g. in __arch_getl().
Anyway, using a definition as in the kernel headers avoids such optimizations when
gcc 4.5.1 is used.

Maybe the headers as used in the current linux-kernel should be used,
but to avoid large changes, I've just added a small change to the current headers.

Signed-off-by: Alexander Holler <holler@ahsoftware.de>
---
 arch/arm/include/asm/io.h |   20 ++++++++++++++------
 1 files changed, 14 insertions(+), 6 deletions(-)
Dirk Behme - Dec. 19, 2010, 7:51 a.m.
On 18.12.2010 23:27, Alexander Holler wrote:
> gcc 4.5.1 seems to ignore (at least some) volatile definitions,
> avoid that as done in the kernel.
>
> Reading C99 6.7.3 8 and the comment 114) there, I think it is a bug of that
> gcc version to ignore the volatile type qualifier used e.g. in __arch_getl().
> Anyway, using a definition as in the kernel headers avoids such optimizations when
> gcc 4.5.1 is used.
>
> Maybe the headers as used in the current linux-kernel should be used,
> but to avoid large changes, I've just added a small change to the current headers.
>
> Signed-off-by: Alexander Holler<holler@ahsoftware.de>
> ---
>   arch/arm/include/asm/io.h |   20 ++++++++++++++------
>   1 files changed, 14 insertions(+), 6 deletions(-)
>
> diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
> index ff1518e..5364b78 100644
> --- a/arch/arm/include/asm/io.h
> +++ b/arch/arm/include/asm/io.h
> @@ -125,13 +125,21 @@ extern inline void __raw_readsl(unsigned int addr, void *data, int longlen)
>   #define __raw_readw(a)			__arch_getw(a)
>   #define __raw_readl(a)			__arch_getl(a)
>
> -#define writeb(v,a)			__arch_putb(v,a)
> -#define writew(v,a)			__arch_putw(v,a)
> -#define writel(v,a)			__arch_putl(v,a)
> +/*
> + * TODO: The kernel offers some more advanced versions of barriers, it might
> + * have some advantages to use them instead of the simple one here.
> + */
> +#define dmb()				__asm__ __volatile__ ("" : : : "memory")
> +#define __iormb()			dmb()
> +#define __iowmb()			dmb()
> +
> +#define writeb(v,c)			({ __iowmb(); __arch_putb(v,c); })
> +#define writew(v,c)			({ __iowmb(); __arch_putw(v,c); })
> +#define writel(v,c)			({ __iowmb(); __arch_putl(v,c); })
>
> -#define readb(a)			__arch_getb(a)
> -#define readw(a)			__arch_getw(a)
> -#define readl(a)			__arch_getl(a)
> +#define readb(c)			({ u8  __v = __arch_getb(c); __iormb(); __v; })
> +#define readw(c)			({ u16 __v = __arch_getw(c); __iormb(); __v; })
> +#define readl(c)			({ u32 __v = __arch_getl(c); __iormb(); __v; })

Using the test code below [1] and then looking at the disassembly from 
the two tool chains gcc version 4.3.3 (Sourcery G++ Lite 2009q1-203) 
versus gcc version 4.5.1 (Sourcery G++ Lite 2010.09-50): Yes, without 
the additional dmb() the gcc 4.5.1 just creates

00000000 <main>:
    0:	e3a00000 	mov	r0, #0
    4:	e12fff1e 	bx	lr

while with the additional dmb() it creates

00000000 <main>:
    0:	e59f300c 	ldr	r3, [pc, #12]	; 14 <main+0x14>
    4:	e5932028 	ldr	r2, [r3, #40]	; 0x28
    8:	e5930028 	ldr	r0, [r3, #40]	; 0x28
    c:	e0620000 	rsb	r0, r2, r0
   10:	e12fff1e 	bx	lr
   14:	48318000

what looks correct. And 4.3.3 does the same code for both readl() 
versions. So:

Acked-by: Dirk Behme <dirk.behme@googlemail.com>

Thanks

Dirk

[1]

arm-none-linux-gnueabi-gcc -Wall -O2 -c foo.c -o foo.o
arm-none-linux-gnueabi-objdump -D foo.o > foo.dis

-- foo.c --
struct gptimer {
	unsigned int tidr;	/* 0x00 r */
	unsigned char res[0xc];
	unsigned int tiocp_cfg;	/* 0x10 rw */
	unsigned int tistat;	/* 0x14 r */
	unsigned int tisr;	/* 0x18 rw */
	unsigned int tier;	/* 0x1c rw */
	unsigned int twer;	/* 0x20 rw */
	unsigned int tclr;	/* 0x24 rw */
	unsigned int tcrr;	/* 0x28 rw */
	unsigned int tldr;	/* 0x2c rw */
	unsigned int ttgr;	/* 0x30 rw */
	unsigned int twpc;	/* 0x34 r*/
	unsigned int tmar;	/* 0x38 rw*/
	unsigned int tcar1;	/* 0x3c r */
	unsigned int tcicr;	/* 0x40 rw */
	unsigned int tcar2;	/* 0x44 r */
};


#define dmb()		  __asm__ __volatile__ ("" : : : "memory")
#define __iormb()	  dmb()

#define __arch_getl(a)	  (*(volatile unsigned int *)(a))
#define readl(a)	  __arch_getl(a)
//#define readl(c)	  ({ unsigned int __v = __arch_getl(c); __iormb(); 
__v; })

int main(void) {

   struct gptimer *gpt1_base = (struct gptimer *)0x48318000;
   unsigned int cdiff, cstart, cend;

   cstart = readl(&gpt1_base->tcrr);

   cend = readl(&gpt1_base->tcrr);

   cdiff = cend - cstart;

   return cdiff;

}
-- foo.c --
Alexander Holler - Dec. 19, 2010, 10:22 a.m.
Hello,

On 19.12.2010 08:51, Dirk Behme wrote:
> On 18.12.2010 23:27, Alexander Holler wrote:
>> gcc 4.5.1 seems to ignore (at least some) volatile definitions,
>> avoid that as done in the kernel.
>>

> Acked-by: Dirk Behme <dirk.behme@googlemail.com>

Thanks for the ack, but I have to say, that those barriers are having 
side effects here. Reading NAND now fails on my BeagleBoard. Regardless 
if I use gcc 4.3.5 or gcc 4.5.1, after applying that patch the wrong ID 
of the NAND is read. In nand_get_flash_type() 
(drivers/mtd/nand/nand_base.c) without that patch I will get the following:

*maf_id: 44, dev_id: 186

with the patch the following is read:

*maf_id: 128, dev_id: 85

Which just is wrong.

I haven't looked further up to now, maybe thats just a side effect of 
some wrong clock settings because of different timings through those 
barrieres or whatever.

Regards,

Alexander
Albert ARIBAUD - Dec. 19, 2010, 11:28 a.m.
Le 19/12/2010 11:22, Alexander Holler a écrit :
> Hello,
>
> On 19.12.2010 08:51, Dirk Behme wrote:
>> On 18.12.2010 23:27, Alexander Holler wrote:
>>> gcc 4.5.1 seems to ignore (at least some) volatile definitions,
>>> avoid that as done in the kernel.
>>>
>
>> Acked-by: Dirk Behme<dirk.behme@googlemail.com>
>
> Thanks for the ack, but I have to say, that those barriers are having
> side effects here. Reading NAND now fails on my BeagleBoard. Regardless
> if I use gcc 4.3.5 or gcc 4.5.1, after applying that patch the wrong ID
> of the NAND is read. In nand_get_flash_type()
> (drivers/mtd/nand/nand_base.c) without that patch I will get the following:
>
> *maf_id: 44, dev_id: 186
>
> with the patch the following is read:
>
> *maf_id: 128, dev_id: 85
>
> Which just is wrong.
>
> I haven't looked further up to now, maybe thats just a side effect of
> some wrong clock settings because of different timings through those
> barrieres or whatever.

IIUC, the patch is adding barriers to every HW register write and read, 
which makes the compiler stop reordering these, and keep them ordered as 
in the source code. Are you sure that the order as laid out in the 
source is the right one? Maybe you were just luck so far that the 
reordering masked a bug.

Amicalement,
Alexander Holler - Dec. 19, 2010, 4:34 p.m.
Hello,

Am 19.12.2010 12:28, schrieb Albert ARIBAUD:
> Le 19/12/2010 11:22, Alexander Holler a écrit :
>> Hello,
>>
>> On 19.12.2010 08:51, Dirk Behme wrote:
>>> On 18.12.2010 23:27, Alexander Holler wrote:
>>>> gcc 4.5.1 seems to ignore (at least some) volatile definitions,
>>>> avoid that as done in the kernel.
>>>>
>>
>>> Acked-by: Dirk Behme<dirk.behme@googlemail.com>
>>
>> Thanks for the ack, but I have to say, that those barriers are having
>> side effects here. Reading NAND now fails on my BeagleBoard. Regardless
>> if I use gcc 4.3.5 or gcc 4.5.1, after applying that patch the wrong ID
>> of the NAND is read. In nand_get_flash_type()
>> (drivers/mtd/nand/nand_base.c) without that patch I will get the following:
>>
>> *maf_id: 44, dev_id: 186
>>
>> with the patch the following is read:
>>
>> *maf_id: 128, dev_id: 85
>>
>> Which just is wrong.
>>
>> I haven't looked further up to now, maybe thats just a side effect of
>> some wrong clock settings because of different timings through those
>> barrieres or whatever.
>
> IIUC, the patch is adding barriers to every HW register write and read,
> which makes the compiler stop reordering these, and keep them ordered as
> in the source code. Are you sure that the order as laid out in the
> source is the right one? Maybe you were just luck so far that the
> reordering masked a bug.

I don't know much about all the stuff the omap3-drivers are doing. E.g. 
I've already wondered why it's necessary to measure a clock frequency in 
the code which got (wrongly) optimized without that patch. I would think 
such isn't necessary because I would assume the drivers are actually 
there to set the clock settings, and not to measure them. ;)

Regards,

Alexander
John Rigby - Dec. 19, 2010, 6:45 p.m.
On Sun, Dec 19, 2010 at 3:22 AM, Alexander Holler <holler@ahsoftware.de> wrote:
> side effects here. Reading NAND now fails on my BeagleBoard. Regardless
> if I use gcc 4.3.5 or gcc 4.5.1, after applying that patch the wrong ID
> of the NAND is read. In nand_get_flash_type()
> (drivers/mtd/nand/nand_base.c) without that patch I will get the following:
>
> *maf_id: 44, dev_id: 186
>
> with the patch the following is read:
>
> *maf_id: 128, dev_id: 85
The nand_get_flash_type routine reads these id's twice and compares
them.  Do your see an error message like this?

second ID read did not match

>
> Which just is wrong.
>
> I haven't looked further up to now, maybe thats just a side effect of
> some wrong clock settings because of different timings through those
> barrieres or whatever.
>
> Regards,
>
> Alexander
> _______________________________________________
> U-Boot mailing list
> U-Boot@lists.denx.de
> http://lists.denx.de/mailman/listinfo/u-boot
>
Alexander Holler - Dec. 19, 2010, 7:59 p.m.
Am 19.12.2010 19:45, schrieb John Rigby:
> On Sun, Dec 19, 2010 at 3:22 AM, Alexander Holler<holler@ahsoftware.de>  wrote:
>> side effects here. Reading NAND now fails on my BeagleBoard. Regardless
>> if I use gcc 4.3.5 or gcc 4.5.1, after applying that patch the wrong ID
>> of the NAND is read. In nand_get_flash_type()
>> (drivers/mtd/nand/nand_base.c) without that patch I will get the following:
>>
>> *maf_id: 44, dev_id: 186
>>
>> with the patch the following is read:
>>
>> *maf_id: 128, dev_id: 85
> The nand_get_flash_type routine reads these id's twice and compares
> them.  Do your see an error message like this?
>
> second ID read did not match
>

No, the output is

without memory barrier in read*/write*:
--------------------------------
U-Boot 2010.12-rc3-00013-g71aab09 (Dec 19 2010 - 01:19:38)

OMAP3530-GP ES3.1, CPU-OPP2, L3-165MHz, Max CPU Clock 720 mHz
OMAP3 Beagle board + LPDDR/NAND
I2C:   ready
DRAM:  256 MiB
NAND:  256 MiB
MMC:   OMAP SD/MMC: 0
In:    serial
Out:   serial
Err:   serial
Beagle Rev C4
timed out in wait_for_pin: I2C_STAT=0
No EEPROM on expansion board
Die ID #062a000400000000040365fa16019019
Hit any key to stop autoboot:  0
OMAP3 beagleboard.org # nand info

Device 0: nand0, sector size 128 KiB
--------------------------------

with memory barrier in read*/write*:
--------------------------------
U-Boot 2010.12-rc3-00014-gde0126f (Dec 19 2010 - 01:25:11)

OMAP3530-GP ES3.1, CPU-OPP2, L3-165MHz, Max CPU Clock 720 mHz
OMAP3 Beagle board + LPDDR/NAND
I2C:   ready
DRAM:  256 MiB
NAND:  32 MiB
MMC:   OMAP SD/MMC: 0
NAND read from offset 260000 failed -74
*** Warning - readenv() failed, using default environment

In:    serial
Out:   serial
Err:   serial
Beagle Rev C4
timed out in wait_for_pin: I2C_STAT=0
No EEPROM on expansion board
Die ID #062a000400000000040365fa16019019
Hit any key to stop autoboot:  0
OMAP3 beagleboard.org # nand info

Device 0: nand0, sector size 16 KiB
--------------------------------

And with the memory barrier the kernel gets started but hangs afterwards 
(at least it looks so). I still haven't searched further and can only 
offer speculations like clocks are screwed up, memory setup is broken or 
such. Finding the reason and not just the impact will need some more 
time on my side.

Regards,

Alexander
John Rigby - Dec. 20, 2010, 12:39 a.m.
On Sun, Dec 19, 2010 at 12:59 PM, Alexander Holler <holler@ahsoftware.de> wrote:
...
> No EEPROM on expansion board
> Die ID #062a000400000000040365fa16019019
> Hit any key to stop autoboot:  0
> OMAP3 beagleboard.org # nand info
>
> Device 0: nand0, sector size 16 KiB
> --------------------------------
>
I get the same output  without your change.  My gcc is linaro 4.4.5.
I'll do some bisecting and try to find out what is going on.
Alexander Holler - Dec. 20, 2010, 12:56 a.m.
Am 20.12.2010 01:39, schrieb John Rigby:
> On Sun, Dec 19, 2010 at 12:59 PM, Alexander Holler<holler@ahsoftware.de>  wrote:
> ...
>> No EEPROM on expansion board
>> Die ID #062a000400000000040365fa16019019
>> Hit any key to stop autoboot:  0
>> OMAP3 beagleboard.org # nand info
>>
>> Device 0: nand0, sector size 16 KiB
>> --------------------------------
>>
> I get the same output  without your change.  My gcc is linaro 4.4.5.
> I'll do some bisecting and try to find out what is going on.

Bisecting won't help you here. Not if the problem was always there 
(which is what I assume).

Regards,

Alexander
John Rigby - Dec. 20, 2010, 4:18 a.m.
On Sun, Dec 19, 2010 at 5:56 PM, Alexander Holler <holler@ahsoftware.de> wrote:
> Am 20.12.2010 01:39, schrieb John Rigby:
>>
>> On Sun, Dec 19, 2010 at 12:59 PM, Alexander Holler<holler@ahsoftware.de>
>>  wrote:
>> ...
>>>
>>> No EEPROM on expansion board
>>> Die ID #062a000400000000040365fa16019019
>>> Hit any key to stop autoboot:  0
>>> OMAP3 beagleboard.org # nand info
>>>
>>> Device 0: nand0, sector size 16 KiB
>>> --------------------------------
>>>
>> I get the same output  without your change.  My gcc is linaro 4.4.5.
>> I'll do some bisecting and try to find out what is going on.
>
> Bisecting won't help you here. Not if the problem was always there (which is
> what I assume
Sorry, I was confused about my results.

If I replace include <asm/io.h> in drivers/mtd/nand/omap_gpmc.c with a
copy of the original called orig_io.h:
#include "orig_io.h"

Nand starts working again.  So the problem seems to be isolated to this file.
>
> Regards,
>
> Alexander
>

Patch

diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index ff1518e..5364b78 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -125,13 +125,21 @@  extern inline void __raw_readsl(unsigned int addr, void *data, int longlen)
 #define __raw_readw(a)			__arch_getw(a)
 #define __raw_readl(a)			__arch_getl(a)
 
-#define writeb(v,a)			__arch_putb(v,a)
-#define writew(v,a)			__arch_putw(v,a)
-#define writel(v,a)			__arch_putl(v,a)
+/*
+ * TODO: The kernel offers some more advanced versions of barriers, it might
+ * have some advantages to use them instead of the simple one here.
+ */
+#define dmb()				__asm__ __volatile__ ("" : : : "memory")
+#define __iormb()			dmb()
+#define __iowmb()			dmb()
+
+#define writeb(v,c)			({ __iowmb(); __arch_putb(v,c); })
+#define writew(v,c)			({ __iowmb(); __arch_putw(v,c); })
+#define writel(v,c)			({ __iowmb(); __arch_putl(v,c); })
 
-#define readb(a)			__arch_getb(a)
-#define readw(a)			__arch_getw(a)
-#define readl(a)			__arch_getl(a)
+#define readb(c)			({ u8  __v = __arch_getb(c); __iormb(); __v; })
+#define readw(c)			({ u16 __v = __arch_getw(c); __iormb(); __v; })
+#define readl(c)			({ u32 __v = __arch_getl(c); __iormb(); __v; })
 
 /*
  * The compiler seems to be incapable of optimising constants