diff mbox

vexpress-a9 aborts when booting decompress code from a modified Linux kernel

Message ID CALCv0x2u9rS8kO1NuWatYpuz2muf4oQMm5_fTKy_wkHmX034=Q@mail.gmail.com
State New
Headers show

Commit Message

Ilya Lipnitskiy Oct. 16, 2015, 5:21 p.m. UTC
Hi All,

I am running into a curious issue with QEMU ARM, maybe a Linux/QEMU
ARM expert could help before I filed a bug report. Is this a QEMU
problem or is there a fundamental problem with my kernel change? QEMU
builds before SHA 6ec1588e handle this kernel change just fine...

I'm seeing a very early CPU abort if I add L1 cache invalidation logic
in early Linux decompress code in arch/arm/boot/compressed/head.S.
Essentially, if I add a copy of v7_invalidate_l1 from
arch/arm/mm/cache-v7.S to __armv7_mmu_cache_on in head.S QEMU aborts
somewhere inside the new code. Please see the head.S patch below:

Comments

Ilya Lipnitskiy Oct. 16, 2015, 5:57 p.m. UTC | #1
On Fri, Oct 16, 2015 at 10:37 AM, Peter Maydell
<peter.maydell@linaro.org> wrote:
> It would be helpful if you said what the abort actually was
> (ie what instruction do we abort on, what are the fault status/
> fault address registers if applicable, etc).
I should have been more specific. Running trunk qemu on the patched
zImage with qemu-system-arm -M vexpress-a9 -kernel
linux/arch/arm/boot/zImage -serial stdio -dtb
linux/arch/arm/boot/dts/vexpress-v2p-ca9.dtb -s -S then attaching gdb
generates the following information. I haven't nailed the actual
instruction causing the abort yet. Is it obvious from the registers?

(gdb) target remote :1234
Remote debugging using :1234
0x60000000 in ?? ()
(gdb) c
Continuing.
^C
Program received signal SIGINT, Interrupt.
__vectors_start () at arch/arm/kernel/entry-armv.S:1240
1240        W(b)    vector_pabt
(gdb) info registers
r0             0xc5507d    12931197
r1             0xfffffffd    -3
r2             0x600    1536
r3             0xffffc000    -16384
r4             0x0    0
r5             0x0    0
r6             0x0    0
r7             0x8e0    2272
r8             0x64000000    1677721600
r9             0xfffc0000    -262144
r10            0xffc0000    268173312
r11            0x100103    1048835
r12            0x600100c8    1610678472
sp             0x0    0x0 <__vectors_start>
lr             0x10    16
pc             0xc    0xc <__vectors_start+12>
cpsr           0x1d7    471
(gdb)

Did it abort at 0x600100c8?

(gdb) disassemble 0x60010000,+1024
Dump of assembler code from 0x60010000 to 0x60010400:
   0x60010000:    nop            ; (mov r0, r0)
   0x60010004:    nop            ; (mov r0, r0)
   0x60010008:    nop            ; (mov r0, r0)
   0x6001000c:    nop            ; (mov r0, r0)
   0x60010010:    nop            ; (mov r0, r0)
   0x60010014:    nop            ; (mov r0, r0)
   0x60010018:    nop            ; (mov r0, r0)
   0x6001001c:    nop            ; (mov r0, r0)
   0x60010020:    b    0x60010034
   0x60010024:    cmneq    pc, r8, lsl r8    ; <UNPREDICTABLE>
   0x60010028:    andeq    r0, r0, r0
   0x6001002c:    eorseq    r4, r4, r8, asr pc
   0x60010030:    streq    r0, [r3], #-513    ; 0xfffffdff
   0x60010034:    mrs    r9, CPSR
   0x60010038:    bl    0x60013560
   0x6001003c:    mov    r7, r1
   0x60010040:    mov    r8, r2
   0x60010044:    mrs    r2, CPSR
   0x60010048:    tst    r2, #3
   0x6001004c:    bne    0x60010058
   0x60010050:    mov    r0, #23
   0x60010054:    svc    0x00123456
   0x60010058:    mrs    r0, CPSR
   0x6001005c:    eor    r0, r0, #26
   0x60010060:    tst    r0, #31
   0x60010064:    bic    r0, r0, #31
   0x60010068:    orr    r0, r0, #211    ; 0xd3
   0x6001006c:    bne    0x60010084
   0x60010070:    orr    r0, r0, #256    ; 0x100
   0x60010074:    add    lr, pc, #12
   0x60010078:    msr    SPSR_fsxc, r0
   0x6001007c:    msr    ELR_hyp, lr
   0x60010080:    eret
   0x60010084:    msr    CPSR_c, r0
   0x60010088:    msr    SPSR_fsxc, r9
   0x6001008c:    andeq    r0, r0, r0
   0x60010090:    andeq    r0, r0, r0
   0x60010094:    andeq    r0, r0, r0
   0x60010098:    andeq    r0, r0, r0
   0x6001009c:    andeq    r0, r0, r0
   0x600100a0:    mov    r4, pc
   0x600100a4:    and    r4, r4, #-134217728    ; 0xf8000000
   0x600100a8:    add    r4, r4, #32768    ; 0x8000
   0x600100ac:    mov    r0, pc
   0x600100b0:    cmp    r0, r4
   0x600100b4:    ldrcc    r0, [pc, #428]    ; 0x60010268
   0x600100b8:    addcc    r0, r0, pc
   0x600100bc:    cmpcc    r4, r0
   0x600100c0:    orrcc    r4, r4, #1
   0x600100c4:    blcs    0x60010280
   0x600100c8:    add    r0, pc, #376    ; 0x178
   0x600100cc:    ldm    r0, {r1, r2, r3, r6, r10, r11, r12}
   0x600100d0:    ldr    sp, [r0, #28]



>
> (I assume you mean "we send an abort to the guest", not "QEMU's
> C code calls abort(); if the latter, please provide a backtrace.)
>
Right, this is way before the kernel is even decompressed, QEMU sends
a CPU abort to the guest, QEMU itself does not crash
Ilya Lipnitskiy Oct. 16, 2015, 6:45 p.m. UTC | #2
On Fri, Oct 16, 2015 at 11:09 AM, Peter Maydell
<peter.maydell@linaro.org> wrote:
> You might find it helpful to turn on QEMU's debug logging
> (see the -d and -D options). Chances are that some insn
> in your new code is faulting (probably UNDEFing).
Thanks for the suggestion! Here is what I got:

----------------
IN:
0x60010958:  f57ff04f      dsb    sy
0x6001095c:  f57ff06f      isb    sy
0x60010960:  e1a0f00e      mov    pc, lr

----------------
IN:
0x60010400:  e89d401f      ldm    sp, {r0, r1, r2, r3, r4, lr}
-------------->The end of my custom code
0x60010404:  ee10bf91      mrc    15, 0, fp, cr0, cr1, {4}
0x60010408:  e31b000f      tst    fp, #15    ; 0xf
0x6001040c:  13a0600e      movne    r6, #14    ; 0xe
0x60010410:  1bffffc1      blne    0x6001031c   ----------> (__setup_mmu)

...

----------------
IN:
0x60010414:  e3a00000      mov    r0, #0    ; 0x0
0x60010418:  ee070f9a      mcr    15, 0, r0, cr7, cr10, {4}
0x6001041c:  e31b000f      tst    fp, #15    ; 0xf
0x60010420:  1e080f17      mcrne    15, 0, r0, cr8, cr7, {0}

----------------
IN:
0x60010424:  ee110f10      mrc    15, 0, r0, cr1, cr0, {0}
0x60010428:  e3c00201      bic    r0, r0, #268435456    ; 0x10000000
0x6001042c:  e3800a05      orr    r0, r0, #20480    ; 0x5000
0x60010430:  e380003c      orr    r0, r0, #60    ; 0x3c
0x60010434:  e3c00002      bic    r0, r0, #2    ; 0x2
0x60010438:  e3800501      orr    r0, r0, #4194304    ; 0x400000
0x6001043c:  1e126f50      mrcne    15, 0, r6, cr2, cr0, {2}
0x60010440:  13800001      orrne    r0, r0, #1    ; 0x1
0x60010444:  13e01002      mvnne    r1, #2    ; 0x2
0x60010448:  e3c66102      bic    r6, r6, #-2147483648    ; 0x80000000
0x6001044c:  e3c66003      bic    r6, r6, #3    ; 0x3
0x60010450:  1e023f10      mcrne    15, 0, r3, cr2, cr0, {0}

----------------
IN:
0x60010454:  1e031f10      mcrne    15, 0, r1, cr3, cr0, {0}

----------------
IN:
0x60010458:  1e026f50      mcrne    15, 0, r6, cr2, cr0, {2}

----------------
IN:
0x6001045c:  ee070f95      mcr    15, 0, r0, cr7, cr5, {4}
0x60010460:  ee010f10      mcr    15, 0, r0, cr1, cr0, {0}

Taking exception 3 [Prefetch Abort]
...with IFSR 0x5 IFAR 0x60010464
Taking exception 3 [Prefetch Abort]
...with IFSR 0x5 IFAR 0xc  -----------> Loops forever



Disassembling head.o (Base is 0x600100A0):
...

00000338 <__armv7_mmu_cache_on>:
 338:    e1a0c00e     mov    ip, lr
 33c:    ee100f10     mrc    15, 0, r0, cr0, cr0, {0}
 340:    e59f157c     ldr    r1, [pc, #1404]    ; 8c4 <v7_invalidate_l1+0x64>
 344:    e59f257c     ldr    r2, [pc, #1404]    ; 8c8 <v7_invalidate_l1+0x68>
 348:    e0100002     ands    r0, r0, r2
 34c:    e0300001     eors    r0, r0, r1
 350:    0a000000     beq    358 <v7_invalidate>
 354:    1a000002     bne    364 <l1_self_invalidated>

00000358 <v7_invalidate>:
 358:    e88d401f     stm    sp, {r0, r1, r2, r3, r4, lr}
 35c:    ebfffffe     bl    860 <v7_invalidate_l1>
 360:    e89d401f     ldm    sp, {r0, r1, r2, r3, r4, lr}  ---------->
The end of my custom code

00000364 <l1_self_invalidated>:
 364:    ee10bf91     mrc    15, 0, fp, cr0, cr1, {4}
 368:    e31b000f     tst    fp, #15
 36c:    13a0600e     movne    r6, #14
 370:    1bffffc1     blne    27c <__setup_mmu>
 374:    e3a00000     mov    r0, #0
 378:    ee070f9a     mcr    15, 0, r0, cr7, cr10, {4}
 37c:    e31b000f     tst    fp, #15
 380:    1e080f17     mcrne    15, 0, r0, cr8, cr7, {0}
 384:    ee110f10     mrc    15, 0, r0, cr1, cr0, {0}
 388:    e3c00201     bic    r0, r0, #268435456    ; 0x10000000
 38c:    e3800a05     orr    r0, r0, #20480    ; 0x5000
 390:    e380003c     orr    r0, r0, #60    ; 0x3c
 394:    e3c00002     bic    r0, r0, #2
 398:    e3800501     orr    r0, r0, #4194304    ; 0x400000
 39c:    1e126f50     mrcne    15, 0, r6, cr2, cr0, {2}
 3a0:    13800001     orrne    r0, r0, #1
 3a4:    13e01002     mvnne    r1, #2
 3a8:    e3c66102     bic    r6, r6, #-2147483648    ; 0x80000000
 3ac:    e3c66003     bic    r6, r6, #3
 3b0:    1e023f10     mcrne    15, 0, r3, cr2, cr0, {0}
 3b4:    1e031f10     mcrne    15, 0, r1, cr3, cr0, {0}
 3b8:    1e026f50     mcrne    15, 0, r6, cr2, cr0, {2}
 3bc:    ee070f95     mcr    15, 0, r0, cr7, cr5, {4}
 3c0:    ee010f10     mcr    15, 0, r0, cr1, cr0, {0}   --------->
Prefetch abort (0x60010464)
 3c4:    ee110f10     mrc    15, 0, r0, cr1, cr0, {0}
 3c8:    e3a00000     mov    r0, #0
 3cc:    ee070f95     mcr    15, 0, r0, cr7, cr5, {4}
 3d0:    e1a0f00c     mov    pc, ip
Ilya Lipnitskiy Oct. 16, 2015, 8:35 p.m. UTC | #3
On Fri, Oct 16, 2015 at 11:09 AM, Peter Maydell
<peter.maydell@linaro.org> wrote:

> You might find it helpful to turn on QEMU's debug logging
> (see the -d and -D options). Chances are that some insn
> in your new code is faulting (probably UNDEFing).
I think have figured it out, looks like my code was doing two bad things:
1. Trying to save registers to a nonexistent stack ($sp == 0 when
stmia is called)
2. Clobbering r4 that is used globally throughout head.S

Removing the stmia/ldmia, and using r7 instead of r4 appears to have
resolved the issue.

It is a mystery how this code has been working on actual hardware, and
why reverting SHA 6ec1588e097 somehow made QEMU work. Any clues why?

Thanks for all the help!

Ilya
Peter Maydell Oct. 16, 2015, 8:41 p.m. UTC | #4
On 16 October 2015 at 21:35, Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com> wrote:
> On Fri, Oct 16, 2015 at 11:09 AM, Peter Maydell
> <peter.maydell@linaro.org> wrote:
>
>> You might find it helpful to turn on QEMU's debug logging
>> (see the -d and -D options). Chances are that some insn
>> in your new code is faulting (probably UNDEFing).
> I think have figured it out, looks like my code was doing two bad things:
> 1. Trying to save registers to a nonexistent stack ($sp == 0 when
> stmia is called)
> 2. Clobbering r4 that is used globally throughout head.S
>
> Removing the stmia/ldmia, and using r7 instead of r4 appears to have
> resolved the issue.
>
> It is a mystery how this code has been working on actual hardware, and
> why reverting SHA 6ec1588e097 somehow made QEMU work. Any clues why?

I have a feeling that if you take an exception in early boot and there
is RAM at address 0 then you'll effectively just reenter the kernel
again from its lowest address (since there's nothing in the vector
tables yet), which means that if you're lucky and the second time
around things work you might not notice. If there's no RAM at 0
then you go into an infinite series of exceptions.

Alternatively, the trashing of some important register might have
not been noticed if that register's value should have been 0
anyway (eg if it was related to the base address of RAM).

You can probably figure out what happened by looking at the debug
tracing of execution in the situation where there is RAM at 0
in QEMU.

thanks
-- PMM
diff mbox

Patch

diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 06e983f..943541f 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -753,6 +753,29 @@  __armv4_mmu_cache_on:

 __armv7_mmu_cache_on:
                mov     r12, lr
+@ r0: Primary Part Number of MIDR[15:4], read from HW
+@ r1: Primary Part Number of Cortex A9
+@ r3: Mask for Primary Part Number
+@ L1 dcache invalidation is needed for Cortex A9
+@ L1 dcache invalidation is not needed for Cortex A7 as HW does
d-cache invalidation during power ON
+
+                mrc 15, 0, r0, c0, c0, 0        @ Read Main ID [MIDR]
+                @ldr r0, =0xc090                @ Added for testing purpose
+                ldr r1, =0x0000c090             @ Cortex-A9 MPCore (ARMv7)
+                ldr r2, =0x0000fff0            @ Mask for Primary
Part Number [15:4]
+                ands    r0, r0, r2              @ get the Primary Part Number
+                eors    r0, r1                 @ Test whether the core is A9
+               beq    v7_invalidate            @ Allow A9 to perform
L1 invalidate
+               bne    l1_self_invalidated      @ Only A9 requires L1
cache invalidate
+@ Invalidate L1
+v7_invalidate:
+                stmia  sp, {r0-r4, lr}
+               bl     v7_invalidate_l1         @ Allow A9 to perform
L1 invalidate
+                ldmia  sp, {r0-r4, lr}
+
+@ Skip Invalidation, alreay validated in the HW
+l1_self_invalidated:
+
 #ifdef CONFIG_MMU
                mrc     p15, 0, r11, c0, c1, 4  @ read ID_MMFR0
                tst     r11, #0xf               @ VMSA
@@ -1253,6 +1276,43 @@  __armv3_mpu_cache_flush:
                mcr     p15, 0, r1, c7, c0, 0   @ invalidate whole cache v3
                mov     pc, lr

+
+ENTRY(v7_invalidate_l1)
+       mov     r0, #0
+       mcr     p15, 2, r0, c0, c0, 0
+       mrc     p15, 1, r0, c0, c0, 0
+
+       ldr     r1, =0x7fff
+       and     r2, r1, r0, lsr #13
+
+       ldr     r1, =0x3ff
+
+       and     r3, r1, r0, lsr #3      @ NumWays - 1
+       add     r2, r2, #1              @ NumSets
+
+       and     r0, r0, #0x7
+       add     r0, r0, #4      @ SetShift
+
+       clz     r1, r3          @ WayShift
+       add     r4, r3, #1      @ NumWays
+1:     sub     r2, r2, #1      @ NumSets--
+       mov     r3, r4          @ Temp = NumWays
+2:     subs    r3, r3, #1      @ Temp--
+       mov     r5, r3, lsl r1
+       mov     r6, r2, lsl r0
+       orr     r5, r5, r6      @ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
+       mcr     p15, 0, r5, c7, c6, 2
+       bgt     2b
+       cmp     r2, #0
+       bgt     1b
+       dsb
+       isb
+       mov     pc, lr
+ENDPROC(v7_invalidate_l1)
+
+/* end of d-cache invalidation
+*/
+
 /*
  * Various debugging routines for printing hex characters and
  * memory, which again must be relocatable.



The modified kernel boots fine if I revert SHA 6ec1588e that aliases
NOR flash at address 0 for vexpress-a9:

commit 6ec1588e09770ac7e9c60194faff6101111fc7f0
Author: Peter Maydell <>
Date:   Wed Jul 2 15:07:50 2014 +0100

    hw/arm/vexpress: Alias NOR flash at 0 for vexpress-a9

    Make the vexpress-a9 board alias the first NOR flash region at
    address zero, like vexpress-a15. This makes "-bios" actually usable
    on this board.

diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c
index 3d83e6c..a88732c 100644
--- a/hw/arm/vexpress.c
+++ b/hw/arm/vexpress.c
@@ -84,6 +84,7 @@  enum {
 };

 static hwaddr motherboard_legacy_map[] = {
+    [VE_NORFLASHALIAS] = 0,
     /* CS7: 0x10000000 .. 0x10020000 */
     [VE_SYSREGS] = 0x10000000,
     [VE_SP810] = 0x10001000,
@@ -114,7 +115,6 @@  static hwaddr motherboard_legacy_map[] = {
     [VE_VIDEORAM] = 0x4c000000,
     [VE_ETHERNET] = 0x4e000000,
     [VE_USB] = 0x4f000000,
-    [VE_NORFLASHALIAS] = -1, /* not present */
 };