Message ID | cover.1465502767.git.geoff@infradead.org |
---|---|
State | New |
Headers | show |
Geoff, On Thu, Jun 09, 2016 at 08:08:44PM +0000, Geoff Levand wrote: > From: AKASHI Takahiro <takahiro.akashi@linaro.org> > > Primary kernel calls machine_crash_shutdown() to shut down non-boot cpus > and save registers' status in per-cpu ELF notes before starting crash > dump kernel. See kernel_kexec(). > Even if not all secondary cpus have shut down, we do kdump anyway. > > As we don't have to make non-boot(crashed) cpus offline (to preserve > correct status of cpus at crash dump) before shutting down, this patch > also adds a variant of smp_send_stop(). > > Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org> > --- > arch/arm64/include/asm/hardirq.h | 2 +- > arch/arm64/include/asm/kexec.h | 41 +++++++++++++++++++++++++- > arch/arm64/include/asm/smp.h | 4 +++ > arch/arm64/kernel/machine_kexec.c | 56 +++++++++++++++++++++++++++++++++-- > arch/arm64/kernel/smp.c | 61 +++++++++++++++++++++++++++++++++++++++ > 5 files changed, 159 insertions(+), 5 deletions(-) > > diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h > index 8740297..1473fc2 100644 > --- a/arch/arm64/include/asm/hardirq.h > +++ b/arch/arm64/include/asm/hardirq.h > @@ -20,7 +20,7 @@ > #include <linux/threads.h> > #include <asm/irq.h> > > -#define NR_IPI 6 > +#define NR_IPI 7 > > typedef struct { > unsigned int __softirq_pending; > diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h > index 04744dc..600b960 100644 > --- a/arch/arm64/include/asm/kexec.h > +++ b/arch/arm64/include/asm/kexec.h > @@ -40,7 +40,46 @@ > static inline void crash_setup_regs(struct pt_regs *newregs, > struct pt_regs *oldregs) > { > - /* Empty routine needed to avoid build errors. */ > + if (oldregs) { > + memcpy(newregs, oldregs, sizeof(*newregs)); > + } else { > + u64 tmp1, tmp2; > + > + __asm__ __volatile__ ( > + "stp x0, x1, [%2, #16 * 0]\n" > + "stp x2, x3, [%2, #16 * 1]\n" > + "stp x4, x5, [%2, #16 * 2]\n" > + "stp x6, x7, [%2, #16 * 3]\n" > + "stp x8, x9, [%2, #16 * 4]\n" > + "stp x10, x11, [%2, #16 * 5]\n" > + "stp x12, x13, [%2, #16 * 6]\n" > + "stp x14, x15, [%2, #16 * 7]\n" > + "stp x16, x17, [%2, #16 * 8]\n" > + "stp x18, x19, [%2, #16 * 9]\n" > + "stp x20, x21, [%2, #16 * 10]\n" > + "stp x22, x23, [%2, #16 * 11]\n" > + "stp x24, x25, [%2, #16 * 12]\n" > + "stp x26, x27, [%2, #16 * 13]\n" > + "stp x28, x29, [%2, #16 * 14]\n" > + "mov %0, sp\n" > + "stp x30, %0, [%2, #16 * 15]\n" > + > + "/* faked current PSTATE */\n" > + "mrs %0, CurrentEL\n" > + "mrs %1, DAIF\n" > + "orr %0, %0, %1\n" > + "mrs %1, NZCV\n" > + "orr %0, %0, %1\n" > + > + /* pc */ > + "adr %1, 1f\n" > + "1:" > + "stp %1, %0, [%2, #16 * 16]\n" > + : "=r" (tmp1), "=r" (tmp2), "+r" (newregs) > + : > + : "memory" > + ); > + } > } > > #endif /* __ASSEMBLY__ */ > diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h > index 38712f4..979154a 100644 > --- a/arch/arm64/include/asm/smp.h > +++ b/arch/arm64/include/asm/smp.h > @@ -134,6 +134,10 @@ static inline void cpu_panic_kernel(void) > */ > bool cpus_are_stuck_in_kernel(void); > > +#ifdef CONFIG_KEXEC_CORE > +extern void smp_send_crash_stop(void); > +#endif > + > #endif /* ifndef __ASSEMBLY__ */ > > #endif /* ifndef __ASM_SMP_H */ > diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c > index 0a8b04d..93588b2 100644 > --- a/arch/arm64/kernel/machine_kexec.c > +++ b/arch/arm64/kernel/machine_kexec.c > @@ -10,6 +10,9 @@ > */ > > #include <linux/highmem.h> > +#include <linux/interrupt.h> > +#include <linux/irq.h> > +#include <linux/kernel.h> > #include <linux/kexec.h> > #include <linux/libfdt_env.h> > #include <linux/of_fdt.h> > @@ -28,6 +31,7 @@ > extern const unsigned char arm64_relocate_new_kernel[]; > extern const unsigned long arm64_relocate_new_kernel_size; > > +bool in_crash_kexec; > static unsigned long kimage_start; > > /** > @@ -179,7 +183,8 @@ void machine_kexec(struct kimage *kimage) > /* > * New cpus may have become stuck_in_kernel after we loaded the image. > */ > - BUG_ON(cpus_are_stuck_in_kernel() && (num_online_cpus() > 1)); > + BUG_ON(cpus_are_stuck_in_kernel() && (num_online_cpus() > 1) && > + !WARN_ON(in_crash_kexec)); > > reboot_code_buffer_phys = page_to_phys(kimage->control_code_page); > reboot_code_buffer = kmap(kimage->control_code_page); > @@ -236,13 +241,58 @@ void machine_kexec(struct kimage *kimage) > * relocation is complete. > */ > > - cpu_soft_restart(1, reboot_code_buffer_phys, kimage->head, > + cpu_soft_restart(in_crash_kexec, reboot_code_buffer_phys, kimage->head, => !in_crash_kexec Please let me recognize any changes you made clearly if you modified my part in the patchset before posting it. Thanks, -Takahiro AKASHI > kimage_start, 0); > > BUG(); /* Should never get here. */ > } > > +static void machine_kexec_mask_interrupts(void) > +{ > + unsigned int i; > + struct irq_desc *desc; > + > + for_each_irq_desc(i, desc) { > + struct irq_chip *chip; > + int ret; > + > + chip = irq_desc_get_chip(desc); > + if (!chip) > + continue; > + > + /* > + * First try to remove the active state. If this > + * fails, try to EOI the interrupt. > + */ > + ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); > + > + if (ret && irqd_irq_inprogress(&desc->irq_data) && > + chip->irq_eoi) > + chip->irq_eoi(&desc->irq_data); > + > + if (chip->irq_mask) > + chip->irq_mask(&desc->irq_data); > + > + if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) > + chip->irq_disable(&desc->irq_data); > + } > +} > + > +/** > + * machine_crash_shutdown - shutdown non-crashing cpus and save registers > + */ > void machine_crash_shutdown(struct pt_regs *regs) > { > - /* Empty routine needed to avoid build errors. */ > + local_irq_disable(); > + > + in_crash_kexec = true; > + > + /* shutdown non-crashing cpus */ > + smp_send_crash_stop(); > + > + /* for crashing cpu */ > + crash_save_cpu(regs, smp_processor_id()); > + machine_kexec_mask_interrupts(); > + > + pr_info("Starting crashdump kernel...\n"); > } > diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c > index d0bcd55..f6461ef 100644 > --- a/arch/arm64/kernel/smp.c > +++ b/arch/arm64/kernel/smp.c > @@ -37,6 +37,7 @@ > #include <linux/completion.h> > #include <linux/of.h> > #include <linux/irq_work.h> > +#include <linux/kexec.h> > > #include <asm/alternative.h> > #include <asm/atomic.h> > @@ -71,6 +72,7 @@ enum ipi_msg_type { > IPI_RESCHEDULE, > IPI_CALL_FUNC, > IPI_CPU_STOP, > + IPI_CPU_CRASH_STOP, > IPI_TIMER, > IPI_IRQ_WORK, > IPI_WAKEUP > @@ -726,6 +728,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { > S(IPI_RESCHEDULE, "Rescheduling interrupts"), > S(IPI_CALL_FUNC, "Function call interrupts"), > S(IPI_CPU_STOP, "CPU stop interrupts"), > + S(IPI_CPU_CRASH_STOP, "CPU stop (for crash dump) interrupts"), > S(IPI_TIMER, "Timer broadcast interrupts"), > S(IPI_IRQ_WORK, "IRQ work interrupts"), > S(IPI_WAKEUP, "CPU wake-up interrupts"), > @@ -800,6 +803,28 @@ static void ipi_cpu_stop(unsigned int cpu) > cpu_relax(); > } > > +#ifdef CONFIG_KEXEC_CORE > +static atomic_t waiting_for_crash_ipi; > + > +static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) > +{ > + crash_save_cpu(regs, cpu); > + > + atomic_dec(&waiting_for_crash_ipi); > + > + local_irq_disable(); > + > +#ifdef CONFIG_HOTPLUG_CPU > + if (cpu_ops[cpu]->cpu_die) > + cpu_ops[cpu]->cpu_die(cpu); > +#endif > + > + /* just in case */ > + while (1) > + wfi(); > +} > +#endif > + > /* > * Main handler for inter-processor interrupts > */ > @@ -830,6 +855,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs) > irq_exit(); > break; > > +#ifdef CONFIG_KEXEC_CORE > + case IPI_CPU_CRASH_STOP: > + irq_enter(); > + ipi_cpu_crash_stop(cpu, regs); > + > + unreachable(); > +#endif > + > #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST > case IPI_TIMER: > irq_enter(); > @@ -902,6 +935,34 @@ void smp_send_stop(void) > cpumask_pr_args(cpu_online_mask)); > } > > +#ifdef CONFIG_KEXEC_CORE > +void smp_send_crash_stop(void) > +{ > + cpumask_t mask; > + unsigned long timeout; > + > + if (num_online_cpus() == 1) > + return; > + > + cpumask_copy(&mask, cpu_online_mask); > + cpumask_clear_cpu(smp_processor_id(), &mask); > + > + atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); > + > + pr_crit("SMP: stopping secondary CPUs\n"); > + smp_cross_call(&mask, IPI_CPU_CRASH_STOP); > + > + /* Wait up to one second for other CPUs to stop */ > + timeout = USEC_PER_SEC; > + while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--) > + udelay(1); > + > + if (atomic_read(&waiting_for_crash_ipi) > 0) > + pr_warning("SMP: failed to stop secondary CPUs %*pbl\n", > + cpumask_pr_args(cpu_online_mask)); > +} > +#endif > + > /* > * not supported here > */ > -- > 2.5.0 > >
On Tue, 2016-06-14 at 16:13 +0900, AKASHI Takahiro wrote: > On Thu, Jun 09, 2016 at 08:08:44PM +0000, Geoff Levand wrote: > > > > -> > > > cpu_soft_restart(1, reboot_code_buffer_phys, kimage->head, > > +> > > > cpu_soft_restart(in_crash_kexec, reboot_code_buffer_phys, kimage->head, > > => !in_crash_kexec > > Please let me recognize any changes you made clearly if you modified my part > in the patchset before posting it. I pushed out a new branch kexec-v18.1 with this fix. Thanks for checking! -Geoff
Hi Geoff, Looks good, I have a few observations and questions below. On 09/06/16 21:08, Geoff Levand wrote: > Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the > arm64 architecture that add support for the kexec re-boot mechanism > (CONFIG_KEXEC) on arm64 platforms. > > Signed-off-by: Geoff Levand <geoff@infradead.org> > diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c > new file mode 100644 > index 0000000..05f7c21 > --- /dev/null > +++ b/arch/arm64/kernel/machine_kexec.c > @@ -0,0 +1,185 @@ > +/* > + * kexec for arm64 > + * > + * Copyright (C) Linaro. > + * Copyright (C) Huawei Futurewei Technologies. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + */ > + > +#include <linux/highmem.h> We don't have/need highmem on arm64. The kmap()/kunmap() calls just obscure what is going on. > +#include <linux/kexec.h> > +#include <linux/of_fdt.h> What do you need of_fdt.h for? I guess this should be in patch 4. > +#include <linux/slab.h> The control page was already allocated, I can't see anything else being allocated... What do you need slab.h for? > +#include <linux/smp.h> > +#include <linux/uaccess.h> User space access? I guess this should be in patch 4. > + > +#include <asm/cacheflush.h> > +#include <asm/cpu_ops.h> > +#include <asm/mmu_context.h> > +#include <asm/system_misc.h> I can't see anything in system_misc.h that you are using in here. > + > +#include "cpu-reset.h" > + > +/* Global variables for the arm64_relocate_new_kernel routine. */ > +extern const unsigned char arm64_relocate_new_kernel[]; > +extern const unsigned long arm64_relocate_new_kernel_size; > + > +static unsigned long kimage_start; > + > +void machine_kexec_cleanup(struct kimage *kimage) > +{ > + /* Empty routine needed to avoid build errors. */ > +} > + > +/** > + * machine_kexec_prepare - Prepare for a kexec reboot. > + * > + * Called from the core kexec code when a kernel image is loaded. > + * Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus > + * are stuck in the kernel. This avoids a panic once we hit machine_kexec(). > + */ > +int machine_kexec_prepare(struct kimage *kimage) > +{ > + kimage_start = kimage->start; > + > + if (kimage->type != KEXEC_TYPE_CRASH) { > + if (cpus_are_stuck_in_kernel()) { > + pr_err("Can't kexec: failed CPUs are stuck in the kernel.\n"); > + return -EBUSY; > + } > + > + if (num_online_cpus() > 1) { > +#ifdef CONFIG_HOTPLUG_CPU > + /* any_cpu as we don't mind being preempted */ > + int any_cpu = raw_smp_processor_id(); > + > + if (cpu_ops[any_cpu]->cpu_die) > + return 0; > +#endif /* CONFIG_HOTPLUG_CPU */ > + > + pr_err("Can't kexec: no mechanism to offline secondary CPUs.\n"); > + return -EBUSY; > + } > + } > + > + return 0; > +} > + > +/** > + * kexec_list_flush - Helper to flush the kimage list to PoC. > + */ > +static void kexec_list_flush(struct kimage *kimage) > +{ > + kimage_entry_t *entry; > + unsigned int flag; > + > + for (entry = &kimage->head, flag = 0; flag != IND_DONE; entry++) { > + void *addr = kmap(phys_to_page(*entry & PAGE_MASK)); > + > + flag = *entry & IND_FLAGS; > + > + switch (flag) { > + case IND_INDIRECTION: > + entry = (kimage_entry_t *)addr - 1; This '-1' is so that entry points before the first entry of the new table, and is un-done by entry++ next time round the loop... If I'm right, could you add a comment to that effect? It took me a little while to work out! kexec_core.c has a snazzy macro: for_each_kimage_entry(), its a shame its not in a header file. This loop does the same but with two variables instead of three. These IN_INDIRECTION pages only appear at the end of a list, this list-walking looks correct. > + __flush_dcache_area(addr, PAGE_SIZE); So if we find an indirection pointer, we switch entry to the new page, and clean it to the PoC, because later we walk this list with the MMU off. But what cleans the very first page? > + break; > + case IND_DESTINATION: > + break; > + case IND_SOURCE: > + __flush_dcache_area(addr, PAGE_SIZE); > + break; > + case IND_DONE: > + break; > + default: > + BUG(); Unless you think its less readable, you could group the clauses together: > case IND_INDIRECTION: > entry = (kimage_entry_t *)addr - 1; > case IND_SOURCE: > __flush_dcache_area(addr, PAGE_SIZE); > case IND_DESTINATION: > case IND_DONE: > break; > + } > + kunmap(addr); > + } > +} > + > +/** > + * kexec_segment_flush - Helper to flush the kimage segments to PoC. > + */ > +static void kexec_segment_flush(const struct kimage *kimage) > +{ > + unsigned long i; > + > + pr_debug("%s:\n", __func__); > + > + for (i = 0; i < kimage->nr_segments; i++) { > + pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n", > + i, > + kimage->segment[i].mem, > + kimage->segment[i].mem + kimage->segment[i].memsz, > + kimage->segment[i].memsz, > + kimage->segment[i].memsz / PAGE_SIZE); > + > + __flush_dcache_area(phys_to_virt(kimage->segment[i].mem), > + kimage->segment[i].memsz); > + } > +} > + > +/** > + * machine_kexec - Do the kexec reboot. > + * > + * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC. > + */ > +void machine_kexec(struct kimage *kimage) > +{ > + phys_addr_t reboot_code_buffer_phys; > + void *reboot_code_buffer; > + > + /* > + * New cpus may have become stuck_in_kernel after we loaded the image. > + */ > + BUG_ON(cpus_are_stuck_in_kernel() && (num_online_cpus() > 1)); > + > + reboot_code_buffer_phys = page_to_phys(kimage->control_code_page); > + reboot_code_buffer = kmap(kimage->control_code_page); > + > + /* > + * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use > + * after the kernel is shut down. > + */ > + memcpy(reboot_code_buffer, arm64_relocate_new_kernel, > + arm64_relocate_new_kernel_size); > + > + /* Flush the reboot_code_buffer in preparation for its execution. */ > + __flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size); > + flush_icache_range((uintptr_t)reboot_code_buffer, > + arm64_relocate_new_kernel_size); > + > + /* Flush the kimage list. */ > + kexec_list_flush(kimage); > + > + /* Flush the new image if already in place. */ > + if (kimage->head & IND_DONE) > + kexec_segment_flush(kimage); > + > + pr_info("Bye!\n"); > + > + /* Disable all DAIF exceptions. */ > + asm volatile ("msr daifset, #0xf" : : : "memory"); > + > + /* > + * cpu_soft_restart will shutdown the MMU, disable data caches, then > + * transfer control to the reboot_code_buffer which contains a copy of > + * the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel > + * uses physical addressing to relocate the new image to its final > + * position and transfers control to the image entry point when the > + * relocation is complete. > + */ > + > + cpu_soft_restart(1, reboot_code_buffer_phys, kimage->head, > + kimage_start, 0); > + > + BUG(); /* Should never get here. */ > +} > + > +void machine_crash_shutdown(struct pt_regs *regs) > +{ > + /* Empty routine needed to avoid build errors. */ > +} > diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S > new file mode 100644 > index 0000000..e380db3 > --- /dev/null > +++ b/arch/arm64/kernel/relocate_kernel.S > @@ -0,0 +1,131 @@ > +/* > + * kexec for arm64 > + * > + * Copyright (C) Linaro. > + * Copyright (C) Huawei Futurewei Technologies. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + */ > + > +#include <linux/kexec.h> > + > +#include <asm/assembler.h> > +#include <asm/kexec.h> > +#include <asm/page.h> > +#include <asm/sysreg.h> > + > +/* > + * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it. > + * > + * The memory that the old kernel occupies may be overwritten when coping the > + * new image to its final location. To assure that the > + * arm64_relocate_new_kernel routine which does that copy is not overwritten, > + * all code and data needed by arm64_relocate_new_kernel must be between the > + * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end. The > + * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec > + * control_code_page, a special page which has been set up to be preserved > + * during the copy operation. > + */ > +.globl arm64_relocate_new_kernel > +arm64_relocate_new_kernel: All the other asm functions use ENTRY(), which would do the .globl and alignment for you. (You would need a ENDPROC(arm64_relocate_new_kernel) too.) > + > + /* Setup the list loop variables. */ > + mov x18, x1 /* x18 = kimage_start */ > + mov x17, x0 /* x17 = kimage_head */ > + dcache_line_size x16, x0 /* x16 = dcache line size */ > + mov x15, xzr /* x15 = segment start */ What uses this 'segment start'? > + mov x14, xzr /* x14 = entry ptr */ > + mov x13, xzr /* x13 = copy dest */ > + > + /* Clear the sctlr_el2 flags. */ > + mrs x0, CurrentEL > + cmp x0, #CurrentEL_EL2 > + b.ne 1f > + mrs x0, sctlr_el2 > + ldr x1, =SCTLR_ELx_FLAGS > + bic x0, x0, x1 > + msr sctlr_el2, x0 > + isb > +1: > + > + /* Check if the new image needs relocation. */ > + cbz x17, .Ldone Does this happen? Do we ever come across an empty slot in the tables? kimage_terminate() adds the IND_DONE entry, so we should never see an empty slot. kexec_list_flush() would BUG() on this too, and we call that unconditionally on the way in here. > + tbnz x17, IND_DONE_BIT, .Ldone > + > +.Lloop: > + and x12, x17, PAGE_MASK /* x12 = addr */ > + > + /* Test the entry flags. */ > +.Ltest_source: > + tbz x17, IND_SOURCE_BIT, .Ltest_indirection > + > + /* Invalidate dest page to PoC. */ > + mov x0, x13 > + add x20, x0, #PAGE_SIZE > + sub x1, x16, #1 > + bic x0, x0, x1 > +2: dc ivac, x0 This relies on an IND_DESTINATION being found first for x13 to be set to something other than 0. I guess if kexec-core hands us a broken list, all bets are off! > + add x0, x0, x16 > + cmp x0, x20 > + b.lo 2b > + dsb sy > + > + mov x20, x13 > + mov x21, x12 > + copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7 > + > + /* dest += PAGE_SIZE */ > + add x13, x13, PAGE_SIZE > + b .Lnext > + > +.Ltest_indirection: > + tbz x17, IND_INDIRECTION_BIT, .Ltest_destination > + > + /* ptr = addr */ > + mov x14, x12 > + b .Lnext > + > +.Ltest_destination: > + tbz x17, IND_DESTINATION_BIT, .Lnext > + > + mov x15, x12 What uses this 'segment start'? > + > + /* dest = addr */ > + mov x13, x12 > + > +.Lnext: > + /* entry = *ptr++ */ > + ldr x17, [x14], #8 > + > + /* while (!(entry & DONE)) */ > + tbz x17, IND_DONE_BIT, .Lloop > + > +.Ldone: /* wait for writes from copy_page to finish */ > + dsb nsh > + ic iallu > + dsb nsh > + isb > + > + /* Start new image. */ > + mov x0, xzr > + mov x1, xzr > + mov x2, xzr > + mov x3, xzr > + br x18 > + > +.ltorg > + > +.align 3 /* To keep the 64-bit values below naturally aligned. */ > + > +.Lcopy_end: > +.org KEXEC_CONTROL_PAGE_SIZE Why do we need to pad up to KEXEC_CONTROL_PAGE_SIZE? In machine_kexec() we only copy arm64_relocate_new_kernel_size bytes, so it shouldn't matter what is here. As far as I can see we don't even access it. > + > +/* > + * arm64_relocate_new_kernel_size - Number of bytes to copy to the > + * control_code_page. > + */ > +.globl arm64_relocate_new_kernel_size > +arm64_relocate_new_kernel_size: > + .quad .Lcopy_end - arm64_relocate_new_kernel Thanks, James
Hi Geoff, On 09/06/16 21:08, Geoff Levand wrote: > To aid in debugging kexec problems or when adding new functionality to kexec add > a new routine kexec_image_info() and several inline pr_debug statements. > > Signed-off-by: Geoff Levand <geoff@infradead.org> > --- > arch/arm64/kernel/machine_kexec.c | 63 +++++++++++++++++++++++++++++++++++++++ > 1 file changed, 63 insertions(+) > > diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c > index 05f7c21..0a8b04d 100644 > --- a/arch/arm64/kernel/machine_kexec.c > +++ b/arch/arm64/kernel/machine_kexec.c > @@ -11,6 +11,7 @@ > > #include <linux/highmem.h> > #include <linux/kexec.h> > +#include <linux/libfdt_env.h> > #include <linux/of_fdt.h> > #include <linux/slab.h> > #include <linux/smp.h> > @@ -29,6 +30,47 @@ extern const unsigned long arm64_relocate_new_kernel_size; > > static unsigned long kimage_start; > > +/** > + * kexec_is_dtb - Helper routine to check the device tree header signature. > + */ > +static bool kexec_is_dtb(const void *dtb) > +{ > + __be32 magic; > + > + if (get_user(magic, (__be32 *)dtb)) > + return false; > + You pass this function 'kimage->segment[i].buf', this looks like the user space memory that contained the dtb when kexec-tools first ran to load the image. This will work when you call it from machine_kexec_prepare(), but by the time we get to machine_kexec() that process is long gone, and this pointer should be considered junk. I don't think its possible to find this information from machine_kexec(), as the DTB will be split up into page size chunks and scattered through memory. > + return fdt32_to_cpu(magic) == OF_DT_HEADER; > +} > + > +/** > + * kexec_image_info - For debugging output. > + */ > +#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i) > +static void _kexec_image_info(const char *func, int line, > + const struct kimage *kimage) > +{ > + unsigned long i; > + > + pr_debug("%s:%d:\n", func, line); > + pr_debug(" kexec kimage info:\n"); > + pr_debug(" type: %d\n", kimage->type); > + pr_debug(" start: %lx\n", kimage->start); > + pr_debug(" head: %lx\n", kimage->head); > + pr_debug(" nr_segments: %lu\n", kimage->nr_segments); > + > + for (i = 0; i < kimage->nr_segments; i++) { > + pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages%s\n", > + i, > + kimage->segment[i].mem, > + kimage->segment[i].mem + kimage->segment[i].memsz, > + kimage->segment[i].memsz, > + kimage->segment[i].memsz / PAGE_SIZE, > + (kexec_is_dtb(kimage->segment[i].buf) ? > + ", dtb segment" : "")); > + } > +} > + > void machine_kexec_cleanup(struct kimage *kimage) > { > /* Empty routine needed to avoid build errors. */ > @@ -65,6 +107,8 @@ int machine_kexec_prepare(struct kimage *kimage) > } > } > > + kexec_image_info(kimage); > + You want to put this at the start of machine_kexec_prepare(), otherwise we may return from: > #ifdef CONFIG_HOTPLUG_CPU > /* any_cpu as we don't mind being preempted */ > int any_cpu = raw_smp_processor_id(); > > if (cpu_ops[any_cpu]->cpu_die) > return 0; > #endif /* CONFIG_HOTPLUG_CPU */ This maybe-return-an-error block needs to be the last thing in the function. I'm not sure if the debug output is actually useful this early: kexec-tools prints out exactly the same information shortly after this function returns. > return 0; > } > > @@ -140,6 +184,25 @@ void machine_kexec(struct kimage *kimage) > reboot_code_buffer_phys = page_to_phys(kimage->control_code_page); > reboot_code_buffer = kmap(kimage->control_code_page); > > + kexec_image_info(kimage); > + > + pr_debug("%s:%d: control_code_page: %p\n", __func__, __LINE__, > + kimage->control_code_page); > + pr_debug("%s:%d: reboot_code_buffer_phys: %pa\n", __func__, __LINE__, > + &reboot_code_buffer_phys); > + pr_debug("%s:%d: reboot_code_buffer: %p\n", __func__, __LINE__, > + reboot_code_buffer); > + pr_debug("%s:%d: relocate_new_kernel: %p\n", __func__, __LINE__, > + arm64_relocate_new_kernel); > + pr_debug("%s:%d: relocate_new_kernel_size: 0x%lx(%lu) bytes\n", > + __func__, __LINE__, arm64_relocate_new_kernel_size, > + arm64_relocate_new_kernel_size); > + > + pr_debug("%s:%d: kimage_head: %lx\n", __func__, __LINE__, > + kimage->head); > + pr_debug("%s:%d: kimage_start: %lx\n", __func__, __LINE__, > + kimage_start); > + > /* > * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use > * after the kernel is shut down. > Thanks, James
On Wed, 2016-06-15 at 18:10 +0100, James Morse wrote: > On 09/06/16 21:08, Geoff Levand wrote: > > +++ b/arch/arm64/kernel/machine_kexec.c > > @@ -0,0 +1,185 @@ > > +/* > > + * kexec for arm64 > > + * > > + * Copyright (C) Linaro. > > + * Copyright (C) Huawei Futurewei Technologies. > > + * > > + * This program is free software; you can redistribute it and/or modify > > + * it under the terms of the GNU General Public License version 2 as > > + * published by the Free Software Foundation. > > + */ > > + > > +#include > > We don't have/need highmem on arm64. The kmap()/kunmap() calls just obscure what > is going on. > > > > +#include > > +#include > > What do you need of_fdt.h for? I guess this should be in patch 4. > > > > +#include > > The control page was already allocated, I can't see anything else being > allocated... What do you need slab.h for? > > > > +#include > > +#include > > User space access? I guess this should be in patch 4. > > > > + > > +#include > > +#include > > +#include > > +#include > > I can't see anything in system_misc.h that you are using in here. I cleaned up all these includes. > > + * kexec_list_flush - Helper to flush the kimage list to PoC. > > + */ > > +static void kexec_list_flush(struct kimage *kimage) > > +{ > > +> > > > kimage_entry_t *entry; > > +> > > > unsigned int flag; > > + > > +> > > > for (entry = &kimage->head, flag = 0; flag != IND_DONE; entry++) { > > +> > > > > > void *addr = kmap(phys_to_page(*entry & PAGE_MASK)); > > + > > +> > > > > > flag = *entry & IND_FLAGS; > > + > > +> > > > > > switch (flag) { > > +> > > > > > case IND_INDIRECTION: > > +> > > > > > > > entry = (kimage_entry_t *)addr - 1; > > This '-1' is so that entry points before the first entry of the new table, > and is un-done by entry++ next time round the loop... > If I'm right, could you add a comment to that effect? It took me a little while > to work out! I added a comment. > kexec_core.c has a snazzy macro: for_each_kimage_entry(), its a shame its not in > a header file. > This loop does the same but with two variables instead of three. These > IN_INDIRECTION pages only appear at the end of a list, this list-walking looks > correct. > > > > +> > > > > > > > __flush_dcache_area(addr, PAGE_SIZE); > > So if we find an indirection pointer, we switch entry to the new page, and clean > it to the PoC, because later we walk this list with the MMU off. > > But what cleans the very first page? I don't think this routine was doing the quite the right thing. The arm64_relocate_new_kernel routine uses the list (the entry's), and the second stage kernel buffers (the IND_SOURCE's). Those two things are what should be flushed here. > > +> > > > > > > > break; > > +> > > > > > case IND_DESTINATION: > > +> > > > > > > > break; > > +> > > > > > case IND_SOURCE: > > +> > > > > > > > __flush_dcache_area(addr, PAGE_SIZE); > > +> > > > > > > > break; > > +> > > > > > case IND_DONE: > > +> > > > > > > > break; > > +> > > > > > default: > > +> > > > > > > > BUG(); > > Unless you think its less readable, you could group the clauses together: Takahiro found a bug when CONFIG_SPARSEMEM_VMEMMAP=n, and this code has now been reworked. > > > > > > case IND_INDIRECTION: > > > > > > > > entry = (kimage_entry_t *)addr - 1; > > > > > > case IND_SOURCE: > > > > > > > > __flush_dcache_area(addr, PAGE_SIZE); > > > > > > case IND_DESTINATION: > > > > > > case IND_DONE: > > > > > > > > break; > diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S > > new file mode 100644 > > index 0000000..e380db3 > > --- /dev/null > > +++ b/arch/arm64/kernel/relocate_kernel.S > > @@ -0,0 +1,131 @@ > > +.globl arm64_relocate_new_kernel > > +arm64_relocate_new_kernel: > > All the other asm functions use ENTRY(), which would do the .globl and alignment > for you. (You would need a ENDPROC(arm64_relocate_new_kernel) too.) Sure. > > + > > +> > > > /* Setup the list loop variables. */ > > +> > > > mov> > > > x18, x1> > > > > > > > > > /* x18 = kimage_start */ > > +> > > > mov> > > > x17, x0> > > > > > > > > > /* x17 = kimage_head */ > > +> > > > dcache_line_size x16, x0> > > > > > /* x16 = dcache line size */ > > +> > > > mov> > > > x15, xzr> > > > > > > > /* x15 = segment start */ > > What uses this 'segment start'? That is left over from when we booted without purgatory (as the arm arch does). > > +> > > > mov> > > > x14, xzr> > > > > > > > /* x14 = entry ptr */ > > +> > > > mov> > > > x13, xzr> > > > > > > > /* x13 = copy dest */ > > + > > +> > > > /* Clear the sctlr_el2 flags. */ > > +> > > > mrs> > > > x0, CurrentEL > > +> > > > cmp> > > > x0, #CurrentEL_EL2 > > +> > > > b.ne> > > > 1f > > +> > > > mrs> > > > x0, sctlr_el2 > > +> > > > ldr> > > > x1, =SCTLR_ELx_FLAGS > > +> > > > bic> > > > x0, x0, x1 > > +> > > > msr> > > > sctlr_el2, x0 > > +> > > > isb > > +1: > > + > > +> > > > /* Check if the new image needs relocation. */ > > +> > > > cbz> > > > x17, .Ldone > > Does this happen? Do we ever come across an empty slot in the tables? > > kimage_terminate() adds the IND_DONE entry, so we should never see an empty > slot. kexec_list_flush() would BUG() on this too, and we call that > unconditionally on the way in here. I put that in just in case, but never checked if it would ever actually happen. I can take it out. > > +> > > > tbnz> > > > x17, IND_DONE_BIT, .Ldone > > + > > +.Lloop: > > +> > > > and> > > > x12, x17, PAGE_MASK> > > > > > /* x12 = addr */ > > + > > +> > > > /* Test the entry flags. */ > > +.Ltest_source: > > +> > > > tbz> > > > x17, IND_SOURCE_BIT, .Ltest_indirection > > + > > +> > > > /* Invalidate dest page to PoC. */ > > +> > > > mov x0, x13 > > +> > > > add x20, x0, #PAGE_SIZE > > +> > > > sub x1, x16, #1 > > +> > > > bic x0, x0, x1 > > +2:> > > > dc ivac, x0 > > This relies on an IND_DESTINATION being found first for x13 to be set to > something other than 0. I guess if kexec-core hands us a broken list, all bets > are off! Yes, assumed to be IND_DESTINATION. > > + > > +.Ldone: > > /* wait for writes from copy_page to finish */ Added. > > + dsb nsh > > +> > > > ic> > > > iallu > > +> > > > dsb> > > > nsh > > +> > > > isb > > + > > +> > > > /* Start new image. */ > > +> > > > mov> > > > x0, xzr > > +> > > > mov> > > > x1, xzr > > +> > > > mov> > > > x2, xzr > > +> > > > mov> > > > x3, xzr > > +> > > > br> > > > x18 > > + > > +.ltorg > > + > > +.align 3> > > > /* To keep the 64-bit values below naturally aligned. */ > > + > > +.Lcopy_end: > > +.org> > > > KEXEC_CONTROL_PAGE_SIZE > > Why do we need to pad up to KEXEC_CONTROL_PAGE_SIZE? > In machine_kexec() we only copy arm64_relocate_new_kernel_size bytes, so it > shouldn't matter what is here. As far as I can see we don't even access it. This is to check if arm64_relocate_new_kernel gets too big. The assembler should give an error if the location counter is set backwards. -Geoff
On Wed, 2016-06-15 at 18:14 +0100, James Morse wrote: > On 09/06/16 21:08, Geoff Levand wrote: > > To aid in debugging kexec problems or when adding new functionality to kexec add > > a new routine kexec_image_info() and several inline pr_debug statements. > > diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c > > +/** > > + * kexec_is_dtb - Helper routine to check the device tree header signature. > > + */ > > +static bool kexec_is_dtb(const void *dtb) > > +{ > > +> > > > __be32 magic; > > + > > +> > > > if (get_user(magic, (__be32 *)dtb)) > > +> > > > > > return false; > > + > > You pass this function 'kimage->segment[i].buf', this looks like the user space > memory that contained the dtb when kexec-tools first ran to load the image. > > This will work when you call it from machine_kexec_prepare(), but by the time we > get to machine_kexec() that process is long gone, and this pointer should be > considered junk. > > I don't think its possible to find this information from machine_kexec(), as the > DTB will be split up into page size chunks and scattered through memory. That's correct. I don't think it that important to print the dtb segment, so I'll just remove this kexec_is_dtb() routine. > > + > > void machine_kexec_cleanup(struct kimage *kimage) > > { > > > > > > /* Empty routine needed to avoid build errors. */ > > @@ -65,6 +107,8 @@ int machine_kexec_prepare(struct kimage *kimage) > > > > > > > > } > > > > > > } > > > > +> > > > kexec_image_info(kimage); > > + > > You want to put this at the start of machine_kexec_prepare(), otherwise we may > return from: > > #ifdef CONFIG_HOTPLUG_CPU > > > > /* any_cpu as we don't mind being preempted */ > > > > int any_cpu = raw_smp_processor_id(); > > > > > > if (cpu_ops[any_cpu]->cpu_die) > > > > > > return 0; > > #endif /* CONFIG_HOTPLUG_CPU */ > > This maybe-return-an-error block needs to be the last thing in the function. OK. > I'm not sure if the debug output is actually useful this early: kexec-tools > prints out exactly the same information shortly after this function returns. I thought it good to do it here, kexec-tools is not the only user of kexec see https://github.com/antonblanchard/kexec-lite. Thanks for checking. -Geoff