diff mbox

[1/8] kvm tools: Add initial SPAPR PPC64 architecture support

Message ID 4EDD94A4.2080003@ozlabs.org
State New, archived
Headers show

Commit Message

Matt Evans Dec. 6, 2011, 4:05 a.m. UTC
This patch adds a new arch directory, powerpc, basic file structure, register
setup and where necessary stubs out arch-specific functions (e.g. interrupts,
runloop exits) that later patches will provide.  The target is an
SPAPR-compliant PPC64 machine (i.e. pSeries); there is no support for PPC32 or
'bare metal' PPC64 guests as yet.  Subsequent patches implement the hcalls and
RTAS required to boot SPAPR pSeries kernels.

Memory is mapped from hugetlbfs (as that is currently required by upstream PPC64
HV-mode KVM).  The mapping of a VRMA region is yet to be implemented; this is
only necessary on processors that don't support VRMA, e.g. <= P6.  Work is
therefore needed to get this going on pre-P7 CPUs.

Processor state is set up as a guest kernel would expect (both primary and
secondaries), and SMP is fully supported.

Finally, support is added for simply loading flat binary kernels (plus initrd).
(bzImages are not used on PPC, and this series does not add zImage support or an
ELF loader.)  The intention is to later support loading firmware such as SLOF.

Signed-off-by: Matt Evans <matt@ozlabs.org>
---
 tools/kvm/Makefile                           |   10 +
 tools/kvm/kvm.c                              |    3 +
 tools/kvm/powerpc/include/kvm/barrier.h      |    6 +
 tools/kvm/powerpc/include/kvm/kvm-arch.h     |   70 ++++++++
 tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h |   46 +++++
 tools/kvm/powerpc/ioport.c                   |   18 ++
 tools/kvm/powerpc/irq.c                      |   40 +++++
 tools/kvm/powerpc/kvm-cpu.c                  |  232 ++++++++++++++++++++++++++
 tools/kvm/powerpc/kvm.c                      |  231 +++++++++++++++++++++++++
 9 files changed, 656 insertions(+), 0 deletions(-)
 create mode 100644 tools/kvm/powerpc/include/kvm/barrier.h
 create mode 100644 tools/kvm/powerpc/include/kvm/kvm-arch.h
 create mode 100644 tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h
 create mode 100644 tools/kvm/powerpc/ioport.c
 create mode 100644 tools/kvm/powerpc/irq.c
 create mode 100644 tools/kvm/powerpc/kvm-cpu.c
 create mode 100644 tools/kvm/powerpc/kvm.c

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Scott Wood Dec. 6, 2011, 6:03 p.m. UTC | #1
On 12/05/2011 10:05 PM, Matt Evans wrote:
> This patch adds a new arch directory, powerpc, basic file structure, register
> setup and where necessary stubs out arch-specific functions (e.g. interrupts,
> runloop exits) that later patches will provide.  The target is an
> SPAPR-compliant PPC64 machine (i.e. pSeries); there is no support for PPC32 or
> 'bare metal' PPC64 guests as yet.  Subsequent patches implement the hcalls and
> RTAS required to boot SPAPR pSeries kernels.

You just sent out 28 patches removing "everything is x86"
dependencies -- may I suggest that the PPC code be structured so that
there isn't an "everything on PPC (or even PPC64) is SPAPR" assumption,
even if SPAPR is initially the only sub-arch present?

E.g. this is SPAPR-specific despite being in generically-named
tools/kvm/powerpc/kvm-cpu.c:

> +static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu)
> +{
> +	struct kvm_regs *r = &vcpu->regs;
> +
> +	if (vcpu->cpu_id == 0) {
> +		r->pc = KERNEL_START_ADDR;
> +		r->gpr[3] = vcpu->kvm->fdt_gra;
> +		r->gpr[5] = 0;
> +	} else {
> +		r->pc = KERNEL_SECONDARY_START_ADDR;
> +		r->gpr[3] = vcpu->cpu_id;
> +	}
> +	r->msr = 0x8000000000001000UL; /* 64bit, non-HV, ME */
> +
> +	if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0)
> +		die_perror("KVM_SET_REGS failed");
> +}

> diff --git a/tools/kvm/powerpc/include/kvm/kvm-arch.h b/tools/kvm/powerpc/include/kvm/kvm-arch.h
> new file mode 100644
> index 0000000..722d01c
> --- /dev/null
> +++ b/tools/kvm/powerpc/include/kvm/kvm-arch.h
> @@ -0,0 +1,70 @@
> +/*
> + * PPC64 architecture-specific definitions
> + *
> + * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published
> + * by the Free Software Foundation.
> + */
> +
> +#ifndef KVM__KVM_ARCH_H
> +#define KVM__KVM_ARCH_H
[snip]
> +void ioport__setup_arch(void)
[snip]
> +int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line)

I'm seeing a lot of double-underscores -- is this common style in KVM
tool?  It's reserved for use by the compiler and system library.  It's
common in the kernel (though not used like this for namespace
prefixes), but there's no system library involved there.

> diff --git a/tools/kvm/powerpc/kvm-cpu.c b/tools/kvm/powerpc/kvm-cpu.c
> new file mode 100644
> index 0000000..79422ff
> --- /dev/null
> +++ b/tools/kvm/powerpc/kvm-cpu.c
[snip]
> +#define MSR_SF		(1UL<<63)
> +#define MSR_HV		(1UL<<60)
> +#define MSR_VEC		(1UL<<25)
> +#define MSR_VSX		(1UL<<23)
> +#define MSR_POW		(1UL<<18)
> +#define MSR_EE		(1UL<<15)
> +#define MSR_PR		(1UL<<14)
> +#define MSR_FP		(1UL<<13)
> +#define MSR_ME		(1UL<<12)
> +#define MSR_FE0		(1UL<<11)
> +#define MSR_SE		(1UL<<10)
> +#define MSR_BE		(1UL<<9)
> +#define MSR_FE1		(1UL<<8)
> +#define MSR_IR		(1UL<<5)
> +#define MSR_DR		(1UL<<4)
> +#define MSR_PMM		(1UL<<2)
> +#define MSR_RI		(1UL<<1)
> +#define MSR_LE		(1UL<<0)

Shouldn't these go in a header?

> +#define HUGETLBFS_MAGIC       0x958458f6

#include <linux/magic.h>

?

-Scott

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pekka Enberg Dec. 6, 2011, 6:33 p.m. UTC | #2
On Tue, Dec 6, 2011 at 8:03 PM, Scott Wood <scottwood@freescale.com> wrote:
> I'm seeing a lot of double-underscores -- is this common style in KVM
> tool?  It's reserved for use by the compiler and system library.  It's
> common in the kernel (though not used like this for namespace
> prefixes), but there's no system library involved there.

Yes, they are KVM tool coding style which we took from perf. Double
underscore _prefixes_ are reserved in userspace but there's no reason
we can't use them in identifiers like we do.

                         Pekka
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Scott Wood Dec. 6, 2011, 6:54 p.m. UTC | #3
On 12/06/2011 12:33 PM, Pekka Enberg wrote:
> On Tue, Dec 6, 2011 at 8:03 PM, Scott Wood <scottwood@freescale.com> wrote:
>> I'm seeing a lot of double-underscores -- is this common style in KVM
>> tool?  It's reserved for use by the compiler and system library.  It's
>> common in the kernel (though not used like this for namespace
>> prefixes), but there's no system library involved there.
> 
> Yes, they are KVM tool coding style which we took from perf. Double
> underscore _prefixes_ are reserved in userspace but there's no reason
> we can't use them in identifiers like we do.

OK, it looks like it's just C++ that also reserves non-leading double
underscores -- sorry about that.

-Scott

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Matt Evans Dec. 7, 2011, 7:35 a.m. UTC | #4
Hi Scott,

On 07/12/11 05:03, Scott Wood wrote:
> On 12/05/2011 10:05 PM, Matt Evans wrote:
>> This patch adds a new arch directory, powerpc, basic file structure, register
>> setup and where necessary stubs out arch-specific functions (e.g. interrupts,
>> runloop exits) that later patches will provide.  The target is an
>> SPAPR-compliant PPC64 machine (i.e. pSeries); there is no support for PPC32 or
>> 'bare metal' PPC64 guests as yet.  Subsequent patches implement the hcalls and
>> RTAS required to boot SPAPR pSeries kernels.
> 
> You just sent out 28 patches removing "everything is x86"
> dependencies -- may I suggest that the PPC code be structured so that
> there isn't an "everything on PPC (or even PPC64) is SPAPR" assumption,
> even if SPAPR is initially the only sub-arch present?

I had anticipated this comment (though not the "28 patches" remark, easy now).
It is a fair comment, but you hit the nail on the head with your other mail
(regarding configuring in i8042, presumably to emulate crappy dev boards)
asking whether kvmtool has a config system.  It does not.

Since we currently lack any kind of build-time configuration (or any fancy
run-time -M <machine> a la QEMU) it's a bit hard to cater for multiple
platforms.  I'm aware that the PPC patches are painfully PPC64-with-SPAPR and I
don't present them as perfect, but I really think we need to attack the
configuration stuff before bifurcating.  Is this something you'd like to see to?

(Your comments on the #defines and magic accepted & fixed, thank you.)


Cheers,


Matt



> 
> E.g. this is SPAPR-specific despite being in generically-named
> tools/kvm/powerpc/kvm-cpu.c:
> 
>> +static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu)
>> +{
>> +	struct kvm_regs *r = &vcpu->regs;
>> +
>> +	if (vcpu->cpu_id == 0) {
>> +		r->pc = KERNEL_START_ADDR;
>> +		r->gpr[3] = vcpu->kvm->fdt_gra;
>> +		r->gpr[5] = 0;
>> +	} else {
>> +		r->pc = KERNEL_SECONDARY_START_ADDR;
>> +		r->gpr[3] = vcpu->cpu_id;
>> +	}
>> +	r->msr = 0x8000000000001000UL; /* 64bit, non-HV, ME */
>> +
>> +	if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0)
>> +		die_perror("KVM_SET_REGS failed");
>> +}
> 
>> diff --git a/tools/kvm/powerpc/include/kvm/kvm-arch.h b/tools/kvm/powerpc/include/kvm/kvm-arch.h
>> new file mode 100644
>> index 0000000..722d01c
>> --- /dev/null
>> +++ b/tools/kvm/powerpc/include/kvm/kvm-arch.h
>> @@ -0,0 +1,70 @@
>> +/*
>> + * PPC64 architecture-specific definitions
>> + *
>> + * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms of the GNU General Public License version 2 as published
>> + * by the Free Software Foundation.
>> + */
>> +
>> +#ifndef KVM__KVM_ARCH_H
>> +#define KVM__KVM_ARCH_H
> [snip]
>> +void ioport__setup_arch(void)
> [snip]
>> +int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line)
> 
> I'm seeing a lot of double-underscores -- is this common style in KVM
> tool?  It's reserved for use by the compiler and system library.  It's
> common in the kernel (though not used like this for namespace
> prefixes), but there's no system library involved there.
> 
>> diff --git a/tools/kvm/powerpc/kvm-cpu.c b/tools/kvm/powerpc/kvm-cpu.c
>> new file mode 100644
>> index 0000000..79422ff
>> --- /dev/null
>> +++ b/tools/kvm/powerpc/kvm-cpu.c
> [snip]
>> +#define MSR_SF		(1UL<<63)
>> +#define MSR_HV		(1UL<<60)
>> +#define MSR_VEC		(1UL<<25)
>> +#define MSR_VSX		(1UL<<23)
>> +#define MSR_POW		(1UL<<18)
>> +#define MSR_EE		(1UL<<15)
>> +#define MSR_PR		(1UL<<14)
>> +#define MSR_FP		(1UL<<13)
>> +#define MSR_ME		(1UL<<12)
>> +#define MSR_FE0		(1UL<<11)
>> +#define MSR_SE		(1UL<<10)
>> +#define MSR_BE		(1UL<<9)
>> +#define MSR_FE1		(1UL<<8)
>> +#define MSR_IR		(1UL<<5)
>> +#define MSR_DR		(1UL<<4)
>> +#define MSR_PMM		(1UL<<2)
>> +#define MSR_RI		(1UL<<1)
>> +#define MSR_LE		(1UL<<0)
> 
> Shouldn't these go in a header?
> 
>> +#define HUGETLBFS_MAGIC       0x958458f6
> 
> #include <linux/magic.h>
> 
> ?
> 
> -Scott

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Scott Wood Dec. 7, 2011, 6:31 p.m. UTC | #5
On 12/07/2011 01:35 AM, Matt Evans wrote:
> Hi Scott,
> 
> On 07/12/11 05:03, Scott Wood wrote:
>> On 12/05/2011 10:05 PM, Matt Evans wrote:
>>> This patch adds a new arch directory, powerpc, basic file structure, register
>>> setup and where necessary stubs out arch-specific functions (e.g. interrupts,
>>> runloop exits) that later patches will provide.  The target is an
>>> SPAPR-compliant PPC64 machine (i.e. pSeries); there is no support for PPC32 or
>>> 'bare metal' PPC64 guests as yet.  Subsequent patches implement the hcalls and
>>> RTAS required to boot SPAPR pSeries kernels.
>>
>> You just sent out 28 patches removing "everything is x86"
>> dependencies -- may I suggest that the PPC code be structured so that
>> there isn't an "everything on PPC (or even PPC64) is SPAPR" assumption,
>> even if SPAPR is initially the only sub-arch present?
> 
> I had anticipated this comment (though not the "28 patches" remark, easy now).

I was just using that to illustrate how it's easier to handle earlier
than later -- no offense intended. :-)

> It is a fair comment, but you hit the nail on the head with your other mail
> (regarding configuring in i8042, presumably to emulate crappy dev boards)
> asking whether kvmtool has a config system.  It does not.
> 
> Since we currently lack any kind of build-time configuration (or any fancy
> run-time -M <machine> a la QEMU) it's a bit hard to cater for multiple
> platforms.  I'm aware that the PPC patches are painfully PPC64-with-SPAPR and I
> don't present them as perfect, but I really think we need to attack the
> configuration stuff before bifurcating.  Is this something you'd like to see to?

Just putting all SPAPR stuff in SPAPR-named files (or at least
SPAPR-named functions), and likewise for book3s stuff, etc. would be an
improvement.  I see that you did this for some things, but not all.  Try
to make it obvious where the target-specific branching would take place,
even if the actual branching mechanism is currently just a FIXME comment.

-Scott

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Matt Evans Dec. 8, 2011, 2:57 a.m. UTC | #6
On 08/12/11 05:31, Scott Wood wrote:
> On 12/07/2011 01:35 AM, Matt Evans wrote:
>> Hi Scott,
>>
>> On 07/12/11 05:03, Scott Wood wrote:
>>> On 12/05/2011 10:05 PM, Matt Evans wrote:
>>>> This patch adds a new arch directory, powerpc, basic file structure, register
>>>> setup and where necessary stubs out arch-specific functions (e.g. interrupts,
>>>> runloop exits) that later patches will provide.  The target is an
>>>> SPAPR-compliant PPC64 machine (i.e. pSeries); there is no support for PPC32 or
>>>> 'bare metal' PPC64 guests as yet.  Subsequent patches implement the hcalls and
>>>> RTAS required to boot SPAPR pSeries kernels.
>>>
>>> You just sent out 28 patches removing "everything is x86"
>>> dependencies -- may I suggest that the PPC code be structured so that
>>> there isn't an "everything on PPC (or even PPC64) is SPAPR" assumption,
>>> even if SPAPR is initially the only sub-arch present?
>>
>> I had anticipated this comment (though not the "28 patches" remark, easy now).
> 
> I was just using that to illustrate how it's easier to handle earlier
> than later -- no offense intended. :-)
> 
>> It is a fair comment, but you hit the nail on the head with your other mail
>> (regarding configuring in i8042, presumably to emulate crappy dev boards)
>> asking whether kvmtool has a config system.  It does not.
>>
>> Since we currently lack any kind of build-time configuration (or any fancy
>> run-time -M <machine> a la QEMU) it's a bit hard to cater for multiple
>> platforms.  I'm aware that the PPC patches are painfully PPC64-with-SPAPR and I
>> don't present them as perfect, but I really think we need to attack the
>> configuration stuff before bifurcating.  Is this something you'd like to see to?
> 
> Just putting all SPAPR stuff in SPAPR-named files (or at least
> SPAPR-named functions), and likewise for book3s stuff, etc. would be an
> improvement.  I see that you did this for some things, but not all.  Try
> to make it obvious where the target-specific branching would take place,
> even if the actual branching mechanism is currently just a FIXME comment.

No worries, that's a good suggestion-- I'll have a spin through the PPC stuff and
see if there's anything worth splitting, or at least point out everywhere I can find
with an appropriate comment.

Thanks,


Matt

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 57dc521..58815a2 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -121,6 +121,16 @@  ifeq ($(ARCH),x86)
 	OTHEROBJS	+= x86/bios/bios-rom.o
 	ARCH_INCLUDE := x86/include
 endif
+# POWER/ppc:  Actually only support ppc64 currently.
+ifeq ($(uname_M), ppc64)
+	DEFINES += -DCONFIG_PPC
+	OBJS	+= powerpc/ioport.o
+	OBJS	+= powerpc/irq.o
+	OBJS	+= powerpc/kvm.o
+	OBJS	+= powerpc/kvm-cpu.o
+	ARCH_INCLUDE := powerpc/include
+	CFLAGS += -m64
+endif
 
 ###
 
diff --git a/tools/kvm/kvm.c b/tools/kvm/kvm.c
index 503ceae..d716ede 100644
--- a/tools/kvm/kvm.c
+++ b/tools/kvm/kvm.c
@@ -49,6 +49,9 @@  const char *kvm_exit_reasons[] = {
 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_DCR),
 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_NMI),
 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTERNAL_ERROR),
+#ifdef CONFIG_PPC64
+	DEFINE_KVM_EXIT_REASON(KVM_EXIT_PAPR_HCALL),
+#endif
 };
 
 extern struct kvm *kvm;
diff --git a/tools/kvm/powerpc/include/kvm/barrier.h b/tools/kvm/powerpc/include/kvm/barrier.h
new file mode 100644
index 0000000..bc7d179
--- /dev/null
+++ b/tools/kvm/powerpc/include/kvm/barrier.h
@@ -0,0 +1,6 @@ 
+#ifndef _KVM_BARRIER_H_
+#define _KVM_BARRIER_H_
+
+#include <asm/system.h>
+
+#endif /* _KVM_BARRIER_H_ */
diff --git a/tools/kvm/powerpc/include/kvm/kvm-arch.h b/tools/kvm/powerpc/include/kvm/kvm-arch.h
new file mode 100644
index 0000000..722d01c
--- /dev/null
+++ b/tools/kvm/powerpc/include/kvm/kvm-arch.h
@@ -0,0 +1,70 @@ 
+/*
+ * PPC64 architecture-specific definitions
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#ifndef KVM__KVM_ARCH_H
+#define KVM__KVM_ARCH_H
+
+#include <stdbool.h>
+#include <linux/types.h>
+#include <time.h>
+
+#define KVM_NR_CPUS			(255)
+
+/* MMIO lives after RAM, but it'd be nice if it didn't constantly move.
+ * Choose a suitably high address, e.g. 63T...  This limits RAM size.
+ */
+#define PPC_MMIO_START			0x3F0000000000UL
+#define PPC_MMIO_SIZE			0x010000000000UL
+
+#define KERNEL_LOAD_ADDR        	0x0000000000000000
+#define KERNEL_START_ADDR       	0x0000000000000000
+#define KERNEL_SECONDARY_START_ADDR     0x0000000000000060
+#define INITRD_LOAD_ADDR        	0x0000000002800000
+
+#define FDT_MAX_SIZE            	0x10000
+#define RTAS_MAX_SIZE           	0x10000
+
+#define TIMEBASE_FREQ           	512000000ULL
+
+#define KVM_MMIO_START			PPC_MMIO_START
+
+/* This is the address that pci_get_io_space_block() starts allocating
+ * from.  Note that this is a PCI bus address.
+ */
+#define KVM_PCI_MMIO_AREA		0x1000000
+
+struct kvm {
+	int			sys_fd;		/* For system ioctls(), i.e. /dev/kvm */
+	int			vm_fd;		/* For VM ioctls() */
+	timer_t			timerid;	/* Posix timer for interrupts */
+
+	int			nrcpus;		/* Number of cpus to run */
+
+	u32			mem_slots;	/* for KVM_SET_USER_MEMORY_REGION */
+
+	u64			ram_size;
+	void			*ram_start;
+
+	bool			nmi_disabled;
+
+	bool			single_step;
+
+	const char		*vmlinux;
+	struct disk_image       **disks;
+	int                     nr_disks;
+	unsigned long		rtas_gra;
+	unsigned long		rtas_size;
+	unsigned long		fdt_gra;
+	unsigned long		initrd_gra;
+	unsigned long		initrd_size;
+	const char		*name;
+};
+
+#endif /* KVM__KVM_ARCH_H */
diff --git a/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h b/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h
new file mode 100644
index 0000000..dbabc57
--- /dev/null
+++ b/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h
@@ -0,0 +1,46 @@ 
+/*
+ * PPC64 cpu-specific definitions
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#ifndef KVM__KVM_CPU_ARCH_H
+#define KVM__KVM_CPU_ARCH_H
+
+/* Architecture-specific kvm_cpu definitions. */
+
+#include <linux/kvm.h>	/* for struct kvm_regs */
+
+#include <pthread.h>
+
+struct kvm;
+
+struct kvm_cpu {
+	pthread_t		thread;		/* VCPU thread */
+
+	unsigned long		cpu_id;
+
+	struct kvm		*kvm;		/* parent KVM */
+	int			vcpu_fd;	/* For VCPU ioctls() */
+	struct kvm_run		*kvm_run;
+
+	struct kvm_regs		regs;
+	struct kvm_sregs	sregs;
+	struct kvm_fpu		fpu;
+
+	u8			is_running;
+	u8			paused;
+
+	/* Although PPC KVM doesn't yet support coalesced MMIO, generic code
+	 * needs this in our kvm_cpu:
+	 */
+	struct kvm_coalesced_mmio_ring  *ring;
+};
+
+void kvm_cpu__irq(struct kvm_cpu *vcpu, int pin, int level);
+
+#endif /* KVM__KVM_CPU_ARCH_H */
diff --git a/tools/kvm/powerpc/ioport.c b/tools/kvm/powerpc/ioport.c
new file mode 100644
index 0000000..a8e4dc3
--- /dev/null
+++ b/tools/kvm/powerpc/ioport.c
@@ -0,0 +1,18 @@ 
+/*
+ * PPC64 ioport platform setup.  There isn't any! :-)
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "kvm/ioport.h"
+
+#include <stdlib.h>
+
+void ioport__setup_arch(void)
+{
+	/* PPC has no legacy ioports to set up */
+}
diff --git a/tools/kvm/powerpc/irq.c b/tools/kvm/powerpc/irq.c
new file mode 100644
index 0000000..46aa64f
--- /dev/null
+++ b/tools/kvm/powerpc/irq.c
@@ -0,0 +1,40 @@ 
+/*
+ * PPC64 IRQ routines
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "kvm/irq.h"
+#include "kvm/kvm.h"
+#include "kvm/util.h"
+
+#include <linux/types.h>
+#include <linux/rbtree.h>
+#include <linux/list.h>
+#include <linux/kvm.h>
+#include <sys/ioctl.h>
+
+#include <stddef.h>
+#include <stdlib.h>
+
+int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line)
+{
+	fprintf(stderr, "irq__register_device(%d, [%d], [%d], [%d]\n",
+		dev, *num, *pin, *line);
+	return 0;
+}
+
+void irq__init(struct kvm *kvm)
+{
+	fprintf(stderr, __func__);
+}
+
+int irq__add_msix_route(struct kvm *kvm, struct msi_msg *msg)
+{
+	die(__FUNCTION__);
+	return 0;
+}
diff --git a/tools/kvm/powerpc/kvm-cpu.c b/tools/kvm/powerpc/kvm-cpu.c
new file mode 100644
index 0000000..79422ff
--- /dev/null
+++ b/tools/kvm/powerpc/kvm-cpu.c
@@ -0,0 +1,232 @@ 
+/*
+ * PPC64 processor support
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "kvm/kvm-cpu.h"
+
+#include "kvm/symbol.h"
+#include "kvm/util.h"
+#include "kvm/kvm.h"
+
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+
+static int debug_fd;
+
+#define MSR_SF		(1UL<<63)
+#define MSR_HV		(1UL<<60)
+#define MSR_VEC		(1UL<<25)
+#define MSR_VSX		(1UL<<23)
+#define MSR_POW		(1UL<<18)
+#define MSR_EE		(1UL<<15)
+#define MSR_PR		(1UL<<14)
+#define MSR_FP		(1UL<<13)
+#define MSR_ME		(1UL<<12)
+#define MSR_FE0		(1UL<<11)
+#define MSR_SE		(1UL<<10)
+#define MSR_BE		(1UL<<9)
+#define MSR_FE1		(1UL<<8)
+#define MSR_IR		(1UL<<5)
+#define MSR_DR		(1UL<<4)
+#define MSR_PMM		(1UL<<2)
+#define MSR_RI		(1UL<<1)
+#define MSR_LE		(1UL<<0)
+
+
+void kvm_cpu__set_debug_fd(int fd)
+{
+	debug_fd = fd;
+}
+
+int kvm_cpu__get_debug_fd(void)
+{
+	return debug_fd;
+}
+
+static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm)
+{
+	struct kvm_cpu *vcpu;
+
+	vcpu		= calloc(1, sizeof *vcpu);
+	if (!vcpu)
+		return NULL;
+
+	vcpu->kvm	= kvm;
+
+	return vcpu;
+}
+
+void kvm_cpu__delete(struct kvm_cpu *vcpu)
+{
+	free(vcpu);
+}
+
+struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id)
+{
+	struct kvm_cpu *vcpu;
+	int mmap_size;
+	struct kvm_enable_cap papr_cap = { .cap = KVM_CAP_PPC_PAPR };
+
+	vcpu		= kvm_cpu__new(kvm);
+	if (!vcpu)
+		return NULL;
+
+	vcpu->cpu_id	= cpu_id;
+
+	vcpu->vcpu_fd = ioctl(vcpu->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id);
+	if (vcpu->vcpu_fd < 0)
+		die_perror("KVM_CREATE_VCPU ioctl");
+
+	mmap_size = ioctl(vcpu->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
+	if (mmap_size < 0)
+		die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl");
+
+	vcpu->kvm_run = mmap(NULL, mmap_size, PROT_RW, MAP_SHARED, vcpu->vcpu_fd, 0);
+	if (vcpu->kvm_run == MAP_FAILED)
+		die("unable to mmap vcpu fd");
+
+	ioctl(vcpu->vcpu_fd, KVM_ENABLE_CAP, &papr_cap);
+
+	/* We start all CPUs, directing non-primary threads into the kernel's
+	 * secondary start point.  When we come to support SLOF, we will start
+	 * only one and SLOF will RTAS call us to ask for others to be
+	 * started.
+	 */
+	vcpu->is_running = true;
+
+	return vcpu;
+}
+
+static void kvm_cpu__setup_fpu(struct kvm_cpu *vcpu)
+{
+	/* Don't have to do anything, there's no expected FPU state. */
+}
+
+static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu)
+{
+	struct kvm_regs *r = &vcpu->regs;
+
+	if (vcpu->cpu_id == 0) {
+		r->pc = KERNEL_START_ADDR;
+		r->gpr[3] = vcpu->kvm->fdt_gra;
+		r->gpr[5] = 0;
+	} else {
+		r->pc = KERNEL_SECONDARY_START_ADDR;
+		r->gpr[3] = vcpu->cpu_id;
+	}
+	r->msr = 0x8000000000001000UL; /* 64bit, non-HV, ME */
+
+	if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0)
+		die_perror("KVM_SET_REGS failed");
+}
+
+static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu)
+{
+	/* There's actually no sregs setup required on PPC64/SPAPR. */
+}
+
+/**
+ * kvm_cpu__reset_vcpu - reset virtual CPU to a known state
+ */
+void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu)
+{
+	kvm_cpu__setup_regs(vcpu);
+	kvm_cpu__setup_sregs(vcpu);
+	kvm_cpu__setup_fpu(vcpu);
+}
+
+/* kvm_cpu__irq - set KVM's IRQ flag on this vcpu */
+void kvm_cpu__irq(struct kvm_cpu *vcpu, int pin, int level)
+{
+}
+
+bool kvm_cpu__handle_exit(struct kvm_cpu *vcpu)
+{
+	bool ret = true;
+	struct kvm_run *run = vcpu->kvm_run;
+	switch(run->exit_reason) {
+	default:
+		ret = false;
+	}
+	return ret;
+}
+
+#define CONDSTR_BIT(m, b) (((m) & MSR_##b) ? #b" " : "")
+
+void kvm_cpu__show_registers(struct kvm_cpu *vcpu)
+{
+	struct kvm_regs regs;
+	struct kvm_sregs sregs;
+	int r;
+
+	if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &regs) < 0)
+		die("KVM_GET_REGS failed");
+        if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &sregs) < 0)
+		die("KVM_GET_SREGS failed");
+
+	dprintf(debug_fd, "\n Registers:\n");
+	dprintf(debug_fd, " NIP:   %016lx  MSR:   %016lx ( %s%s%s%s%s%s%s%s%s%s%s%s)\n",
+		regs.pc, regs.msr,
+		CONDSTR_BIT(regs.msr, SF),
+		CONDSTR_BIT(regs.msr, HV), /* ! */
+		CONDSTR_BIT(regs.msr, VEC),
+		CONDSTR_BIT(regs.msr, VSX),
+		CONDSTR_BIT(regs.msr, EE),
+		CONDSTR_BIT(regs.msr, PR),
+		CONDSTR_BIT(regs.msr, FP),
+		CONDSTR_BIT(regs.msr, ME),
+		CONDSTR_BIT(regs.msr, IR),
+		CONDSTR_BIT(regs.msr, DR),
+		CONDSTR_BIT(regs.msr, RI),
+		CONDSTR_BIT(regs.msr, LE));
+	dprintf(debug_fd, " CTR:   %016lx  LR:    %016lx  CR:   %08lx\n",
+		regs.ctr, regs.lr, regs.cr);
+	dprintf(debug_fd, " SRR0:  %016lx  SRR1:  %016lx  XER:  %016lx\n",
+		regs.srr0, regs.srr1, regs.xer);
+	dprintf(debug_fd, " SPRG0: %016lx  SPRG1: %016lx\n", regs.sprg0, regs.sprg1);
+	dprintf(debug_fd, " SPRG2: %016lx  SPRG3: %016lx\n", regs.sprg2, regs.sprg3);
+	dprintf(debug_fd, " SPRG4: %016lx  SPRG5: %016lx\n", regs.sprg4, regs.sprg5);
+	dprintf(debug_fd, " SPRG6: %016lx  SPRG7: %016lx\n", regs.sprg6, regs.sprg7);
+	dprintf(debug_fd, " GPRs:\n ");
+	for (r = 0; r < 32; r++) {
+		dprintf(debug_fd, "%016lx  ", regs.gpr[r]);
+		if ((r & 3) == 3)
+			dprintf(debug_fd, "\n ");
+	}
+	dprintf(debug_fd, "\n");
+
+	for (r = 0; r < 32; r++) {
+		dprintf(debug_fd, " SLB%02d  %016lx %016lx\n", r,
+			sregs.u.s.ppc64.slb[r].slbe, sregs.u.s.ppc64.slb[r].slbv);
+	}
+	dprintf(debug_fd, "----------\n");
+}
+
+void kvm_cpu__show_code(struct kvm_cpu *vcpu)
+{
+	if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &vcpu->regs) < 0)
+		die("KVM_GET_REGS failed");
+
+	/* To do: dump some code...! */
+
+	dprintf(debug_fd, "\n Stack:\n");
+	dprintf(debug_fd,   " ------\n");
+	/* Only works in real mode: */
+	kvm__dump_mem(vcpu->kvm, vcpu->regs.gpr[1], 32);
+}
+
+void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu)
+{
+	/* Does nothing yet */
+}
diff --git a/tools/kvm/powerpc/kvm.c b/tools/kvm/powerpc/kvm.c
new file mode 100644
index 0000000..036bfc0
--- /dev/null
+++ b/tools/kvm/powerpc/kvm.c
@@ -0,0 +1,231 @@ 
+/*
+ * PPC64 (SPAPR) platform support
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "kvm/kvm.h"
+#include "kvm/util.h"
+
+#include <linux/kvm.h>
+
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <stdbool.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <asm/unistd.h>
+#include <errno.h>
+
+#include <libfdt.h>
+
+#define HUGETLBFS_PATH "/var/lib/hugetlbfs/global/pagesize-16MB/"
+
+static char kern_cmdline[2048];
+
+struct kvm_ext kvm_req_ext[] = {
+	{ 0, 0 }
+};
+
+bool kvm__arch_cpu_supports_vm(void)
+{
+	return true;
+}
+
+int kvm__recommended_cpus(struct kvm *kvm)
+{
+	/* FIXME, this is obviously fictional; does KVM/POWER have a 'get
+	 * reasonable number of CPUs' IOCTL like x86?  If not, perhaps suggest
+	 * number of present cores. */
+	return 4;
+}
+
+int kvm__max_cpus(struct kvm *kvm)
+{
+	return KVM_NR_CPUS;
+}
+
+void kvm__init_ram(struct kvm *kvm)
+{
+	u64	phys_start, phys_size;
+	void	*host_mem;
+
+	phys_start = 0;
+	phys_size  = kvm->ram_size;
+	host_mem   = kvm->ram_start;
+
+	/* We put MMIO at PPC_MMIO_START, high up.  Make sure that
+	 * this doesn't crash into the end of RAM -- practically, this is
+	 * so high (63TB!) that this won't happen.
+	 */
+	if (phys_size >= PPC_MMIO_START)
+		die("Too much memory (%ld, what a nice problem): overlaps MMIO!\n",
+		    phys_size);
+
+	kvm__register_mem(kvm, phys_start, phys_size, host_mem);
+}
+
+#define HUGETLBFS_MAGIC       0x958458f6
+
+static void kvm__map_ram(struct kvm *kvm, const char *path)
+{
+	char mpath[PATH_MAX];
+	int fd;
+	int r;
+	struct statfs sfs;
+	const char *htlbfs_path = path;
+
+	if (!path) {
+		htlbfs_path = HUGETLBFS_PATH;
+		pr_info("Using default %s for memory", htlbfs_path);
+	}
+
+	do {
+		/* QEMU seems to work around this returning EINTR...  Let's do
+		 * that too. */
+		r = statfs(htlbfs_path, &sfs);
+	} while (r && errno == EINTR);
+
+	if (r)
+		die("Can't stat %s\n", htlbfs_path);
+
+	if (sfs.f_type != HUGETLBFS_MAGIC) {
+		die("%s is not hugetlbfs!\n", htlbfs_path);
+	}
+
+	snprintf(mpath, PATH_MAX, "%s/kvmtoolXXXXXX", htlbfs_path);
+
+	if (sfs.f_bsize == 0 || (unsigned long)sfs.f_bsize > kvm->ram_size) {
+		die("Can't use hugetlbfs pagesize %ld for mem size %ld\n",
+		    sfs.f_bsize, kvm->ram_size);
+	}
+	fd = mkstemp(mpath);
+
+	if (fd < 0)
+		die("Can't open %s for hugetlbfs map\n", mpath);
+
+	unlink(mpath);
+
+	ftruncate(fd, kvm->ram_size);
+
+	/* What other flags?  Is it required that we prealloc? */
+	kvm->ram_start = mmap(NULL, kvm->ram_size, PROT_RW, MAP_PRIVATE, fd, 0);
+
+	if (kvm->ram_start == MAP_FAILED)
+		die("Couldn't map %ld bytes for RAM (%d)\n", kvm->ram_size, errno);
+}
+
+void kvm__arch_set_cmdline(char *cmdline, bool video)
+{
+	/* We don't need anything unusual in here. */
+}
+
+/* Architecture-specific KVM init */
+void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, const char *hugetlbfs_path, u64 ram_size, const char *name)
+{
+	int cap_ppc_rma;
+	kvm->ram_size		= ram_size;
+
+	kvm__map_ram(kvm, hugetlbfs_path);
+
+	/* FDT goes at top of memory, RTAS just below */
+	kvm->fdt_gra = kvm->ram_size - FDT_MAX_SIZE;
+	kvm->rtas_gra = kvm->fdt_gra - RTAS_MAX_SIZE;
+	madvise(kvm->ram_start, kvm->ram_size, MADV_MERGEABLE);
+
+	cap_ppc_rma = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_PPC_RMA);
+	if (cap_ppc_rma == 2)
+		die("Need contiguous RMA allocation on this hardware, which is not yet supported.");
+}
+
+void kvm__irq_line(struct kvm *kvm, int irq, int level)
+{
+	fprintf(stderr, "irq_line(%d, %d)\n", irq, level);
+}
+
+void kvm__irq_trigger(struct kvm *kvm, int irq)
+{
+	kvm__irq_line(kvm, irq, 1);
+	kvm__irq_line(kvm, irq, 0);
+}
+
+int load_flat_binary(struct kvm *kvm, int fd_kernel, int fd_initrd, const char *kernel_cmdline)
+{
+	void *p;
+	void *k_start;
+	void *i_start;
+	int nr;
+
+	if (lseek(fd_kernel, 0, SEEK_SET) < 0)
+		die_perror("lseek");
+
+	p = k_start = guest_flat_to_host(kvm, KERNEL_LOAD_ADDR);
+
+	while ((nr = read(fd_kernel, p, 65536)) > 0)
+		p += nr;
+
+	pr_info("Loaded kernel to 0x%x (%ld bytes)", KERNEL_LOAD_ADDR, p-k_start);
+
+	if (fd_initrd != -1) {
+		if (lseek(fd_initrd, 0, SEEK_SET) < 0)
+			die_perror("lseek");
+
+		if (p-k_start > INITRD_LOAD_ADDR)
+			die("Kernel overlaps initrd!");
+
+		/* Round up kernel size to 8byte alignment, and load initrd right after. */
+		i_start = p = guest_flat_to_host(kvm, INITRD_LOAD_ADDR);
+
+		while (((nr = read(fd_initrd, p, 65536)) > 0) && p < (kvm->ram_start + kvm->ram_size))
+			p += nr;
+
+		if (p >= (kvm->ram_start + kvm->ram_size))
+			die("initrd too big to contain in guest RAM.\n");
+
+		pr_info("Loaded initrd to 0x%x (%ld bytes)", INITRD_LOAD_ADDR, p-i_start);
+		kvm->initrd_gra = INITRD_LOAD_ADDR;
+		kvm->initrd_size = p-i_start;
+	} else {
+		kvm->initrd_size = 0;
+	}
+	strncpy(kern_cmdline, kernel_cmdline, 2048);
+	kern_cmdline[2047] = '\0';
+
+	return true;
+}
+
+bool load_bzimage(struct kvm *kvm, int fd_kernel,
+		  int fd_initrd, const char *kernel_cmdline, u16 vidmode)
+{
+	/* We don't support bzImages. */
+	return false;
+}
+
+static void setup_fdt(struct kvm *kvm)
+{
+
+}
+
+/**
+ * kvm__arch_setup_firmware
+ */
+void kvm__arch_setup_firmware(struct kvm *kvm)
+{
+	/* Load RTAS */
+
+	/* Load SLOF */
+
+	/* Init FDT */
+	setup_fdt(kvm);
+}