Patchwork [V2,1/6] kvm tools: Generate SPAPR PPC64 guest device tree

login
register
mail settings
Submitter Matt Evans
Date Dec. 13, 2011, 7:10 a.m.
Message ID <1323760250-13237-2-git-send-email-matt@ozlabs.org>
Download mbox | patch
Permalink /patch/131025/
State New
Headers show

Comments

Matt Evans - Dec. 13, 2011, 7:10 a.m.
The generated DT is the bare minimum structure required for SPAPR (on which
subsequent patches for VIO, XICS, PCI etc. will build); root node, cpus, memory.

Some aspects are currently hardwired for simplicity, for example advertised
page sizes, HPT size, SLB size, VMX/DFP, etc.  Future support of a variety
of POWER CPUs should acquire this info from the host and encode appropriately.

This requires a 64-bit libfdt.

Signed-off-by: Matt Evans <matt@ozlabs.org>
---
 tools/kvm/Makefile                       |    3 +-
 tools/kvm/powerpc/include/kvm/kvm-arch.h |   10 ++
 tools/kvm/powerpc/kvm.c                  |  141 ++++++++++++++++++++++++++++++
 3 files changed, 153 insertions(+), 1 deletions(-)
David Gibson - Dec. 14, 2011, 2:25 a.m.
On Tue, Dec 13, 2011 at 06:10:45PM +1100, Matt Evans wrote:
> The generated DT is the bare minimum structure required for SPAPR (on which
> subsequent patches for VIO, XICS, PCI etc. will build); root node, cpus, memory.
> 
> Some aspects are currently hardwired for simplicity, for example advertised
> page sizes, HPT size, SLB size, VMX/DFP, etc.  Future support of a variety
> of POWER CPUs should acquire this info from the host and encode appropriately.
> 
> This requires a 64-bit libfdt.

There's already a copy of libfdt embedded in the kernel tree
(scripts/dtc/libfdt), which you should be able to use to build one of
these as you go.

> 
> Signed-off-by: Matt Evans <matt@ozlabs.org>
> ---
>  tools/kvm/Makefile                       |    3 +-
>  tools/kvm/powerpc/include/kvm/kvm-arch.h |   10 ++
>  tools/kvm/powerpc/kvm.c                  |  141 ++++++++++++++++++++++++++++++
>  3 files changed, 153 insertions(+), 1 deletions(-)
> 
> diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
> index 5bb3f08..4ee4805 100644
> --- a/tools/kvm/Makefile
> +++ b/tools/kvm/Makefile
> @@ -132,7 +132,8 @@ ifeq ($(uname_M), ppc64)
>  	OBJS	+= powerpc/kvm.o
>  	OBJS	+= powerpc/kvm-cpu.o
>  	ARCH_INCLUDE := powerpc/include
> -	CFLAGS += -m64
> +	CFLAGS 	+= -m64
> +	LIBS 	+= -lfdt
>  endif
>  
>  ###
> diff --git a/tools/kvm/powerpc/include/kvm/kvm-arch.h b/tools/kvm/powerpc/include/kvm/kvm-arch.h
> index da61774..33a3827 100644
> --- a/tools/kvm/powerpc/include/kvm/kvm-arch.h
> +++ b/tools/kvm/powerpc/include/kvm/kvm-arch.h
> @@ -69,4 +69,14 @@ struct kvm {
>  	const char		*name;
>  };
>  
> +/* Helper for the various bits of code that generate FDT nodes */
> +#define _FDT(exp)							\
> +	do {								\
> +		int ret = (exp);					\
> +		if (ret < 0) {						\
> +			die("Error creating device tree: %s: %s\n",	\
> +			    #exp, fdt_strerror(ret));			\
> +		}							\
> +	} while (0)
> +
>  #endif /* KVM__KVM_ARCH_H */
> diff --git a/tools/kvm/powerpc/kvm.c b/tools/kvm/powerpc/kvm.c
> index f838a8f..95ed1cc 100644
> --- a/tools/kvm/powerpc/kvm.c
> +++ b/tools/kvm/powerpc/kvm.c
> @@ -3,6 +3,9 @@
>   *
>   * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
>   *
> + * Portions of FDT setup borrowed from QEMU, copyright 2010 David Gibson, IBM
> + * Corporation.
> + *
>   * This program is free software; you can redistribute it and/or modify it
>   * under the terms of the GNU General Public License version 2 as published
>   * by the Free Software Foundation.
> @@ -29,6 +32,8 @@
>  #include <linux/byteorder.h>
>  #include <libfdt.h>
>  
> +#define HPT_ORDER 24
> +
>  #define HUGETLBFS_PATH "/var/lib/hugetlbfs/global/pagesize-16MB/"
>  
>  static char kern_cmdline[2048];
> @@ -168,9 +173,145 @@ bool load_bzimage(struct kvm *kvm, int fd_kernel,
>  	return false;
>  }
>  
> +#define SMT_THREADS 4
> +
> +static uint32_t mfpvr(void)
> +{
> +	uint32_t r;
> +	asm volatile ("mfpvr %0" : "=r"(r));
> +	return r;
> +}
> +
>  static void setup_fdt(struct kvm *kvm)
>  {
> +	uint64_t 	mem_reg_property[] = { 0, cpu_to_be64(kvm->ram_size) };
> +	int 		smp_cpus = kvm->nrcpus;
> +	char 		hypertas_prop_kvm[] = "hcall-pft\0hcall-term\0"
> +		"hcall-dabr\0hcall-interrupt\0hcall-tce\0hcall-vio\0"
> +		"hcall-splpar\0hcall-bulk";
> +	int 		i, j;
> +	char 		cpu_name[30];
> +	u8		staging_fdt[FDT_MAX_SIZE];
> +	uint32_t      	pvr = mfpvr();
> +
> +	/* Generate an appropriate DT at kvm->fdt_gra */
> +	void *fdt_dest = guest_flat_to_host(kvm, kvm->fdt_gra);
> +	void *fdt = staging_fdt;
> +
> +	_FDT(fdt_create(fdt, FDT_MAX_SIZE));
> +	_FDT(fdt_finish_reservemap(fdt));
> +
> +	_FDT(fdt_begin_node(fdt, ""));
> +
> +	_FDT(fdt_property_string(fdt, "device_type", "chrp"));
> +	_FDT(fdt_property_string(fdt, "model", "IBM pSeries (kvmtool)"));
> +	_FDT(fdt_property_cell(fdt, "#address-cells", 0x2));
> +	_FDT(fdt_property_cell(fdt, "#size-cells", 0x2));
> +
> +	/* /chosen */
> +	_FDT(fdt_begin_node(fdt, "chosen"));
> +	/* cmdline */
> +	_FDT(fdt_property_string(fdt, "bootargs", kern_cmdline));
> +	/* Initrd */
> +	if (kvm->initrd_size != 0) {
> +		uint32_t ird_st_prop = cpu_to_be32(kvm->initrd_gra);
> +		uint32_t ird_end_prop = cpu_to_be32(kvm->initrd_gra +
> +						    kvm->initrd_size);
> +		_FDT(fdt_property(fdt, "linux,initrd-start",
> +				   &ird_st_prop, sizeof(ird_st_prop)));
> +		_FDT(fdt_property(fdt, "linux,initrd-end",
> +				   &ird_end_prop, sizeof(ird_end_prop)));
> +	}
> +	_FDT(fdt_end_node(fdt));
> +
> +	/*
> +	 * Memory: We don't alloc. a separate RMA yet.  If we ever need to
> +	 * (CAP_PPC_RMA == 2) then have one memory node for 0->RMAsize, and
> +	 * another RMAsize->endOfMem.
> +	 */
> +	_FDT(fdt_begin_node(fdt, "memory@0"));
> +	_FDT(fdt_property_string(fdt, "device_type", "memory"));
> +	_FDT(fdt_property(fdt, "reg", mem_reg_property,
> +			  sizeof(mem_reg_property)));
> +	_FDT(fdt_end_node(fdt));
> +
> +	/* CPUs */
> +	_FDT(fdt_begin_node(fdt, "cpus"));
> +	_FDT(fdt_property_cell(fdt, "#address-cells", 0x1));
> +	_FDT(fdt_property_cell(fdt, "#size-cells", 0x0));
> +
> +	for (i = 0; i < smp_cpus; i += SMT_THREADS) {
> +		/*
> +		 * These page and segment sizes are a basic minimum set.
> +		 * Really, we should be fancier and work out what the host
> +		 * supports then encode this here.
> +		 */
> +		int32_t page_sizes_prop[] = {0xc, 0x0, 0x1, 0xc, 0x0,
> +					     0x18, 0x100, 0x1, 0x18, 0x0};
> +		int32_t seg_sizes_prop[] = {0x1c, 0x28, 0xffffffff,
> 0xffffffff};


So here and in the vmx and dfp settings below, you appear to assume
POWER7, but I haven't spotted any code to check that assumption.

> +		int32_t pft_size_prop[] = { 0, HPT_ORDER };
> +		uint32_t servers_prop[SMT_THREADS];
> +		uint32_t gservers_prop[SMT_THREADS * 2];
> +		int threads = (smp_cpus - i) >= SMT_THREADS ? SMT_THREADS :
> +			smp_cpus - i;
> +
> +		sprintf(cpu_name, "PowerPC,POWER7@%d", i);
> +		_FDT(fdt_begin_node(fdt, cpu_name));
> +		_FDT(fdt_property_string(fdt, "name", "PowerPC,POWER7"));
> +		_FDT(fdt_property_string(fdt, "device_type", "cpu"));
> +
> +		_FDT(fdt_property_cell(fdt, "reg", i));
> +		_FDT(fdt_property_cell(fdt, "cpu-version", pvr));
> +		_FDT(fdt_property_cell(fdt, "dcache-block-size", 0x00000080));
> +		_FDT(fdt_property_cell(fdt, "icache-block-size", 0x00000080));
> +		_FDT(fdt_property_cell(fdt, "timebase-frequency", 512000000));
> +		_FDT(fdt_property_cell(fdt, "clock-frequency", 0xddbab200));
> +
> +		/* SLB size is hardwired as we currently assume POWERn */
> +		_FDT(fdt_property_cell(fdt, "ibm,slb-size", 32));
> +		/*
> +		 * HPT size is also hardwired; KVM currently fixes it at 16MB
> +		 * but the moment that changes we'll need to read it out of the
> +		 * kernel.
> +		 */
> +		_FDT(fdt_property(fdt, "ibm,pft-size", pft_size_prop,
> +				  sizeof(pft_size_prop)));
> +
> +		_FDT(fdt_property_string(fdt, "status", "okay"));
> +		_FDT(fdt_property(fdt, "64-bit", NULL, 0));
> +		/* A server for each thread in this core */
> +		for (j = 0; j < SMT_THREADS; j++) {
> +			servers_prop[j] = cpu_to_be32(i+j);
> +			/*
> +			 * Hack borrowed from QEMU, direct the group queues back
> +			 * to cpu 0:
> +			 */
> +			gservers_prop[j*2] = cpu_to_be32(i+j);
> +			gservers_prop[j*2 + 1] = 0;
> +		}
> +		_FDT(fdt_property(fdt, "ibm,ppc-interrupt-server#s",
> +				   servers_prop, threads * sizeof(uint32_t)));
> +		_FDT(fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
> +				  gservers_prop,
> +				  threads * 2 * sizeof(uint32_t)));
> +		_FDT(fdt_property(fdt, "ibm,segment-page-sizes",
> +				  page_sizes_prop, sizeof(page_sizes_prop)));
> +		_FDT(fdt_property(fdt, "ibm,processor-segment-sizes",
> +				  seg_sizes_prop, sizeof(seg_sizes_prop)));
> +		/* And VMX / DFP */
> +		_FDT(fdt_property_cell(fdt, "ibm,vmx", 0x2));
> +		_FDT(fdt_property_cell(fdt, "ibm,dfp", 0x1));
> +		_FDT(fdt_end_node(fdt));
> +	}
> +	_FDT(fdt_end_node(fdt));
> +
> +	/* Finalise: */
> +	_FDT(fdt_end_node(fdt)); /* Root node */
> +	_FDT(fdt_finish(fdt));
>  
> +	_FDT(fdt_open_into(fdt, fdt_dest, FDT_MAX_SIZE));
> +	_FDT(fdt_add_mem_rsv(fdt_dest, kvm->rtas_gra, kvm->rtas_size));
> +	_FDT(fdt_pack(fdt_dest));
>  }
>  
>  /**

Patch

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 5bb3f08..4ee4805 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -132,7 +132,8 @@  ifeq ($(uname_M), ppc64)
 	OBJS	+= powerpc/kvm.o
 	OBJS	+= powerpc/kvm-cpu.o
 	ARCH_INCLUDE := powerpc/include
-	CFLAGS += -m64
+	CFLAGS 	+= -m64
+	LIBS 	+= -lfdt
 endif
 
 ###
diff --git a/tools/kvm/powerpc/include/kvm/kvm-arch.h b/tools/kvm/powerpc/include/kvm/kvm-arch.h
index da61774..33a3827 100644
--- a/tools/kvm/powerpc/include/kvm/kvm-arch.h
+++ b/tools/kvm/powerpc/include/kvm/kvm-arch.h
@@ -69,4 +69,14 @@  struct kvm {
 	const char		*name;
 };
 
+/* Helper for the various bits of code that generate FDT nodes */
+#define _FDT(exp)							\
+	do {								\
+		int ret = (exp);					\
+		if (ret < 0) {						\
+			die("Error creating device tree: %s: %s\n",	\
+			    #exp, fdt_strerror(ret));			\
+		}							\
+	} while (0)
+
 #endif /* KVM__KVM_ARCH_H */
diff --git a/tools/kvm/powerpc/kvm.c b/tools/kvm/powerpc/kvm.c
index f838a8f..95ed1cc 100644
--- a/tools/kvm/powerpc/kvm.c
+++ b/tools/kvm/powerpc/kvm.c
@@ -3,6 +3,9 @@ 
  *
  * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
  *
+ * Portions of FDT setup borrowed from QEMU, copyright 2010 David Gibson, IBM
+ * Corporation.
+ *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
  * by the Free Software Foundation.
@@ -29,6 +32,8 @@ 
 #include <linux/byteorder.h>
 #include <libfdt.h>
 
+#define HPT_ORDER 24
+
 #define HUGETLBFS_PATH "/var/lib/hugetlbfs/global/pagesize-16MB/"
 
 static char kern_cmdline[2048];
@@ -168,9 +173,145 @@  bool load_bzimage(struct kvm *kvm, int fd_kernel,
 	return false;
 }
 
+#define SMT_THREADS 4
+
+static uint32_t mfpvr(void)
+{
+	uint32_t r;
+	asm volatile ("mfpvr %0" : "=r"(r));
+	return r;
+}
+
 static void setup_fdt(struct kvm *kvm)
 {
+	uint64_t 	mem_reg_property[] = { 0, cpu_to_be64(kvm->ram_size) };
+	int 		smp_cpus = kvm->nrcpus;
+	char 		hypertas_prop_kvm[] = "hcall-pft\0hcall-term\0"
+		"hcall-dabr\0hcall-interrupt\0hcall-tce\0hcall-vio\0"
+		"hcall-splpar\0hcall-bulk";
+	int 		i, j;
+	char 		cpu_name[30];
+	u8		staging_fdt[FDT_MAX_SIZE];
+	uint32_t      	pvr = mfpvr();
+
+	/* Generate an appropriate DT at kvm->fdt_gra */
+	void *fdt_dest = guest_flat_to_host(kvm, kvm->fdt_gra);
+	void *fdt = staging_fdt;
+
+	_FDT(fdt_create(fdt, FDT_MAX_SIZE));
+	_FDT(fdt_finish_reservemap(fdt));
+
+	_FDT(fdt_begin_node(fdt, ""));
+
+	_FDT(fdt_property_string(fdt, "device_type", "chrp"));
+	_FDT(fdt_property_string(fdt, "model", "IBM pSeries (kvmtool)"));
+	_FDT(fdt_property_cell(fdt, "#address-cells", 0x2));
+	_FDT(fdt_property_cell(fdt, "#size-cells", 0x2));
+
+	/* /chosen */
+	_FDT(fdt_begin_node(fdt, "chosen"));
+	/* cmdline */
+	_FDT(fdt_property_string(fdt, "bootargs", kern_cmdline));
+	/* Initrd */
+	if (kvm->initrd_size != 0) {
+		uint32_t ird_st_prop = cpu_to_be32(kvm->initrd_gra);
+		uint32_t ird_end_prop = cpu_to_be32(kvm->initrd_gra +
+						    kvm->initrd_size);
+		_FDT(fdt_property(fdt, "linux,initrd-start",
+				   &ird_st_prop, sizeof(ird_st_prop)));
+		_FDT(fdt_property(fdt, "linux,initrd-end",
+				   &ird_end_prop, sizeof(ird_end_prop)));
+	}
+	_FDT(fdt_end_node(fdt));
+
+	/*
+	 * Memory: We don't alloc. a separate RMA yet.  If we ever need to
+	 * (CAP_PPC_RMA == 2) then have one memory node for 0->RMAsize, and
+	 * another RMAsize->endOfMem.
+	 */
+	_FDT(fdt_begin_node(fdt, "memory@0"));
+	_FDT(fdt_property_string(fdt, "device_type", "memory"));
+	_FDT(fdt_property(fdt, "reg", mem_reg_property,
+			  sizeof(mem_reg_property)));
+	_FDT(fdt_end_node(fdt));
+
+	/* CPUs */
+	_FDT(fdt_begin_node(fdt, "cpus"));
+	_FDT(fdt_property_cell(fdt, "#address-cells", 0x1));
+	_FDT(fdt_property_cell(fdt, "#size-cells", 0x0));
+
+	for (i = 0; i < smp_cpus; i += SMT_THREADS) {
+		/*
+		 * These page and segment sizes are a basic minimum set.
+		 * Really, we should be fancier and work out what the host
+		 * supports then encode this here.
+		 */
+		int32_t page_sizes_prop[] = {0xc, 0x0, 0x1, 0xc, 0x0,
+					     0x18, 0x100, 0x1, 0x18, 0x0};
+		int32_t seg_sizes_prop[] = {0x1c, 0x28, 0xffffffff, 0xffffffff};
+		int32_t pft_size_prop[] = { 0, HPT_ORDER };
+		uint32_t servers_prop[SMT_THREADS];
+		uint32_t gservers_prop[SMT_THREADS * 2];
+		int threads = (smp_cpus - i) >= SMT_THREADS ? SMT_THREADS :
+			smp_cpus - i;
+
+		sprintf(cpu_name, "PowerPC,POWER7@%d", i);
+		_FDT(fdt_begin_node(fdt, cpu_name));
+		_FDT(fdt_property_string(fdt, "name", "PowerPC,POWER7"));
+		_FDT(fdt_property_string(fdt, "device_type", "cpu"));
+
+		_FDT(fdt_property_cell(fdt, "reg", i));
+		_FDT(fdt_property_cell(fdt, "cpu-version", pvr));
+		_FDT(fdt_property_cell(fdt, "dcache-block-size", 0x00000080));
+		_FDT(fdt_property_cell(fdt, "icache-block-size", 0x00000080));
+		_FDT(fdt_property_cell(fdt, "timebase-frequency", 512000000));
+		_FDT(fdt_property_cell(fdt, "clock-frequency", 0xddbab200));
+
+		/* SLB size is hardwired as we currently assume POWERn */
+		_FDT(fdt_property_cell(fdt, "ibm,slb-size", 32));
+		/*
+		 * HPT size is also hardwired; KVM currently fixes it at 16MB
+		 * but the moment that changes we'll need to read it out of the
+		 * kernel.
+		 */
+		_FDT(fdt_property(fdt, "ibm,pft-size", pft_size_prop,
+				  sizeof(pft_size_prop)));
+
+		_FDT(fdt_property_string(fdt, "status", "okay"));
+		_FDT(fdt_property(fdt, "64-bit", NULL, 0));
+		/* A server for each thread in this core */
+		for (j = 0; j < SMT_THREADS; j++) {
+			servers_prop[j] = cpu_to_be32(i+j);
+			/*
+			 * Hack borrowed from QEMU, direct the group queues back
+			 * to cpu 0:
+			 */
+			gservers_prop[j*2] = cpu_to_be32(i+j);
+			gservers_prop[j*2 + 1] = 0;
+		}
+		_FDT(fdt_property(fdt, "ibm,ppc-interrupt-server#s",
+				   servers_prop, threads * sizeof(uint32_t)));
+		_FDT(fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
+				  gservers_prop,
+				  threads * 2 * sizeof(uint32_t)));
+		_FDT(fdt_property(fdt, "ibm,segment-page-sizes",
+				  page_sizes_prop, sizeof(page_sizes_prop)));
+		_FDT(fdt_property(fdt, "ibm,processor-segment-sizes",
+				  seg_sizes_prop, sizeof(seg_sizes_prop)));
+		/* And VMX / DFP */
+		_FDT(fdt_property_cell(fdt, "ibm,vmx", 0x2));
+		_FDT(fdt_property_cell(fdt, "ibm,dfp", 0x1));
+		_FDT(fdt_end_node(fdt));
+	}
+	_FDT(fdt_end_node(fdt));
+
+	/* Finalise: */
+	_FDT(fdt_end_node(fdt)); /* Root node */
+	_FDT(fdt_finish(fdt));
 
+	_FDT(fdt_open_into(fdt, fdt_dest, FDT_MAX_SIZE));
+	_FDT(fdt_add_mem_rsv(fdt_dest, kvm->rtas_gra, kvm->rtas_size));
+	_FDT(fdt_pack(fdt_dest));
 }
 
 /**