@@ -89,6 +89,7 @@ qemu_irq spapr_allocate_irq(uint32_t hint, uint32_t *irq_num)
}
static void *spapr_create_fdt_skel(const char *cpu_model,
+ target_phys_addr_t rma_size,
target_phys_addr_t initrd_base,
target_phys_addr_t initrd_size,
const char *boot_device,
@@ -97,7 +98,9 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
{
void *fdt;
CPUState *env;
- uint64_t mem_reg_property[] = { 0, cpu_to_be64(ram_size) };
+ uint64_t mem_reg_property_rma[] = { 0, cpu_to_be64(rma_size) };
+ uint64_t mem_reg_property_nonrma[] = { cpu_to_be64(rma_size),
+ cpu_to_be64(ram_size - rma_size) };
uint32_t start_prop = cpu_to_be32(initrd_base);
uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)};
@@ -143,15 +146,25 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
_FDT((fdt_end_node(fdt)));
- /* memory node */
+ /* memory node(s) */
_FDT((fdt_begin_node(fdt, "memory@0")));
_FDT((fdt_property_string(fdt, "device_type", "memory")));
- _FDT((fdt_property(fdt, "reg",
- mem_reg_property, sizeof(mem_reg_property))));
-
+ _FDT((fdt_property(fdt, "reg", mem_reg_property_rma,
+ sizeof(mem_reg_property_rma))));
_FDT((fdt_end_node(fdt)));
+ if (ram_size > rma_size) {
+ char mem_name[32];
+
+ sprintf(mem_name, "memory@%" PRIx64, (uint64_t)rma_size);
+ _FDT((fdt_begin_node(fdt, mem_name)));
+ _FDT((fdt_property_string(fdt, "device_type", "memory")));
+ _FDT((fdt_property(fdt, "reg", mem_reg_property_nonrma,
+ sizeof(mem_reg_property_nonrma))));
+ _FDT((fdt_end_node(fdt)));
+ }
+
/* cpus */
_FDT((fdt_begin_node(fdt, "cpus")));
@@ -341,6 +354,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
{
CPUState *env;
int i;
+ target_phys_addr_t rma_alloc_size, rma_size;
ram_addr_t ram_offset;
uint32_t initrd_base;
long kernel_size, initrd_size, fw_size;
@@ -350,10 +364,23 @@ static void ppc_spapr_init(ram_addr_t ram_size,
spapr = g_malloc(sizeof(*spapr));
cpu_ppc_hypercall = emulate_spapr_hypercall;
+ /* Allocate RMA if necessary */
+ rma_alloc_size = kvmppc_alloc_rma("ppc_spapr.rma");
+
+ if (rma_alloc_size == -1) {
+ hw_error("qemu: Unable to create RMA\n");
+ exit(1);
+ }
+ if (rma_alloc_size && (rma_alloc_size < ram_size)) {
+ rma_size = rma_alloc_size;
+ } else {
+ rma_size = ram_size;
+ }
+
/* We place the device tree just below either the top of RAM, or
* 2GB, so that it can be processed with 32-bit code if
* necessary */
- spapr->fdt_addr = MIN(ram_size, 0x80000000) - FDT_MAX_SIZE;
+ spapr->fdt_addr = MIN(rma_size, 0x80000000) - FDT_MAX_SIZE;
spapr->rtas_addr = spapr->fdt_addr - RTAS_MAX_SIZE;
/* init CPUs */
@@ -378,8 +405,13 @@ static void ppc_spapr_init(ram_addr_t ram_size,
/* allocate RAM */
spapr->ram_limit = ram_size;
- ram_offset = qemu_ram_alloc(NULL, "ppc_spapr.ram", spapr->ram_limit);
- cpu_register_physical_memory(0, ram_size, ram_offset);
+ if (spapr->ram_limit > rma_alloc_size) {
+ ram_addr_t nonrma_base = rma_alloc_size;
+ ram_addr_t nonrma_size = spapr->ram_limit - rma_alloc_size;
+
+ ram_offset = qemu_ram_alloc(NULL, "ppc_spapr.ram", nonrma_size);
+ cpu_register_physical_memory(nonrma_base, nonrma_size, ram_offset);
+ }
/* allocate hash page table. For now we always make this 16mb,
* later we should probably make it scale to the size of guest
@@ -503,7 +535,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
}
/* Prepare the device tree */
- spapr->fdt_skel = spapr_create_fdt_skel(cpu_model,
+ spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, rma_size,
initrd_base, initrd_size,
boot_device, kernel_cmdline,
pteg_shift + 7);
@@ -55,6 +55,9 @@ static int cap_interrupt_level = false;
static int cap_segstate;
static int cap_booke_sregs;
static int cap_ppc_smt = 0;
+#ifdef KVM_CAP_PPC_RMA
+static int cap_ppc_rma = 0;
+#endif
/* XXX We have a race condition where we actually have a level triggered
* interrupt, but the infrastructure can't expose that yet, so the guest
@@ -81,6 +84,9 @@ int kvm_arch_init(KVMState *s)
#ifdef KVM_CAP_PPC_SMT
cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
#endif
+#ifdef KVM_CAP_PPC_RMA
+ cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
+#endif
if (!cap_interrupt_level) {
fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
@@ -741,6 +747,51 @@ int kvmppc_smt_threads(void)
return cap_ppc_smt ? cap_ppc_smt : 1;
}
+off_t kvmppc_alloc_rma(const char *name)
+{
+#ifndef KVM_CAP_PPC_RMA
+ return 0;
+#else
+ void *rma;
+ ram_addr_t rma_offset;
+ off_t size;
+ int fd;
+ struct kvm_allocate_rma ret;
+
+ /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
+ * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
+ * not necessary on this hardware
+ * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
+ *
+ * FIXME: We should allow the user to force contiguous RMA
+ * allocation in the cap_ppc_rma==1 case.
+ */
+ if (cap_ppc_rma < 2) {
+ return 0;
+ }
+
+ fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
+ if (fd < 0) {
+ fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ size = MIN(ret.rma_size, 256ul << 20);
+
+ rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ if (rma == MAP_FAILED) {
+ fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
+ return -1;
+ };
+
+ rma_offset = qemu_ram_alloc_from_ptr(NULL, name, size, rma);
+ cpu_register_physical_memory(0, size, rma_offset);
+
+ return size;
+#endif
+}
+
bool kvm_arch_stop_on_emulation_error(CPUState *env)
{
return true;
@@ -19,6 +19,7 @@ int kvmppc_get_hypercall(CPUState *env, uint8_t *buf, int buf_len);
int kvmppc_set_interrupt(CPUState *env, int irq, int level);
void kvmppc_set_papr(CPUState *env);
int kvmppc_smt_threads(void);
+off_t kvmppc_alloc_rma(const char *name);
#else
@@ -51,6 +52,11 @@ static inline int kvmppc_smt_threads(void)
return 1;
}
+static inline off_t kvmppc_alloc_rma(const char *name)
+{
+ return 0;
+}
+
#endif
#ifndef CONFIG_KVM