From patchwork Fri Aug 17 08:38:44 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Benjamin Herrenschmidt X-Patchwork-Id: 178167 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id E2C8C2C009B for ; Fri, 17 Aug 2012 18:43:57 +1000 (EST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755583Ab2HQIn4 (ORCPT ); Fri, 17 Aug 2012 04:43:56 -0400 Received: from gate.crashing.org ([63.228.1.57]:50891 "EHLO gate.crashing.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755552Ab2HQInw (ORCPT ); Fri, 17 Aug 2012 04:43:52 -0400 Received: from [127.0.0.1] (localhost.localdomain [127.0.0.1]) by gate.crashing.org (8.14.1/8.13.8) with ESMTP id q7H8cigM024655; Fri, 17 Aug 2012 03:38:45 -0500 Message-ID: <1345192724.11751.72.camel@pasglop> Subject: [PATCH 4/7] powerpc/kvm/xics: Add kernel emulation for the XICS interrupt controller From: Benjamin Herrenschmidt To: kvm-ppc@vger.kernel.org Cc: Alexander Graf , kvm@vger.kernel.org Date: Fri, 17 Aug 2012 18:38:44 +1000 X-Mailer: Evolution 3.2.3-0ubuntu6 Mime-Version: 1.0 Sender: kvm-ppc-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm-ppc@vger.kernel.org This is an initial variant of the in-kernel XICS emulation for both HV and PR KVM running in PAPR mode. This is based on an initial implementation by Michael Ellerman reworked by myself. It supports up to 4095 "BUID" (blocks of interrupts) of up to 4096 interrupts each. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/kvm.h | 39 ++ arch/powerpc/include/asm/kvm_host.h | 8 + arch/powerpc/include/asm/kvm_ppc.h | 30 +- arch/powerpc/kvm/Makefile | 1 + arch/powerpc/kvm/book3s.c | 3 +- arch/powerpc/kvm/book3s_hv.c | 20 + arch/powerpc/kvm/book3s_pr.c | 13 + arch/powerpc/kvm/book3s_pr_papr.c | 19 +- arch/powerpc/kvm/book3s_rtas.c | 51 +- arch/powerpc/kvm/book3s_xics.c | 882 +++++++++++++++++++++++++++++++++++ arch/powerpc/kvm/booke.c | 3 +- arch/powerpc/kvm/powerpc.c | 28 +- include/linux/kvm.h | 9 +- 13 files changed, 1090 insertions(+), 16 deletions(-) create mode 100644 arch/powerpc/kvm/book3s_xics.c -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 3dc91df..f653424 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -296,6 +296,45 @@ struct kvm_rtas_token_args { __u64 token; /* Use a token of 0 to undefine a mapping */ }; +/* for KVM_CAP_SPAPR_XICS */ +#define __KVM_HAVE_IRQCHIP_ARGS +struct kvm_irqchip_args { +#define KVM_IRQCHIP_TYPE_ICP 0 /* XICS: ICP (presentation controller) */ +#define KVM_IRQCHIP_TYPE_ICS 1 /* XICS: ICS (source controller) */ + __u32 type; + union { + /* XICS ICP arguments. This needs to be called once before + * creating any VCPU to initialize the main kernel XICS data + * structures. + */ + struct { +#define KVM_ICP_FLAG_NOREALMODE 0x00000001 /* Disable real mode ICP */ + __u32 flags; + } icp; + + /* XICS ICS arguments. You can call this for every BUID you + * want to make available. + * + * The BUID is 12 bits, the interrupt number within a BUID + * is up to 12 bits as well. The resulting interrupt numbers + * exposed to the guest are BUID || IRQ which is 24 bit + * + * BUID cannot be 0. + */ + struct { + __u32 flags; + __u16 buid; + __u16 nr_irqs; + } ics; + }; +}; + +struct kvm_spapr_xics_xive { + __u32 irq; + __u32 server; + __u32 priority; +}; + struct kvm_book3e_206_tlb_entry { __u32 mas8; __u32 mas1; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index d9c3f63..ccbf3dc 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -187,6 +187,10 @@ struct kvmppc_linear_info { int type; }; +/* XICS components, defined in boo3s_xics.c */ +struct kvmppc_xics; +struct kvmppc_icp; + /* * The reverse mapping array has one entry for each HPTE, * which stores the guest's view of the second word of the HPTE @@ -251,6 +255,7 @@ struct kvm_arch { #ifdef CONFIG_PPC_BOOK3S_64 struct list_head spapr_tce_tables; struct list_head rtas_tokens; + struct kvmppc_xics *xics; #endif }; @@ -532,6 +537,9 @@ struct kvm_vcpu_arch { u64 stolen_logged; struct kvmppc_vpa slb_shadow; #endif +#ifdef CONFIG_PPC_BOOK3S_64 + struct kvmppc_icp *icp; /* XICS presentation controller */ +#endif }; /* Values for vcpu->arch.state */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index e23bfc6..ce81d91 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -102,8 +102,7 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); -extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq); +extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu); extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int op, int *advance); @@ -127,6 +126,12 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm, extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, unsigned long porder); extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); +extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd); +extern int kvmppc_xics_ioctl(struct kvm *kvm, unsigned ioctl, unsigned long arg); +extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); +extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu); +extern void kvmppc_xics_free(struct kvm *kvm); + extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce *args); extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, @@ -152,6 +157,8 @@ extern void kvmppc_bookehv_exit(void); extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp); extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu); extern void kvmppc_rtas_tokens_free(struct kvm *kvm); +extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority); +extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority); /* * Cuts out inst bits with ordering according to spec. @@ -213,6 +220,25 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) static inline void kvm_linear_init(void) {} + +#endif + +#ifdef CONFIG_PPC_BOOK3S_64 + +extern int kvmppc_xics_ioctl(struct kvm *kvm, unsigned ioctl, unsigned long arg); + +static inline int kvmppc_xics_enabled(struct kvm *kvm) +{ + return kvm->arch.xics != NULL; +} + +#else +static inline int kvmppc_xics_ioctl(struct kvm *kvm, unsigned ioctl, + unsigned long arg)\ +{ + return -ENOTTY; +} +static inline int kvmppc_xics_enabled(struct kvm *kvm) { return 0; } #endif int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 536f65f..ec2f8da 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -81,6 +81,7 @@ kvm-book3s_64-module-objs := \ book3s.o \ book3s_64_vio.o \ book3s_rtas.o \ + book3s_xics.o \ $(kvm-book3s_64-objs-y) kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs) diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 3f2a836..5c631e4 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -160,8 +160,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, kvmppc_book3s_queue_irqprio(vcpu, vec); } -void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq) +void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) { kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 6199063..b41e586 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -465,6 +465,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) /* Send the error out to userspace via KVM_RUN */ return rc; + case H_XIRR: + case H_CPPR: + case H_EOI: + case H_IPI: + if (kvmppc_xics_enabled(vcpu->kvm)) { + ret = kvmppc_xics_hcall(vcpu, req); + break; + } /* fallthrough */ default: return RESUME_HOST; } @@ -673,6 +681,13 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) kvmppc_set_pvr(vcpu, vcpu->arch.pvr); spin_lock_init(&vcpu->arch.vpa_update_lock); + /* Create the XICS */ + if (kvmppc_xics_enabled(kvm)) { + err = kvmppc_xics_create_icp(vcpu); + if (err < 0) + goto free_vcpu; + } + kvmppc_mmu_book3s_hv_init(vcpu); /* @@ -727,6 +742,8 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr); spin_unlock(&vcpu->arch.vpa_update_lock); kvm_vcpu_uninit(vcpu); + if (kvmppc_xics_enabled(vcpu->kvm)) + kvmppc_xics_free_icp(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); } @@ -1602,6 +1619,9 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) kvmppc_rtas_tokens_free(kvm); + if (kvmppc_xics_enabled(kvm)) + kvmppc_xics_free(kvm); + kvmppc_free_hpt(kvm); WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 9f4c13f..ab9776b 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -984,6 +984,13 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) if (err < 0) goto uninit_vcpu; + /* Create the XICS */ + if (kvmppc_xics_enabled(kvm)) { + err = kvmppc_xics_create_icp(vcpu); + if (err < 0) + goto free_vcpu; + } + return vcpu; uninit_vcpu: @@ -1000,6 +1007,8 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); + if (kvmppc_xics_enabled(vcpu->kvm)) + kvmppc_xics_free_icp(vcpu); free_page((unsigned long)vcpu->arch.shared & PAGE_MASK); kvm_vcpu_uninit(vcpu); kfree(vcpu_book3s->shadow_vcpu); @@ -1199,6 +1208,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) { #ifdef CONFIG_PPC64 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); + INIT_LIST_HEAD(&kvm->arch.rtas_tokens); #endif return 0; @@ -1209,6 +1219,9 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) #ifdef CONFIG_PPC64 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); #endif + if (kvmppc_xics_enabled(kvm)) + kvmppc_xics_free(kvm); + } static int kvmppc_book3s_init(void) diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index 175404a..8352cac 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -227,6 +227,15 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu) return EMULATE_DONE; } +static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) +{ + long rc = kvmppc_xics_hcall(vcpu, cmd); + if (rc == H_TOO_HARD) + return EMULATE_FAIL; + kvmppc_set_gpr(vcpu, 3, rc); + return EMULATE_DONE; +} + int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) { switch (cmd) { @@ -246,11 +255,17 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) clear_bit(KVM_REQ_UNHALT, &vcpu->requests); vcpu->stat.halt_wakeup++; return EMULATE_DONE; + case H_XIRR: + case H_CPPR: + case H_EOI: + case H_IPI: + if (kvmppc_xics_enabled(vcpu->kvm)) + return kvmppc_h_pr_xics_hcall(vcpu, cmd); + break; case H_RTAS: if (list_empty(&vcpu->kvm->arch.rtas_tokens)) return RESUME_HOST; - rc = kvmppc_rtas_hcall(vcpu); - if (rc != 0) + if (kvmppc_rtas_hcall(vcpu) != 0) break; kvmppc_set_gpr(vcpu, 3, 0); return EMULATE_DONE; diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 8a324e8..6a6c1fe 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -18,12 +18,61 @@ #include +static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) +{ + u32 irq, server, priority; + int rc; + + if (args->nargs != 3 || args->nret != 1) { + rc = -3; + goto out; + } + + irq = args->args[0]; + server = args->args[1]; + priority = args->args[2]; + + rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority); + if (rc) + rc = -3; +out: + args->rets[0] = rc; +} + +static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) +{ + u32 irq, server, priority; + int rc; + + if (args->nargs != 1 || args->nret != 3) { + rc = -3; + goto out; + } + + irq = args->args[0]; + + server = priority = 0; + rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); + if (rc) { + rc = -3; + goto out; + } + + args->rets[1] = server; + args->rets[2] = priority; +out: + args->rets[0] = rc; +} + struct rtas_handler { void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args); char *name; }; -static struct rtas_handler rtas_handlers[] = { }; +static struct rtas_handler rtas_handlers[] = { + { .name = "ibm,set-xive", .handler = kvm_rtas_set_xive }, + { .name = "ibm,get-xive", .handler = kvm_rtas_get_xive }, +}; struct rtas_token_definition { struct list_head list; diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c new file mode 100644 index 0000000..5638e21 --- /dev/null +++ b/arch/powerpc/kvm/book3s_xics.c @@ -0,0 +1,882 @@ +/* + * Copyright 2012 Michael Ellerman, IBM Corporation. + * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#define MASKED 0xff + +#define XICS_DBG(fmt...) do { } while(0) + +#undef DEBUG_REALMODE + +/* + * LOCKING + * ======= + * + * Each ICP has its own lock, and there is one lock for the ICS (ie. all + * information about irq sources). + * + * The ICS lock nests inside any of the ICP locks. ie. you are allowed + * to take the ICS lock while holding an ICP lock, but not vice versa. + */ + +/* + * Interrupt numbering + * =================== + * + * The 24-bit global interrupt numbers are divided in two components, + * the BUID and the interrupt source. We have arbitrarily chosen a + * 10 bit + */ + +#define KVMPPC_XICS_MAX_BUID 0xfff +#define KVMPPC_XICS_IRQ_COUNT 0x1000 +#define KVMPPC_XICS_BUID_SHIFT 12 +#define KVMPPC_XICS_SRC_MASK 0xfff + +/* State for one irq in an ics */ +struct ics_irq_state { + u32 number; + u32 server; + u8 priority; + u8 saved_priority; /* currently unused */ + u8 resend; + u8 masked_pending; + u8 asserted; /* Only for LSI */ +}; + +#define ICP_RESEND_MAP_SIZE \ + ((KVMPPC_XICS_MAX_BUID + BITS_PER_LONG - 1) / BITS_PER_LONG) + +struct kvmppc_icp { + struct mutex lock; + struct kvm_vcpu *vcpu; + u32 pending_irq; /* XISR */ + u8 pending_priority; + u8 current_priority; /* CPPR */ + u8 mfrr; /* MFRR */ + bool need_resend; + unsigned long resend_map[ICP_RESEND_MAP_SIZE]; +}; + + +struct kvmppc_ics { + struct mutex lock; + u16 buid; + u16 nr_irqs; + struct ics_irq_state irq_state[]; +}; + +struct kvmppc_xics { + struct kvm *kvm; + struct dentry *dentry; + u32 max_buid; + struct kvmppc_ics *ics[KVMPPC_XICS_MAX_BUID]; /* [1...MAX_BUID] */ +}; + +static struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm, u32 nr) +{ + struct kvm_vcpu *vcpu = NULL; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (nr == vcpu->vcpu_id) + return vcpu->arch.icp; + } + return NULL; +} + +static struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics, + u32 irq, u16 *source) +{ + u16 buid = irq >> KVMPPC_XICS_BUID_SHIFT; + u16 src = irq & KVMPPC_XICS_SRC_MASK; + struct kvmppc_ics *ics; + + ics = xics->ics[buid - 1]; + if (!ics) + return NULL; + if (src >= ics->nr_irqs) + return NULL; + if (source) + *source = src; + return ics; +} + + +/* -- ICS routines -- */ + +static void icp_deliver_irq(struct kvmppc_xics *xics, + struct kvmppc_icp *icp, + struct kvmppc_ics *ics, u16 src); + +static void __ics_reject_irq(struct kvmppc_icp *icp, + struct kvmppc_ics *ics, u16 src) +{ + struct ics_irq_state *state = &ics->irq_state[src]; + + XICS_DBG("server %d reject src %#x\n", icp->vcpu->vcpu_id, src); + + /* XXX check if it still level & asserted ? */ + state->resend = 1; + set_bit(ics->buid, icp->resend_map); + icp->need_resend = true; +} + +static void ics_reject_irq(struct kvmppc_xics *xics, + struct kvmppc_icp *icp, u32 irq) +{ + struct kvmppc_ics *ics; + u16 src; + + lockdep_assert_held(&icp->lock); + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) { + pr_warning("ics_reject_irq: IRQ 0x%06x not found !\n", irq); + return; + } + + mutex_lock(&ics->lock); + __ics_reject_irq(icp, ics, src); + mutex_unlock(&ics->lock); +} + +static void ics_eoi(struct kvmppc_xics *xics, struct kvmppc_icp *icp, + u32 irq) +{ + struct ics_irq_state *state; + struct kvmppc_ics *ics; + u16 src; + + XICS_DBG("ics_eoi 0x%06x\n", irq); + + lockdep_assert_held(&icp->lock); + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) { + pr_warning("ics_eoi: IRQ 0x%06x not found !\n", irq); + return; + } + state = &ics->irq_state[src]; + + mutex_lock(&ics->lock); + + /* If it's an LSI and still asserted we resend */ + if (state->asserted) { + state->resend = 1; + set_bit(ics->buid, icp->resend_map); + icp->need_resend = true; + } + + mutex_unlock(&ics->lock); +} + +static void ics_deliver_irq(struct kvmppc_xics *xics, + u32 irq, u32 level) +{ + struct kvmppc_icp *icp; + struct ics_irq_state *state; + struct kvmppc_ics *ics; + bool deliver = false; + u32 server; + u16 src; + + XICS_DBG("ics deliver 0x%06x (level: %d)\n", irq, level); + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) { + pr_warning("ics_deliver_irq: IRQ 0x%06x not found !\n", irq); + return; + } + state = &ics->irq_state[src]; + + mutex_lock(&ics->lock); + + if (level == KVM_INTERRUPT_SET_LEVEL) + state->asserted = 1; + else if (level == KVM_INTERRUPT_UNSET) { + state->asserted = 0; + goto unlock; + } + + if (state->priority != MASKED) { + deliver = true; + server = state->server; + } else { + XICS_DBG("masked pending\n"); + state->masked_pending = 1; + } + +unlock: + mutex_unlock(&ics->lock); + + if (deliver) { + icp = kvmppc_xics_find_server(xics->kvm, server); + /* Configured server not found... XXX FALLBACK */ + if (icp) + icp_deliver_irq(xics, icp, ics, src); + } +} + +static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics, + struct kvmppc_icp *icp) +{ + u32 server = icp->vcpu->vcpu_id; + int i; + + mutex_lock(&ics->lock); + + for (i = 0; i < ics->nr_irqs; i++) { + struct ics_irq_state *state = &ics->irq_state[i]; + + if (!state->resend || state->server != server) + continue; + + XICS_DBG("resend 0x%06x prio %d\n", state->number, + state->priority); + + state->resend = 0; + if (state->priority == MASKED) + continue; + + mutex_unlock(&ics->lock); + icp_deliver_irq(xics, icp, ics, i); + mutex_lock(&ics->lock); + } + + mutex_unlock(&ics->lock); +} + +static void icp_check_resend(struct kvmppc_xics *xics, + struct kvmppc_icp *icp) +{ + u32 buid; + + for_each_set_bit(buid, icp->resend_map, xics->max_buid + 1) { + struct kvmppc_ics *ics = xics->ics[buid - 1]; + + if (!test_and_clear_bit(buid, icp->resend_map)) + continue; + if (!ics) + continue; + ics_check_resend(xics, ics, icp); + } +} + +int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_icp *icp; + struct kvmppc_ics *ics; + struct ics_irq_state *state; + u16 src; + bool deliver; + + if (!xics) + return -ENODEV; + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) + return -EINVAL; + state = &ics->irq_state[src]; + + icp = kvmppc_xics_find_server(kvm, server); + if (!icp) + return -EINVAL; + + mutex_lock(&ics->lock); + + state->server = server; + state->priority = priority; + deliver = false; + if (state->masked_pending && state->priority != MASKED) { + state->masked_pending = 0; + deliver = true; + } + + mutex_unlock(&ics->lock); + + XICS_DBG("irq 0x%06x server %d prio %#x\n", irq, server, priority); + + if (deliver) + icp_deliver_irq(xics, icp, ics, src); + + return 0; +} + +int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_ics *ics; + struct ics_irq_state *state; + u16 src; + + if (!xics) + return -ENODEV; + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) + return -EINVAL; + state = &ics->irq_state[src]; + + mutex_lock(&ics->lock); + *server = state->server; + *priority = state->priority; + mutex_unlock(&ics->lock); + + XICS_DBG("irq 0x%06x server %d prio %#x\n", + irq, state->server, state->priority); + + return 0; +} + +/* -- ICP routines, including hcalls -- */ + +static void icp_external_interrupt(struct kvmppc_icp *icp) +{ + unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL; + + lockdep_assert_held(&icp->lock); + + kvmppc_book3s_queue_irqprio(icp->vcpu, vec); + kvm_vcpu_kick(icp->vcpu); +} + +static void icp_deliver_irq(struct kvmppc_xics *xics, + struct kvmppc_icp *icp, + struct kvmppc_ics *ics, u16 src) +{ + struct ics_irq_state state_copy; + + mutex_lock(&icp->lock); + + /* Snapshot irq state */ + mutex_lock(&ics->lock); + state_copy = ics->irq_state[src]; + + if (state_copy.priority > icp->current_priority) { + /* CPU is not interested in us */ + __ics_reject_irq(icp, ics, src); + mutex_unlock(&ics->lock); + goto out; + } + + if (icp->pending_irq) { + /* An interrupt is pending */ + if (icp->pending_priority <= state_copy.priority) { + /* pending irq is equally or more favoured */ + __ics_reject_irq(icp, ics, src); + mutex_unlock(&ics->lock); + goto out; + } + } + mutex_unlock(&ics->lock); + + /* We are more favoured, reject pending irq */ + if (icp->pending_irq) + ics_reject_irq(xics, icp, icp->pending_irq); + + icp->pending_irq = state_copy.number; + icp->pending_priority = state_copy.priority; + + XICS_DBG("irq 0x%06x pending on %d prio %#x\n", + state_copy.number, state_copy.server, state_copy.priority); + + icp_external_interrupt(icp); + +out: + mutex_unlock(&icp->lock); +} + +static void icp_check_ipi(struct kvmppc_xics *xics, struct kvmppc_icp *icp) +{ + lockdep_assert_held(&icp->lock); + + if (icp->mfrr >= icp->current_priority) + return; + + XICS_DBG("cpu %d can take IPI mfrr=%#x\n", + icp->vcpu->vcpu_id, icp->mfrr); + + if (icp->pending_irq) { + /* IPI is less favoured */ + if (icp->pending_priority <= icp->mfrr) { + XICS_DBG("ODD: pending_prio=%#x pending_irq=%#x\n", + icp->pending_priority, icp->pending_irq); + return; + } + + /* IPI is more favoured, reject the other interrupt */ + ics_reject_irq(xics, icp, icp->pending_irq); + } + + icp->pending_irq = XICS_IPI; + icp->pending_priority = icp->mfrr; + icp_external_interrupt(icp); +} + +static u32 icp_accept(struct kvm_vcpu *vcpu, struct kvmppc_icp *icp) +{ + u32 xirr; + + mutex_lock(&icp->lock); + + kvmppc_core_dequeue_external(vcpu); + + /* The XIRR is the pending interrupt & current priority */ + xirr = icp->pending_irq | (icp->current_priority << 24); + + /* The pending priority becomes current */ + icp->current_priority = icp->pending_priority; + + /* Clear the pending interrupt */ + icp->pending_irq = 0; + + mutex_unlock(&icp->lock); + + return xirr; +} + +static unsigned long h_xirr(struct kvm_vcpu *vcpu) +{ + struct kvmppc_icp *icp = vcpu->arch.icp; + u32 xirr; + + xirr = icp_accept(vcpu, icp); + + XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr); + + return xirr; +} + +static int h_ipi(struct kvm_vcpu *vcpu, unsigned long server, + unsigned long mfrr) +{ + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp; + + XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n", + vcpu->vcpu_id, server, mfrr); + + icp = kvmppc_xics_find_server(vcpu->kvm, server); + if (!icp) + return H_PARAMETER; + + mutex_lock(&icp->lock); + + icp->mfrr = mfrr; + icp_check_ipi(xics, icp); + + mutex_unlock(&icp->lock); + + return H_SUCCESS; +} + +static void h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) +{ + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp = vcpu->arch.icp; + u8 old_priority; + bool check_resend = false; + + XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr); + + mutex_lock(&icp->lock); + + old_priority = icp->current_priority; + icp->current_priority = cppr; + + if (icp->pending_irq && + icp->current_priority < icp->pending_priority) { + u32 pending = icp->pending_irq; + /* Pending irq is less favoured than our new priority */ + icp->pending_irq = 0; + kvmppc_core_dequeue_external(vcpu); + ics_reject_irq(xics, icp, pending); + } + + /* Check if there is anything we can accept now */ + if (!icp->pending_irq) + icp_check_ipi(xics, icp); + if (!icp->pending_irq && icp->need_resend) { + check_resend = true; + icp->need_resend = false; + } + + mutex_unlock(&icp->lock); + + if (check_resend) + icp_check_resend(xics, icp); +} + +static void h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) +{ + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp = vcpu->arch.icp; + bool check_resend = false; + + XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr); + + mutex_lock(&icp->lock); + + icp->current_priority = xirr >> 24; + + /* If nothing is pending since accept, check for an IPI */ + if (!icp->pending_irq) + icp_check_ipi(xics, icp); + + if (!icp->pending_irq && icp->need_resend) { + check_resend = true; + icp->need_resend = false; + } + + ics_eoi(xics, icp, xirr & 0xFFFFFF); + + mutex_unlock(&icp->lock); + + if (check_resend) + icp_check_resend(xics, icp); +} + +int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) +{ + unsigned long res; + int rc = H_SUCCESS; + + /* Check if we have an ICP */ + if (!vcpu->arch.icp || !vcpu->kvm->arch.xics) + return H_HARDWARE; + + switch (req) { + case H_XIRR: + res = h_xirr(vcpu); + kvmppc_set_gpr(vcpu, 4, res); + break; + case H_CPPR: + h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4)); + break; + case H_EOI: + h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4)); + break; + case H_IPI: + rc = h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5)); + break; + } + + return rc; +} + + +/* -- Initialisation code etc. -- */ + +static int xics_debug_show(struct seq_file *m, void *private) +{ + struct kvmppc_xics *xics = m->private; + struct kvm *kvm = xics->kvm; + struct kvm_vcpu *vcpu; + int buid, i; + + if (!kvm) + return 0; + + seq_printf(m, "=========\nICP state\n=========\n"); + + kvm_for_each_vcpu(i, vcpu, kvm) { + struct kvmppc_icp *icp = vcpu->arch.icp; + + if (!icp) + continue; + + mutex_lock(&icp->lock); + + seq_printf(m, "cpu server %#x pending %#x pending prio %#x cppr %#x " + "mfrr %#x\n", vcpu->vcpu_id, icp->pending_irq, + icp->pending_priority, icp->current_priority, + icp->mfrr); + + mutex_unlock(&icp->lock); + } + + for (buid = 1; buid <= KVMPPC_XICS_MAX_BUID; buid++) { + struct kvmppc_ics *ics = xics->ics[buid - 1]; + + if (!ics) + continue; + + seq_printf(m, "=========\nICS state for BUID 0x%x\n=========\n", buid); + + mutex_lock(&ics->lock); + + for (i = 0; i < ics->nr_irqs; i++) { + struct ics_irq_state *irq = &ics->irq_state[i]; + + seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x " + "asserted %d resend %d masked pending %d\n", + irq->number, irq->server, irq->priority, + irq->saved_priority, irq->asserted, irq->resend, + irq->masked_pending); + + } + mutex_unlock(&ics->lock); + } + return 0; +} + +static int xics_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, xics_debug_show, inode->i_private); +} + +static const struct file_operations xics_debug_fops = { + .open = xics_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void xics_debugfs_init(struct kvmppc_xics *xics) +{ + char *name; + + name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics); + if (!name) { + pr_err("%s: no memory for name\n", __func__); + return; + } + + xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root, + xics, &xics_debug_fops); + + pr_debug("%s: created %s\n", __func__, name); + kfree(name); +} + +static int kvmppc_xics_create_ics(struct kvmppc_xics *xics, u16 buid, u16 nr_irq) +{ + struct kvmppc_ics *ics; + int i, size; + + + /* Create the ICS */ + size = sizeof(struct kvmppc_ics) + sizeof(struct ics_irq_state) * nr_irqs; + ics = kzalloc(size, GFP_KERNEL); + if (!ics) + return -ENOMEM; + + mutex_init(&ics->lock); + ics->buid = buid; + ics->nr_irqs = nr_irqs; + + for (i = 0; i < nr_irqs; i++) { + ics->irq_state[i].number = (buid << KVMPPC_XICS_BUID_SHIFT) | i; + ics->irq_state[i].priority = MASKED; + ics->irq_state[i].saved_priority = MASKED; + } + smp_wmb(); + xics->ics[buid - 1] = ics; + + if (buid > xics->max_buid) + xics->max_buid = buid; + + return 0; +} + +int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu) +{ + struct kvmppc_icp *icp; + + icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL); + if (!icp) + return -ENOMEM; + + mutex_init(&icp->lock); + icp->vcpu = vcpu; + icp->mfrr = MASKED; + vcpu->arch.icp = icp; + + XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id); + + return 0; +} + +void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) +{ + if (!vcpu->arch.icp) + return; + kfree(vcpu->arch.icp); + vcpu->arch.icp = NULL; +} + +void kvmppc_xics_free(struct kvm *kvm) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + int i; + + if (!xics) + return; + + lockdep_assert_held(&kvm->lock); + + debugfs_remove(xics->dentry); + + if (xics->kvm) { + xics->kvm->arch.xics = NULL; + xics->kvm = NULL; + } + + for (i = 0; i < xics->max_buid; i++) { + if (xics->ics[i]) + kfree(xics->ics[i]); + } + kfree(xics); +} + +/* -- ioctls -- */ + +static int kvm_vm_ioctl_create_icp(struct kvm *kvm, + struct kvm_irqchip_args *args) +{ + struct kvmppc_xics *xics; + int rc = 0; + + mutex_lock(&kvm->lock); + + /* Already there ? */ + if (kvm->arch.xics) + return -EEXIST; + + xics = kzalloc(sizeof(*xics), GFP_KERNEL); + if (!xics) { + rc = -ENOMEM; + goto out; + } + + xics->kvm = kvm; + kvm->arch.xics = xics; + xics_debugfs_init(xics); + +out: + mutex_unlock(&kvm->lock); + return rc; +} + +static int kvm_vm_ioctl_create_ics(struct kvm *kvm, + struct kvm_irqchip_args *args) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + u16 nr_irqs, buid; + int rc; + + if (!xics) + return -ENODEV; + + nr_irqs = args->ics.nr_irqs; + buid = args->ics.buid; + + /* BUID 0 is bogus */ + if (buid == 0) { + rc = 0; + goto out; + } + + /* Sanity checks */ + if (nr_irqs == 0 || nr_irqs > KVMPPC_XICS_IRQ_COUNT || + buid > KVMPPC_XICS_MAX_BUID) + return -EINVAL; + + mutex_lock(&kvm->lock); + + /* BUID already exists */ + if (xics->ics[buid - 1]) { + rc = -EEXIST; + goto out; + } + + /* Create the ICS */ + rc = kvmppc_xics_create_ics(xics, buid, nr_irqs); +out: + mutex_unlock(&kvm->lock); + return rc; +} + +static int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args) +{ + struct kvmppc_xics *xics; + + /* locking against multiple callers? */ + + xics = kvm->arch.xics; + if (!xics) + return -ENODEV; + + switch (args->level) { + case KVM_INTERRUPT_SET: + case KVM_INTERRUPT_SET_LEVEL: + case KVM_INTERRUPT_UNSET: + ics_deliver_irq(xics, args->irq, args->level); + break; + default: + return -EINVAL; + } + + return 0; +} + +int kvmppc_xics_ioctl(struct kvm *kvm, unsigned ioctl, unsigned long arg) +{ + void __user *argp = (void __user *)arg; + int rc; + + switch (ioctl) { + case KVM_CREATE_IRQCHIP: { + struct kvm_irqchip_args args; + + rc = -EFAULT; + if (copy_from_user(&args, argp, sizeof(args))) + break; + rc = -EINVAL; + if (args.type == KVM_IRQCHIP_TYPE_ICP) + rc = kvm_vm_ioctl_create_icp(kvm, &args); + else if (args.type == KVM_IRQCHIP_TYPE_ICS) + rc = kvm_vm_ioctl_create_ics(kvm, &args); + break; + } + + case KVM_IRQ_LINE: { + struct kvm_irq_level args; + + rc = -EFAULT; + if (copy_from_user(&args, argp, sizeof(args))) + break; + rc = kvm_vm_ioctl_xics_irq(kvm, &args); + break; + } + + default: + rc = -ENOTTY; + break; + } + + return rc; +} diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 5ecfd80..507c9f5 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -199,8 +199,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, kvmppc_booke_queue_irqprio(vcpu, prio); } -void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq) +void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) { clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 4b7522f..89e3572 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -286,6 +286,7 @@ int kvm_dev_ioctl_check_extension(long ext) break; #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_PPC_GET_SMMU_INFO: + case KVM_CAP_SPAPR_XICS: r = 1; break; #endif @@ -611,7 +612,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { if (irq->irq == KVM_INTERRUPT_UNSET) { - kvmppc_core_dequeue_external(vcpu, irq); + kvmppc_core_dequeue_external(vcpu); return 0; } @@ -841,11 +842,6 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } - - case KVM_PPC_RTAS_DEFINE_TOKEN: - r = kvm_vm_ioctl_rtas_define_token(kvm, argp); - break; - #endif /* CONFIG_KVM_BOOK3S_64_HV */ #ifdef CONFIG_PPC_BOOK3S_64 @@ -859,7 +855,27 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; break; } + case KVM_PPC_RTAS_DEFINE_TOKEN: + r = kvm_vm_ioctl_rtas_define_token(kvm, argp); + break; #endif /* CONFIG_PPC_BOOK3S_64 */ + case KVM_IRQ_LINE: + if (kvmppc_xics_enabled(kvm)) + r = kvmppc_xics_ioctl(kvm, ioctl, arg); + else + r = -ENOTTY; + break; + case KVM_CREATE_IRQCHIP: { + u32 type; + + r = -EFAULT; + if (get_user(type, (u32 __user *)argp)) + break; + r = -EINVAL; + if (type == KVM_IRQCHIP_TYPE_ICP || type == KVM_IRQCHIP_TYPE_ICS) + r = kvmppc_xics_ioctl(kvm, ioctl, arg); + break; + } default: r = -ENOTTY; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 35c063a..f9a396f 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -111,6 +111,7 @@ struct kvm_irq_level { * ACPI gsi notion of irq. * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47.. * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23.. + * On powerpc SPAPR, the ICS source number, level is ignored. */ union { __u32 irq; @@ -620,6 +621,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_ALLOC_HTAB 80 #define KVM_CAP_PPC_VPA 81 #define KVM_CAP_PPC_RTAS 82 +#define KVM_CAP_SPAPR_XICS 83 #ifdef KVM_CAP_IRQ_ROUTING @@ -753,6 +755,11 @@ struct kvm_msi { __u8 pad[16]; }; +#ifndef __KVM_HAVE_IRQCHIP_ARGS +/* Allow arch code to optionally define args for KVM_CREATE_IRQCHIP */ +struct kvm_irqchip_args { }; +#endif + /* * ioctls for VM fds */ @@ -783,7 +790,7 @@ struct kvm_s390_ucas_mapping { #define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long) /* Device model IOC */ -#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) +#define KVM_CREATE_IRQCHIP _IOW(KVMIO, 0x60, struct kvm_irqchip_args) #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) #define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip) #define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip)