diff mbox series

[v2,07/16] KVM: PPC: Book3S HV: XIVE: add a global reset control

Message ID 20190222112840.25000-8-clg@kaod.org (mailing list archive)
State Superseded
Headers show
Series KVM: PPC: Book3S HV: add XIVE native exploitation mode | expand

Commit Message

Cédric Le Goater Feb. 22, 2019, 11:28 a.m. UTC
This control is to be used by the H_INT_RESET hcall from QEMU. Its
purpose is to clear all configuration of the sources and EQs. This is
necessary in case of a kexec (for a kdump kernel for instance) to make
sure that no remaining configuration is left from the previous boot
setup so that the new kernel can start safely from a clean state.

The queue 7 is ignored when the KVM device is configured to run in
single escalation mode. Prio 7 is used by escalations.

The XIVE VP is kept enabled as the vCPU is still active and connected
to the XIVE device.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/include/uapi/asm/kvm.h        |  1 +
 arch/powerpc/kvm/book3s_xive_native.c      | 82 ++++++++++++++++++++++
 Documentation/virtual/kvm/devices/xive.txt |  5 ++
 3 files changed, 88 insertions(+)

Comments

David Gibson Feb. 25, 2019, 2:43 a.m. UTC | #1
On Fri, Feb 22, 2019 at 12:28:31PM +0100, Cédric Le Goater wrote:
> This control is to be used by the H_INT_RESET hcall from QEMU. Its
> purpose is to clear all configuration of the sources and EQs. This is
> necessary in case of a kexec (for a kdump kernel for instance) to make
> sure that no remaining configuration is left from the previous boot
> setup so that the new kernel can start safely from a clean state.
> 
> The queue 7 is ignored when the KVM device is configured to run in
> single escalation mode. Prio 7 is used by escalations.
> 
> The XIVE VP is kept enabled as the vCPU is still active and connected
> to the XIVE device.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  arch/powerpc/include/uapi/asm/kvm.h        |  1 +
>  arch/powerpc/kvm/book3s_xive_native.c      | 82 ++++++++++++++++++++++
>  Documentation/virtual/kvm/devices/xive.txt |  5 ++
>  3 files changed, 88 insertions(+)
> 
> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
> index 177e43f3edaf..7ae8cb22af7d 100644
> --- a/arch/powerpc/include/uapi/asm/kvm.h
> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> @@ -677,6 +677,7 @@ struct kvm_ppc_cpu_char {
>  
>  /* POWER9 XIVE Native Interrupt Controller */
>  #define KVM_DEV_XIVE_GRP_CTRL		1
> +#define   KVM_DEV_XIVE_RESET		1
>  #define KVM_DEV_XIVE_GRP_SOURCE		2	/* 64-bit source attributes */
>  #define KVM_DEV_XIVE_GRP_SOURCE_CONFIG	3	/* 64-bit source attributes */
>  #define KVM_DEV_XIVE_GRP_EQ_CONFIG	4	/* 64-bit eq attributes */
> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
> index 34a35bcf550c..bb3e121c918a 100644
> --- a/arch/powerpc/kvm/book3s_xive_native.c
> +++ b/arch/powerpc/kvm/book3s_xive_native.c
> @@ -536,6 +536,80 @@ static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive,
>  	return 0;
>  }
>  
> +static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb)
> +{
> +	int i;
> +
> +	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
> +		struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
> +
> +		if (!state->valid)
> +			continue;
> +
> +		if (state->act_priority == MASKED)

You're checking some things in state before you take the lock.  Could
this race?

> +			continue;
> +
> +		arch_spin_lock(&sb->lock);
> +		state->eisn = 0;
> +		state->act_server = 0;
> +		state->act_priority = MASKED;
> +		xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
> +		xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
> +		if (state->pt_number) {
> +			xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
> +			xive_native_configure_irq(state->pt_number,
> +						  0, MASKED, 0);
> +		}
> +		arch_spin_unlock(&sb->lock);
> +	}
> +}
> +
> +static int kvmppc_xive_reset(struct kvmppc_xive *xive)
> +{
> +	struct kvm *kvm = xive->kvm;
> +	struct kvm_vcpu *vcpu;
> +	unsigned int i;
> +
> +	pr_devel("%s\n", __func__);
> +
> +	mutex_lock(&kvm->lock);
> +
> +	kvm_for_each_vcpu(i, vcpu, kvm) {
> +		struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> +		unsigned int prio;
> +
> +		if (!xc)
> +			continue;
> +
> +		kvmppc_xive_disable_vcpu_interrupts(vcpu);
> +
> +		for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
> +
> +			/* Single escalation, no queue 7 */
> +			if (prio == 7 && xive->single_escalation)
> +				break;
> +
> +			if (xc->esc_virq[prio]) {
> +				free_irq(xc->esc_virq[prio], vcpu);
> +				irq_dispose_mapping(xc->esc_virq[prio]);
> +				kfree(xc->esc_virq_names[prio]);
> +				xc->esc_virq[prio] = 0;
> +			}
> +
> +			kvmppc_xive_native_cleanup_queue(vcpu, prio);
> +		}
> +	}
> +
> +	for (i = 0; i <= xive->max_sbid; i++) {
> +		if (xive->src_blocks[i])
> +			kvmppc_xive_reset_sources(xive->src_blocks[i]);
> +	}
> +
> +	mutex_unlock(&kvm->lock);
> +
> +	return 0;
> +}
> +
>  static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
>  				       struct kvm_device_attr *attr)
>  {
> @@ -543,6 +617,10 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
>  
>  	switch (attr->group) {
>  	case KVM_DEV_XIVE_GRP_CTRL:
> +		switch (attr->attr) {
> +		case KVM_DEV_XIVE_RESET:
> +			return kvmppc_xive_reset(xive);
> +		}
>  		break;
>  	case KVM_DEV_XIVE_GRP_SOURCE:
>  		return kvmppc_xive_native_set_source(xive, attr->attr,
> @@ -575,6 +653,10 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
>  {
>  	switch (attr->group) {
>  	case KVM_DEV_XIVE_GRP_CTRL:
> +		switch (attr->attr) {
> +		case KVM_DEV_XIVE_RESET:
> +			return 0;
> +		}
>  		break;
>  	case KVM_DEV_XIVE_GRP_SOURCE:
>  	case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
> diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt
> index c0b5d9bd43fb..f1d007f485a9 100644
> --- a/Documentation/virtual/kvm/devices/xive.txt
> +++ b/Documentation/virtual/kvm/devices/xive.txt
> @@ -17,6 +17,11 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
>  
>    1. KVM_DEV_XIVE_GRP_CTRL
>    Provides global controls on the device
> +  Attributes:
> +    1.1 KVM_DEV_XIVE_RESET (write only)
> +    Resets the interrupt controller configuration for sources and event
> +    queues. To be used by kexec and kdump.
> +    Errors: none
>  
>    2. KVM_DEV_XIVE_GRP_SOURCE (write only)
>    Initializes a new source in the XIVE device and mask it.
diff mbox series

Patch

diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 177e43f3edaf..7ae8cb22af7d 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -677,6 +677,7 @@  struct kvm_ppc_cpu_char {
 
 /* POWER9 XIVE Native Interrupt Controller */
 #define KVM_DEV_XIVE_GRP_CTRL		1
+#define   KVM_DEV_XIVE_RESET		1
 #define KVM_DEV_XIVE_GRP_SOURCE		2	/* 64-bit source attributes */
 #define KVM_DEV_XIVE_GRP_SOURCE_CONFIG	3	/* 64-bit source attributes */
 #define KVM_DEV_XIVE_GRP_EQ_CONFIG	4	/* 64-bit eq attributes */
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 34a35bcf550c..bb3e121c918a 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -536,6 +536,80 @@  static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive,
 	return 0;
 }
 
+static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb)
+{
+	int i;
+
+	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+		struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
+
+		if (!state->valid)
+			continue;
+
+		if (state->act_priority == MASKED)
+			continue;
+
+		arch_spin_lock(&sb->lock);
+		state->eisn = 0;
+		state->act_server = 0;
+		state->act_priority = MASKED;
+		xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
+		xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
+		if (state->pt_number) {
+			xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
+			xive_native_configure_irq(state->pt_number,
+						  0, MASKED, 0);
+		}
+		arch_spin_unlock(&sb->lock);
+	}
+}
+
+static int kvmppc_xive_reset(struct kvmppc_xive *xive)
+{
+	struct kvm *kvm = xive->kvm;
+	struct kvm_vcpu *vcpu;
+	unsigned int i;
+
+	pr_devel("%s\n", __func__);
+
+	mutex_lock(&kvm->lock);
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+		unsigned int prio;
+
+		if (!xc)
+			continue;
+
+		kvmppc_xive_disable_vcpu_interrupts(vcpu);
+
+		for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
+
+			/* Single escalation, no queue 7 */
+			if (prio == 7 && xive->single_escalation)
+				break;
+
+			if (xc->esc_virq[prio]) {
+				free_irq(xc->esc_virq[prio], vcpu);
+				irq_dispose_mapping(xc->esc_virq[prio]);
+				kfree(xc->esc_virq_names[prio]);
+				xc->esc_virq[prio] = 0;
+			}
+
+			kvmppc_xive_native_cleanup_queue(vcpu, prio);
+		}
+	}
+
+	for (i = 0; i <= xive->max_sbid; i++) {
+		if (xive->src_blocks[i])
+			kvmppc_xive_reset_sources(xive->src_blocks[i]);
+	}
+
+	mutex_unlock(&kvm->lock);
+
+	return 0;
+}
+
 static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
 				       struct kvm_device_attr *attr)
 {
@@ -543,6 +617,10 @@  static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
 
 	switch (attr->group) {
 	case KVM_DEV_XIVE_GRP_CTRL:
+		switch (attr->attr) {
+		case KVM_DEV_XIVE_RESET:
+			return kvmppc_xive_reset(xive);
+		}
 		break;
 	case KVM_DEV_XIVE_GRP_SOURCE:
 		return kvmppc_xive_native_set_source(xive, attr->attr,
@@ -575,6 +653,10 @@  static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
 {
 	switch (attr->group) {
 	case KVM_DEV_XIVE_GRP_CTRL:
+		switch (attr->attr) {
+		case KVM_DEV_XIVE_RESET:
+			return 0;
+		}
 		break;
 	case KVM_DEV_XIVE_GRP_SOURCE:
 	case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt
index c0b5d9bd43fb..f1d007f485a9 100644
--- a/Documentation/virtual/kvm/devices/xive.txt
+++ b/Documentation/virtual/kvm/devices/xive.txt
@@ -17,6 +17,11 @@  the legacy interrupt mode, referred as XICS (POWER7/8).
 
   1. KVM_DEV_XIVE_GRP_CTRL
   Provides global controls on the device
+  Attributes:
+    1.1 KVM_DEV_XIVE_RESET (write only)
+    Resets the interrupt controller configuration for sources and event
+    queues. To be used by kexec and kdump.
+    Errors: none
 
   2. KVM_DEV_XIVE_GRP_SOURCE (write only)
   Initializes a new source in the XIVE device and mask it.