diff mbox series

[v2,12/16] KVM: PPC: Book3S HV: XIVE: add a TIMA mapping

Message ID 20190222112840.25000-13-clg@kaod.org (mailing list archive)
State Superseded
Headers show
Series KVM: PPC: Book3S HV: add XIVE native exploitation mode | expand

Commit Message

Cédric Le Goater Feb. 22, 2019, 11:28 a.m. UTC
Each thread has an associated Thread Interrupt Management context
composed of a set of registers. These registers let the thread handle
priority management and interrupt acknowledgment. The most important
are :

    - Interrupt Pending Buffer     (IPB)
    - Current Processor Priority   (CPPR)
    - Notification Source Register (NSR)

They are exposed to software in four different pages each proposing a
view with a different privilege. The first page is for the physical
thread context and the second for the hypervisor. Only the third
(operating system) and the fourth (user level) are exposed the guest.

A custom VM fault handler will populate the VMA with the appropriate
pages, which should only be the OS page for now.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/include/asm/xive.h            |  1 +
 arch/powerpc/include/uapi/asm/kvm.h        |  2 ++
 arch/powerpc/kvm/book3s_xive_native.c      | 39 ++++++++++++++++++++++
 arch/powerpc/sysdev/xive/native.c          | 11 ++++++
 Documentation/virtual/kvm/devices/xive.txt | 23 +++++++++++++
 5 files changed, 76 insertions(+)

Comments

David Gibson Feb. 25, 2019, 3:42 a.m. UTC | #1
On Fri, Feb 22, 2019 at 12:28:36PM +0100, Cédric Le Goater wrote:
> Each thread has an associated Thread Interrupt Management context
> composed of a set of registers. These registers let the thread handle
> priority management and interrupt acknowledgment. The most important
> are :
> 
>     - Interrupt Pending Buffer     (IPB)
>     - Current Processor Priority   (CPPR)
>     - Notification Source Register (NSR)
> 
> They are exposed to software in four different pages each proposing a
> view with a different privilege. The first page is for the physical
> thread context and the second for the hypervisor. Only the third
> (operating system) and the fourth (user level) are exposed the guest.
> 
> A custom VM fault handler will populate the VMA with the appropriate
> pages, which should only be the OS page for now.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

Subject to possible modification depending on whether we go with the
generic change to allow mmap() on kvm devices.

> ---
>  arch/powerpc/include/asm/xive.h            |  1 +
>  arch/powerpc/include/uapi/asm/kvm.h        |  2 ++
>  arch/powerpc/kvm/book3s_xive_native.c      | 39 ++++++++++++++++++++++
>  arch/powerpc/sysdev/xive/native.c          | 11 ++++++
>  Documentation/virtual/kvm/devices/xive.txt | 23 +++++++++++++
>  5 files changed, 76 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
> index 46891f321606..eb6d302082da 100644
> --- a/arch/powerpc/include/asm/xive.h
> +++ b/arch/powerpc/include/asm/xive.h
> @@ -23,6 +23,7 @@
>   * same offset regardless of where the code is executing
>   */
>  extern void __iomem *xive_tima;
> +extern unsigned long xive_tima_os;
>  
>  /*
>   * Offset in the TM area of our current execution level (provided by
> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
> index 42d4ef93ec2d..be9b255e061d 100644
> --- a/arch/powerpc/include/uapi/asm/kvm.h
> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> @@ -720,4 +720,6 @@ struct kvm_ppc_xive_eq {
>  #define KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY	0x00000002
>  #define KVM_XIVE_EQ_FLAG_ESCALATE	0x00000004
>  
> +#define KVM_XIVE_TIMA_PAGE_OFFSET	0
> +
>  #endif /* __LINUX_KVM_POWERPC_H */
> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
> index 132bff52d70a..c6ac818a13b2 100644
> --- a/arch/powerpc/kvm/book3s_xive_native.c
> +++ b/arch/powerpc/kvm/book3s_xive_native.c
> @@ -176,6 +176,44 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
>  	return rc;
>  }
>  
> +static int xive_native_tima_fault(struct vm_fault *vmf)
> +{
> +	struct vm_area_struct *vma = vmf->vma;
> +
> +	switch (vmf->pgoff - vma->vm_pgoff) {
> +	case 0: /* HW - forbid access */
> +	case 1: /* HV - forbid access */
> +		return VM_FAULT_SIGBUS;
> +	case 2: /* OS */
> +		vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT);
> +		return VM_FAULT_NOPAGE;
> +	case 3: /* USER - TODO */
> +	default:
> +		return VM_FAULT_SIGBUS;
> +	}
> +}
> +
> +static const struct vm_operations_struct xive_native_tima_vmops = {
> +	.fault = xive_native_tima_fault,
> +};
> +
> +static int kvmppc_xive_native_mmap(struct kvm_device *dev,
> +				   struct vm_area_struct *vma)
> +{
> +	/* We only allow mappings at fixed offset for now */
> +	if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) {
> +		if (vma_pages(vma) > 4)
> +			return -EINVAL;
> +		vma->vm_ops = &xive_native_tima_vmops;
> +	} else {
> +		return -EINVAL;
> +	}
> +
> +	vma->vm_flags |= VM_IO | VM_PFNMAP;
> +	vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
> +	return 0;
> +}
> +
>  static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
>  					 u64 addr)
>  {
> @@ -1005,6 +1043,7 @@ struct kvm_device_ops kvm_xive_native_ops = {
>  	.set_attr = kvmppc_xive_native_set_attr,
>  	.get_attr = kvmppc_xive_native_get_attr,
>  	.has_attr = kvmppc_xive_native_has_attr,
> +	.mmap = kvmppc_xive_native_mmap,
>  };
>  
>  void kvmppc_xive_native_init_module(void)
> diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
> index 0c037e933e55..7782201e5fe8 100644
> --- a/arch/powerpc/sysdev/xive/native.c
> +++ b/arch/powerpc/sysdev/xive/native.c
> @@ -521,6 +521,9 @@ u32 xive_native_default_eq_shift(void)
>  }
>  EXPORT_SYMBOL_GPL(xive_native_default_eq_shift);
>  
> +unsigned long xive_tima_os;
> +EXPORT_SYMBOL_GPL(xive_tima_os);
> +
>  bool __init xive_native_init(void)
>  {
>  	struct device_node *np;
> @@ -573,6 +576,14 @@ bool __init xive_native_init(void)
>  	for_each_possible_cpu(cpu)
>  		kvmppc_set_xive_tima(cpu, r.start, tima);
>  
> +	/* Resource 2 is OS window */
> +	if (of_address_to_resource(np, 2, &r)) {
> +		pr_err("Failed to get thread mgmnt area resource\n");
> +		return false;
> +	}
> +
> +	xive_tima_os = r.start;
> +
>  	/* Grab size of provisionning pages */
>  	xive_parse_provisioning(np);
>  
> diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt
> index 1b8957c50c53..4d6b41609fd9 100644
> --- a/Documentation/virtual/kvm/devices/xive.txt
> +++ b/Documentation/virtual/kvm/devices/xive.txt
> @@ -13,6 +13,29 @@ requires a POWER9 host and the guest OS should have support for the
>  XIVE native exploitation interrupt mode. If not, it should run using
>  the legacy interrupt mode, referred as XICS (POWER7/8).
>  
> +* Device Mappings
> +
> +  The KVM device exposes different MMIO ranges of the XIVE HW which
> +  are required for interrupt management. These are exposed to the
> +  guest in VMAs populated with a custom VM fault handler.
> +
> +  1. Thread Interrupt Management Area (TIMA)
> +
> +  Each thread has an associated Thread Interrupt Management context
> +  composed of a set of registers. These registers let the thread
> +  handle priority management and interrupt acknowledgment. The most
> +  important are :
> +
> +      - Interrupt Pending Buffer     (IPB)
> +      - Current Processor Priority   (CPPR)
> +      - Notification Source Register (NSR)
> +
> +  They are exposed to software in four different pages each proposing
> +  a view with a different privilege. The first page is for the
> +  physical thread context and the second for the hypervisor. Only the
> +  third (operating system) and the fourth (user level) are exposed the
> +  guest.
> +
>  * Groups:
>  
>    1. KVM_DEV_XIVE_GRP_CTRL
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 46891f321606..eb6d302082da 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -23,6 +23,7 @@ 
  * same offset regardless of where the code is executing
  */
 extern void __iomem *xive_tima;
+extern unsigned long xive_tima_os;
 
 /*
  * Offset in the TM area of our current execution level (provided by
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 42d4ef93ec2d..be9b255e061d 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -720,4 +720,6 @@  struct kvm_ppc_xive_eq {
 #define KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY	0x00000002
 #define KVM_XIVE_EQ_FLAG_ESCALATE	0x00000004
 
+#define KVM_XIVE_TIMA_PAGE_OFFSET	0
+
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 132bff52d70a..c6ac818a13b2 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -176,6 +176,44 @@  int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
 	return rc;
 }
 
+static int xive_native_tima_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+
+	switch (vmf->pgoff - vma->vm_pgoff) {
+	case 0: /* HW - forbid access */
+	case 1: /* HV - forbid access */
+		return VM_FAULT_SIGBUS;
+	case 2: /* OS */
+		vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT);
+		return VM_FAULT_NOPAGE;
+	case 3: /* USER - TODO */
+	default:
+		return VM_FAULT_SIGBUS;
+	}
+}
+
+static const struct vm_operations_struct xive_native_tima_vmops = {
+	.fault = xive_native_tima_fault,
+};
+
+static int kvmppc_xive_native_mmap(struct kvm_device *dev,
+				   struct vm_area_struct *vma)
+{
+	/* We only allow mappings at fixed offset for now */
+	if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) {
+		if (vma_pages(vma) > 4)
+			return -EINVAL;
+		vma->vm_ops = &xive_native_tima_vmops;
+	} else {
+		return -EINVAL;
+	}
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
+	return 0;
+}
+
 static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
 					 u64 addr)
 {
@@ -1005,6 +1043,7 @@  struct kvm_device_ops kvm_xive_native_ops = {
 	.set_attr = kvmppc_xive_native_set_attr,
 	.get_attr = kvmppc_xive_native_get_attr,
 	.has_attr = kvmppc_xive_native_has_attr,
+	.mmap = kvmppc_xive_native_mmap,
 };
 
 void kvmppc_xive_native_init_module(void)
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 0c037e933e55..7782201e5fe8 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -521,6 +521,9 @@  u32 xive_native_default_eq_shift(void)
 }
 EXPORT_SYMBOL_GPL(xive_native_default_eq_shift);
 
+unsigned long xive_tima_os;
+EXPORT_SYMBOL_GPL(xive_tima_os);
+
 bool __init xive_native_init(void)
 {
 	struct device_node *np;
@@ -573,6 +576,14 @@  bool __init xive_native_init(void)
 	for_each_possible_cpu(cpu)
 		kvmppc_set_xive_tima(cpu, r.start, tima);
 
+	/* Resource 2 is OS window */
+	if (of_address_to_resource(np, 2, &r)) {
+		pr_err("Failed to get thread mgmnt area resource\n");
+		return false;
+	}
+
+	xive_tima_os = r.start;
+
 	/* Grab size of provisionning pages */
 	xive_parse_provisioning(np);
 
diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt
index 1b8957c50c53..4d6b41609fd9 100644
--- a/Documentation/virtual/kvm/devices/xive.txt
+++ b/Documentation/virtual/kvm/devices/xive.txt
@@ -13,6 +13,29 @@  requires a POWER9 host and the guest OS should have support for the
 XIVE native exploitation interrupt mode. If not, it should run using
 the legacy interrupt mode, referred as XICS (POWER7/8).
 
+* Device Mappings
+
+  The KVM device exposes different MMIO ranges of the XIVE HW which
+  are required for interrupt management. These are exposed to the
+  guest in VMAs populated with a custom VM fault handler.
+
+  1. Thread Interrupt Management Area (TIMA)
+
+  Each thread has an associated Thread Interrupt Management context
+  composed of a set of registers. These registers let the thread
+  handle priority management and interrupt acknowledgment. The most
+  important are :
+
+      - Interrupt Pending Buffer     (IPB)
+      - Current Processor Priority   (CPPR)
+      - Notification Source Register (NSR)
+
+  They are exposed to software in four different pages each proposing
+  a view with a different privilege. The first page is for the
+  physical thread context and the second for the hypervisor. Only the
+  third (operating system) and the fourth (user level) are exposed the
+  guest.
+
 * Groups:
 
   1. KVM_DEV_XIVE_GRP_CTRL