diff mbox

[RFC] PPC: KVM: vfio kvm device: support spapr tce

Message ID 1380889450-25550-1-git-send-email-aik@ozlabs.ru
State New, archived
Headers show

Commit Message

Alexey Kardashevskiy Oct. 4, 2013, 12:24 p.m. UTC
This is a very rough change set required for ppc64 to use this KVM device.

vfio_rm.c is a piece of code which is going to be called from the realmode (MMU off),
and I will put everything spapr-related under #ifdef CONFIG_SPAPR_TCE_IOMMU,
it is just friday and I have to run :)

This is an RFC but it works.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 arch/powerpc/kvm/Kconfig  |  1 +
 arch/powerpc/kvm/Makefile |  4 ++++
 include/linux/kvm_host.h  |  8 ++++---
 include/linux/vfio.h      |  3 +++
 include/uapi/linux/kvm.h  |  1 +
 virt/kvm/vfio.c           | 46 ++++++++++++++++++++++++++++++++++++++++
 virt/kvm/vfio_rm.c        | 54 +++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 114 insertions(+), 3 deletions(-)
 create mode 100644 virt/kvm/vfio_rm.c

Comments

Alex Williamson Oct. 4, 2013, 4:05 p.m. UTC | #1
On Fri, 2013-10-04 at 22:24 +1000, Alexey Kardashevskiy wrote:
> This is a very rough change set required for ppc64 to use this KVM device.
> 
> vfio_rm.c is a piece of code which is going to be called from the realmode (MMU off),
> and I will put everything spapr-related under #ifdef CONFIG_SPAPR_TCE_IOMMU,
> it is just friday and I have to run :)
> 
> This is an RFC but it works.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
>  arch/powerpc/kvm/Kconfig  |  1 +
>  arch/powerpc/kvm/Makefile |  4 ++++
>  include/linux/kvm_host.h  |  8 ++++---
>  include/linux/vfio.h      |  3 +++
>  include/uapi/linux/kvm.h  |  1 +
>  virt/kvm/vfio.c           | 46 ++++++++++++++++++++++++++++++++++++++++
>  virt/kvm/vfio_rm.c        | 54 +++++++++++++++++++++++++++++++++++++++++++++++
>  7 files changed, 114 insertions(+), 3 deletions(-)
>  create mode 100644 virt/kvm/vfio_rm.c
> 
> diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
> index 61b3535..d1b7f64 100644
> --- a/arch/powerpc/kvm/Kconfig
> +++ b/arch/powerpc/kvm/Kconfig
> @@ -60,6 +60,7 @@ config KVM_BOOK3S_64
>  	select KVM_BOOK3S_64_HANDLER
>  	select KVM
>  	select SPAPR_TCE_IOMMU
> +	select KVM_VFIO
>  	---help---
>  	  Support running unmodified book3s_64 and book3s_32 guest kernels
>  	  in virtual machines on book3s_64 host processors.
> diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
> index 6646c95..fc2878b 100644
> --- a/arch/powerpc/kvm/Makefile
> +++ b/arch/powerpc/kvm/Makefile
> @@ -55,6 +55,8 @@ kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
>  
>  kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
>  	$(KVM)/coalesced_mmio.o \
> +	$(KVM)/vfio.o \
> +	$(KVM)/vfio_rm.o \
>  	fpu.o \
>  	book3s_paired_singles.o \
>  	book3s_pr.o \
> @@ -76,6 +78,7 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
>  kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
>  	book3s_hv_rm_xics.o
>  kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
> +	$(KVM)/vfio_rm.o \
>  	book3s_hv_rmhandlers.o \
>  	book3s_hv_rm_mmu.o \
>  	book3s_64_vio_hv.o \
> @@ -89,6 +92,7 @@ kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
>  
>  kvm-book3s_64-module-objs := \
>  	$(KVM)/kvm_main.o \
> +	$(KVM)/vfio.o \
>  	$(KVM)/eventfd.o \
>  	powerpc.o \
>  	emulate.o \
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index ad2b581..43c0290 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -407,6 +407,8 @@ struct kvm {
>  #endif
>  	long tlbs_dirty;
>  	struct list_head devices;
> +
> +	struct kvm_vfio *vfio;


<cringe> can't this be on kvm->arch?

>  };
>  
>  #define kvm_err(fmt, ...) \
> @@ -677,15 +679,15 @@ void kvm_arch_register_noncoherent_dma(struct kvm *kvm);
>  void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm);
>  bool kvm_arch_has_noncoherent_dma(struct kvm *kvm);
>  #else
> -static inline void kvm_arch_register_noncoherent_dma(void)
> +static inline void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
>  {
>  }
>  
> -static inline void kvm_arch_unregister_noncoherent_dma(void)
> +static inline void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
>  {
>  }
>  
> -static inline bool kvm_arch_has_noncoherent_dma(void)
> +static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
>  {
>  	return false;
>  }

Will fix in my series.

> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index 24579a0..681e19b 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -97,4 +97,7 @@ extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
>  extern void vfio_group_put_external_user(struct vfio_group *group);
>  extern int vfio_external_user_iommu_id(struct vfio_group *group);
>  
> +extern struct iommu_group *vfio_find_group_by_liobn(struct kvm *kvm,
> +		unsigned long liobn);
> +

Wrong header file.

>  #endif /* VFIO_H */
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 7c1a349..a74ad16 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -847,6 +847,7 @@ struct kvm_device_attr {
>  #define  KVM_DEV_VFIO_GROUP			1
>  #define   KVM_DEV_VFIO_GROUP_ADD			1
>  #define   KVM_DEV_VFIO_GROUP_DEL			2
> +#define  KVM_DEV_VFIO_SPAPR_TCE_LIOBN		2
>  
>  /*
>   * ioctls for VM fds
> diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
> index 2e336a7..39dea9f 100644
> --- a/virt/kvm/vfio.c
> +++ b/virt/kvm/vfio.c
> @@ -22,6 +22,7 @@
>  struct kvm_vfio_group {
>  	struct list_head node;
>  	struct vfio_group *vfio_group;
> +	uint64_t liobn; /* sPAPR */

Perhaps an arch pointer or at least a union.

>  };
>  
>  struct kvm_vfio {
> @@ -188,12 +189,52 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
>  	return -ENXIO;
>  }
>  
> +static int kvm_vfio_set_spapr_tce_liobn(struct kvm_device *dev,
> +		long attr, u64 arg)
> +{
> +	struct kvm_vfio *kv = dev->private;
> +	struct vfio_group *vfio_group;
> +	struct kvm_vfio_group *kvg;
> +	void __user *argp = (void __user *)arg;
> +	struct fd f;
> +	int32_t fd;
> +	uint64_t liobn = attr;
> +
> +	if (get_user(fd, (int32_t __user *)argp))
> +		return -EFAULT;
> +
> +	f = fdget(fd);
> +	if (!f.file)
> +		return -EBADF;
> +
> +	vfio_group = kvm_vfio_group_get_external_user(f.file);
> +	fdput(f);
> +
> +	list_for_each_entry(kvg, &kv->group_list, node) {
> +		if (kvg->vfio_group == vfio_group) {
> +			WARN_ON(kvg->liobn);

Users shouldn't be able to trigger WARN_ON so easily, return -EBUSY,
allow it to be unset and re-set, or just allow the overwrite.

> +			kvg->liobn = liobn;
> +			kvm_vfio_group_put_external_user(vfio_group);
> +			return 0;
> +		}
> +	}
> +
> +	kvm_vfio_group_put_external_user(vfio_group);
> +
> +	return -ENXIO;
> +}
> +
>  static int kvm_vfio_set_attr(struct kvm_device *dev,
>  			     struct kvm_device_attr *attr)
>  {
>  	switch (attr->group) {
>  	case KVM_DEV_VFIO_GROUP:
>  		return kvm_vfio_set_group(dev, attr->attr, attr->addr);
> +#ifdef CONFIG_SPAPR_TCE_IOMMU
> +	case KVM_DEV_VFIO_SPAPR_TCE_LIOBN:
> +		return kvm_vfio_set_spapr_tce_liobn(dev, attr->attr,
> +				attr->addr);
> +#endif
>  	}
>  
>  	return -ENXIO;
> @@ -211,6 +252,10 @@ static int kvm_vfio_has_attr(struct kvm_device *dev,
>  		}
>  
>  		break;
> +#ifdef CONFIG_SPAPR_TCE_IOMMU
> +	case KVM_DEV_VFIO_SPAPR_TCE_LIOBN:
> +		return 0;
> +#endif
>  	}
>  
>  	return -ENXIO;
> @@ -250,6 +295,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
>  	mutex_init(&kv->lock);
>  
>  	dev->private = kv;
> +	dev->kvm->vfio = kv;
>  
>  	return 0;
>  }
> diff --git a/virt/kvm/vfio_rm.c b/virt/kvm/vfio_rm.c
> new file mode 100644
> index 0000000..ee9fd96
> --- /dev/null
> +++ b/virt/kvm/vfio_rm.c
> @@ -0,0 +1,54 @@
> +#include <linux/errno.h>
> +#include <linux/file.h>
> +#include <linux/kvm_host.h>
> +#include <linux/list.h>
> +#include <linux/module.h>
> +#include <linux/mutex.h>
> +#include <linux/slab.h>
> +#include <linux/uaccess.h>
> +#include <linux/vfio.h>
> +
> +struct kvm_vfio_group {
> +	struct list_head node;
> +	struct vfio_group *vfio_group;
> +	uint64_t liobn; /* sPAPR */
> +};
> +
> +struct kvm_vfio {
> +	struct list_head group_list;
> +	struct mutex lock;
> +	bool noncoherent;
> +};
> +
> +struct vfio_group {
> +	struct kref			kref;
> +	int				minor;
> +	atomic_t			container_users;
> +	struct iommu_group		*iommu_group;
> +	struct vfio_container		*container;
> +	struct list_head		device_list;
> +	struct mutex			device_lock;
> +	struct device			*dev;
> +	struct notifier_block		nb;
> +	struct list_head		vfio_next;
> +	struct list_head		container_next;
> +	atomic_t			opened;
> +};
> +
> +struct iommu_group *vfio_find_group_by_liobn(struct kvm *kvm, unsigned long liobn)
> +{
> +	struct kvm_vfio_group *kvg;
> +
> +	if (!kvm->vfio)
> +		return NULL;
> +
> +	list_for_each_entry(kvg, &kvm->vfio->group_list, node) {
> +		if (kvg->liobn == liobn)
> +			return kvg->vfio_group->iommu_group;
> +	}
> +
> +	return NULL;
> +}
> +EXPORT_SYMBOL_GPL(vfio_find_group_by_liobn);
> +
> +

You're kidding, right?  These are intentionally private data structures
that are blatantly copied so that you can extract what you want.  NACK.
The iommu_group is available off struct device, do you even need vfio or
this kvm-vfio device to get from liobn to iommu_group?  Thanks,

Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexey Kardashevskiy Oct. 5, 2013, 1:52 a.m. UTC | #2
On 05.10.2013 2:05, Alex Williamson wrote:
> On Fri, 2013-10-04 at 22:24 +1000, Alexey Kardashevskiy wrote:
>> This is a very rough change set required for ppc64 to use this KVM device.
>>
>> vfio_rm.c is a piece of code which is going to be called from the realmode (MMU off),
>> and I will put everything spapr-related under #ifdef CONFIG_SPAPR_TCE_IOMMU,
>> it is just friday and I have to run :)
>>
>> This is an RFC but it works.
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> ---
>>  arch/powerpc/kvm/Kconfig  |  1 +
>>  arch/powerpc/kvm/Makefile |  4 ++++
>>  include/linux/kvm_host.h  |  8 ++++---
>>  include/linux/vfio.h      |  3 +++
>>  include/uapi/linux/kvm.h  |  1 +
>>  virt/kvm/vfio.c           | 46 ++++++++++++++++++++++++++++++++++++++++
>>  virt/kvm/vfio_rm.c        | 54 +++++++++++++++++++++++++++++++++++++++++++++++
>>  7 files changed, 114 insertions(+), 3 deletions(-)
>>  create mode 100644 virt/kvm/vfio_rm.c
>>
>> diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
>> index 61b3535..d1b7f64 100644
>> --- a/arch/powerpc/kvm/Kconfig
>> +++ b/arch/powerpc/kvm/Kconfig
>> @@ -60,6 +60,7 @@ config KVM_BOOK3S_64
>>  	select KVM_BOOK3S_64_HANDLER
>>  	select KVM
>>  	select SPAPR_TCE_IOMMU
>> +	select KVM_VFIO
>>  	---help---
>>  	  Support running unmodified book3s_64 and book3s_32 guest kernels
>>  	  in virtual machines on book3s_64 host processors.
>> diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
>> index 6646c95..fc2878b 100644
>> --- a/arch/powerpc/kvm/Makefile
>> +++ b/arch/powerpc/kvm/Makefile
>> @@ -55,6 +55,8 @@ kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
>>  
>>  kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
>>  	$(KVM)/coalesced_mmio.o \
>> +	$(KVM)/vfio.o \
>> +	$(KVM)/vfio_rm.o \
>>  	fpu.o \
>>  	book3s_paired_singles.o \
>>  	book3s_pr.o \
>> @@ -76,6 +78,7 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
>>  kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
>>  	book3s_hv_rm_xics.o
>>  kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
>> +	$(KVM)/vfio_rm.o \
>>  	book3s_hv_rmhandlers.o \
>>  	book3s_hv_rm_mmu.o \
>>  	book3s_64_vio_hv.o \
>> @@ -89,6 +92,7 @@ kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
>>  
>>  kvm-book3s_64-module-objs := \
>>  	$(KVM)/kvm_main.o \
>> +	$(KVM)/vfio.o \
>>  	$(KVM)/eventfd.o \
>>  	powerpc.o \
>>  	emulate.o \
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index ad2b581..43c0290 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -407,6 +407,8 @@ struct kvm {
>>  #endif
>>  	long tlbs_dirty;
>>  	struct list_head devices;
>> +
>> +	struct kvm_vfio *vfio;
> 
> 
> <cringe> can't this be on kvm->arch?

It can, I just thought since it is valid for more than just one
platform, it can go here.


>>  };
>>  
>>  #define kvm_err(fmt, ...) \
>> @@ -677,15 +679,15 @@ void kvm_arch_register_noncoherent_dma(struct kvm *kvm);
>>  void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm);
>>  bool kvm_arch_has_noncoherent_dma(struct kvm *kvm);
>>  #else
>> -static inline void kvm_arch_register_noncoherent_dma(void)
>> +static inline void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
>>  {
>>  }
>>  
>> -static inline void kvm_arch_unregister_noncoherent_dma(void)
>> +static inline void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
>>  {
>>  }
>>  
>> -static inline bool kvm_arch_has_noncoherent_dma(void)
>> +static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
>>  {
>>  	return false;
>>  }
> 
> Will fix in my series.
> 
>> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
>> index 24579a0..681e19b 100644
>> --- a/include/linux/vfio.h
>> +++ b/include/linux/vfio.h
>> @@ -97,4 +97,7 @@ extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
>>  extern void vfio_group_put_external_user(struct vfio_group *group);
>>  extern int vfio_external_user_iommu_id(struct vfio_group *group);
>>  
>> +extern struct iommu_group *vfio_find_group_by_liobn(struct kvm *kvm,
>> +		unsigned long liobn);
>> +
> 
> Wrong header file.
> 
>>  #endif /* VFIO_H */
>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> index 7c1a349..a74ad16 100644
>> --- a/include/uapi/linux/kvm.h
>> +++ b/include/uapi/linux/kvm.h
>> @@ -847,6 +847,7 @@ struct kvm_device_attr {
>>  #define  KVM_DEV_VFIO_GROUP			1
>>  #define   KVM_DEV_VFIO_GROUP_ADD			1
>>  #define   KVM_DEV_VFIO_GROUP_DEL			2
>> +#define  KVM_DEV_VFIO_SPAPR_TCE_LIOBN		2
>>  
>>  /*
>>   * ioctls for VM fds
>> diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
>> index 2e336a7..39dea9f 100644
>> --- a/virt/kvm/vfio.c
>> +++ b/virt/kvm/vfio.c
>> @@ -22,6 +22,7 @@
>>  struct kvm_vfio_group {
>>  	struct list_head node;
>>  	struct vfio_group *vfio_group;
>> +	uint64_t liobn; /* sPAPR */
> 
> Perhaps an arch pointer or at least a union.
> 
>>  };
>>  
>>  struct kvm_vfio {
>> @@ -188,12 +189,52 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
>>  	return -ENXIO;
>>  }
>>  
>> +static int kvm_vfio_set_spapr_tce_liobn(struct kvm_device *dev,
>> +		long attr, u64 arg)
>> +{
>> +	struct kvm_vfio *kv = dev->private;
>> +	struct vfio_group *vfio_group;
>> +	struct kvm_vfio_group *kvg;
>> +	void __user *argp = (void __user *)arg;
>> +	struct fd f;
>> +	int32_t fd;
>> +	uint64_t liobn = attr;
>> +
>> +	if (get_user(fd, (int32_t __user *)argp))
>> +		return -EFAULT;
>> +
>> +	f = fdget(fd);
>> +	if (!f.file)
>> +		return -EBADF;
>> +
>> +	vfio_group = kvm_vfio_group_get_external_user(f.file);
>> +	fdput(f);
>> +
>> +	list_for_each_entry(kvg, &kv->group_list, node) {
>> +		if (kvg->vfio_group == vfio_group) {
>> +			WARN_ON(kvg->liobn);
> 
> Users shouldn't be able to trigger WARN_ON so easily, return -EBUSY,
> allow it to be unset and re-set, or just allow the overwrite.
> 
>> +			kvg->liobn = liobn;
>> +			kvm_vfio_group_put_external_user(vfio_group);
>> +			return 0;
>> +		}
>> +	}
>> +
>> +	kvm_vfio_group_put_external_user(vfio_group);
>> +
>> +	return -ENXIO;
>> +}
>> +
>>  static int kvm_vfio_set_attr(struct kvm_device *dev,
>>  			     struct kvm_device_attr *attr)
>>  {
>>  	switch (attr->group) {
>>  	case KVM_DEV_VFIO_GROUP:
>>  		return kvm_vfio_set_group(dev, attr->attr, attr->addr);
>> +#ifdef CONFIG_SPAPR_TCE_IOMMU
>> +	case KVM_DEV_VFIO_SPAPR_TCE_LIOBN:
>> +		return kvm_vfio_set_spapr_tce_liobn(dev, attr->attr,
>> +				attr->addr);
>> +#endif
>>  	}
>>  
>>  	return -ENXIO;
>> @@ -211,6 +252,10 @@ static int kvm_vfio_has_attr(struct kvm_device *dev,
>>  		}
>>  
>>  		break;
>> +#ifdef CONFIG_SPAPR_TCE_IOMMU
>> +	case KVM_DEV_VFIO_SPAPR_TCE_LIOBN:
>> +		return 0;
>> +#endif
>>  	}
>>  
>>  	return -ENXIO;
>> @@ -250,6 +295,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
>>  	mutex_init(&kv->lock);
>>  
>>  	dev->private = kv;
>> +	dev->kvm->vfio = kv;
>>  
>>  	return 0;
>>  }
>> diff --git a/virt/kvm/vfio_rm.c b/virt/kvm/vfio_rm.c
>> new file mode 100644
>> index 0000000..ee9fd96
>> --- /dev/null
>> +++ b/virt/kvm/vfio_rm.c
>> @@ -0,0 +1,54 @@
>> +#include <linux/errno.h>
>> +#include <linux/file.h>
>> +#include <linux/kvm_host.h>
>> +#include <linux/list.h>
>> +#include <linux/module.h>
>> +#include <linux/mutex.h>
>> +#include <linux/slab.h>
>> +#include <linux/uaccess.h>
>> +#include <linux/vfio.h>
>> +
>> +struct kvm_vfio_group {
>> +	struct list_head node;
>> +	struct vfio_group *vfio_group;
>> +	uint64_t liobn; /* sPAPR */
>> +};
>> +
>> +struct kvm_vfio {
>> +	struct list_head group_list;
>> +	struct mutex lock;
>> +	bool noncoherent;
>> +};
>> +
>> +struct vfio_group {
>> +	struct kref			kref;
>> +	int				minor;
>> +	atomic_t			container_users;
>> +	struct iommu_group		*iommu_group;
>> +	struct vfio_container		*container;
>> +	struct list_head		device_list;
>> +	struct mutex			device_lock;
>> +	struct device			*dev;
>> +	struct notifier_block		nb;
>> +	struct list_head		vfio_next;
>> +	struct list_head		container_next;
>> +	atomic_t			opened;
>> +};
>> +
>> +struct iommu_group *vfio_find_group_by_liobn(struct kvm *kvm, unsigned long liobn)
>> +{
>> +	struct kvm_vfio_group *kvg;
>> +
>> +	if (!kvm->vfio)
>> +		return NULL;
>> +
>> +	list_for_each_entry(kvg, &kvm->vfio->group_list, node) {
>> +		if (kvg->liobn == liobn)
>> +			return kvg->vfio_group->iommu_group;
>> +	}
>> +
>> +	return NULL;
>> +}
>> +EXPORT_SYMBOL_GPL(vfio_find_group_by_liobn);
>> +
>> +
> 
> You're kidding, right?  These are intentionally private data structures
> that are blatantly copied so that you can extract what you want.  NACK.
> The iommu_group is available off struct device, do you even need vfio or
> this kvm-vfio device to get from liobn to iommu_group?  Thanks,


This is an RFC. I am not saying this is what can go to upstream or
anything. I am not kidding (why everyone assumes that?), I am showing
what API I would like to have in the VFIO KVM device. I need the way to
get iommu_table (which is in a private data of iommu_group) by LIOBN and
the VFIO KVM device is the _only_ entity which will know about this
connection (LIOBN is made up by the qemu and told to the guest) and it
cannot go to the kvm.ko - and the patch like this is the best way to
show it as my english obviously sucks.
Alexey Kardashevskiy Oct. 5, 2013, 3:36 a.m. UTC | #3
On 10/05/2013 11:52 AM, Alexey Kardashevskiy wrote:
> On 05.10.2013 2:05, Alex Williamson wrote:
>> On Fri, 2013-10-04 at 22:24 +1000, Alexey Kardashevskiy wrote:
>>> This is a very rough change set required for ppc64 to use this KVM device.
>>>
>>> vfio_rm.c is a piece of code which is going to be called from the realmode (MMU off),
>>> and I will put everything spapr-related under #ifdef CONFIG_SPAPR_TCE_IOMMU,
>>> it is just friday and I have to run :)
>>>
>>> This is an RFC but it works.
>>>
>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>> ---
>>>  arch/powerpc/kvm/Kconfig  |  1 +
>>>  arch/powerpc/kvm/Makefile |  4 ++++
>>>  include/linux/kvm_host.h  |  8 ++++---
>>>  include/linux/vfio.h      |  3 +++
>>>  include/uapi/linux/kvm.h  |  1 +
>>>  virt/kvm/vfio.c           | 46 ++++++++++++++++++++++++++++++++++++++++
>>>  virt/kvm/vfio_rm.c        | 54 +++++++++++++++++++++++++++++++++++++++++++++++
>>>  7 files changed, 114 insertions(+), 3 deletions(-)
>>>  create mode 100644 virt/kvm/vfio_rm.c
>>>
>>> diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
>>> index 61b3535..d1b7f64 100644
>>> --- a/arch/powerpc/kvm/Kconfig
>>> +++ b/arch/powerpc/kvm/Kconfig
>>> @@ -60,6 +60,7 @@ config KVM_BOOK3S_64
>>>  	select KVM_BOOK3S_64_HANDLER
>>>  	select KVM
>>>  	select SPAPR_TCE_IOMMU
>>> +	select KVM_VFIO
>>>  	---help---
>>>  	  Support running unmodified book3s_64 and book3s_32 guest kernels
>>>  	  in virtual machines on book3s_64 host processors.
>>> diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
>>> index 6646c95..fc2878b 100644
>>> --- a/arch/powerpc/kvm/Makefile
>>> +++ b/arch/powerpc/kvm/Makefile
>>> @@ -55,6 +55,8 @@ kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
>>>  
>>>  kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
>>>  	$(KVM)/coalesced_mmio.o \
>>> +	$(KVM)/vfio.o \
>>> +	$(KVM)/vfio_rm.o \
>>>  	fpu.o \
>>>  	book3s_paired_singles.o \
>>>  	book3s_pr.o \
>>> @@ -76,6 +78,7 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
>>>  kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
>>>  	book3s_hv_rm_xics.o
>>>  kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
>>> +	$(KVM)/vfio_rm.o \
>>>  	book3s_hv_rmhandlers.o \
>>>  	book3s_hv_rm_mmu.o \
>>>  	book3s_64_vio_hv.o \
>>> @@ -89,6 +92,7 @@ kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
>>>  
>>>  kvm-book3s_64-module-objs := \
>>>  	$(KVM)/kvm_main.o \
>>> +	$(KVM)/vfio.o \
>>>  	$(KVM)/eventfd.o \
>>>  	powerpc.o \
>>>  	emulate.o \
>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>> index ad2b581..43c0290 100644
>>> --- a/include/linux/kvm_host.h
>>> +++ b/include/linux/kvm_host.h
>>> @@ -407,6 +407,8 @@ struct kvm {
>>>  #endif
>>>  	long tlbs_dirty;
>>>  	struct list_head devices;
>>> +
>>> +	struct kvm_vfio *vfio;
>>
>>
>> <cringe> can't this be on kvm->arch?
> 
> It can, I just thought since it is valid for more than just one
> platform, it can go here.
> 
> 
>>>  };
>>>  
>>>  #define kvm_err(fmt, ...) \
>>> @@ -677,15 +679,15 @@ void kvm_arch_register_noncoherent_dma(struct kvm *kvm);
>>>  void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm);
>>>  bool kvm_arch_has_noncoherent_dma(struct kvm *kvm);
>>>  #else
>>> -static inline void kvm_arch_register_noncoherent_dma(void)
>>> +static inline void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
>>>  {
>>>  }
>>>  
>>> -static inline void kvm_arch_unregister_noncoherent_dma(void)
>>> +static inline void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
>>>  {
>>>  }
>>>  
>>> -static inline bool kvm_arch_has_noncoherent_dma(void)
>>> +static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
>>>  {
>>>  	return false;
>>>  }
>>
>> Will fix in my series.
>>
>>> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
>>> index 24579a0..681e19b 100644
>>> --- a/include/linux/vfio.h
>>> +++ b/include/linux/vfio.h
>>> @@ -97,4 +97,7 @@ extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
>>>  extern void vfio_group_put_external_user(struct vfio_group *group);
>>>  extern int vfio_external_user_iommu_id(struct vfio_group *group);
>>>  
>>> +extern struct iommu_group *vfio_find_group_by_liobn(struct kvm *kvm,
>>> +		unsigned long liobn);
>>> +
>>
>> Wrong header file.


btw there are two - uapi/linux/vfio.h and linux/vfio.h. The external user
API is in linux/vfio.h but my change should go to uapi/linux/vfio.h, is
that correct? Or we need a third header? Just asking, no kidding, no arguing :)


>>
>>>  #endif /* VFIO_H */
>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>>> index 7c1a349..a74ad16 100644
>>> --- a/include/uapi/linux/kvm.h
>>> +++ b/include/uapi/linux/kvm.h
>>> @@ -847,6 +847,7 @@ struct kvm_device_attr {
>>>  #define  KVM_DEV_VFIO_GROUP			1
>>>  #define   KVM_DEV_VFIO_GROUP_ADD			1
>>>  #define   KVM_DEV_VFIO_GROUP_DEL			2
>>> +#define  KVM_DEV_VFIO_SPAPR_TCE_LIOBN		2
>>>  
>>>  /*
>>>   * ioctls for VM fds
>>> diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
>>> index 2e336a7..39dea9f 100644
>>> --- a/virt/kvm/vfio.c
>>> +++ b/virt/kvm/vfio.c
>>> @@ -22,6 +22,7 @@
>>>  struct kvm_vfio_group {
>>>  	struct list_head node;
>>>  	struct vfio_group *vfio_group;
>>> +	uint64_t liobn; /* sPAPR */
>>
>> Perhaps an arch pointer or at least a union.
>>
>>>  };
>>>  
>>>  struct kvm_vfio {
>>> @@ -188,12 +189,52 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
>>>  	return -ENXIO;
>>>  }
>>>  
>>> +static int kvm_vfio_set_spapr_tce_liobn(struct kvm_device *dev,
>>> +		long attr, u64 arg)
>>> +{
>>> +	struct kvm_vfio *kv = dev->private;
>>> +	struct vfio_group *vfio_group;
>>> +	struct kvm_vfio_group *kvg;
>>> +	void __user *argp = (void __user *)arg;
>>> +	struct fd f;
>>> +	int32_t fd;
>>> +	uint64_t liobn = attr;
>>> +
>>> +	if (get_user(fd, (int32_t __user *)argp))
>>> +		return -EFAULT;
>>> +
>>> +	f = fdget(fd);
>>> +	if (!f.file)
>>> +		return -EBADF;
>>> +
>>> +	vfio_group = kvm_vfio_group_get_external_user(f.file);
>>> +	fdput(f);
>>> +
>>> +	list_for_each_entry(kvg, &kv->group_list, node) {
>>> +		if (kvg->vfio_group == vfio_group) {
>>> +			WARN_ON(kvg->liobn);
>>
>> Users shouldn't be able to trigger WARN_ON so easily, return -EBUSY,
>> allow it to be unset and re-set, or just allow the overwrite.
>>
>>> +			kvg->liobn = liobn;
>>> +			kvm_vfio_group_put_external_user(vfio_group);
>>> +			return 0;
>>> +		}
>>> +	}
>>> +
>>> +	kvm_vfio_group_put_external_user(vfio_group);
>>> +
>>> +	return -ENXIO;
>>> +}
>>> +
>>>  static int kvm_vfio_set_attr(struct kvm_device *dev,
>>>  			     struct kvm_device_attr *attr)
>>>  {
>>>  	switch (attr->group) {
>>>  	case KVM_DEV_VFIO_GROUP:
>>>  		return kvm_vfio_set_group(dev, attr->attr, attr->addr);
>>> +#ifdef CONFIG_SPAPR_TCE_IOMMU
>>> +	case KVM_DEV_VFIO_SPAPR_TCE_LIOBN:
>>> +		return kvm_vfio_set_spapr_tce_liobn(dev, attr->attr,
>>> +				attr->addr);
>>> +#endif
>>>  	}
>>>  
>>>  	return -ENXIO;
>>> @@ -211,6 +252,10 @@ static int kvm_vfio_has_attr(struct kvm_device *dev,
>>>  		}
>>>  
>>>  		break;
>>> +#ifdef CONFIG_SPAPR_TCE_IOMMU
>>> +	case KVM_DEV_VFIO_SPAPR_TCE_LIOBN:
>>> +		return 0;
>>> +#endif
>>>  	}
>>>  
>>>  	return -ENXIO;
>>> @@ -250,6 +295,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
>>>  	mutex_init(&kv->lock);
>>>  
>>>  	dev->private = kv;
>>> +	dev->kvm->vfio = kv;
>>>  
>>>  	return 0;
>>>  }
>>> diff --git a/virt/kvm/vfio_rm.c b/virt/kvm/vfio_rm.c
>>> new file mode 100644
>>> index 0000000..ee9fd96
>>> --- /dev/null
>>> +++ b/virt/kvm/vfio_rm.c
>>> @@ -0,0 +1,54 @@
>>> +#include <linux/errno.h>
>>> +#include <linux/file.h>
>>> +#include <linux/kvm_host.h>
>>> +#include <linux/list.h>
>>> +#include <linux/module.h>
>>> +#include <linux/mutex.h>
>>> +#include <linux/slab.h>
>>> +#include <linux/uaccess.h>
>>> +#include <linux/vfio.h>
>>> +
>>> +struct kvm_vfio_group {
>>> +	struct list_head node;
>>> +	struct vfio_group *vfio_group;
>>> +	uint64_t liobn; /* sPAPR */
>>> +};
>>> +
>>> +struct kvm_vfio {
>>> +	struct list_head group_list;
>>> +	struct mutex lock;
>>> +	bool noncoherent;
>>> +};
>>> +
>>> +struct vfio_group {
>>> +	struct kref			kref;
>>> +	int				minor;
>>> +	atomic_t			container_users;
>>> +	struct iommu_group		*iommu_group;
>>> +	struct vfio_container		*container;
>>> +	struct list_head		device_list;
>>> +	struct mutex			device_lock;
>>> +	struct device			*dev;
>>> +	struct notifier_block		nb;
>>> +	struct list_head		vfio_next;
>>> +	struct list_head		container_next;
>>> +	atomic_t			opened;
>>> +};
>>> +
>>> +struct iommu_group *vfio_find_group_by_liobn(struct kvm *kvm, unsigned long liobn)
>>> +{
>>> +	struct kvm_vfio_group *kvg;
>>> +
>>> +	if (!kvm->vfio)
>>> +		return NULL;
>>> +
>>> +	list_for_each_entry(kvg, &kvm->vfio->group_list, node) {
>>> +		if (kvg->liobn == liobn)
>>> +			return kvg->vfio_group->iommu_group;
>>> +	}
>>> +
>>> +	return NULL;
>>> +}
>>> +EXPORT_SYMBOL_GPL(vfio_find_group_by_liobn);
>>> +
>>> +
>>
>> You're kidding, right?  These are intentionally private data structures
>> that are blatantly copied so that you can extract what you want.  NACK.
>> The iommu_group is available off struct device, do you even need vfio or
>> this kvm-vfio device to get from liobn to iommu_group?  Thanks,
> 
> 
> This is an RFC. I am not saying this is what can go to upstream or
> anything. I am not kidding (why everyone assumes that?), I am showing
> what API I would like to have in the VFIO KVM device. I need the way to
> get iommu_table (which is in a private data of iommu_group) by LIOBN and
> the VFIO KVM device is the _only_ entity which will know about this
> connection (LIOBN is made up by the qemu and told to the guest) and it
> cannot go to the kvm.ko - and the patch like this is the best way to
> show it as my english obviously sucks.

Oh. I was confused by:
drivers/vfio/vfio.c|67| struct vfio_group {
drivers/vfio/vfio_iommu_type1.c|73| struct vfio_group {

which are two completely different types (confusing).

So. I either need an additional file to compile to the kernel for mmu-off
case (such as vfio_rm.c) and share vfio_group struct via some internal
header or compile vfio.c into the kernel image always, in this case I'll
need to export kvm_vfio_ops symbol. What would you suggest?
diff mbox

Patch

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 61b3535..d1b7f64 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -60,6 +60,7 @@  config KVM_BOOK3S_64
 	select KVM_BOOK3S_64_HANDLER
 	select KVM
 	select SPAPR_TCE_IOMMU
+	select KVM_VFIO
 	---help---
 	  Support running unmodified book3s_64 and book3s_32 guest kernels
 	  in virtual machines on book3s_64 host processors.
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 6646c95..fc2878b 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -55,6 +55,8 @@  kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
 
 kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
 	$(KVM)/coalesced_mmio.o \
+	$(KVM)/vfio.o \
+	$(KVM)/vfio_rm.o \
 	fpu.o \
 	book3s_paired_singles.o \
 	book3s_pr.o \
@@ -76,6 +78,7 @@  kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
 kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
 	book3s_hv_rm_xics.o
 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
+	$(KVM)/vfio_rm.o \
 	book3s_hv_rmhandlers.o \
 	book3s_hv_rm_mmu.o \
 	book3s_64_vio_hv.o \
@@ -89,6 +92,7 @@  kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
 
 kvm-book3s_64-module-objs := \
 	$(KVM)/kvm_main.o \
+	$(KVM)/vfio.o \
 	$(KVM)/eventfd.o \
 	powerpc.o \
 	emulate.o \
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ad2b581..43c0290 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -407,6 +407,8 @@  struct kvm {
 #endif
 	long tlbs_dirty;
 	struct list_head devices;
+
+	struct kvm_vfio *vfio;
 };
 
 #define kvm_err(fmt, ...) \
@@ -677,15 +679,15 @@  void kvm_arch_register_noncoherent_dma(struct kvm *kvm);
 void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm);
 bool kvm_arch_has_noncoherent_dma(struct kvm *kvm);
 #else
-static inline void kvm_arch_register_noncoherent_dma(void)
+static inline void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
 {
 }
 
-static inline void kvm_arch_unregister_noncoherent_dma(void)
+static inline void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
 {
 }
 
-static inline bool kvm_arch_has_noncoherent_dma(void)
+static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
 {
 	return false;
 }
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 24579a0..681e19b 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -97,4 +97,7 @@  extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
 extern void vfio_group_put_external_user(struct vfio_group *group);
 extern int vfio_external_user_iommu_id(struct vfio_group *group);
 
+extern struct iommu_group *vfio_find_group_by_liobn(struct kvm *kvm,
+		unsigned long liobn);
+
 #endif /* VFIO_H */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 7c1a349..a74ad16 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -847,6 +847,7 @@  struct kvm_device_attr {
 #define  KVM_DEV_VFIO_GROUP			1
 #define   KVM_DEV_VFIO_GROUP_ADD			1
 #define   KVM_DEV_VFIO_GROUP_DEL			2
+#define  KVM_DEV_VFIO_SPAPR_TCE_LIOBN		2
 
 /*
  * ioctls for VM fds
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 2e336a7..39dea9f 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -22,6 +22,7 @@ 
 struct kvm_vfio_group {
 	struct list_head node;
 	struct vfio_group *vfio_group;
+	uint64_t liobn; /* sPAPR */
 };
 
 struct kvm_vfio {
@@ -188,12 +189,52 @@  static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
 	return -ENXIO;
 }
 
+static int kvm_vfio_set_spapr_tce_liobn(struct kvm_device *dev,
+		long attr, u64 arg)
+{
+	struct kvm_vfio *kv = dev->private;
+	struct vfio_group *vfio_group;
+	struct kvm_vfio_group *kvg;
+	void __user *argp = (void __user *)arg;
+	struct fd f;
+	int32_t fd;
+	uint64_t liobn = attr;
+
+	if (get_user(fd, (int32_t __user *)argp))
+		return -EFAULT;
+
+	f = fdget(fd);
+	if (!f.file)
+		return -EBADF;
+
+	vfio_group = kvm_vfio_group_get_external_user(f.file);
+	fdput(f);
+
+	list_for_each_entry(kvg, &kv->group_list, node) {
+		if (kvg->vfio_group == vfio_group) {
+			WARN_ON(kvg->liobn);
+			kvg->liobn = liobn;
+			kvm_vfio_group_put_external_user(vfio_group);
+			return 0;
+		}
+	}
+
+	kvm_vfio_group_put_external_user(vfio_group);
+
+	return -ENXIO;
+}
+
 static int kvm_vfio_set_attr(struct kvm_device *dev,
 			     struct kvm_device_attr *attr)
 {
 	switch (attr->group) {
 	case KVM_DEV_VFIO_GROUP:
 		return kvm_vfio_set_group(dev, attr->attr, attr->addr);
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+	case KVM_DEV_VFIO_SPAPR_TCE_LIOBN:
+		return kvm_vfio_set_spapr_tce_liobn(dev, attr->attr,
+				attr->addr);
+#endif
 	}
 
 	return -ENXIO;
@@ -211,6 +252,10 @@  static int kvm_vfio_has_attr(struct kvm_device *dev,
 		}
 
 		break;
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+	case KVM_DEV_VFIO_SPAPR_TCE_LIOBN:
+		return 0;
+#endif
 	}
 
 	return -ENXIO;
@@ -250,6 +295,7 @@  static int kvm_vfio_create(struct kvm_device *dev, u32 type)
 	mutex_init(&kv->lock);
 
 	dev->private = kv;
+	dev->kvm->vfio = kv;
 
 	return 0;
 }
diff --git a/virt/kvm/vfio_rm.c b/virt/kvm/vfio_rm.c
new file mode 100644
index 0000000..ee9fd96
--- /dev/null
+++ b/virt/kvm/vfio_rm.c
@@ -0,0 +1,54 @@ 
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/kvm_host.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+
+struct kvm_vfio_group {
+	struct list_head node;
+	struct vfio_group *vfio_group;
+	uint64_t liobn; /* sPAPR */
+};
+
+struct kvm_vfio {
+	struct list_head group_list;
+	struct mutex lock;
+	bool noncoherent;
+};
+
+struct vfio_group {
+	struct kref			kref;
+	int				minor;
+	atomic_t			container_users;
+	struct iommu_group		*iommu_group;
+	struct vfio_container		*container;
+	struct list_head		device_list;
+	struct mutex			device_lock;
+	struct device			*dev;
+	struct notifier_block		nb;
+	struct list_head		vfio_next;
+	struct list_head		container_next;
+	atomic_t			opened;
+};
+
+struct iommu_group *vfio_find_group_by_liobn(struct kvm *kvm, unsigned long liobn)
+{
+	struct kvm_vfio_group *kvg;
+
+	if (!kvm->vfio)
+		return NULL;
+
+	list_for_each_entry(kvg, &kvm->vfio->group_list, node) {
+		if (kvg->liobn == liobn)
+			return kvg->vfio_group->iommu_group;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(vfio_find_group_by_liobn);
+
+