diff mbox

[6/7] vfio: moving some functions in common file

Message ID 1379575763-2091-7-git-send-email-Bharat.Bhushan@freescale.com
State Not Applicable
Headers show

Commit Message

Bharat Bhushan Sept. 19, 2013, 7:29 a.m. UTC
Some function defined in vfio_iommu_type1.c were common and
we want to use these for FSL IOMMU (PAMU) and iommu-none driver.
So some of them are moved to vfio_iommu_common.c

I think we can do more of that but we will take this step by step.

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
---
 drivers/vfio/Makefile            |    4 +-
 drivers/vfio/vfio_iommu_common.c |  235 ++++++++++++++++++++++++++++++++++++++
 drivers/vfio/vfio_iommu_common.h |   30 +++++
 drivers/vfio/vfio_iommu_type1.c  |  206 +---------------------------------
 4 files changed, 268 insertions(+), 207 deletions(-)
 create mode 100644 drivers/vfio/vfio_iommu_common.c
 create mode 100644 drivers/vfio/vfio_iommu_common.h

Comments

Alex Williamson Sept. 25, 2013, 5:02 p.m. UTC | #1
On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> Some function defined in vfio_iommu_type1.c were common and
> we want to use these for FSL IOMMU (PAMU) and iommu-none driver.
> So some of them are moved to vfio_iommu_common.c
> 
> I think we can do more of that but we will take this step by step.
> 
> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> ---
>  drivers/vfio/Makefile            |    4 +-
>  drivers/vfio/vfio_iommu_common.c |  235 ++++++++++++++++++++++++++++++++++++++
>  drivers/vfio/vfio_iommu_common.h |   30 +++++
>  drivers/vfio/vfio_iommu_type1.c  |  206 +---------------------------------
>  4 files changed, 268 insertions(+), 207 deletions(-)
>  create mode 100644 drivers/vfio/vfio_iommu_common.c
>  create mode 100644 drivers/vfio/vfio_iommu_common.h
> 
> diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
> index 72bfabc..c5792ec 100644
> --- a/drivers/vfio/Makefile
> +++ b/drivers/vfio/Makefile
> @@ -1,4 +1,4 @@
>  obj-$(CONFIG_VFIO) += vfio.o
> -obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
> -obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
> +obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_common.o vfio_iommu_type1.o
> +obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_common.o vfio_iommu_spapr_tce.o
>  obj-$(CONFIG_VFIO_PCI) += pci/
> diff --git a/drivers/vfio/vfio_iommu_common.c b/drivers/vfio/vfio_iommu_common.c
> new file mode 100644
> index 0000000..8bdc0ea
> --- /dev/null
> +++ b/drivers/vfio/vfio_iommu_common.c
> @@ -0,0 +1,235 @@
> +/*
> + * VFIO: Common code for vfio IOMMU support
> + *
> + * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
> + *     Author: Alex Williamson <alex.williamson@redhat.com>
> + *     Author: Bharat Bhushan <bharat.bhushan@freescale.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * Derived from original vfio:
> + * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
> + * Author: Tom Lyon, pugs@cisco.com
> + */
> +
> +#include <linux/compat.h>
> +#include <linux/device.h>
> +#include <linux/fs.h>
> +#include <linux/iommu.h>
> +#include <linux/module.h>
> +#include <linux/mm.h>
> +#include <linux/pci.h>		/* pci_bus_type */
> +#include <linux/rbtree.h>
> +#include <linux/sched.h>
> +#include <linux/slab.h>
> +#include <linux/uaccess.h>
> +#include <linux/vfio.h>
> +#include <linux/workqueue.h>

Please cleanup includes on both the source and target files.  You
obviously don't need linux/pci.h here for one.

> +
> +static bool disable_hugepages;
> +module_param_named(disable_hugepages,
> +		   disable_hugepages, bool, S_IRUGO | S_IWUSR);
> +MODULE_PARM_DESC(disable_hugepages,
> +		 "Disable VFIO IOMMU support for IOMMU hugepages.");
> +
> +struct vwork {
> +	struct mm_struct	*mm;
> +	long			npage;
> +	struct work_struct	work;
> +};
> +
> +/* delayed decrement/increment for locked_vm */
> +void vfio_lock_acct_bg(struct work_struct *work)
> +{
> +	struct vwork *vwork = container_of(work, struct vwork, work);
> +	struct mm_struct *mm;
> +
> +	mm = vwork->mm;
> +	down_write(&mm->mmap_sem);
> +	mm->locked_vm += vwork->npage;
> +	up_write(&mm->mmap_sem);
> +	mmput(mm);
> +	kfree(vwork);
> +}
> +
> +void vfio_lock_acct(long npage)
> +{
> +	struct vwork *vwork;
> +	struct mm_struct *mm;
> +
> +	if (!current->mm || !npage)
> +		return; /* process exited or nothing to do */
> +
> +	if (down_write_trylock(&current->mm->mmap_sem)) {
> +		current->mm->locked_vm += npage;
> +		up_write(&current->mm->mmap_sem);
> +		return;
> +	}
> +
> +	/*
> +	 * Couldn't get mmap_sem lock, so must setup to update
> +	 * mm->locked_vm later. If locked_vm were atomic, we
> +	 * wouldn't need this silliness
> +	 */
> +	vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL);
> +	if (!vwork)
> +		return;
> +	mm = get_task_mm(current);
> +	if (!mm) {
> +		kfree(vwork);
> +		return;
> +	}
> +	INIT_WORK(&vwork->work, vfio_lock_acct_bg);
> +	vwork->mm = mm;
> +	vwork->npage = npage;
> +	schedule_work(&vwork->work);
> +}
> +
> +/*
> + * Some mappings aren't backed by a struct page, for example an mmap'd
> + * MMIO range for our own or another device.  These use a different
> + * pfn conversion and shouldn't be tracked as locked pages.
> + */
> +bool is_invalid_reserved_pfn(unsigned long pfn)
> +{
> +	if (pfn_valid(pfn)) {
> +		bool reserved;
> +		struct page *tail = pfn_to_page(pfn);
> +		struct page *head = compound_trans_head(tail);
> +		reserved = !!(PageReserved(head));
> +		if (head != tail) {
> +			/*
> +			 * "head" is not a dangling pointer
> +			 * (compound_trans_head takes care of that)
> +			 * but the hugepage may have been split
> +			 * from under us (and we may not hold a
> +			 * reference count on the head page so it can
> +			 * be reused before we run PageReferenced), so
> +			 * we've to check PageTail before returning
> +			 * what we just read.
> +			 */
> +			smp_rmb();
> +			if (PageTail(tail))
> +				return reserved;
> +		}
> +		return PageReserved(tail);
> +	}
> +
> +	return true;
> +}
> +
> +int put_pfn(unsigned long pfn, int prot)
> +{
> +	if (!is_invalid_reserved_pfn(pfn)) {
> +		struct page *page = pfn_to_page(pfn);
> +		if (prot & IOMMU_WRITE)
> +			SetPageDirty(page);
> +		put_page(page);
> +		return 1;
> +	}
> +	return 0;
> +}
> +
> +static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn)
> +{
> +	struct page *page[1];
> +	struct vm_area_struct *vma;
> +	int ret = -EFAULT;
> +
> +	if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) {
> +		*pfn = page_to_pfn(page[0]);
> +		return 0;
> +	}
> +
> +	printk("via vma\n");
> +	down_read(&current->mm->mmap_sem);
> +
> +	vma = find_vma_intersection(current->mm, vaddr, vaddr + 1);
> +
> +	if (vma && vma->vm_flags & VM_PFNMAP) {
> +		*pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
> +		if (is_invalid_reserved_pfn(*pfn))
> +			ret = 0;
> +	}
> +
> +	up_read(&current->mm->mmap_sem);
> +
> +	return ret;
> +}
> +
> +/*
> + * Attempt to pin pages.  We really don't want to track all the pfns and
> + * the iommu can only map chunks of consecutive pfns anyway, so get the
> + * first page and all consecutive pages with the same locking.
> + */
> +long vfio_pin_pages(unsigned long vaddr, long npage,
> +			   int prot, unsigned long *pfn_base)
> +{
> +	unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
> +	bool lock_cap = capable(CAP_IPC_LOCK);
> +	long ret, i;
> +
> +	if (!current->mm)
> +		return -ENODEV;
> +
> +	ret = vaddr_get_pfn(vaddr, prot, pfn_base);
> +	if (ret)
> +		return ret;
> +
> +	if (is_invalid_reserved_pfn(*pfn_base))
> +		return 1;
> +
> +	if (!lock_cap && current->mm->locked_vm + 1 > limit) {
> +		put_pfn(*pfn_base, prot);
> +		pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
> +			limit << PAGE_SHIFT);
> +		return -ENOMEM;
> +	}
> +
> +	if (unlikely(disable_hugepages)) {
> +		vfio_lock_acct(1);
> +		return 1;
> +	}
> +
> +	/* Lock all the consecutive pages from pfn_base */
> +	for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) {
> +		unsigned long pfn = 0;
> +
> +		ret = vaddr_get_pfn(vaddr, prot, &pfn);
> +		if (ret)
> +			break;
> +
> +		if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) {
> +			put_pfn(pfn, prot);
> +			break;
> +		}
> +
> +		if (!lock_cap && current->mm->locked_vm + i + 1 > limit) {
> +			put_pfn(pfn, prot);
> +			pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
> +				__func__, limit << PAGE_SHIFT);
> +			break;
> +		}
> +	}
> +
> +	vfio_lock_acct(i);
> +
> +	return i;
> +}
> +
> +long vfio_unpin_pages(unsigned long pfn, long npage,
> +			     int prot, bool do_accounting)
> +{
> +	unsigned long unlocked = 0;
> +	long i;
> +
> +	for (i = 0; i < npage; i++)
> +		unlocked += put_pfn(pfn++, prot);
> +
> +	if (do_accounting)
> +		vfio_lock_acct(-unlocked);
> +
> +	return unlocked;
> +}
> diff --git a/drivers/vfio/vfio_iommu_common.h b/drivers/vfio/vfio_iommu_common.h
> new file mode 100644
> index 0000000..4738391
> --- /dev/null
> +++ b/drivers/vfio/vfio_iommu_common.h
> @@ -0,0 +1,30 @@
> +/*
> + * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
> + * Copyright (C) 2013 Freescale Semiconductor, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published
> + * by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
> + */
> +
> +#ifndef _VFIO_IOMMU_COMMON_H
> +#define _VFIO_IOMMU_COMMON_H
> +
> +void vfio_lock_acct_bg(struct work_struct *work);

Does this need to be exposed?

> +void vfio_lock_acct(long npage);
> +bool is_invalid_reserved_pfn(unsigned long pfn);
> +int put_pfn(unsigned long pfn, int prot);

Why are these exposed, they only seem to be used by functions in the new
common file.

> +long vfio_pin_pages(unsigned long vaddr, long npage, int prot,
> +		    unsigned long *pfn_base);
> +long vfio_unpin_pages(unsigned long pfn, long npage,
> +		      int prot, bool do_accounting);

Can we get by with just these two and vfio_lock_acct()?

> +#endif
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index a9807de..e9a58fa 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -37,6 +37,7 @@
>  #include <linux/uaccess.h>
>  #include <linux/vfio.h>
>  #include <linux/workqueue.h>
> +#include "vfio_iommu_common.h"
>  
>  #define DRIVER_VERSION  "0.2"
>  #define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"
> @@ -48,12 +49,6 @@ module_param_named(allow_unsafe_interrupts,
>  MODULE_PARM_DESC(allow_unsafe_interrupts,
>  		 "Enable VFIO IOMMU support for on platforms without interrupt remapping support.");
>  
> -static bool disable_hugepages;
> -module_param_named(disable_hugepages,
> -		   disable_hugepages, bool, S_IRUGO | S_IWUSR);
> -MODULE_PARM_DESC(disable_hugepages,
> -		 "Disable VFIO IOMMU support for IOMMU hugepages.");
> -
>  struct vfio_iommu {
>  	struct iommu_domain	*domain;
>  	struct mutex		lock;
> @@ -123,205 +118,6 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *old)
>  	rb_erase(&old->node, &iommu->dma_list);
>  }
>  
> -struct vwork {
> -	struct mm_struct	*mm;
> -	long			npage;
> -	struct work_struct	work;
> -};
> -
> -/* delayed decrement/increment for locked_vm */
> -static void vfio_lock_acct_bg(struct work_struct *work)
> -{
> -	struct vwork *vwork = container_of(work, struct vwork, work);
> -	struct mm_struct *mm;
> -
> -	mm = vwork->mm;
> -	down_write(&mm->mmap_sem);
> -	mm->locked_vm += vwork->npage;
> -	up_write(&mm->mmap_sem);
> -	mmput(mm);
> -	kfree(vwork);
> -}
> -
> -static void vfio_lock_acct(long npage)
> -{
> -	struct vwork *vwork;
> -	struct mm_struct *mm;
> -
> -	if (!current->mm || !npage)
> -		return; /* process exited or nothing to do */
> -
> -	if (down_write_trylock(&current->mm->mmap_sem)) {
> -		current->mm->locked_vm += npage;
> -		up_write(&current->mm->mmap_sem);
> -		return;
> -	}
> -
> -	/*
> -	 * Couldn't get mmap_sem lock, so must setup to update
> -	 * mm->locked_vm later. If locked_vm were atomic, we
> -	 * wouldn't need this silliness
> -	 */
> -	vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL);
> -	if (!vwork)
> -		return;
> -	mm = get_task_mm(current);
> -	if (!mm) {
> -		kfree(vwork);
> -		return;
> -	}
> -	INIT_WORK(&vwork->work, vfio_lock_acct_bg);
> -	vwork->mm = mm;
> -	vwork->npage = npage;
> -	schedule_work(&vwork->work);
> -}
> -
> -/*
> - * Some mappings aren't backed by a struct page, for example an mmap'd
> - * MMIO range for our own or another device.  These use a different
> - * pfn conversion and shouldn't be tracked as locked pages.
> - */
> -static bool is_invalid_reserved_pfn(unsigned long pfn)
> -{
> -	if (pfn_valid(pfn)) {
> -		bool reserved;
> -		struct page *tail = pfn_to_page(pfn);
> -		struct page *head = compound_trans_head(tail);
> -		reserved = !!(PageReserved(head));
> -		if (head != tail) {
> -			/*
> -			 * "head" is not a dangling pointer
> -			 * (compound_trans_head takes care of that)
> -			 * but the hugepage may have been split
> -			 * from under us (and we may not hold a
> -			 * reference count on the head page so it can
> -			 * be reused before we run PageReferenced), so
> -			 * we've to check PageTail before returning
> -			 * what we just read.
> -			 */
> -			smp_rmb();
> -			if (PageTail(tail))
> -				return reserved;
> -		}
> -		return PageReserved(tail);
> -	}
> -
> -	return true;
> -}
> -
> -static int put_pfn(unsigned long pfn, int prot)
> -{
> -	if (!is_invalid_reserved_pfn(pfn)) {
> -		struct page *page = pfn_to_page(pfn);
> -		if (prot & IOMMU_WRITE)
> -			SetPageDirty(page);
> -		put_page(page);
> -		return 1;
> -	}
> -	return 0;
> -}
> -
> -static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn)
> -{
> -	struct page *page[1];
> -	struct vm_area_struct *vma;
> -	int ret = -EFAULT;
> -
> -	if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) {
> -		*pfn = page_to_pfn(page[0]);
> -		return 0;
> -	}
> -
> -	down_read(&current->mm->mmap_sem);
> -
> -	vma = find_vma_intersection(current->mm, vaddr, vaddr + 1);
> -
> -	if (vma && vma->vm_flags & VM_PFNMAP) {
> -		*pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
> -		if (is_invalid_reserved_pfn(*pfn))
> -			ret = 0;
> -	}
> -
> -	up_read(&current->mm->mmap_sem);
> -
> -	return ret;
> -}
> -
> -/*
> - * Attempt to pin pages.  We really don't want to track all the pfns and
> - * the iommu can only map chunks of consecutive pfns anyway, so get the
> - * first page and all consecutive pages with the same locking.
> - */
> -static long vfio_pin_pages(unsigned long vaddr, long npage,
> -			   int prot, unsigned long *pfn_base)
> -{
> -	unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
> -	bool lock_cap = capable(CAP_IPC_LOCK);
> -	long ret, i;
> -
> -	if (!current->mm)
> -		return -ENODEV;
> -
> -	ret = vaddr_get_pfn(vaddr, prot, pfn_base);
> -	if (ret)
> -		return ret;
> -
> -	if (is_invalid_reserved_pfn(*pfn_base))
> -		return 1;
> -
> -	if (!lock_cap && current->mm->locked_vm + 1 > limit) {
> -		put_pfn(*pfn_base, prot);
> -		pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
> -			limit << PAGE_SHIFT);
> -		return -ENOMEM;
> -	}
> -
> -	if (unlikely(disable_hugepages)) {
> -		vfio_lock_acct(1);
> -		return 1;
> -	}
> -
> -	/* Lock all the consecutive pages from pfn_base */
> -	for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) {
> -		unsigned long pfn = 0;
> -
> -		ret = vaddr_get_pfn(vaddr, prot, &pfn);
> -		if (ret)
> -			break;
> -
> -		if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) {
> -			put_pfn(pfn, prot);
> -			break;
> -		}
> -
> -		if (!lock_cap && current->mm->locked_vm + i + 1 > limit) {
> -			put_pfn(pfn, prot);
> -			pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
> -				__func__, limit << PAGE_SHIFT);
> -			break;
> -		}
> -	}
> -
> -	vfio_lock_acct(i);
> -
> -	return i;
> -}
> -
> -static long vfio_unpin_pages(unsigned long pfn, long npage,
> -			     int prot, bool do_accounting)
> -{
> -	unsigned long unlocked = 0;
> -	long i;
> -
> -	for (i = 0; i < npage; i++)
> -		unlocked += put_pfn(pfn++, prot);
> -
> -	if (do_accounting)
> -		vfio_lock_acct(-unlocked);
> -
> -	return unlocked;
> -}
> -
>  static int vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
>  			    dma_addr_t iova, size_t *size)
>  {



--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan Sept. 26, 2013, 3:57 a.m. UTC | #2
> -----Original Message-----

> From: linux-pci-owner@vger.kernel.org [mailto:linux-pci-owner@vger.kernel.org]

> On Behalf Of Alex Williamson

> Sent: Wednesday, September 25, 2013 10:33 PM

> To: Bhushan Bharat-R65777

> Cc: joro@8bytes.org; benh@kernel.crashing.org; galak@kernel.crashing.org; linux-

> kernel@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; linux-

> pci@vger.kernel.org; agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-

> foundation.org; Bhushan Bharat-R65777

> Subject: Re: [PATCH 6/7] vfio: moving some functions in common file

> 

> On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:

> > Some function defined in vfio_iommu_type1.c were common and we want to

> > use these for FSL IOMMU (PAMU) and iommu-none driver.

> > So some of them are moved to vfio_iommu_common.c

> >

> > I think we can do more of that but we will take this step by step.

> >

> > Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>

> > ---

> >  drivers/vfio/Makefile            |    4 +-

> >  drivers/vfio/vfio_iommu_common.c |  235

> ++++++++++++++++++++++++++++++++++++++

> >  drivers/vfio/vfio_iommu_common.h |   30 +++++

> >  drivers/vfio/vfio_iommu_type1.c  |  206

> > +---------------------------------

> >  4 files changed, 268 insertions(+), 207 deletions(-)  create mode

> > 100644 drivers/vfio/vfio_iommu_common.c  create mode 100644

> > drivers/vfio/vfio_iommu_common.h

> >

> > diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index

> > 72bfabc..c5792ec 100644

> > --- a/drivers/vfio/Makefile

> > +++ b/drivers/vfio/Makefile

> > @@ -1,4 +1,4 @@

> >  obj-$(CONFIG_VFIO) += vfio.o

> > -obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o

> > -obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o

> > +obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_common.o

> > +vfio_iommu_type1.o

> > +obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_common.o

> > +vfio_iommu_spapr_tce.o

> >  obj-$(CONFIG_VFIO_PCI) += pci/

> > diff --git a/drivers/vfio/vfio_iommu_common.c

> > b/drivers/vfio/vfio_iommu_common.c

> > new file mode 100644

> > index 0000000..8bdc0ea

> > --- /dev/null

> > +++ b/drivers/vfio/vfio_iommu_common.c

> > @@ -0,0 +1,235 @@

> > +/*

> > + * VFIO: Common code for vfio IOMMU support

> > + *

> > + * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.

> > + *     Author: Alex Williamson <alex.williamson@redhat.com>

> > + *     Author: Bharat Bhushan <bharat.bhushan@freescale.com>

> > + *

> > + * This program is free software; you can redistribute it and/or

> > +modify

> > + * it under the terms of the GNU General Public License version 2 as

> > + * published by the Free Software Foundation.

> > + *

> > + * Derived from original vfio:

> > + * Copyright 2010 Cisco Systems, Inc.  All rights reserved.

> > + * Author: Tom Lyon, pugs@cisco.com

> > + */

> > +

> > +#include <linux/compat.h>

> > +#include <linux/device.h>

> > +#include <linux/fs.h>

> > +#include <linux/iommu.h>

> > +#include <linux/module.h>

> > +#include <linux/mm.h>

> > +#include <linux/pci.h>		/* pci_bus_type */

> > +#include <linux/rbtree.h>

> > +#include <linux/sched.h>

> > +#include <linux/slab.h>

> > +#include <linux/uaccess.h>

> > +#include <linux/vfio.h>

> > +#include <linux/workqueue.h>

> 

> Please cleanup includes on both the source and target files.  You obviously

> don't need linux/pci.h here for one.


Will do.

> 

> > +

> > +static bool disable_hugepages;

> > +module_param_named(disable_hugepages,

> > +		   disable_hugepages, bool, S_IRUGO | S_IWUSR);

> > +MODULE_PARM_DESC(disable_hugepages,

> > +		 "Disable VFIO IOMMU support for IOMMU hugepages.");

> > +

> > +struct vwork {

> > +	struct mm_struct	*mm;

> > +	long			npage;

> > +	struct work_struct	work;

> > +};

> > +

> > +/* delayed decrement/increment for locked_vm */ void

> > +vfio_lock_acct_bg(struct work_struct *work) {

> > +	struct vwork *vwork = container_of(work, struct vwork, work);

> > +	struct mm_struct *mm;

> > +

> > +	mm = vwork->mm;

> > +	down_write(&mm->mmap_sem);

> > +	mm->locked_vm += vwork->npage;

> > +	up_write(&mm->mmap_sem);

> > +	mmput(mm);

> > +	kfree(vwork);

> > +}

> > +

> > +void vfio_lock_acct(long npage)

> > +{

> > +	struct vwork *vwork;

> > +	struct mm_struct *mm;

> > +

> > +	if (!current->mm || !npage)

> > +		return; /* process exited or nothing to do */

> > +

> > +	if (down_write_trylock(&current->mm->mmap_sem)) {

> > +		current->mm->locked_vm += npage;

> > +		up_write(&current->mm->mmap_sem);

> > +		return;

> > +	}

> > +

> > +	/*

> > +	 * Couldn't get mmap_sem lock, so must setup to update

> > +	 * mm->locked_vm later. If locked_vm were atomic, we

> > +	 * wouldn't need this silliness

> > +	 */

> > +	vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL);

> > +	if (!vwork)

> > +		return;

> > +	mm = get_task_mm(current);

> > +	if (!mm) {

> > +		kfree(vwork);

> > +		return;

> > +	}

> > +	INIT_WORK(&vwork->work, vfio_lock_acct_bg);

> > +	vwork->mm = mm;

> > +	vwork->npage = npage;

> > +	schedule_work(&vwork->work);

> > +}

> > +

> > +/*

> > + * Some mappings aren't backed by a struct page, for example an

> > +mmap'd

> > + * MMIO range for our own or another device.  These use a different

> > + * pfn conversion and shouldn't be tracked as locked pages.

> > + */

> > +bool is_invalid_reserved_pfn(unsigned long pfn) {

> > +	if (pfn_valid(pfn)) {

> > +		bool reserved;

> > +		struct page *tail = pfn_to_page(pfn);

> > +		struct page *head = compound_trans_head(tail);

> > +		reserved = !!(PageReserved(head));

> > +		if (head != tail) {

> > +			/*

> > +			 * "head" is not a dangling pointer

> > +			 * (compound_trans_head takes care of that)

> > +			 * but the hugepage may have been split

> > +			 * from under us (and we may not hold a

> > +			 * reference count on the head page so it can

> > +			 * be reused before we run PageReferenced), so

> > +			 * we've to check PageTail before returning

> > +			 * what we just read.

> > +			 */

> > +			smp_rmb();

> > +			if (PageTail(tail))

> > +				return reserved;

> > +		}

> > +		return PageReserved(tail);

> > +	}

> > +

> > +	return true;

> > +}

> > +

> > +int put_pfn(unsigned long pfn, int prot) {

> > +	if (!is_invalid_reserved_pfn(pfn)) {

> > +		struct page *page = pfn_to_page(pfn);

> > +		if (prot & IOMMU_WRITE)

> > +			SetPageDirty(page);

> > +		put_page(page);

> > +		return 1;

> > +	}

> > +	return 0;

> > +}

> > +

> > +static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long

> > +*pfn) {

> > +	struct page *page[1];

> > +	struct vm_area_struct *vma;

> > +	int ret = -EFAULT;

> > +

> > +	if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) {

> > +		*pfn = page_to_pfn(page[0]);

> > +		return 0;

> > +	}

> > +

> > +	printk("via vma\n");

> > +	down_read(&current->mm->mmap_sem);

> > +

> > +	vma = find_vma_intersection(current->mm, vaddr, vaddr + 1);

> > +

> > +	if (vma && vma->vm_flags & VM_PFNMAP) {

> > +		*pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;

> > +		if (is_invalid_reserved_pfn(*pfn))

> > +			ret = 0;

> > +	}

> > +

> > +	up_read(&current->mm->mmap_sem);

> > +

> > +	return ret;

> > +}

> > +

> > +/*

> > + * Attempt to pin pages.  We really don't want to track all the pfns

> > +and

> > + * the iommu can only map chunks of consecutive pfns anyway, so get

> > +the

> > + * first page and all consecutive pages with the same locking.

> > + */

> > +long vfio_pin_pages(unsigned long vaddr, long npage,

> > +			   int prot, unsigned long *pfn_base) {

> > +	unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;

> > +	bool lock_cap = capable(CAP_IPC_LOCK);

> > +	long ret, i;

> > +

> > +	if (!current->mm)

> > +		return -ENODEV;

> > +

> > +	ret = vaddr_get_pfn(vaddr, prot, pfn_base);

> > +	if (ret)

> > +		return ret;

> > +

> > +	if (is_invalid_reserved_pfn(*pfn_base))

> > +		return 1;

> > +

> > +	if (!lock_cap && current->mm->locked_vm + 1 > limit) {

> > +		put_pfn(*pfn_base, prot);

> > +		pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,

> > +			limit << PAGE_SHIFT);

> > +		return -ENOMEM;

> > +	}

> > +

> > +	if (unlikely(disable_hugepages)) {

> > +		vfio_lock_acct(1);

> > +		return 1;

> > +	}

> > +

> > +	/* Lock all the consecutive pages from pfn_base */

> > +	for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) {

> > +		unsigned long pfn = 0;

> > +

> > +		ret = vaddr_get_pfn(vaddr, prot, &pfn);

> > +		if (ret)

> > +			break;

> > +

> > +		if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) {

> > +			put_pfn(pfn, prot);

> > +			break;

> > +		}

> > +

> > +		if (!lock_cap && current->mm->locked_vm + i + 1 > limit) {

> > +			put_pfn(pfn, prot);

> > +			pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",

> > +				__func__, limit << PAGE_SHIFT);

> > +			break;

> > +		}

> > +	}

> > +

> > +	vfio_lock_acct(i);

> > +

> > +	return i;

> > +}

> > +

> > +long vfio_unpin_pages(unsigned long pfn, long npage,

> > +			     int prot, bool do_accounting) {

> > +	unsigned long unlocked = 0;

> > +	long i;

> > +

> > +	for (i = 0; i < npage; i++)

> > +		unlocked += put_pfn(pfn++, prot);

> > +

> > +	if (do_accounting)

> > +		vfio_lock_acct(-unlocked);

> > +

> > +	return unlocked;

> > +}

> > diff --git a/drivers/vfio/vfio_iommu_common.h

> > b/drivers/vfio/vfio_iommu_common.h

> > new file mode 100644

> > index 0000000..4738391

> > --- /dev/null

> > +++ b/drivers/vfio/vfio_iommu_common.h

> > @@ -0,0 +1,30 @@

> > +/*

> > + * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.

> > + * Copyright (C) 2013 Freescale Semiconductor, Inc.

> > + *

> > + * This program is free software; you can redistribute it and/or

> > +modify it

> > + * under the terms of the GNU General Public License version 2 as

> > +published

> > + * by the Free Software Foundation.

> > + *

> > + * This program is distributed in the hope that it will be useful,

> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of

> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

> > + * GNU General Public License for more details.

> > + *

> > + * You should have received a copy of the GNU General Public License

> > + * along with this program; if not, write to the Free Software

> > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA

> > +02111-1307 USA  */

> > +

> > +#ifndef _VFIO_IOMMU_COMMON_H

> > +#define _VFIO_IOMMU_COMMON_H

> > +

> > +void vfio_lock_acct_bg(struct work_struct *work);

> 

> Does this need to be exposed?


No, will make this and other which does not need to be exposed as static to this file

Thanks
-Bharat
> 

> > +void vfio_lock_acct(long npage);

> > +bool is_invalid_reserved_pfn(unsigned long pfn); int put_pfn(unsigned

> > +long pfn, int prot);

> 

> Why are these exposed, they only seem to be used by functions in the new common

> file.

> 

> > +long vfio_pin_pages(unsigned long vaddr, long npage, int prot,

> > +		    unsigned long *pfn_base);

> > +long vfio_unpin_pages(unsigned long pfn, long npage,

> > +		      int prot, bool do_accounting);

> 

> Can we get by with just these two and vfio_lock_acct()?

> 

> > +#endif

> > diff --git a/drivers/vfio/vfio_iommu_type1.c

> > b/drivers/vfio/vfio_iommu_type1.c index a9807de..e9a58fa 100644

> > --- a/drivers/vfio/vfio_iommu_type1.c

> > +++ b/drivers/vfio/vfio_iommu_type1.c

> > @@ -37,6 +37,7 @@

> >  #include <linux/uaccess.h>

> >  #include <linux/vfio.h>

> >  #include <linux/workqueue.h>

> > +#include "vfio_iommu_common.h"

> >

> >  #define DRIVER_VERSION  "0.2"

> >  #define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"

> > @@ -48,12 +49,6 @@ module_param_named(allow_unsafe_interrupts,

> >  MODULE_PARM_DESC(allow_unsafe_interrupts,

> >  		 "Enable VFIO IOMMU support for on platforms without interrupt

> > remapping support.");

> >

> > -static bool disable_hugepages;

> > -module_param_named(disable_hugepages,

> > -		   disable_hugepages, bool, S_IRUGO | S_IWUSR);

> > -MODULE_PARM_DESC(disable_hugepages,

> > -		 "Disable VFIO IOMMU support for IOMMU hugepages.");

> > -

> >  struct vfio_iommu {

> >  	struct iommu_domain	*domain;

> >  	struct mutex		lock;

> > @@ -123,205 +118,6 @@ static void vfio_remove_dma(struct vfio_iommu *iommu,

> struct vfio_dma *old)

> >  	rb_erase(&old->node, &iommu->dma_list);  }

> >

> > -struct vwork {

> > -	struct mm_struct	*mm;

> > -	long			npage;

> > -	struct work_struct	work;

> > -};

> > -

> > -/* delayed decrement/increment for locked_vm */ -static void

> > vfio_lock_acct_bg(struct work_struct *work) -{

> > -	struct vwork *vwork = container_of(work, struct vwork, work);

> > -	struct mm_struct *mm;

> > -

> > -	mm = vwork->mm;

> > -	down_write(&mm->mmap_sem);

> > -	mm->locked_vm += vwork->npage;

> > -	up_write(&mm->mmap_sem);

> > -	mmput(mm);

> > -	kfree(vwork);

> > -}

> > -

> > -static void vfio_lock_acct(long npage) -{

> > -	struct vwork *vwork;

> > -	struct mm_struct *mm;

> > -

> > -	if (!current->mm || !npage)

> > -		return; /* process exited or nothing to do */

> > -

> > -	if (down_write_trylock(&current->mm->mmap_sem)) {

> > -		current->mm->locked_vm += npage;

> > -		up_write(&current->mm->mmap_sem);

> > -		return;

> > -	}

> > -

> > -	/*

> > -	 * Couldn't get mmap_sem lock, so must setup to update

> > -	 * mm->locked_vm later. If locked_vm were atomic, we

> > -	 * wouldn't need this silliness

> > -	 */

> > -	vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL);

> > -	if (!vwork)

> > -		return;

> > -	mm = get_task_mm(current);

> > -	if (!mm) {

> > -		kfree(vwork);

> > -		return;

> > -	}

> > -	INIT_WORK(&vwork->work, vfio_lock_acct_bg);

> > -	vwork->mm = mm;

> > -	vwork->npage = npage;

> > -	schedule_work(&vwork->work);

> > -}

> > -

> > -/*

> > - * Some mappings aren't backed by a struct page, for example an

> > mmap'd

> > - * MMIO range for our own or another device.  These use a different

> > - * pfn conversion and shouldn't be tracked as locked pages.

> > - */

> > -static bool is_invalid_reserved_pfn(unsigned long pfn) -{

> > -	if (pfn_valid(pfn)) {

> > -		bool reserved;

> > -		struct page *tail = pfn_to_page(pfn);

> > -		struct page *head = compound_trans_head(tail);

> > -		reserved = !!(PageReserved(head));

> > -		if (head != tail) {

> > -			/*

> > -			 * "head" is not a dangling pointer

> > -			 * (compound_trans_head takes care of that)

> > -			 * but the hugepage may have been split

> > -			 * from under us (and we may not hold a

> > -			 * reference count on the head page so it can

> > -			 * be reused before we run PageReferenced), so

> > -			 * we've to check PageTail before returning

> > -			 * what we just read.

> > -			 */

> > -			smp_rmb();

> > -			if (PageTail(tail))

> > -				return reserved;

> > -		}

> > -		return PageReserved(tail);

> > -	}

> > -

> > -	return true;

> > -}

> > -

> > -static int put_pfn(unsigned long pfn, int prot) -{

> > -	if (!is_invalid_reserved_pfn(pfn)) {

> > -		struct page *page = pfn_to_page(pfn);

> > -		if (prot & IOMMU_WRITE)

> > -			SetPageDirty(page);

> > -		put_page(page);

> > -		return 1;

> > -	}

> > -	return 0;

> > -}

> > -

> > -static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long

> > *pfn) -{

> > -	struct page *page[1];

> > -	struct vm_area_struct *vma;

> > -	int ret = -EFAULT;

> > -

> > -	if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) {

> > -		*pfn = page_to_pfn(page[0]);

> > -		return 0;

> > -	}

> > -

> > -	down_read(&current->mm->mmap_sem);

> > -

> > -	vma = find_vma_intersection(current->mm, vaddr, vaddr + 1);

> > -

> > -	if (vma && vma->vm_flags & VM_PFNMAP) {

> > -		*pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;

> > -		if (is_invalid_reserved_pfn(*pfn))

> > -			ret = 0;

> > -	}

> > -

> > -	up_read(&current->mm->mmap_sem);

> > -

> > -	return ret;

> > -}

> > -

> > -/*

> > - * Attempt to pin pages.  We really don't want to track all the pfns

> > and

> > - * the iommu can only map chunks of consecutive pfns anyway, so get

> > the

> > - * first page and all consecutive pages with the same locking.

> > - */

> > -static long vfio_pin_pages(unsigned long vaddr, long npage,

> > -			   int prot, unsigned long *pfn_base)

> > -{

> > -	unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;

> > -	bool lock_cap = capable(CAP_IPC_LOCK);

> > -	long ret, i;

> > -

> > -	if (!current->mm)

> > -		return -ENODEV;

> > -

> > -	ret = vaddr_get_pfn(vaddr, prot, pfn_base);

> > -	if (ret)

> > -		return ret;

> > -

> > -	if (is_invalid_reserved_pfn(*pfn_base))

> > -		return 1;

> > -

> > -	if (!lock_cap && current->mm->locked_vm + 1 > limit) {

> > -		put_pfn(*pfn_base, prot);

> > -		pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,

> > -			limit << PAGE_SHIFT);

> > -		return -ENOMEM;

> > -	}

> > -

> > -	if (unlikely(disable_hugepages)) {

> > -		vfio_lock_acct(1);

> > -		return 1;

> > -	}

> > -

> > -	/* Lock all the consecutive pages from pfn_base */

> > -	for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) {

> > -		unsigned long pfn = 0;

> > -

> > -		ret = vaddr_get_pfn(vaddr, prot, &pfn);

> > -		if (ret)

> > -			break;

> > -

> > -		if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) {

> > -			put_pfn(pfn, prot);

> > -			break;

> > -		}

> > -

> > -		if (!lock_cap && current->mm->locked_vm + i + 1 > limit) {

> > -			put_pfn(pfn, prot);

> > -			pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",

> > -				__func__, limit << PAGE_SHIFT);

> > -			break;

> > -		}

> > -	}

> > -

> > -	vfio_lock_acct(i);

> > -

> > -	return i;

> > -}

> > -

> > -static long vfio_unpin_pages(unsigned long pfn, long npage,

> > -			     int prot, bool do_accounting)

> > -{

> > -	unsigned long unlocked = 0;

> > -	long i;

> > -

> > -	for (i = 0; i < npage; i++)

> > -		unlocked += put_pfn(pfn++, prot);

> > -

> > -	if (do_accounting)

> > -		vfio_lock_acct(-unlocked);

> > -

> > -	return unlocked;

> > -}

> > -

> >  static int vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,

> >  			    dma_addr_t iova, size_t *size)  {

> 

> 

> 

> --

> To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body

> of a message to majordomo@vger.kernel.org More majordomo info at

> http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 72bfabc..c5792ec 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,4 +1,4 @@ 
 obj-$(CONFIG_VFIO) += vfio.o
-obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
-obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
+obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_common.o vfio_iommu_type1.o
+obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_common.o vfio_iommu_spapr_tce.o
 obj-$(CONFIG_VFIO_PCI) += pci/
diff --git a/drivers/vfio/vfio_iommu_common.c b/drivers/vfio/vfio_iommu_common.c
new file mode 100644
index 0000000..8bdc0ea
--- /dev/null
+++ b/drivers/vfio/vfio_iommu_common.c
@@ -0,0 +1,235 @@ 
+/*
+ * VFIO: Common code for vfio IOMMU support
+ *
+ * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson@redhat.com>
+ *     Author: Bharat Bhushan <bharat.bhushan@freescale.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Derived from original vfio:
+ * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
+ * Author: Tom Lyon, pugs@cisco.com
+ */
+
+#include <linux/compat.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/pci.h>		/* pci_bus_type */
+#include <linux/rbtree.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+#include <linux/workqueue.h>
+
+static bool disable_hugepages;
+module_param_named(disable_hugepages,
+		   disable_hugepages, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(disable_hugepages,
+		 "Disable VFIO IOMMU support for IOMMU hugepages.");
+
+struct vwork {
+	struct mm_struct	*mm;
+	long			npage;
+	struct work_struct	work;
+};
+
+/* delayed decrement/increment for locked_vm */
+void vfio_lock_acct_bg(struct work_struct *work)
+{
+	struct vwork *vwork = container_of(work, struct vwork, work);
+	struct mm_struct *mm;
+
+	mm = vwork->mm;
+	down_write(&mm->mmap_sem);
+	mm->locked_vm += vwork->npage;
+	up_write(&mm->mmap_sem);
+	mmput(mm);
+	kfree(vwork);
+}
+
+void vfio_lock_acct(long npage)
+{
+	struct vwork *vwork;
+	struct mm_struct *mm;
+
+	if (!current->mm || !npage)
+		return; /* process exited or nothing to do */
+
+	if (down_write_trylock(&current->mm->mmap_sem)) {
+		current->mm->locked_vm += npage;
+		up_write(&current->mm->mmap_sem);
+		return;
+	}
+
+	/*
+	 * Couldn't get mmap_sem lock, so must setup to update
+	 * mm->locked_vm later. If locked_vm were atomic, we
+	 * wouldn't need this silliness
+	 */
+	vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL);
+	if (!vwork)
+		return;
+	mm = get_task_mm(current);
+	if (!mm) {
+		kfree(vwork);
+		return;
+	}
+	INIT_WORK(&vwork->work, vfio_lock_acct_bg);
+	vwork->mm = mm;
+	vwork->npage = npage;
+	schedule_work(&vwork->work);
+}
+
+/*
+ * Some mappings aren't backed by a struct page, for example an mmap'd
+ * MMIO range for our own or another device.  These use a different
+ * pfn conversion and shouldn't be tracked as locked pages.
+ */
+bool is_invalid_reserved_pfn(unsigned long pfn)
+{
+	if (pfn_valid(pfn)) {
+		bool reserved;
+		struct page *tail = pfn_to_page(pfn);
+		struct page *head = compound_trans_head(tail);
+		reserved = !!(PageReserved(head));
+		if (head != tail) {
+			/*
+			 * "head" is not a dangling pointer
+			 * (compound_trans_head takes care of that)
+			 * but the hugepage may have been split
+			 * from under us (and we may not hold a
+			 * reference count on the head page so it can
+			 * be reused before we run PageReferenced), so
+			 * we've to check PageTail before returning
+			 * what we just read.
+			 */
+			smp_rmb();
+			if (PageTail(tail))
+				return reserved;
+		}
+		return PageReserved(tail);
+	}
+
+	return true;
+}
+
+int put_pfn(unsigned long pfn, int prot)
+{
+	if (!is_invalid_reserved_pfn(pfn)) {
+		struct page *page = pfn_to_page(pfn);
+		if (prot & IOMMU_WRITE)
+			SetPageDirty(page);
+		put_page(page);
+		return 1;
+	}
+	return 0;
+}
+
+static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn)
+{
+	struct page *page[1];
+	struct vm_area_struct *vma;
+	int ret = -EFAULT;
+
+	if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) {
+		*pfn = page_to_pfn(page[0]);
+		return 0;
+	}
+
+	printk("via vma\n");
+	down_read(&current->mm->mmap_sem);
+
+	vma = find_vma_intersection(current->mm, vaddr, vaddr + 1);
+
+	if (vma && vma->vm_flags & VM_PFNMAP) {
+		*pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+		if (is_invalid_reserved_pfn(*pfn))
+			ret = 0;
+	}
+
+	up_read(&current->mm->mmap_sem);
+
+	return ret;
+}
+
+/*
+ * Attempt to pin pages.  We really don't want to track all the pfns and
+ * the iommu can only map chunks of consecutive pfns anyway, so get the
+ * first page and all consecutive pages with the same locking.
+ */
+long vfio_pin_pages(unsigned long vaddr, long npage,
+			   int prot, unsigned long *pfn_base)
+{
+	unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+	bool lock_cap = capable(CAP_IPC_LOCK);
+	long ret, i;
+
+	if (!current->mm)
+		return -ENODEV;
+
+	ret = vaddr_get_pfn(vaddr, prot, pfn_base);
+	if (ret)
+		return ret;
+
+	if (is_invalid_reserved_pfn(*pfn_base))
+		return 1;
+
+	if (!lock_cap && current->mm->locked_vm + 1 > limit) {
+		put_pfn(*pfn_base, prot);
+		pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
+			limit << PAGE_SHIFT);
+		return -ENOMEM;
+	}
+
+	if (unlikely(disable_hugepages)) {
+		vfio_lock_acct(1);
+		return 1;
+	}
+
+	/* Lock all the consecutive pages from pfn_base */
+	for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) {
+		unsigned long pfn = 0;
+
+		ret = vaddr_get_pfn(vaddr, prot, &pfn);
+		if (ret)
+			break;
+
+		if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) {
+			put_pfn(pfn, prot);
+			break;
+		}
+
+		if (!lock_cap && current->mm->locked_vm + i + 1 > limit) {
+			put_pfn(pfn, prot);
+			pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
+				__func__, limit << PAGE_SHIFT);
+			break;
+		}
+	}
+
+	vfio_lock_acct(i);
+
+	return i;
+}
+
+long vfio_unpin_pages(unsigned long pfn, long npage,
+			     int prot, bool do_accounting)
+{
+	unsigned long unlocked = 0;
+	long i;
+
+	for (i = 0; i < npage; i++)
+		unlocked += put_pfn(pfn++, prot);
+
+	if (do_accounting)
+		vfio_lock_acct(-unlocked);
+
+	return unlocked;
+}
diff --git a/drivers/vfio/vfio_iommu_common.h b/drivers/vfio/vfio_iommu_common.h
new file mode 100644
index 0000000..4738391
--- /dev/null
+++ b/drivers/vfio/vfio_iommu_common.h
@@ -0,0 +1,30 @@ 
+/*
+ * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2013 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef _VFIO_IOMMU_COMMON_H
+#define _VFIO_IOMMU_COMMON_H
+
+void vfio_lock_acct_bg(struct work_struct *work);
+void vfio_lock_acct(long npage);
+bool is_invalid_reserved_pfn(unsigned long pfn);
+int put_pfn(unsigned long pfn, int prot);
+long vfio_pin_pages(unsigned long vaddr, long npage, int prot,
+		    unsigned long *pfn_base);
+long vfio_unpin_pages(unsigned long pfn, long npage,
+		      int prot, bool do_accounting);
+#endif
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index a9807de..e9a58fa 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -37,6 +37,7 @@ 
 #include <linux/uaccess.h>
 #include <linux/vfio.h>
 #include <linux/workqueue.h>
+#include "vfio_iommu_common.h"
 
 #define DRIVER_VERSION  "0.2"
 #define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"
@@ -48,12 +49,6 @@  module_param_named(allow_unsafe_interrupts,
 MODULE_PARM_DESC(allow_unsafe_interrupts,
 		 "Enable VFIO IOMMU support for on platforms without interrupt remapping support.");
 
-static bool disable_hugepages;
-module_param_named(disable_hugepages,
-		   disable_hugepages, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(disable_hugepages,
-		 "Disable VFIO IOMMU support for IOMMU hugepages.");
-
 struct vfio_iommu {
 	struct iommu_domain	*domain;
 	struct mutex		lock;
@@ -123,205 +118,6 @@  static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *old)
 	rb_erase(&old->node, &iommu->dma_list);
 }
 
-struct vwork {
-	struct mm_struct	*mm;
-	long			npage;
-	struct work_struct	work;
-};
-
-/* delayed decrement/increment for locked_vm */
-static void vfio_lock_acct_bg(struct work_struct *work)
-{
-	struct vwork *vwork = container_of(work, struct vwork, work);
-	struct mm_struct *mm;
-
-	mm = vwork->mm;
-	down_write(&mm->mmap_sem);
-	mm->locked_vm += vwork->npage;
-	up_write(&mm->mmap_sem);
-	mmput(mm);
-	kfree(vwork);
-}
-
-static void vfio_lock_acct(long npage)
-{
-	struct vwork *vwork;
-	struct mm_struct *mm;
-
-	if (!current->mm || !npage)
-		return; /* process exited or nothing to do */
-
-	if (down_write_trylock(&current->mm->mmap_sem)) {
-		current->mm->locked_vm += npage;
-		up_write(&current->mm->mmap_sem);
-		return;
-	}
-
-	/*
-	 * Couldn't get mmap_sem lock, so must setup to update
-	 * mm->locked_vm later. If locked_vm were atomic, we
-	 * wouldn't need this silliness
-	 */
-	vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL);
-	if (!vwork)
-		return;
-	mm = get_task_mm(current);
-	if (!mm) {
-		kfree(vwork);
-		return;
-	}
-	INIT_WORK(&vwork->work, vfio_lock_acct_bg);
-	vwork->mm = mm;
-	vwork->npage = npage;
-	schedule_work(&vwork->work);
-}
-
-/*
- * Some mappings aren't backed by a struct page, for example an mmap'd
- * MMIO range for our own or another device.  These use a different
- * pfn conversion and shouldn't be tracked as locked pages.
- */
-static bool is_invalid_reserved_pfn(unsigned long pfn)
-{
-	if (pfn_valid(pfn)) {
-		bool reserved;
-		struct page *tail = pfn_to_page(pfn);
-		struct page *head = compound_trans_head(tail);
-		reserved = !!(PageReserved(head));
-		if (head != tail) {
-			/*
-			 * "head" is not a dangling pointer
-			 * (compound_trans_head takes care of that)
-			 * but the hugepage may have been split
-			 * from under us (and we may not hold a
-			 * reference count on the head page so it can
-			 * be reused before we run PageReferenced), so
-			 * we've to check PageTail before returning
-			 * what we just read.
-			 */
-			smp_rmb();
-			if (PageTail(tail))
-				return reserved;
-		}
-		return PageReserved(tail);
-	}
-
-	return true;
-}
-
-static int put_pfn(unsigned long pfn, int prot)
-{
-	if (!is_invalid_reserved_pfn(pfn)) {
-		struct page *page = pfn_to_page(pfn);
-		if (prot & IOMMU_WRITE)
-			SetPageDirty(page);
-		put_page(page);
-		return 1;
-	}
-	return 0;
-}
-
-static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn)
-{
-	struct page *page[1];
-	struct vm_area_struct *vma;
-	int ret = -EFAULT;
-
-	if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) {
-		*pfn = page_to_pfn(page[0]);
-		return 0;
-	}
-
-	down_read(&current->mm->mmap_sem);
-
-	vma = find_vma_intersection(current->mm, vaddr, vaddr + 1);
-
-	if (vma && vma->vm_flags & VM_PFNMAP) {
-		*pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
-		if (is_invalid_reserved_pfn(*pfn))
-			ret = 0;
-	}
-
-	up_read(&current->mm->mmap_sem);
-
-	return ret;
-}
-
-/*
- * Attempt to pin pages.  We really don't want to track all the pfns and
- * the iommu can only map chunks of consecutive pfns anyway, so get the
- * first page and all consecutive pages with the same locking.
- */
-static long vfio_pin_pages(unsigned long vaddr, long npage,
-			   int prot, unsigned long *pfn_base)
-{
-	unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-	bool lock_cap = capable(CAP_IPC_LOCK);
-	long ret, i;
-
-	if (!current->mm)
-		return -ENODEV;
-
-	ret = vaddr_get_pfn(vaddr, prot, pfn_base);
-	if (ret)
-		return ret;
-
-	if (is_invalid_reserved_pfn(*pfn_base))
-		return 1;
-
-	if (!lock_cap && current->mm->locked_vm + 1 > limit) {
-		put_pfn(*pfn_base, prot);
-		pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
-			limit << PAGE_SHIFT);
-		return -ENOMEM;
-	}
-
-	if (unlikely(disable_hugepages)) {
-		vfio_lock_acct(1);
-		return 1;
-	}
-
-	/* Lock all the consecutive pages from pfn_base */
-	for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) {
-		unsigned long pfn = 0;
-
-		ret = vaddr_get_pfn(vaddr, prot, &pfn);
-		if (ret)
-			break;
-
-		if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) {
-			put_pfn(pfn, prot);
-			break;
-		}
-
-		if (!lock_cap && current->mm->locked_vm + i + 1 > limit) {
-			put_pfn(pfn, prot);
-			pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
-				__func__, limit << PAGE_SHIFT);
-			break;
-		}
-	}
-
-	vfio_lock_acct(i);
-
-	return i;
-}
-
-static long vfio_unpin_pages(unsigned long pfn, long npage,
-			     int prot, bool do_accounting)
-{
-	unsigned long unlocked = 0;
-	long i;
-
-	for (i = 0; i < npage; i++)
-		unlocked += put_pfn(pfn++, prot);
-
-	if (do_accounting)
-		vfio_lock_acct(-unlocked);
-
-	return unlocked;
-}
-
 static int vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
 			    dma_addr_t iova, size_t *size)
 {