diff mbox series

[RFCv2,01/36] iommu: Keep track of processes and PASIDs

Message ID 20171006133203.22803-2-jean-philippe.brucker@arm.com
State Not Applicable
Headers show
Series Process management for IOMMU + SVM for SMMUv3 | expand

Commit Message

Jean-Philippe Brucker Oct. 6, 2017, 1:31 p.m. UTC
IOMMU drivers need a way to bind Linux processes to devices. This is used
for Shared Virtual Memory (SVM), where devices support paging. In that
mode, DMA can directly target virtual addresses of a process.

Introduce boilerplate code for allocating process structures and binding
them to devices. Four operations are added to IOMMU drivers:

* process_alloc, process_free: to create an iommu_process structure and
  perform architecture-specific operations required to grab the process
  (for instance on ARM SMMU, pin down the CPU ASID). There is a single
  iommu_process structure per Linux process.

* process_attach: attach a process to a device. The IOMMU driver checks
  that the device is capable of sharing an address space with this
  process, and writes the PASID table entry to install the process page
  directory.

  Some IOMMU drivers (e.g. ARM SMMU and virtio-iommu) will have a single
  PASID table per domain, for convenience. Other can implement it
  differently but to help these drivers, process_attach and process_detach
  take a 'first' or 'last' parameter telling whether they need to
  install/remove the PASID entry or only send the required TLB
  invalidations.

* process_detach: detach a process from a device. The IOMMU driver removes
  the PASID table entry and invalidates the IOTLBs.

process_attach and process_detach operations are serialized with a
spinlock. At the moment it is global, but if we try to optimize it, the
core should at least prevent concurrent attach/detach on the same domain.
(so multi-level PASID table code can allocate tables lazily without having
to go through the io-pgtable concurrency nightmare). process_alloc can
sleep, but process_free must not (because we'll have to call it from
call_srcu.)

At the moment we use an IDR for allocating PASIDs and retrieving contexts.
We also use a single spinlock. These can be refined and optimized later (a
custom allocator will be needed for top-down PASID allocation).

Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
---
 drivers/iommu/Kconfig         |  10 ++
 drivers/iommu/Makefile        |   1 +
 drivers/iommu/iommu-process.c | 225 ++++++++++++++++++++++++++++++++++++++++++
 drivers/iommu/iommu.c         |   1 +
 include/linux/iommu.h         |  24 +++++
 5 files changed, 261 insertions(+)
 create mode 100644 drivers/iommu/iommu-process.c

Comments

Sinan Kaya Oct. 20, 2017, 11:32 p.m. UTC | #1
few nits below.

> +/*
> + * Allocate a iommu_process structure for the given task.
> + *
> + * Ideally we shouldn't need the domain parameter, since iommu_process is
> + * system-wide, but we use it to retrieve the driver's allocation ops and a
> + * PASID range.
> + */
> +static struct iommu_process *
> +iommu_process_alloc(struct iommu_domain *domain, struct task_struct *task)
> +{
> +	int err;
> +	int pasid;
> +	struct iommu_process *process;
> +
> +	if (WARN_ON(!domain->ops->process_alloc || !domain->ops->process_free))
> +		return ERR_PTR(-ENODEV);
> +
> +	process = domain->ops->process_alloc(task);
> +	if (IS_ERR(process))
> +		return process;
> +	if (!process)
> +		return ERR_PTR(-ENOMEM);
> +
> +	process->pid		= get_task_pid(task, PIDTYPE_PID);
> +	process->release	= domain->ops->process_free;
> +	INIT_LIST_HEAD(&process->domains);
> +	kref_init(&process->kref);
> +
nit, I think you should place this check right after the pid assignment.

> +	if (!process->pid) {
> +		err = -EINVAL;
> +		goto err_free_process;
> +	}
> +
> +	idr_preload(GFP_KERNEL);
> +	spin_lock(&iommu_process_lock);
> +	pasid = idr_alloc_cyclic(&iommu_process_idr, process, domain->min_pasid,
> +				 domain->max_pasid + 1, GFP_ATOMIC);
> +	process->pasid = pasid;
> +	spin_unlock(&iommu_process_lock);
> +	idr_preload_end();
> +

nit, You can maybe return here if pasid is not negative.

> +	if (pasid < 0) {
> +		err = pasid;
> +		goto err_put_pid;
> +	}
> +
> +	return process;
> +
> +err_put_pid:
> +	put_pid(process->pid);
> +
> +err_free_process:
> +	domain->ops->process_free(process);
> +
> +	return ERR_PTR(err);
> +}
> +
> +static void iommu_process_release(struct kref *kref)
> +{
> +	struct iommu_process *process;
> +	void (*release)(struct iommu_process *);
> +
> +	assert_spin_locked(&iommu_process_lock);
> +
> +	process = container_of(kref, struct iommu_process, kref);

if we are concerned about things going wrong (assert above), we should
also add some pointer check here (WARN) for process and release pointers as well.

> +	release = process->release;
> +
> +	WARN_ON(!list_empty(&process->domains));
> +
> +	idr_remove(&iommu_process_idr, process->pasid);
> +	put_pid(process->pid);
> +	release(process);
> +}
> +
> +/*
> + * Returns non-zero if a reference to the process was successfully taken.
> + * Returns zero if the process is being freed and should not be used.
> + */
> +static int iommu_process_get_locked(struct iommu_process *process)
> +{
> +	assert_spin_locked(&iommu_process_lock);
> +
> +	if (process)
> +		return kref_get_unless_zero(&process->kref);
> +
> +	return 0;
> +}
> +
> +static void iommu_process_put_locked(struct iommu_process *process)
> +{
> +	assert_spin_locked(&iommu_process_lock);
> +
> +	kref_put(&process->kref, iommu_process_release);
> +}
> +
> +static int iommu_process_attach(struct iommu_domain *domain, struct device *dev,
> +				struct iommu_process *process)
> +{
> +	int err;
> +	int pasid = process->pasid;
> +	struct iommu_context *context;
> +
> +	if (WARN_ON(!domain->ops->process_attach || !domain->ops->process_detach))
> +		return -ENODEV;
> +
> +	if (pasid > domain->max_pasid || pasid < domain->min_pasid)
> +		return -ENOSPC;
> +
> +	context = kzalloc(sizeof(*context), GFP_KERNEL);
> +	if (!context)
> +		return -ENOMEM;
> +

devm_kzalloc maybe?

> +	context->process	= process;
> +	context->domain		= domain;
> +	refcount_set(&context->ref, 1);
> +
> +	spin_lock(&iommu_process_lock);
> +	err = domain->ops->process_attach(domain, dev, process, true);
> +	if (err) {
> +		kfree(context);
> +		spin_unlock(&iommu_process_lock);
> +		return err;
> +	}
> +
> +	list_add(&context->process_head, &process->domains);
> +	list_add(&context->domain_head, &domain->processes);
> +	spin_unlock(&iommu_process_lock);
> +
> +	return 0;
> +}
Yi Liu Oct. 23, 2017, 11:04 a.m. UTC | #2
Hi Jean,

> -----Original Message-----
> From: Jean-Philippe Brucker [mailto:jean-philippe.brucker@arm.com]
> Sent: Friday, October 6, 2017 9:31 PM
> To: linux-arm-kernel@lists.infradead.org; linux-pci@vger.kernel.org; linux-
> acpi@vger.kernel.org; devicetree@vger.kernel.org; iommu@lists.linux-
> foundation.org
> Cc: joro@8bytes.org; robh+dt@kernel.org; mark.rutland@arm.com;
> catalin.marinas@arm.com; will.deacon@arm.com; lorenzo.pieralisi@arm.com;
> hanjun.guo@linaro.org; sudeep.holla@arm.com; rjw@rjwysocki.net;
> lenb@kernel.org; robin.murphy@arm.com; bhelgaas@google.com;
> alex.williamson@redhat.com; tn@semihalf.com; liubo95@huawei.com;
> thunder.leizhen@huawei.com; xieyisheng1@huawei.com;
> gabriele.paoloni@huawei.com; nwatters@codeaurora.org; okaya@codeaurora.org;
> rfranz@cavium.com; dwmw2@infradead.org; jacob.jun.pan@linux.intel.com; Liu, Yi
> L <yi.l.liu@intel.com>; Raj, Ashok <ashok.raj@intel.com>; robdclark@gmail.com
> Subject: [RFCv2 PATCH 01/36] iommu: Keep track of processes and PASIDs
> 
> IOMMU drivers need a way to bind Linux processes to devices. This is used for
> Shared Virtual Memory (SVM), where devices support paging. In that mode, DMA can
> directly target virtual addresses of a process.
> 
> Introduce boilerplate code for allocating process structures and binding them to
> devices. Four operations are added to IOMMU drivers:
> 
> * process_alloc, process_free: to create an iommu_process structure and
>   perform architecture-specific operations required to grab the process
>   (for instance on ARM SMMU, pin down the CPU ASID). There is a single
>   iommu_process structure per Linux process.
> 
> * process_attach: attach a process to a device. The IOMMU driver checks
>   that the device is capable of sharing an address space with this
>   process, and writes the PASID table entry to install the process page
>   directory.
> 
>   Some IOMMU drivers (e.g. ARM SMMU and virtio-iommu) will have a single
>   PASID table per domain, for convenience. Other can implement it
>   differently but to help these drivers, process_attach and process_detach
>   take a 'first' or 'last' parameter telling whether they need to
>   install/remove the PASID entry or only send the required TLB
>   invalidations.
> 
> * process_detach: detach a process from a device. The IOMMU driver removes
>   the PASID table entry and invalidates the IOTLBs.
> 
> process_attach and process_detach operations are serialized with a spinlock. At the
> moment it is global, but if we try to optimize it, the core should at least prevent
> concurrent attach/detach on the same domain.
> (so multi-level PASID table code can allocate tables lazily without having to go
> through the io-pgtable concurrency nightmare). process_alloc can sleep, but
> process_free must not (because we'll have to call it from
> call_srcu.)
> 
> At the moment we use an IDR for allocating PASIDs and retrieving contexts.
> We also use a single spinlock. These can be refined and optimized later (a custom
> allocator will be needed for top-down PASID allocation).
> 
> Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
> ---
>  drivers/iommu/Kconfig         |  10 ++
>  drivers/iommu/Makefile        |   1 +
>  drivers/iommu/iommu-process.c | 225
> ++++++++++++++++++++++++++++++++++++++++++
>  drivers/iommu/iommu.c         |   1 +
>  include/linux/iommu.h         |  24 +++++
>  5 files changed, 261 insertions(+)
>  create mode 100644 drivers/iommu/iommu-process.c
> 
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index
> f3a21343e636..1ea5c90e37be 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -74,6 +74,16 @@ config IOMMU_DMA
>  	select IOMMU_IOVA
>  	select NEED_SG_DMA_LENGTH
> 
> +config IOMMU_PROCESS
> +	bool "Process management API for the IOMMU"
> +	select IOMMU_API
> +	help
> +	  Enable process management for the IOMMU API. In systems that support
> +	  it, device drivers can bind processes to devices and share their page
> +	  tables using this API.
> +
> +	  If unsure, say N here.
> +
>  config FSL_PAMU
>  	bool "Freescale IOMMU support"
>  	depends on PCI
> diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index
> b910aea813a1..a2832edbfaa2 100644
> --- a/drivers/iommu/Makefile
> +++ b/drivers/iommu/Makefile
> @@ -1,6 +1,7 @@
>  obj-$(CONFIG_IOMMU_API) += iommu.o
>  obj-$(CONFIG_IOMMU_API) += iommu-traces.o
>  obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
> +obj-$(CONFIG_IOMMU_PROCESS) += iommu-process.o
>  obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o diff --git
> a/drivers/iommu/iommu-process.c b/drivers/iommu/iommu-process.c new file
> mode 100644 index 000000000000..a7e5a1c94305
> --- /dev/null
> +++ b/drivers/iommu/iommu-process.c
> @@ -0,0 +1,225 @@
> +/*
> + * Track processes bound to devices
> + *
> + * This program is free software; you can redistribute it and/or modify
> +it
> + * under the terms of the GNU General Public License version 2 as
> +published
> + * by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
> +USA
> + *
> + * Copyright (C) 2017 ARM Ltd.
> + *
> + * Author: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>  */
> +
> +#include <linux/idr.h>
> +#include <linux/iommu.h>
> +#include <linux/slab.h>
> +#include <linux/spinlock.h>
> +
> +/* Link between a domain and a process */ struct iommu_context {
> +	struct iommu_process	*process;
> +	struct iommu_domain	*domain;
> +
> +	struct list_head	process_head;
> +	struct list_head	domain_head;
> +
> +	/* Number of devices that use this context */
> +	refcount_t		ref;
> +};
> +
> +/*
> + * Because we're using an IDR, PASIDs are limited to 31 bits (the sign
> +bit is
> + * used for returning errors). In practice implementations will use at
> +most 20
> + * bits, which is the PCI limit.
> + */
> +static DEFINE_IDR(iommu_process_idr);
> +
> +/*
> + * For the moment this is an all-purpose lock. It serializes
> + * access/modifications to contexts (process-domain links),
> +access/modifications
> + * to the PASID IDR, and changes to process refcount as well.
> + */
> +static DEFINE_SPINLOCK(iommu_process_lock);
> +
> +/*
> + * Allocate a iommu_process structure for the given task.
> + *
> + * Ideally we shouldn't need the domain parameter, since iommu_process
> +is
> + * system-wide, but we use it to retrieve the driver's allocation ops
> +and a
> + * PASID range.
> + */
> +static struct iommu_process *
> +iommu_process_alloc(struct iommu_domain *domain, struct task_struct
> +*task) {
> +	int err;
> +	int pasid;
> +	struct iommu_process *process;
> +
> +	if (WARN_ON(!domain->ops->process_alloc || !domain->ops-
> >process_free))
> +		return ERR_PTR(-ENODEV);
> +
> +	process = domain->ops->process_alloc(task);
> +	if (IS_ERR(process))
> +		return process;
> +	if (!process)
> +		return ERR_PTR(-ENOMEM);
> +
> +	process->pid		= get_task_pid(task, PIDTYPE_PID);
> +	process->release	= domain->ops->process_free;
> +	INIT_LIST_HEAD(&process->domains);
> +	kref_init(&process->kref);
> +
> +	if (!process->pid) {
> +		err = -EINVAL;
> +		goto err_free_process;
> +	}
> +
> +	idr_preload(GFP_KERNEL);
> +	spin_lock(&iommu_process_lock);
> +	pasid = idr_alloc_cyclic(&iommu_process_idr, process, domain->min_pasid,
> +				 domain->max_pasid + 1, GFP_ATOMIC);
> +	process->pasid = pasid;

[Liu, Yi L] If I'm understanding well, here is managing the pasid allocation in iommu
layer instead of vendor iommu driver? Is there strong reason here? I think pasid
management may be better within vendor iommu driver as pasid management
could differ from vendor to vendor.

Regards,
Yi L
Jean-Philippe Brucker Oct. 23, 2017, 12:17 p.m. UTC | #3
On 23/10/17 12:04, Liu, Yi L wrote:
>> +	idr_preload(GFP_KERNEL);
>> +	spin_lock(&iommu_process_lock);
>> +	pasid = idr_alloc_cyclic(&iommu_process_idr, process, domain->min_pasid,
>> +				 domain->max_pasid + 1, GFP_ATOMIC);
>> +	process->pasid = pasid;
> 
> [Liu, Yi L] If I'm understanding well, here is managing the pasid allocation in iommu
> layer instead of vendor iommu driver? Is there strong reason here? I think pasid
> management may be better within vendor iommu driver as pasid management
> could differ from vendor to vendor.

But that's the thing, we're trying to abstract PASID and process
management to have it in the core, because there shouldn't be many
differences from vendor to vendor. This way we have the allocation code in
one place and vendor drivers don't have to copy paste it from other drivers.

It's just a global number within a range, so I don't think vendors will
have many different ways of designing it.

Thanks,
Jean
Ashok Raj Oct. 25, 2017, 6:05 p.m. UTC | #4
Hi Jean

On Mon, Oct 23, 2017 at 01:17:07PM +0100, Jean-Philippe Brucker wrote:
> On 23/10/17 12:04, Liu, Yi L wrote:
> >> +	idr_preload(GFP_KERNEL);
> >> +	spin_lock(&iommu_process_lock);
> >> +	pasid = idr_alloc_cyclic(&iommu_process_idr, process, domain->min_pasid,
> >> +				 domain->max_pasid + 1, GFP_ATOMIC);
> >> +	process->pasid = pasid;
> > 
> > [Liu, Yi L] If I'm understanding well, here is managing the pasid allocation in iommu
> > layer instead of vendor iommu driver? Is there strong reason here? I think pasid
> > management may be better within vendor iommu driver as pasid management
> > could differ from vendor to vendor.
> 
> But that's the thing, we're trying to abstract PASID and process
> management to have it in the core, because there shouldn't be many
> differences from vendor to vendor. This way we have the allocation code in
> one place and vendor drivers don't have to copy paste it from other drivers.

I think this makes sense for the native case and also in the vIOMMU
if the PASID tables and allocation are completely managed by the guest.

If the vIOMMU requires any co-ordination in how the PASID's are allocated
for guest devices there might need to be some control on how these are 
allocated that ultimately need to be managed by VMM/Physical IOMMU. For 
instance if the PASID space is sparse for e.g

if we make the PASID allocation as one of the ops, the IOMMU implementation
will choose the default function, or if it choose a differnt mechanism it would
have that flexibility.

Does this make sense?

Cheers,
Ashok

> 
> It's just a global number within a range, so I don't think vendors will
> have many different ways of designing it.
> 
> Thanks,
> Jean
> 
> 
> 
>
Jean-Philippe Brucker Oct. 30, 2017, 10:28 a.m. UTC | #5
On 25/10/17 19:05, Raj, Ashok wrote:
> Hi Jean
> 
> On Mon, Oct 23, 2017 at 01:17:07PM +0100, Jean-Philippe Brucker wrote:
>> On 23/10/17 12:04, Liu, Yi L wrote:
>>>> +	idr_preload(GFP_KERNEL);
>>>> +	spin_lock(&iommu_process_lock);
>>>> +	pasid = idr_alloc_cyclic(&iommu_process_idr, process, domain->min_pasid,
>>>> +				 domain->max_pasid + 1, GFP_ATOMIC);
>>>> +	process->pasid = pasid;
>>>
>>> [Liu, Yi L] If I'm understanding well, here is managing the pasid allocation in iommu
>>> layer instead of vendor iommu driver? Is there strong reason here? I think pasid
>>> management may be better within vendor iommu driver as pasid management
>>> could differ from vendor to vendor.
>>
>> But that's the thing, we're trying to abstract PASID and process
>> management to have it in the core, because there shouldn't be many
>> differences from vendor to vendor. This way we have the allocation code in
>> one place and vendor drivers don't have to copy paste it from other drivers.
> 
> I think this makes sense for the native case and also in the vIOMMU
> if the PASID tables and allocation are completely managed by the guest.
> 
> If the vIOMMU requires any co-ordination in how the PASID's are allocated
> for guest devices there might need to be some control on how these are 
> allocated that ultimately need to be managed by VMM/Physical IOMMU. For 
> instance if the PASID space is sparse for e.g

(I don't have your example)

> if we make the PASID allocation as one of the ops, the IOMMU implementation
> will choose the default function, or if it choose a differnt mechanism it would
> have that flexibility.
> 
> Does this make sense?

If the PASID space is sparse, maybe we can add a firmware or probe
mechanism to declare reserved PASIDs, like we have for reserved IOVAs,
that feeds into the core IOMMU driver. But I agree that we can always let
vendor drivers implement their own allocator if they need one in the
future. For the moment it can stay generic.

Thanks,
Jean
Jean-Philippe Brucker Nov. 2, 2017, 4:20 p.m. UTC | #6
Hi Sinan,

Sorry for the delay, thanks for reviewing this!

On 21/10/17 00:32, Sinan Kaya wrote:
> few nits below.
> 
>> +/*
>> + * Allocate a iommu_process structure for the given task.
>> + *
>> + * Ideally we shouldn't need the domain parameter, since iommu_process is
>> + * system-wide, but we use it to retrieve the driver's allocation ops and a
>> + * PASID range.
>> + */
>> +static struct iommu_process *
>> +iommu_process_alloc(struct iommu_domain *domain, struct task_struct *task)
>> +{
>> +	int err;
>> +	int pasid;
>> +	struct iommu_process *process;
>> +
>> +	if (WARN_ON(!domain->ops->process_alloc || !domain->ops->process_free))
>> +		return ERR_PTR(-ENODEV);
>> +
>> +	process = domain->ops->process_alloc(task);
>> +	if (IS_ERR(process))
>> +		return process;
>> +	if (!process)
>> +		return ERR_PTR(-ENOMEM);
>> +
>> +	process->pid		= get_task_pid(task, PIDTYPE_PID);
>> +	process->release	= domain->ops->process_free;
>> +	INIT_LIST_HEAD(&process->domains);
>> +	kref_init(&process->kref);
>> +
> nit, I think you should place this check right after the pid assignment.

Sure

>> +	if (!process->pid) {
>> +		err = -EINVAL;
>> +		goto err_free_process;
>> +	}
>> +
>> +	idr_preload(GFP_KERNEL);
>> +	spin_lock(&iommu_process_lock);
>> +	pasid = idr_alloc_cyclic(&iommu_process_idr, process, domain->min_pasid,
>> +				 domain->max_pasid + 1, GFP_ATOMIC);
>> +	process->pasid = pasid;
>> +	spin_unlock(&iommu_process_lock);
>> +	idr_preload_end();
>> +
> 
> nit, You can maybe return here if pasid is not negative.

Ok

>> +	if (pasid < 0) {
>> +		err = pasid;
>> +		goto err_put_pid;
>> +	}
>> +
>> +	return process;
>> +
>> +err_put_pid:
>> +	put_pid(process->pid);
>> +
>> +err_free_process:
>> +	domain->ops->process_free(process);
>> +
>> +	return ERR_PTR(err);
>> +}
>> +
>> +static void iommu_process_release(struct kref *kref)
>> +{
>> +	struct iommu_process *process;
>> +	void (*release)(struct iommu_process *);
>> +
>> +	assert_spin_locked(&iommu_process_lock);
>> +
>> +	process = container_of(kref, struct iommu_process, kref);
> 
> if we are concerned about things going wrong (assert above), we should
> also add some pointer check here (WARN) for process and release pointers as well.

We can probably get rid of this assert, as any external users releasing
the process will have to go through iommu_process_put() which takes the
lock. process_alloc() ensures that release isn't NULL, and process should
always be valid here since we're being called from kref_put, but I should
check the value in process_put.

>> +	release = process->release;
>> +
>> +	WARN_ON(!list_empty(&process->domains));
>> +
>> +	idr_remove(&iommu_process_idr, process->pasid);
>> +	put_pid(process->pid);
>> +	release(process);
>> +}
>> +
>> +/*
>> + * Returns non-zero if a reference to the process was successfully taken.
>> + * Returns zero if the process is being freed and should not be used.
>> + */
>> +static int iommu_process_get_locked(struct iommu_process *process)
>> +{
>> +	assert_spin_locked(&iommu_process_lock);
>> +
>> +	if (process)
>> +		return kref_get_unless_zero(&process->kref);
>> +
>> +	return 0;
>> +}
>> +
>> +static void iommu_process_put_locked(struct iommu_process *process)
>> +{
>> +	assert_spin_locked(&iommu_process_lock);
>> +
>> +	kref_put(&process->kref, iommu_process_release);
>> +}
>> +
>> +static int iommu_process_attach(struct iommu_domain *domain, struct device *dev,
>> +				struct iommu_process *process)
>> +{
>> +	int err;
>> +	int pasid = process->pasid;
>> +	struct iommu_context *context;
>> +
>> +	if (WARN_ON(!domain->ops->process_attach || !domain->ops->process_detach))
>> +		return -ENODEV;
>> +
>> +	if (pasid > domain->max_pasid || pasid < domain->min_pasid)
>> +		return -ENOSPC;
>> +
>> +	context = kzalloc(sizeof(*context), GFP_KERNEL);
>> +	if (!context)
>> +		return -ENOMEM;
>> +
> 
> devm_kzalloc maybe?

I don't think we can ever leak contexts. Before the device is released, it
has to detach() from the domain, which will unbind from any process (call
unbind_dev_all()).

Thanks,
Jean
Bharat Kumar Gogada Nov. 8, 2017, 5:50 p.m. UTC | #7
Hi Jean,

+static struct iommu_process *
+iommu_process_alloc(struct iommu_domain *domain, struct task_struct 
+*task) {
+	int err;
+	int pasid;
+	struct iommu_process *process;
+
+	if (WARN_ON(!domain->ops->process_alloc || !domain->ops->process_free))
+		return ERR_PTR(-ENODEV);
+
+	process = domain->ops->process_alloc(task);
+	if (IS_ERR(process))
+		return process;
+	if (!process)
+		return ERR_PTR(-ENOMEM);
+
+	process->pid		= get_task_pid(task, PIDTYPE_PID);
+	process->release	= domain->ops->process_free;
+	INIT_LIST_HEAD(&process->domains);
+	kref_init(&process->kref);
+
+	if (!process->pid) {
+		err = -EINVAL;
+		goto err_free_process;
+	}
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&iommu_process_lock);
+	pasid = idr_alloc_cyclic(&iommu_process_idr, process, domain->min_pasid,
+				 domain->max_pasid + 1, GFP_ATOMIC);
If EP supports only one pasid; domain->min_pasid=1 and domain->max_pasid=0.
When idr_alloc_cyclic is called it invokes idr_get_free_cmn function where we have following condition. (Based on kernel 4.14-rc6)
	if (!radix_tree_tagged(root, IDR_FREE)) 
                start = max(start, maxindex + 1);
              if (start > max) 
		return ERR_PTR(-ENOSPC);
Here max is being assigned zero by the time this function is invoked, this value is based on domain->max_pasid.
This condition fails and ENOSPC is returned.
	
In this case even though hardware supports PASID, BIND flow fails.
Any reason why pasid allocation moved to idr allocations rather than bitmap allocations as in v1 patches ?

+	process->pasid = pasid;

Regards,
Bharat
Jean-philippe Brucker Nov. 9, 2017, 12:13 p.m. UTC | #8
Hi Bharat,

On 11/08/2017 05:50 PM, Bharat Kumar Gogada wrote:
> Hi Jean,
> 
> +static struct iommu_process *
> +iommu_process_alloc(struct iommu_domain *domain, struct task_struct
> +*task) {
> +       int err;
> +       int pasid;
> +       struct iommu_process *process;
> +
> +       if (WARN_ON(!domain->ops->process_alloc ||
> !domain->ops->process_free))
> +               return ERR_PTR(-ENODEV);
> +
> +       process = domain->ops->process_alloc(task);
> +       if (IS_ERR(process))
> +               return process;
> +       if (!process)
> +               return ERR_PTR(-ENOMEM);
> +
> +       process->pid            = get_task_pid(task, PIDTYPE_PID);
> +       process->release        = domain->ops->process_free;
> +       INIT_LIST_HEAD(&process->domains);
> +       kref_init(&process->kref);
> +
> +       if (!process->pid) {
> +               err = -EINVAL;
> +               goto err_free_process;
> +       }
> +
> +       idr_preload(GFP_KERNEL);
> +       spin_lock(&iommu_process_lock);
> +       pasid = idr_alloc_cyclic(&iommu_process_idr, process,
> domain->min_pasid,
> +                                domain->max_pasid + 1, GFP_ATOMIC);
> If EP supports only one pasid; domain->min_pasid=1 and domain->max_pasid=0.
> When idr_alloc_cyclic is called it invokes idr_get_free_cmn function
> where we have following condition. (Based on kernel 4.14-rc6)
>         if (!radix_tree_tagged(root, IDR_FREE))
>                 start = max(start, maxindex + 1);
>               if (start > max)
>                 return ERR_PTR(-ENOSPC);
> Here max is being assigned zero by the time this function is invoked,
> this value is based on domain->max_pasid.
> This condition fails and ENOSPC is returned.
>        
> In this case even though hardware supports PASID, BIND flow fails.

It should fail, since we're reserving PASID 0 for non-PASID transactions
with S1DSS=0b10. In addition, the SMMUv3 specification does not allow
using PASID with a single entry. See the description of S1CDMax in 5.2
Stream Table Entry:

"when this field is 0, the substreams of the STE are disabled and one CD
is available. (The minimum useful number of substreams is 2.) Any
transaction with a SubstreamID will be terminated with an abort and a
C_BAD_SUBSTREAMID event recorded."

> Any reason why pasid allocation moved to idr allocations rather than
> bitmap allocations as in v1 patches ?

Yes, idr provides a convenient way to quickly retrieve the context
associated with a PASID, when handling a fault. v1 had the allocation in a
bitmap and storing in a rb-tree. By using an idr we combine both and rely
on a well-tested infrastructure.

Note that in the future we might need to go back to handcrafting the PASID
allocation, but it will probably still be based on idr.

Thanks,
Jean
Jean-Philippe Brucker Nov. 9, 2017, 12:16 p.m. UTC | #9
Hi Bharat,

On 11/08/2017 05:50 PM, Bharat Kumar Gogada wrote:
> Hi Jean,
> 
> +static struct iommu_process *
> +iommu_process_alloc(struct iommu_domain *domain, struct task_struct
> +*task) {
> +       int err;
> +       int pasid;
> +       struct iommu_process *process;
> +
> +       if (WARN_ON(!domain->ops->process_alloc ||
> !domain->ops->process_free))
> +               return ERR_PTR(-ENODEV);
> +
> +       process = domain->ops->process_alloc(task);
> +       if (IS_ERR(process))
> +               return process;
> +       if (!process)
> +               return ERR_PTR(-ENOMEM);
> +
> +       process->pid            = get_task_pid(task, PIDTYPE_PID);
> +       process->release        = domain->ops->process_free;
> +       INIT_LIST_HEAD(&process->domains);
> +       kref_init(&process->kref);
> +
> +       if (!process->pid) {
> +               err = -EINVAL;
> +               goto err_free_process;
> +       }
> +
> +       idr_preload(GFP_KERNEL);
> +       spin_lock(&iommu_process_lock);
> +       pasid = idr_alloc_cyclic(&iommu_process_idr, process,
> domain->min_pasid,
> +                                domain->max_pasid + 1, GFP_ATOMIC);
> If EP supports only one pasid; domain->min_pasid=1 and domain->max_pasid=0.
> When idr_alloc_cyclic is called it invokes idr_get_free_cmn function
> where we have following condition. (Based on kernel 4.14-rc6)
>         if (!radix_tree_tagged(root, IDR_FREE))
>                 start = max(start, maxindex + 1);
>               if (start > max)
>                 return ERR_PTR(-ENOSPC);
> Here max is being assigned zero by the time this function is invoked,
> this value is based on domain->max_pasid.
> This condition fails and ENOSPC is returned.
>        
> In this case even though hardware supports PASID, BIND flow fails.

It should fail, since we're reserving PASID 0 for non-PASID transactions
with S1DSS=0b10. In addition, the SMMUv3 specification does not allow
using PASID with a single entry. See the description of S1CDMax in 5.2
Stream Table Entry:

"when this field is 0, the substreams of the STE are disabled and one CD
is available. (The minimum useful number of substreams is 2.) Any
transaction with a SubstreamID will be terminated with an abort and a
C_BAD_SUBSTREAMID event recorded."

> Any reason why pasid allocation moved to idr allocations rather than
> bitmap allocations as in v1 patches ?

Yes, idr provides a convenient way to quickly retrieve the context
associated with a PASID, when handling a fault. v1 had the allocation in a
bitmap and storing in a rb-tree. By using an idr we combine both and rely
on a well-tested infrastructure.

Note that in the future we might need to go back to handcrafting the PASID
allocation, but it will probably still be based on idr.

Thanks,
Jean
Bharat Kumar Gogada Nov. 13, 2017, 11:06 a.m. UTC | #10
>        
> In this case even though hardware supports PASID, BIND flow fails.

It should fail, since we're reserving PASID 0 for non-PASID transactions with S1DSS=0b10. In addition, the SMMUv3 specification does not allow using PASID with a single entry. See the description of S1CDMax in 5.2 Stream Table Entry:

"when this field is 0, the substreams of the STE are disabled and one CD is available. (The minimum useful number of substreams is 2.) Any transaction with a SubstreamID will be terminated with an abort and a C_BAD_SUBSTREAMID event recorded."

> Any reason why pasid allocation moved to idr allocations rather than 
> bitmap allocations as in v1 patches ?

Yes, idr provides a convenient way to quickly retrieve the context associated with a PASID, when handling a fault. v1 had the allocation in a bitmap and storing in a rb-tree. By using an idr we combine both and rely on a well-tested infrastructure.

Note that in the future we might need to go back to handcrafting the PASID allocation, but it will probably still be based on idr.

Thanks for the clarification, Jean.
Bob Liu Nov. 22, 2017, 3:15 a.m. UTC | #11
Hey Jean,

On 2017/10/6 21:31, Jean-Philippe Brucker wrote:
> IOMMU drivers need a way to bind Linux processes to devices. This is used
> for Shared Virtual Memory (SVM), where devices support paging. In that
> mode, DMA can directly target virtual addresses of a process.
> 
> Introduce boilerplate code for allocating process structures and binding
> them to devices. Four operations are added to IOMMU drivers:
> 
> * process_alloc, process_free: to create an iommu_process structure and
>   perform architecture-specific operations required to grab the process
>   (for instance on ARM SMMU, pin down the CPU ASID). There is a single
>   iommu_process structure per Linux process.
> 

I'm a bit confused here.
The original meaning of iommu_domain is a virtual addrspace defined by a set of io page table.
(fix me if I misunderstood).
Then what's the meaning of iommu_domain and iommu_process after introducing iommu_process?
Could you consider document these concepts? 

Thanks,
Liubo
Jean-Philippe Brucker Nov. 22, 2017, 1:04 p.m. UTC | #12
On 22/11/17 03:15, Bob Liu wrote:
> Hey Jean,
> 
> On 2017/10/6 21:31, Jean-Philippe Brucker wrote:
>> IOMMU drivers need a way to bind Linux processes to devices. This is used
>> for Shared Virtual Memory (SVM), where devices support paging. In that
>> mode, DMA can directly target virtual addresses of a process.
>>
>> Introduce boilerplate code for allocating process structures and binding
>> them to devices. Four operations are added to IOMMU drivers:
>>
>> * process_alloc, process_free: to create an iommu_process structure and
>>   perform architecture-specific operations required to grab the process
>>   (for instance on ARM SMMU, pin down the CPU ASID). There is a single
>>   iommu_process structure per Linux process.
>>
> 
> I'm a bit confused here.
> The original meaning of iommu_domain is a virtual addrspace defined by a set of io page table.
> (fix me if I misunderstood).

iommu_domain can also be seen as a logical partition of devices that share
the same address spaces (the concept comes from AMD and Intel IOMMU
domains, I believe). Without PASIDs it was a single address space, with
PASIDs it can have multiple address spaces.

> Then what's the meaning of iommu_domain and iommu_process after introducing iommu_process?
> Could you consider document these concepts? 

iommu_process is used to keep track of Linux process address spaces. I'll
rename it to io_mm in next version, to make it clear that it doesn't
represent a Linux task but an mm_struct instead. However the
implementation stays pretty much identical. A domain can be associated to
multiple io_mm, and an io_mm can be associated to multiple domains.

In the IOMMU architectures I know, PASID is implemented like this. You
have the device tables (stream tables on SMMU), pointing to PASID tables
(context descriptor tables on SMMU). In the following diagram,

                    .->+--------+
                   / 0 |        |------ io_pgtable
                  /    +--------+
                 /   1 |        |------ io_mm->mm X
    +--------+  /      +--------+
  0 |      A |-'     2 |        |-.
    +--------+         +--------+  \
  1 |        |       3 |        |   \
    +--------+         +--------+    -- io_mm->mm Y
  2 |      B |--.     PASID tables  /
    +--------+   \                 |
  3 |      B |----+--->+--------+  |
    +--------+   /   0 |        |- | -- io_pgtable
  4 |      B |--'      +--------+  |
    +--------+       1 |        |  |
  Device tables        +--------+  |
                     2 |        |--'
                       +--------+
                     3 |        |------ io_mm->priv io_pgtable
                       +--------+
                      PASID tables

* Device 0 (e.g. PCI 0000:00:00.0) is in domain A.
* Devices 2, 3 and 4 are in domain B.
* Domain A has the top set of PASID tables.
* Domain B has the bottom set of PASID tables.

* Domain A is bound to process address space X.
  -> Device 0 can access X with PASID 1.
* Both domains A and B are bound to process address space Y.
  -> Devices 0, 2, 3 and 4 can access Y with PASID 2

* PASID 0 is special on Arm SMMU (with S1DSS=0b10). It will always be
  reserved for classic DMA map/unmap. Even for hypothetical devices that
  don't support non-pasid transactions, I'd like to keep this convention.
  It should be quite useful for device drivers to have PASID 0 available
  with DMA map/unmap.

* When introducing "private" PASID address spaces (that many are asking
  for), which are backed by a set of io-pgtable and map/unmap ops, I
  suppose they would reuse the io_mm structure. In this example PASID 3 is
  associated to a private address space and not backed by an mm. Since the
  PASID space is global, PASID 3 won't be available for any other domain.

Does this clarify the current design, or is it just more confusing?

Thanks,
Jean
Bob Liu Nov. 23, 2017, 10:33 a.m. UTC | #13
On 2017/11/22 21:04, Jean-Philippe Brucker wrote:
> On 22/11/17 03:15, Bob Liu wrote:
>> Hey Jean,
>>
>> On 2017/10/6 21:31, Jean-Philippe Brucker wrote:
>>> IOMMU drivers need a way to bind Linux processes to devices. This is used
>>> for Shared Virtual Memory (SVM), where devices support paging. In that
>>> mode, DMA can directly target virtual addresses of a process.
>>>
>>> Introduce boilerplate code for allocating process structures and binding
>>> them to devices. Four operations are added to IOMMU drivers:
>>>
>>> * process_alloc, process_free: to create an iommu_process structure and
>>>   perform architecture-specific operations required to grab the process
>>>   (for instance on ARM SMMU, pin down the CPU ASID). There is a single
>>>   iommu_process structure per Linux process.
>>>
>>
>> I'm a bit confused here.
>> The original meaning of iommu_domain is a virtual addrspace defined by a set of io page table.
>> (fix me if I misunderstood).
> 
> iommu_domain can also be seen as a logical partition of devices that share
> the same address spaces (the concept comes from AMD and Intel IOMMU
> domains, I believe). Without PASIDs it was a single address space, with
> PASIDs it can have multiple address spaces.
> 
>> Then what's the meaning of iommu_domain and iommu_process after introducing iommu_process?
>> Could you consider document these concepts? 
> 
> iommu_process is used to keep track of Linux process address spaces. I'll
> rename it to io_mm in next version, to make it clear that it doesn't
> represent a Linux task but an mm_struct instead. However the
> implementation stays pretty much identical. A domain can be associated to
> multiple io_mm, and an io_mm can be associated to multiple domains.
> 
> In the IOMMU architectures I know, PASID is implemented like this. You
> have the device tables (stream tables on SMMU), pointing to PASID tables
> (context descriptor tables on SMMU). In the following diagram,
> 
>                     .->+--------+
>                    / 0 |        |------ io_pgtable
>                   /    +--------+
>                  /   1 |        |------ io_mm->mm X
>     +--------+  /      +--------+
>   0 |      A |-'     2 |        |-.
>     +--------+         +--------+  \
>   1 |        |       3 |        |   \
>     +--------+         +--------+    -- io_mm->mm Y
>   2 |      B |--.     PASID tables  /
>     +--------+   \                 |
>   3 |      B |----+--->+--------+  |
>     +--------+   /   0 |        |- | -- io_pgtable
>   4 |      B |--'      +--------+  |
>     +--------+       1 |        |  |
>   Device tables        +--------+  |
>                      2 |        |--'
>                        +--------+
>                      3 |        |------ io_mm->priv io_pgtable
>                        +--------+
>                       PASID tables
> 
> * Device 0 (e.g. PCI 0000:00:00.0) is in domain A.
> * Devices 2, 3 and 4 are in domain B.
> * Domain A has the top set of PASID tables.
> * Domain B has the bottom set of PASID tables.
> 
> * Domain A is bound to process address space X.
>   -> Device 0 can access X with PASID 1.
> * Both domains A and B are bound to process address space Y.
>   -> Devices 0, 2, 3 and 4 can access Y with PASID 2
> 
> * PASID 0 is special on Arm SMMU (with S1DSS=0b10). It will always be
>   reserved for classic DMA map/unmap. Even for hypothetical devices that
>   don't support non-pasid transactions, I'd like to keep this convention.
>   It should be quite useful for device drivers to have PASID 0 available
>   with DMA map/unmap.
> 
> * When introducing "private" PASID address spaces (that many are asking
>   for), which are backed by a set of io-pgtable and map/unmap ops, I
>   suppose they would reuse the io_mm structure. In this example PASID 3 is
>   associated to a private address space and not backed by an mm. Since the
>   PASID space is global, PASID 3 won't be available for any other domain.
> 
> Does this clarify the current design, or is it just more confusing?
> 

It's very helpful, thank you very much!

Regards,
Liubo
diff mbox series

Patch

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index f3a21343e636..1ea5c90e37be 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -74,6 +74,16 @@  config IOMMU_DMA
 	select IOMMU_IOVA
 	select NEED_SG_DMA_LENGTH
 
+config IOMMU_PROCESS
+	bool "Process management API for the IOMMU"
+	select IOMMU_API
+	help
+	  Enable process management for the IOMMU API. In systems that support
+	  it, device drivers can bind processes to devices and share their page
+	  tables using this API.
+
+	  If unsure, say N here.
+
 config FSL_PAMU
 	bool "Freescale IOMMU support"
 	depends on PCI
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index b910aea813a1..a2832edbfaa2 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -1,6 +1,7 @@ 
 obj-$(CONFIG_IOMMU_API) += iommu.o
 obj-$(CONFIG_IOMMU_API) += iommu-traces.o
 obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
+obj-$(CONFIG_IOMMU_PROCESS) += iommu-process.o
 obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
diff --git a/drivers/iommu/iommu-process.c b/drivers/iommu/iommu-process.c
new file mode 100644
index 000000000000..a7e5a1c94305
--- /dev/null
+++ b/drivers/iommu/iommu-process.c
@@ -0,0 +1,225 @@ 
+/*
+ * Track processes bound to devices
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ * Copyright (C) 2017 ARM Ltd.
+ *
+ * Author: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
+ */
+
+#include <linux/idr.h>
+#include <linux/iommu.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+/* Link between a domain and a process */
+struct iommu_context {
+	struct iommu_process	*process;
+	struct iommu_domain	*domain;
+
+	struct list_head	process_head;
+	struct list_head	domain_head;
+
+	/* Number of devices that use this context */
+	refcount_t		ref;
+};
+
+/*
+ * Because we're using an IDR, PASIDs are limited to 31 bits (the sign bit is
+ * used for returning errors). In practice implementations will use at most 20
+ * bits, which is the PCI limit.
+ */
+static DEFINE_IDR(iommu_process_idr);
+
+/*
+ * For the moment this is an all-purpose lock. It serializes
+ * access/modifications to contexts (process-domain links), access/modifications
+ * to the PASID IDR, and changes to process refcount as well.
+ */
+static DEFINE_SPINLOCK(iommu_process_lock);
+
+/*
+ * Allocate a iommu_process structure for the given task.
+ *
+ * Ideally we shouldn't need the domain parameter, since iommu_process is
+ * system-wide, but we use it to retrieve the driver's allocation ops and a
+ * PASID range.
+ */
+static struct iommu_process *
+iommu_process_alloc(struct iommu_domain *domain, struct task_struct *task)
+{
+	int err;
+	int pasid;
+	struct iommu_process *process;
+
+	if (WARN_ON(!domain->ops->process_alloc || !domain->ops->process_free))
+		return ERR_PTR(-ENODEV);
+
+	process = domain->ops->process_alloc(task);
+	if (IS_ERR(process))
+		return process;
+	if (!process)
+		return ERR_PTR(-ENOMEM);
+
+	process->pid		= get_task_pid(task, PIDTYPE_PID);
+	process->release	= domain->ops->process_free;
+	INIT_LIST_HEAD(&process->domains);
+	kref_init(&process->kref);
+
+	if (!process->pid) {
+		err = -EINVAL;
+		goto err_free_process;
+	}
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&iommu_process_lock);
+	pasid = idr_alloc_cyclic(&iommu_process_idr, process, domain->min_pasid,
+				 domain->max_pasid + 1, GFP_ATOMIC);
+	process->pasid = pasid;
+	spin_unlock(&iommu_process_lock);
+	idr_preload_end();
+
+	if (pasid < 0) {
+		err = pasid;
+		goto err_put_pid;
+	}
+
+	return process;
+
+err_put_pid:
+	put_pid(process->pid);
+
+err_free_process:
+	domain->ops->process_free(process);
+
+	return ERR_PTR(err);
+}
+
+static void iommu_process_release(struct kref *kref)
+{
+	struct iommu_process *process;
+	void (*release)(struct iommu_process *);
+
+	assert_spin_locked(&iommu_process_lock);
+
+	process = container_of(kref, struct iommu_process, kref);
+	release = process->release;
+
+	WARN_ON(!list_empty(&process->domains));
+
+	idr_remove(&iommu_process_idr, process->pasid);
+	put_pid(process->pid);
+	release(process);
+}
+
+/*
+ * Returns non-zero if a reference to the process was successfully taken.
+ * Returns zero if the process is being freed and should not be used.
+ */
+static int iommu_process_get_locked(struct iommu_process *process)
+{
+	assert_spin_locked(&iommu_process_lock);
+
+	if (process)
+		return kref_get_unless_zero(&process->kref);
+
+	return 0;
+}
+
+static void iommu_process_put_locked(struct iommu_process *process)
+{
+	assert_spin_locked(&iommu_process_lock);
+
+	kref_put(&process->kref, iommu_process_release);
+}
+
+static int iommu_process_attach(struct iommu_domain *domain, struct device *dev,
+				struct iommu_process *process)
+{
+	int err;
+	int pasid = process->pasid;
+	struct iommu_context *context;
+
+	if (WARN_ON(!domain->ops->process_attach || !domain->ops->process_detach))
+		return -ENODEV;
+
+	if (pasid > domain->max_pasid || pasid < domain->min_pasid)
+		return -ENOSPC;
+
+	context = kzalloc(sizeof(*context), GFP_KERNEL);
+	if (!context)
+		return -ENOMEM;
+
+	context->process	= process;
+	context->domain		= domain;
+	refcount_set(&context->ref, 1);
+
+	spin_lock(&iommu_process_lock);
+	err = domain->ops->process_attach(domain, dev, process, true);
+	if (err) {
+		kfree(context);
+		spin_unlock(&iommu_process_lock);
+		return err;
+	}
+
+	list_add(&context->process_head, &process->domains);
+	list_add(&context->domain_head, &domain->processes);
+	spin_unlock(&iommu_process_lock);
+
+	return 0;
+}
+
+static void iommu_context_free(struct iommu_context *context)
+{
+	assert_spin_locked(&iommu_process_lock);
+
+	if (WARN_ON(!context->process || !context->domain))
+		return;
+
+	list_del(&context->process_head);
+	list_del(&context->domain_head);
+	iommu_process_put_locked(context->process);
+
+	kfree(context);
+}
+
+/* Attach an existing context to the device */
+static int iommu_process_attach_locked(struct iommu_context *context,
+				       struct device *dev)
+{
+	assert_spin_locked(&iommu_process_lock);
+
+	refcount_inc(&context->ref);
+	return context->domain->ops->process_attach(context->domain, dev,
+						    context->process, false);
+}
+
+/* Detach device from context and release it if necessary */
+static void iommu_process_detach_locked(struct iommu_context *context,
+					struct device *dev)
+{
+	bool last = false;
+	struct iommu_domain *domain = context->domain;
+
+	assert_spin_locked(&iommu_process_lock);
+
+	if (refcount_dec_and_test(&context->ref))
+		last = true;
+
+	domain->ops->process_detach(domain, dev, context->process, last);
+
+	if (last)
+		iommu_context_free(context);
+}
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 3de5c0bcb5cc..b2b34cf7c978 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1264,6 +1264,7 @@  static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
 	domain->type = type;
 	/* Assume all sizes by default; the driver may override this later */
 	domain->pgsize_bitmap  = bus->iommu_ops->pgsize_bitmap;
+	INIT_LIST_HEAD(&domain->processes);
 
 	return domain;
 }
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 41b8c5757859..3978dc094706 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -94,6 +94,19 @@  struct iommu_domain {
 	void *handler_token;
 	struct iommu_domain_geometry geometry;
 	void *iova_cookie;
+
+	unsigned int min_pasid, max_pasid;
+	struct list_head processes;
+};
+
+struct iommu_process {
+	struct pid		*pid;
+	int			pasid;
+	struct list_head	domains;
+	struct kref		kref;
+
+	/* Release callback for this process */
+	void (*release)(struct iommu_process *process);
 };
 
 enum iommu_cap {
@@ -164,6 +177,11 @@  struct iommu_resv_region {
  * @domain_free: free iommu domain
  * @attach_dev: attach device to an iommu domain
  * @detach_dev: detach device from an iommu domain
+ * @process_alloc: allocate iommu process
+ * @process_free: free iommu process
+ * @process_attach: attach iommu process to a domain
+ * @process_detach: detach iommu process from a domain. Remove PASID entry and
+ *                  flush associated TLB entries.
  * @map: map a physically contiguous memory region to an iommu domain
  * @unmap: unmap a physically contiguous memory region from an iommu domain
  * @map_sg: map a scatter-gather list of physically contiguous memory chunks
@@ -197,6 +215,12 @@  struct iommu_ops {
 
 	int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
 	void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
+	struct iommu_process *(*process_alloc)(struct task_struct *task);
+	void (*process_free)(struct iommu_process *process);
+	int (*process_attach)(struct iommu_domain *domain, struct device *dev,
+			      struct iommu_process *process, bool first);
+	void (*process_detach)(struct iommu_domain *domain, struct device *dev,
+			       struct iommu_process *process, bool last);
 	int (*map)(struct iommu_domain *domain, unsigned long iova,
 		   phys_addr_t paddr, size_t size, int prot);
 	size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,