diff mbox series

[v3,07/12] soc: aspeed: xdma: Add user interface

Message ID 1576681778-18737-8-git-send-email-eajames@linux.ibm.com
State Changes Requested, archived
Headers show
Series aspeed: Add SCU interrupt controller and XDMA engine drivers | expand

Commit Message

Eddie James Dec. 18, 2019, 3:09 p.m. UTC
This commits adds a miscdevice to provide a user interface to the XDMA
engine. The interface provides the write operation to start DMA
operations. The DMA parameters are passed as the data to the write call.
The actual data to transfer is NOT passed through write. Note that both
directions of DMA operation are accomplished through the write command;
BMC to host and host to BMC.

The XDMA driver reserves an area of physical memory for DMA operations,
as the XDMA engine is restricted to accessing certain physical memory
areas on some platforms. This memory forms a pool from which users can
allocate pages for their usage with calls to mmap. The space allocated
by a client will be the space used in the DMA operation. For an
"upstream" (BMC to host) operation, the data in the client's area will
be transferred to the host. For a "downstream" (host to BMC) operation,
the host data will be placed in the client's memory area.

Poll is also provided in order to determine when the DMA operation is
complete for non-blocking IO.

Signed-off-by: Eddie James <eajames@linux.ibm.com>
---
Changes since v2:
 - Rework commit message to talk about VGA memory less
 - Remove user reset functionality
 - Clean up sanity checks in aspeed_xdma_write()
 - Wait for transfer complete in the vm area close function

 drivers/soc/aspeed/aspeed-xdma.c | 205 ++++++++++++++++++++++++++++++-
 1 file changed, 203 insertions(+), 2 deletions(-)

Comments

Andrew Jeffery Dec. 19, 2019, 1:19 a.m. UTC | #1
On Thu, 19 Dec 2019, at 01:39, Eddie James wrote:
> This commits adds a miscdevice to provide a user interface to the XDMA
> engine. The interface provides the write operation to start DMA
> operations. The DMA parameters are passed as the data to the write call.
> The actual data to transfer is NOT passed through write. Note that both
> directions of DMA operation are accomplished through the write command;
> BMC to host and host to BMC.
> 
> The XDMA driver reserves an area of physical memory for DMA operations,
> as the XDMA engine is restricted to accessing certain physical memory
> areas on some platforms. This memory forms a pool from which users can
> allocate pages for their usage with calls to mmap. The space allocated
> by a client will be the space used in the DMA operation. For an
> "upstream" (BMC to host) operation, the data in the client's area will
> be transferred to the host. For a "downstream" (host to BMC) operation,
> the host data will be placed in the client's memory area.
> 
> Poll is also provided in order to determine when the DMA operation is
> complete for non-blocking IO.
> 
> Signed-off-by: Eddie James <eajames@linux.ibm.com>
> ---
> Changes since v2:
>  - Rework commit message to talk about VGA memory less
>  - Remove user reset functionality
>  - Clean up sanity checks in aspeed_xdma_write()
>  - Wait for transfer complete in the vm area close function
> 
>  drivers/soc/aspeed/aspeed-xdma.c | 205 ++++++++++++++++++++++++++++++-
>  1 file changed, 203 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/soc/aspeed/aspeed-xdma.c b/drivers/soc/aspeed/aspeed-xdma.c
> index cb94adf798b1..e844937dc925 100644
> --- a/drivers/soc/aspeed/aspeed-xdma.c
> +++ b/drivers/soc/aspeed/aspeed-xdma.c
> @@ -13,6 +13,7 @@
>  #include <linux/io.h>
>  #include <linux/jiffies.h>
>  #include <linux/mfd/syscon.h>
> +#include <linux/miscdevice.h>
>  #include <linux/module.h>
>  #include <linux/mutex.h>
>  #include <linux/of_device.h>
> @@ -201,6 +202,8 @@ struct aspeed_xdma {
>  	struct clk *clock;
>  	struct reset_control *reset;
>  
> +	/* file_lock serializes reads of current_client */
> +	struct mutex file_lock;

I wonder whether start_lock can serve this purpose.

>  	/* client_lock protects error and in_progress of the client */
>  	spinlock_t client_lock;
>  	struct aspeed_xdma_client *current_client;
> @@ -223,6 +226,8 @@ struct aspeed_xdma {
>  	void __iomem *mem_virt;
>  	dma_addr_t cmdq_phys;
>  	struct gen_pool *pool;
> +
> +	struct miscdevice misc;
>  };
>  
>  struct aspeed_xdma_client {
> @@ -522,6 +527,185 @@ static irqreturn_t aspeed_xdma_pcie_irq(int irq, 
> void *arg)
>  	return IRQ_HANDLED;
>  }
>  
> +static ssize_t aspeed_xdma_write(struct file *file, const char __user *buf,
> +				 size_t len, loff_t *offset)
> +{
> +	int rc;
> +	struct aspeed_xdma_op op;
> +	struct aspeed_xdma_client *client = file->private_data;
> +	struct aspeed_xdma *ctx = client->ctx;
> +
> +	if (len != sizeof(op))
> +		return -EINVAL;
> +
> +	rc = copy_from_user(&op, buf, len);
> +	if (rc)
> +		return rc;
> +
> +	if (!op.len || op.len > client->size ||
> +	    op.direction > ASPEED_XDMA_DIRECTION_UPSTREAM)
> +		return -EINVAL;
> +
> +	if (file->f_flags & O_NONBLOCK) {
> +		if (!mutex_trylock(&ctx->file_lock))
> +			return -EAGAIN;
> +
> +		if (ctx->current_client) {

Should be tested under client_lock for consistency with the previous patch,
though perhaps you could use READ_ONCE()?

> +			mutex_unlock(&ctx->file_lock);
> +			return -EBUSY;
> +		}
> +	} else {
> +		mutex_lock(&ctx->file_lock);
> +
> +		rc = wait_event_interruptible(ctx->wait, !ctx->current_client);
> +		if (rc) {
> +			mutex_unlock(&ctx->file_lock);
> +			return -EINTR;
> +		}
> +	}
> +
> +	aspeed_xdma_start(ctx, &op, client->phys, client);
> +
> +	mutex_unlock(&ctx->file_lock);

Shouldn't we lift start_lock out of aspeed_xdma_start() use that here
instead of file_lock? I think that would mean that we could remove
file_lock.

> +
> +	if (!(file->f_flags & O_NONBLOCK)) {
> +		rc = wait_event_interruptible(ctx->wait, !client->in_progress);
> +		if (rc)
> +			return -EINTR;
> +
> +		if (client->error)
> +			return -EIO;
> +	}
> +
> +	return len;
> +}
> +
> +static __poll_t aspeed_xdma_poll(struct file *file,
> +				 struct poll_table_struct *wait)
> +{
> +	__poll_t mask = 0;
> +	__poll_t req = poll_requested_events(wait);
> +	struct aspeed_xdma_client *client = file->private_data;
> +	struct aspeed_xdma *ctx = client->ctx;
> +
> +	if (req & (EPOLLIN | EPOLLRDNORM)) {
> +		if (client->in_progress)
> +			poll_wait(file, &ctx->wait, wait);
> +
> +		if (!client->in_progress) {
> +			if (client->error)
> +				mask |= EPOLLERR;
> +			else
> +				mask |= EPOLLIN | EPOLLRDNORM;
> +		}
> +	}
> +
> +	if (req & (EPOLLOUT | EPOLLWRNORM)) {
> +		if (ctx->current_client)
> +			poll_wait(file, &ctx->wait, wait);
> +
> +		if (!ctx->current_client)
> +			mask |= EPOLLOUT | EPOLLWRNORM;
> +	}
> +
> +	return mask;
> +}
> +
> +static void aspeed_xdma_vma_close(struct vm_area_struct *vma)
> +{
> +	int rc;
> +	struct aspeed_xdma_client *client = vma->vm_private_data;
> +
> +	rc = wait_event_interruptible(client->ctx->wait, !client->in_progress);
> +	if (rc)
> +		return;
> +
> +	gen_pool_free(client->ctx->pool, (unsigned long)client->virt,
> +		      client->size);
> +
> +	client->virt = NULL;
> +	client->phys = 0;
> +	client->size = 0;
> +}
> +
> +static const struct vm_operations_struct aspeed_xdma_vm_ops = {
> +	.close =	aspeed_xdma_vma_close,
> +};
> +
> +static int aspeed_xdma_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> +	int rc;
> +	struct aspeed_xdma_client *client = file->private_data;
> +	struct aspeed_xdma *ctx = client->ctx;
> +
> +	/* restrict file to one mapping */
> +	if (client->size)
> +		return -EBUSY;
> +
> +	client->size = vma->vm_end - vma->vm_start;
> +	client->virt = gen_pool_dma_alloc(ctx->pool, client->size,
> +					  &client->phys);
> +	if (!client->virt) {
> +		client->phys = 0;
> +		client->size = 0;
> +		return -ENOMEM;
> +	}
> +
> +	vma->vm_pgoff = (client->phys - ctx->mem_phys) >> PAGE_SHIFT;
> +	vma->vm_ops = &aspeed_xdma_vm_ops;
> +	vma->vm_private_data = client;
> +	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
> +
> +	rc = io_remap_pfn_range(vma, vma->vm_start, client->phys >> PAGE_SHIFT,
> +				client->size, vma->vm_page_prot);
> +	if (rc) {

Probably worth a dev_warn() here so we know what happened?

> +		gen_pool_free(ctx->pool, (unsigned long)client->virt,
> +			      client->size);
> +
> +		client->virt = NULL;
> +		client->phys = 0;
> +		client->size = 0;
> +		return rc;
> +	}
> +
> +	dev_dbg(ctx->dev, "mmap: v[%08lx] to p[%08x], s[%08x]\n",
> +		vma->vm_start, (u32)client->phys, client->size);
> +
> +	return 0;
> +}
> +
> +static int aspeed_xdma_open(struct inode *inode, struct file *file)
> +{
> +	struct miscdevice *misc = file->private_data;
> +	struct aspeed_xdma *ctx = container_of(misc, struct aspeed_xdma, misc);
> +	struct aspeed_xdma_client *client = kzalloc(sizeof(*client),
> +						    GFP_KERNEL);
> +
> +	if (!client)
> +		return -ENOMEM;
> +
> +	client->ctx = ctx;
> +	file->private_data = client;
> +	return 0;
> +}
> +
> +static int aspeed_xdma_release(struct inode *inode, struct file *file)
> +{
> +	struct aspeed_xdma_client *client = file->private_data;
> +
> +	kfree(client);

I assume the vma gets torn down before release() gets invoked? I haven't
looked closely.

Andrew
Eddie James Dec. 19, 2019, 4 p.m. UTC | #2
On 12/18/19 7:19 PM, Andrew Jeffery wrote:
>
> On Thu, 19 Dec 2019, at 01:39, Eddie James wrote:
>> This commits adds a miscdevice to provide a user interface to the XDMA
>> engine. The interface provides the write operation to start DMA
>> operations. The DMA parameters are passed as the data to the write call.
>> The actual data to transfer is NOT passed through write. Note that both
>> directions of DMA operation are accomplished through the write command;
>> BMC to host and host to BMC.
>>
>> The XDMA driver reserves an area of physical memory for DMA operations,
>> as the XDMA engine is restricted to accessing certain physical memory
>> areas on some platforms. This memory forms a pool from which users can
>> allocate pages for their usage with calls to mmap. The space allocated
>> by a client will be the space used in the DMA operation. For an
>> "upstream" (BMC to host) operation, the data in the client's area will
>> be transferred to the host. For a "downstream" (host to BMC) operation,
>> the host data will be placed in the client's memory area.
>>
>> Poll is also provided in order to determine when the DMA operation is
>> complete for non-blocking IO.
>>
>> Signed-off-by: Eddie James <eajames@linux.ibm.com>
>> ---
>> Changes since v2:
>>   - Rework commit message to talk about VGA memory less
>>   - Remove user reset functionality
>>   - Clean up sanity checks in aspeed_xdma_write()
>>   - Wait for transfer complete in the vm area close function
>>
>>   drivers/soc/aspeed/aspeed-xdma.c | 205 ++++++++++++++++++++++++++++++-
>>   1 file changed, 203 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/soc/aspeed/aspeed-xdma.c b/drivers/soc/aspeed/aspeed-xdma.c
>> index cb94adf798b1..e844937dc925 100644
>> --- a/drivers/soc/aspeed/aspeed-xdma.c
>> +++ b/drivers/soc/aspeed/aspeed-xdma.c
>> @@ -13,6 +13,7 @@
>>   #include <linux/io.h>
>>   #include <linux/jiffies.h>
>>   #include <linux/mfd/syscon.h>
>> +#include <linux/miscdevice.h>
>>   #include <linux/module.h>
>>   #include <linux/mutex.h>
>>   #include <linux/of_device.h>
>> @@ -201,6 +202,8 @@ struct aspeed_xdma {
>>   	struct clk *clock;
>>   	struct reset_control *reset;
>>   
>> +	/* file_lock serializes reads of current_client */
>> +	struct mutex file_lock;
> I wonder whether start_lock can serve this purpose.
>
>>   	/* client_lock protects error and in_progress of the client */
>>   	spinlock_t client_lock;
>>   	struct aspeed_xdma_client *current_client;
>> @@ -223,6 +226,8 @@ struct aspeed_xdma {
>>   	void __iomem *mem_virt;
>>   	dma_addr_t cmdq_phys;
>>   	struct gen_pool *pool;
>> +
>> +	struct miscdevice misc;
>>   };
>>   
>>   struct aspeed_xdma_client {
>> @@ -522,6 +527,185 @@ static irqreturn_t aspeed_xdma_pcie_irq(int irq,
>> void *arg)
>>   	return IRQ_HANDLED;
>>   }
>>   
>> +static ssize_t aspeed_xdma_write(struct file *file, const char __user *buf,
>> +				 size_t len, loff_t *offset)
>> +{
>> +	int rc;
>> +	struct aspeed_xdma_op op;
>> +	struct aspeed_xdma_client *client = file->private_data;
>> +	struct aspeed_xdma *ctx = client->ctx;
>> +
>> +	if (len != sizeof(op))
>> +		return -EINVAL;
>> +
>> +	rc = copy_from_user(&op, buf, len);
>> +	if (rc)
>> +		return rc;
>> +
>> +	if (!op.len || op.len > client->size ||
>> +	    op.direction > ASPEED_XDMA_DIRECTION_UPSTREAM)
>> +		return -EINVAL;
>> +
>> +	if (file->f_flags & O_NONBLOCK) {
>> +		if (!mutex_trylock(&ctx->file_lock))
>> +			return -EAGAIN;
>> +
>> +		if (ctx->current_client) {
> Should be tested under client_lock for consistency with the previous patch,
> though perhaps you could use READ_ONCE()?


I think READ_ONCE will work.



>
>> +			mutex_unlock(&ctx->file_lock);
>> +			return -EBUSY;
>> +		}
>> +	} else {
>> +		mutex_lock(&ctx->file_lock);
>> +
>> +		rc = wait_event_interruptible(ctx->wait, !ctx->current_client);
>> +		if (rc) {
>> +			mutex_unlock(&ctx->file_lock);
>> +			return -EINTR;
>> +		}
>> +	}
>> +
>> +	aspeed_xdma_start(ctx, &op, client->phys, client);
>> +
>> +	mutex_unlock(&ctx->file_lock);
> Shouldn't we lift start_lock out of aspeed_xdma_start() use that here
> instead of file_lock? I think that would mean that we could remove
> file_lock.


That wouldn't work with the reset though. The reset should hold 
start_lock as well, but if a client is waiting here with start_lock, 
we'd never get to the reset if the transfer doesn't complete. I think 
file_lock is necessary.


>
>> +
>> +	if (!(file->f_flags & O_NONBLOCK)) {
>> +		rc = wait_event_interruptible(ctx->wait, !client->in_progress);
>> +		if (rc)
>> +			return -EINTR;
>> +
>> +		if (client->error)
>> +			return -EIO;
>> +	}
>> +
>> +	return len;
>> +}
>> +
>> +static __poll_t aspeed_xdma_poll(struct file *file,
>> +				 struct poll_table_struct *wait)
>> +{
>> +	__poll_t mask = 0;
>> +	__poll_t req = poll_requested_events(wait);
>> +	struct aspeed_xdma_client *client = file->private_data;
>> +	struct aspeed_xdma *ctx = client->ctx;
>> +
>> +	if (req & (EPOLLIN | EPOLLRDNORM)) {
>> +		if (client->in_progress)
>> +			poll_wait(file, &ctx->wait, wait);
>> +
>> +		if (!client->in_progress) {
>> +			if (client->error)
>> +				mask |= EPOLLERR;
>> +			else
>> +				mask |= EPOLLIN | EPOLLRDNORM;
>> +		}
>> +	}
>> +
>> +	if (req & (EPOLLOUT | EPOLLWRNORM)) {
>> +		if (ctx->current_client)
>> +			poll_wait(file, &ctx->wait, wait);
>> +
>> +		if (!ctx->current_client)
>> +			mask |= EPOLLOUT | EPOLLWRNORM;
>> +	}
>> +
>> +	return mask;
>> +}
>> +
>> +static void aspeed_xdma_vma_close(struct vm_area_struct *vma)
>> +{
>> +	int rc;
>> +	struct aspeed_xdma_client *client = vma->vm_private_data;
>> +
>> +	rc = wait_event_interruptible(client->ctx->wait, !client->in_progress);
>> +	if (rc)
>> +		return;
>> +
>> +	gen_pool_free(client->ctx->pool, (unsigned long)client->virt,
>> +		      client->size);
>> +
>> +	client->virt = NULL;
>> +	client->phys = 0;
>> +	client->size = 0;
>> +}
>> +
>> +static const struct vm_operations_struct aspeed_xdma_vm_ops = {
>> +	.close =	aspeed_xdma_vma_close,
>> +};
>> +
>> +static int aspeed_xdma_mmap(struct file *file, struct vm_area_struct *vma)
>> +{
>> +	int rc;
>> +	struct aspeed_xdma_client *client = file->private_data;
>> +	struct aspeed_xdma *ctx = client->ctx;
>> +
>> +	/* restrict file to one mapping */
>> +	if (client->size)
>> +		return -EBUSY;
>> +
>> +	client->size = vma->vm_end - vma->vm_start;
>> +	client->virt = gen_pool_dma_alloc(ctx->pool, client->size,
>> +					  &client->phys);
>> +	if (!client->virt) {
>> +		client->phys = 0;
>> +		client->size = 0;
>> +		return -ENOMEM;
>> +	}
>> +
>> +	vma->vm_pgoff = (client->phys - ctx->mem_phys) >> PAGE_SHIFT;
>> +	vma->vm_ops = &aspeed_xdma_vm_ops;
>> +	vma->vm_private_data = client;
>> +	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
>> +
>> +	rc = io_remap_pfn_range(vma, vma->vm_start, client->phys >> PAGE_SHIFT,
>> +				client->size, vma->vm_page_prot);
>> +	if (rc) {
> Probably worth a dev_warn() here so we know what happened?


Sure.


>
>> +		gen_pool_free(ctx->pool, (unsigned long)client->virt,
>> +			      client->size);
>> +
>> +		client->virt = NULL;
>> +		client->phys = 0;
>> +		client->size = 0;
>> +		return rc;
>> +	}
>> +
>> +	dev_dbg(ctx->dev, "mmap: v[%08lx] to p[%08x], s[%08x]\n",
>> +		vma->vm_start, (u32)client->phys, client->size);
>> +
>> +	return 0;
>> +}
>> +
>> +static int aspeed_xdma_open(struct inode *inode, struct file *file)
>> +{
>> +	struct miscdevice *misc = file->private_data;
>> +	struct aspeed_xdma *ctx = container_of(misc, struct aspeed_xdma, misc);
>> +	struct aspeed_xdma_client *client = kzalloc(sizeof(*client),
>> +						    GFP_KERNEL);
>> +
>> +	if (!client)
>> +		return -ENOMEM;
>> +
>> +	client->ctx = ctx;
>> +	file->private_data = client;
>> +	return 0;
>> +}
>> +
>> +static int aspeed_xdma_release(struct inode *inode, struct file *file)
>> +{
>> +	struct aspeed_xdma_client *client = file->private_data;
>> +
>> +	kfree(client);
> I assume the vma gets torn down before release() gets invoked? I haven't
> looked closely.


 From what I've read, yes, the VMA has to be closed before release() can 
be called.


Thanks for the review!

Eddie


>
> Andrew
Andrew Jeffery Dec. 19, 2019, 9:16 p.m. UTC | #3
On Fri, 20 Dec 2019, at 02:30, Eddie James wrote:
> 
> On 12/18/19 7:19 PM, Andrew Jeffery wrote:
> >
> > On Thu, 19 Dec 2019, at 01:39, Eddie James wrote:
> >> +			mutex_unlock(&ctx->file_lock);
> >> +			return -EBUSY;
> >> +		}
> >> +	} else {
> >> +		mutex_lock(&ctx->file_lock);
> >> +
> >> +		rc = wait_event_interruptible(ctx->wait, !ctx->current_client);
> >> +		if (rc) {
> >> +			mutex_unlock(&ctx->file_lock);
> >> +			return -EINTR;
> >> +		}
> >> +	}
> >> +
> >> +	aspeed_xdma_start(ctx, &op, client->phys, client);
> >> +
> >> +	mutex_unlock(&ctx->file_lock);
> > Shouldn't we lift start_lock out of aspeed_xdma_start() use that here
> > instead of file_lock? I think that would mean that we could remove
> > file_lock.
> 
> 
> That wouldn't work with the reset though. The reset should hold 
> start_lock as well, but if a client is waiting here with start_lock, 
> we'd never get to the reset if the transfer doesn't complete. I think 
> file_lock is necessary.

Hmm, let me think about this some more.

Andrew
diff mbox series

Patch

diff --git a/drivers/soc/aspeed/aspeed-xdma.c b/drivers/soc/aspeed/aspeed-xdma.c
index cb94adf798b1..e844937dc925 100644
--- a/drivers/soc/aspeed/aspeed-xdma.c
+++ b/drivers/soc/aspeed/aspeed-xdma.c
@@ -13,6 +13,7 @@ 
 #include <linux/io.h>
 #include <linux/jiffies.h>
 #include <linux/mfd/syscon.h>
+#include <linux/miscdevice.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/of_device.h>
@@ -201,6 +202,8 @@  struct aspeed_xdma {
 	struct clk *clock;
 	struct reset_control *reset;
 
+	/* file_lock serializes reads of current_client */
+	struct mutex file_lock;
 	/* client_lock protects error and in_progress of the client */
 	spinlock_t client_lock;
 	struct aspeed_xdma_client *current_client;
@@ -223,6 +226,8 @@  struct aspeed_xdma {
 	void __iomem *mem_virt;
 	dma_addr_t cmdq_phys;
 	struct gen_pool *pool;
+
+	struct miscdevice misc;
 };
 
 struct aspeed_xdma_client {
@@ -522,6 +527,185 @@  static irqreturn_t aspeed_xdma_pcie_irq(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
+static ssize_t aspeed_xdma_write(struct file *file, const char __user *buf,
+				 size_t len, loff_t *offset)
+{
+	int rc;
+	struct aspeed_xdma_op op;
+	struct aspeed_xdma_client *client = file->private_data;
+	struct aspeed_xdma *ctx = client->ctx;
+
+	if (len != sizeof(op))
+		return -EINVAL;
+
+	rc = copy_from_user(&op, buf, len);
+	if (rc)
+		return rc;
+
+	if (!op.len || op.len > client->size ||
+	    op.direction > ASPEED_XDMA_DIRECTION_UPSTREAM)
+		return -EINVAL;
+
+	if (file->f_flags & O_NONBLOCK) {
+		if (!mutex_trylock(&ctx->file_lock))
+			return -EAGAIN;
+
+		if (ctx->current_client) {
+			mutex_unlock(&ctx->file_lock);
+			return -EBUSY;
+		}
+	} else {
+		mutex_lock(&ctx->file_lock);
+
+		rc = wait_event_interruptible(ctx->wait, !ctx->current_client);
+		if (rc) {
+			mutex_unlock(&ctx->file_lock);
+			return -EINTR;
+		}
+	}
+
+	aspeed_xdma_start(ctx, &op, client->phys, client);
+
+	mutex_unlock(&ctx->file_lock);
+
+	if (!(file->f_flags & O_NONBLOCK)) {
+		rc = wait_event_interruptible(ctx->wait, !client->in_progress);
+		if (rc)
+			return -EINTR;
+
+		if (client->error)
+			return -EIO;
+	}
+
+	return len;
+}
+
+static __poll_t aspeed_xdma_poll(struct file *file,
+				 struct poll_table_struct *wait)
+{
+	__poll_t mask = 0;
+	__poll_t req = poll_requested_events(wait);
+	struct aspeed_xdma_client *client = file->private_data;
+	struct aspeed_xdma *ctx = client->ctx;
+
+	if (req & (EPOLLIN | EPOLLRDNORM)) {
+		if (client->in_progress)
+			poll_wait(file, &ctx->wait, wait);
+
+		if (!client->in_progress) {
+			if (client->error)
+				mask |= EPOLLERR;
+			else
+				mask |= EPOLLIN | EPOLLRDNORM;
+		}
+	}
+
+	if (req & (EPOLLOUT | EPOLLWRNORM)) {
+		if (ctx->current_client)
+			poll_wait(file, &ctx->wait, wait);
+
+		if (!ctx->current_client)
+			mask |= EPOLLOUT | EPOLLWRNORM;
+	}
+
+	return mask;
+}
+
+static void aspeed_xdma_vma_close(struct vm_area_struct *vma)
+{
+	int rc;
+	struct aspeed_xdma_client *client = vma->vm_private_data;
+
+	rc = wait_event_interruptible(client->ctx->wait, !client->in_progress);
+	if (rc)
+		return;
+
+	gen_pool_free(client->ctx->pool, (unsigned long)client->virt,
+		      client->size);
+
+	client->virt = NULL;
+	client->phys = 0;
+	client->size = 0;
+}
+
+static const struct vm_operations_struct aspeed_xdma_vm_ops = {
+	.close =	aspeed_xdma_vma_close,
+};
+
+static int aspeed_xdma_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	int rc;
+	struct aspeed_xdma_client *client = file->private_data;
+	struct aspeed_xdma *ctx = client->ctx;
+
+	/* restrict file to one mapping */
+	if (client->size)
+		return -EBUSY;
+
+	client->size = vma->vm_end - vma->vm_start;
+	client->virt = gen_pool_dma_alloc(ctx->pool, client->size,
+					  &client->phys);
+	if (!client->virt) {
+		client->phys = 0;
+		client->size = 0;
+		return -ENOMEM;
+	}
+
+	vma->vm_pgoff = (client->phys - ctx->mem_phys) >> PAGE_SHIFT;
+	vma->vm_ops = &aspeed_xdma_vm_ops;
+	vma->vm_private_data = client;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	rc = io_remap_pfn_range(vma, vma->vm_start, client->phys >> PAGE_SHIFT,
+				client->size, vma->vm_page_prot);
+	if (rc) {
+		gen_pool_free(ctx->pool, (unsigned long)client->virt,
+			      client->size);
+
+		client->virt = NULL;
+		client->phys = 0;
+		client->size = 0;
+		return rc;
+	}
+
+	dev_dbg(ctx->dev, "mmap: v[%08lx] to p[%08x], s[%08x]\n",
+		vma->vm_start, (u32)client->phys, client->size);
+
+	return 0;
+}
+
+static int aspeed_xdma_open(struct inode *inode, struct file *file)
+{
+	struct miscdevice *misc = file->private_data;
+	struct aspeed_xdma *ctx = container_of(misc, struct aspeed_xdma, misc);
+	struct aspeed_xdma_client *client = kzalloc(sizeof(*client),
+						    GFP_KERNEL);
+
+	if (!client)
+		return -ENOMEM;
+
+	client->ctx = ctx;
+	file->private_data = client;
+	return 0;
+}
+
+static int aspeed_xdma_release(struct inode *inode, struct file *file)
+{
+	struct aspeed_xdma_client *client = file->private_data;
+
+	kfree(client);
+	return 0;
+}
+
+static const struct file_operations aspeed_xdma_fops = {
+	.owner			= THIS_MODULE,
+	.write			= aspeed_xdma_write,
+	.poll			= aspeed_xdma_poll,
+	.mmap			= aspeed_xdma_mmap,
+	.open			= aspeed_xdma_open,
+	.release		= aspeed_xdma_release,
+};
+
 static int aspeed_xdma_probe(struct platform_device *pdev)
 {
 	int rc;
@@ -543,6 +727,7 @@  static int aspeed_xdma_probe(struct platform_device *pdev)
 	ctx->chip = md;
 	ctx->dev = dev;
 	platform_set_drvdata(pdev, ctx);
+	mutex_init(&ctx->file_lock);
 	mutex_init(&ctx->start_lock);
 	INIT_WORK(&ctx->reset_work, aspeed_xdma_reset_work);
 	spin_lock_init(&ctx->client_lock);
@@ -674,6 +859,22 @@  static int aspeed_xdma_probe(struct platform_device *pdev)
 
 	aspeed_xdma_init_eng(ctx);
 
+	ctx->misc.minor = MISC_DYNAMIC_MINOR;
+	ctx->misc.fops = &aspeed_xdma_fops;
+	ctx->misc.name = "aspeed-xdma";
+	ctx->misc.parent = dev;
+	rc = misc_register(&ctx->misc);
+	if (rc) {
+		dev_err(dev, "Failed to register xdma miscdevice.\n");
+
+		gen_pool_free(ctx->pool, (unsigned long)ctx->cmdq,
+			      XDMA_CMDQ_SIZE);
+
+		reset_control_assert(ctx->reset);
+		clk_disable_unprepare(ctx->clock);
+		return rc;
+	}
+
 	/*
 	 * This interrupt could fire immediately so only request it once the
 	 * engine and driver are initialized.
@@ -695,8 +896,8 @@  static int aspeed_xdma_remove(struct platform_device *pdev)
 {
 	struct aspeed_xdma *ctx = platform_get_drvdata(pdev);
 
-	gen_pool_free(ctx->pool, (unsigned long)ctx->cmdq_virt,
-		      XDMA_CMDQ_SIZE);
+	misc_deregister(&ctx->misc);
+	gen_pool_free(ctx->pool, (unsigned long)ctx->cmdq, XDMA_CMDQ_SIZE);
 
 	reset_control_assert(ctx->reset);
 	clk_disable_unprepare(ctx->clock);