diff mbox

[v0,2/2] Crypto: Talitos: Support for Async_tx XOR offload

Message ID 1255588866-4133-2-git-send-email-Vishnu@freescale.com (mailing list archive)
State Superseded
Delegated to: Kumar Gala
Headers show

Commit Message

Vishnu Suresh Oct. 15, 2009, 6:41 a.m. UTC
Expose Talitos's XOR functionality to be used for
RAID Parity calculation via the Async_tx layer.

Thanks to Surender Kumar and Lee Nipper for their help in
realising this driver

Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Dipen Dudhat <Dipen.Dudhat@freescale.com>
Signed-off-by: Maneesh Gupta <Maneesh.Gupta@freescale.com>
Signed-off-by: Vishnu Suresh <Vishnu@freescale.com>
---
 drivers/crypto/Kconfig   |    2 +
 drivers/crypto/talitos.c |  423 +++++++++++++++++++++++++++++++++++++++++++++-
 drivers/crypto/talitos.h |    2 +
 3 files changed, 426 insertions(+), 1 deletions(-)

Comments

Dan Williams Oct. 29, 2009, 10:46 p.m. UTC | #1
On Wed, Oct 14, 2009 at 11:41 PM, Vishnu Suresh <Vishnu@freescale.com> wrote:
[..]
> diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
> index b08403d..343e578 100644
> --- a/drivers/crypto/Kconfig
> +++ b/drivers/crypto/Kconfig
> @@ -192,6 +192,8 @@ config CRYPTO_DEV_TALITOS
>        select CRYPTO_ALGAPI
>        select CRYPTO_AUTHENC
>        select HW_RANDOM
> +       select DMA_ENGINE
> +       select ASYNC_XOR

You need only select DMA_ENGINE.  ASYNC_XOR is selected by its users.

>        depends on FSL_SOC
>        help
>          Say 'Y' here to use the Freescale Security Engine (SEC)
> diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
> index c47ffe8..84819d4 100644
> --- a/drivers/crypto/talitos.c
> +++ b/drivers/crypto/talitos.c
[..]
> +static void talitos_process_pending(struct talitos_xor_chan *xor_chan)
> +{
> +       struct talitos_xor_desc *desc, *_desc;
> +       unsigned long flags;
> +       int status;
> +
> +       spin_lock_irqsave(&xor_chan->desc_lock, flags);
> +
> +       list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q, node) {
> +               status = talitos_submit(xor_chan->dev, &desc->hwdesc,
> +                                       talitos_release_xor, desc);
> +               if (status != -EINPROGRESS)
> +                       break;
> +
> +               list_del(&desc->node);
> +               list_add_tail(&desc->node, &xor_chan->in_progress_q);
> +       }
> +
> +       spin_unlock_irqrestore(&xor_chan->desc_lock, flags);
> +}

The driver uses spin_lock_bh everywhere else which is either a bug, or
this code is being overly protective.  In any event lockdep will
rightly complain about this.  The API and its users do not submit
operations in hard-irq context.

> +
> +static void talitos_release_xor(struct device *dev, struct talitos_desc *hwdesc,
> +                               void *context, int error)
> +{
> +       struct talitos_xor_desc *desc = context;
> +       struct talitos_xor_chan *xor_chan;
> +       dma_async_tx_callback callback;
> +       void *callback_param;
> +
> +       if (unlikely(error)) {
> +               dev_err(dev, "xor operation: talitos error %d\n", error);
> +               BUG();
> +       }
> +
> +       xor_chan = container_of(desc->async_tx.chan, struct talitos_xor_chan,
> +                               common);
> +       spin_lock_bh(&xor_chan->desc_lock);
> +       if (xor_chan->completed_cookie < desc->async_tx.cookie)
> +               xor_chan->completed_cookie = desc->async_tx.cookie;
> +
> +       callback = desc->async_tx.callback;
> +       callback_param = desc->async_tx.callback_param;
> +
> +       if (callback) {
> +               spin_unlock_bh(&xor_chan->desc_lock);
> +               callback(callback_param);
> +               spin_lock_bh(&xor_chan->desc_lock);
> +       }

You do not need to unlock to call the callback.  Users of the API
assume that they are called in bh context and that they are not
allowed to submit new operations directly from the callback (this
simplifies the descriptor cleanup routines in other drivers).

> +
> +       list_del(&desc->node);
> +       list_add_tail(&desc->node, &xor_chan->free_desc);
> +       spin_unlock_bh(&xor_chan->desc_lock);
> +       if (!list_empty(&xor_chan->pending_q))
> +               talitos_process_pending(xor_chan);
> +}

It appears this routine is missing a call to dma_run_dependencies()?
This is needed if another channel is handling memory copy offload.  I
assume this is the case due to your other patch to fsldma.  See
iop_adma_run_tx_complete_actions().  If this xor resource can also
handle copy operations that would be more efficient as it eliminates
channel switching.  See the ioatdma driver and its use of
ASYNC_TX_DISABLE_CHANNEL_SWITCH.

> +static struct dma_async_tx_descriptor * talitos_prep_dma_xor(
> +                       struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
> +                       unsigned int src_cnt, size_t len, unsigned long flags)
> +{
> +       struct talitos_xor_chan *xor_chan;
> +       struct talitos_xor_desc *new;
> +       struct talitos_desc *desc;
> +       int i, j;
> +
> +       BUG_ON(unlikely(len > TALITOS_MAX_DATA_LEN));
> +
> +       xor_chan = container_of(chan, struct talitos_xor_chan, common);
> +
> +       spin_lock_bh(&xor_chan->desc_lock);
> +       if (!list_empty(&xor_chan->free_desc)) {
> +               new = container_of(xor_chan->free_desc.next,
> +                                  struct talitos_xor_desc, node);
> +               list_del(&new->node);
> +       } else {
> +               new = talitos_xor_alloc_descriptor(xor_chan, GFP_KERNEL);
> +       }
> +       spin_unlock_bh(&xor_chan->desc_lock);
> +
> +       if (!new) {
> +               dev_err(xor_chan->common.device->dev,
> +                       "No free memory for XOR DMA descriptor\n");
> +               return NULL;
> +       }
> +       dma_async_tx_descriptor_init(&new->async_tx, &xor_chan->common);
> +
> +       INIT_LIST_HEAD(&new->node);
> +       INIT_LIST_HEAD(&new->tx_list);

You can save some overhead in the fast path by moving this
initialization to talitos_xor_alloc_descriptor.

> +
> +       desc = &new->hwdesc;
> +       /* Set destination: Last pointer pair */
> +       to_talitos_ptr(&desc->ptr[6], dest);
> +       desc->ptr[6].len = cpu_to_be16(len);
> +       desc->ptr[6].j_extent = 0;
> +
> +       /* Set Sources: End loading from second-last pointer pair */
> +       for (i = 5, j = 0; (j < src_cnt) && (i > 0); i--, j++) {
> +               to_talitos_ptr(&desc->ptr[i], src[j]);
> +               desc->ptr[i].len = cpu_to_be16(len);
> +               desc->ptr[i].j_extent = 0;
> +       }
> +
> +       /*
> +        * documentation states first 0 ptr/len combo marks end of sources
> +        * yet device produces scatter boundary error unless all subsequent
> +        * sources are zeroed out
> +        */
> +       for (; i >= 0; i--) {
> +               to_talitos_ptr(&desc->ptr[i], 0);
> +               desc->ptr[i].len = 0;
> +               desc->ptr[i].j_extent = 0;
> +       }
> +
> +       desc->hdr = DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_XOR
> +                   | DESC_HDR_TYPE_RAID_XOR;
> +
> +       new->async_tx.parent = NULL;
> +       new->async_tx.next = NULL;

These fields are managed by the async_tx channel switch code.  No need
to manage it here.

> +       new->async_tx.cookie = 0;

This is set below to -EBUSY, it's redundant to touch it here.

> +       async_tx_ack(&new->async_tx);

This makes it impossible to attach a dependency to this operation.
Not sure this is what you want.

--
Dan
hank peng Nov. 3, 2009, 12:43 a.m. UTC | #2
I have tested this patch on my MPC8548 machine box, kernel version is
2.6.30. There is a problem.
#mdadm -C /dev/md0 -l5 -n3 /dev/sd{a,b,c}
Recovery can be done successfully, interrupts looks normal.
# cat /proc/interrupts
           CPU0
 16:   16091057   OpenPIC   Level     mvSata
 17:          0   OpenPIC   Level     mvSata
 18:          4   OpenPIC   Level     phy_interrupt, phy_interrupt
 20:         39   OpenPIC   Level     fsldma-channel
 21:          0   OpenPIC   Level     fsldma-channel
 22:          0   OpenPIC   Level     fsldma-channel
 23:          0   OpenPIC   Level     fsldma-channel
 29:        241   OpenPIC   Level     eth0_tx
 30:    1004692   OpenPIC   Level     eth0_rx
 34:          0   OpenPIC   Level     eth0_er
 35:          0   OpenPIC   Level     eth1_tx
 36:          0   OpenPIC   Level     eth1_rx
 40:          0   OpenPIC   Level     eth1_er
 42:      73060   OpenPIC   Level     serial
 43:       9854   OpenPIC   Level     i2c-mpc, i2c-mpc
 45:   61264188   OpenPIC   Level     talitos
BAD:          0

Then, I plan to create a VG, but problem occured.
#pvcreate /dev/md0

After I input this command, system hangs there without any messages.

BTW, all is OK before using this patch.


2009/10/30 Dan Williams <dan.j.williams@intel.com>:
> On Wed, Oct 14, 2009 at 11:41 PM, Vishnu Suresh <Vishnu@freescale.com> wrote:
> [..]
>> diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
>> index b08403d..343e578 100644
>> --- a/drivers/crypto/Kconfig
>> +++ b/drivers/crypto/Kconfig
>> @@ -192,6 +192,8 @@ config CRYPTO_DEV_TALITOS
>>        select CRYPTO_ALGAPI
>>        select CRYPTO_AUTHENC
>>        select HW_RANDOM
>> +       select DMA_ENGINE
>> +       select ASYNC_XOR
>
> You need only select DMA_ENGINE.  ASYNC_XOR is selected by its users.
>
>>        depends on FSL_SOC
>>        help
>>          Say 'Y' here to use the Freescale Security Engine (SEC)
>> diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
>> index c47ffe8..84819d4 100644
>> --- a/drivers/crypto/talitos.c
>> +++ b/drivers/crypto/talitos.c
> [..]
>> +static void talitos_process_pending(struct talitos_xor_chan *xor_chan)
>> +{
>> +       struct talitos_xor_desc *desc, *_desc;
>> +       unsigned long flags;
>> +       int status;
>> +
>> +       spin_lock_irqsave(&xor_chan->desc_lock, flags);
>> +
>> +       list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q, node) {
>> +               status = talitos_submit(xor_chan->dev, &desc->hwdesc,
>> +                                       talitos_release_xor, desc);
>> +               if (status != -EINPROGRESS)
>> +                       break;
>> +
>> +               list_del(&desc->node);
>> +               list_add_tail(&desc->node, &xor_chan->in_progress_q);
>> +       }
>> +
>> +       spin_unlock_irqrestore(&xor_chan->desc_lock, flags);
>> +}
>
> The driver uses spin_lock_bh everywhere else which is either a bug, or
> this code is being overly protective.  In any event lockdep will
> rightly complain about this.  The API and its users do not submit
> operations in hard-irq context.
>
>> +
>> +static void talitos_release_xor(struct device *dev, struct talitos_desc *hwdesc,
>> +                               void *context, int error)
>> +{
>> +       struct talitos_xor_desc *desc = context;
>> +       struct talitos_xor_chan *xor_chan;
>> +       dma_async_tx_callback callback;
>> +       void *callback_param;
>> +
>> +       if (unlikely(error)) {
>> +               dev_err(dev, "xor operation: talitos error %d\n", error);
>> +               BUG();
>> +       }
>> +
>> +       xor_chan = container_of(desc->async_tx.chan, struct talitos_xor_chan,
>> +                               common);
>> +       spin_lock_bh(&xor_chan->desc_lock);
>> +       if (xor_chan->completed_cookie < desc->async_tx.cookie)
>> +               xor_chan->completed_cookie = desc->async_tx.cookie;
>> +
>> +       callback = desc->async_tx.callback;
>> +       callback_param = desc->async_tx.callback_param;
>> +
>> +       if (callback) {
>> +               spin_unlock_bh(&xor_chan->desc_lock);
>> +               callback(callback_param);
>> +               spin_lock_bh(&xor_chan->desc_lock);
>> +       }
>
> You do not need to unlock to call the callback.  Users of the API
> assume that they are called in bh context and that they are not
> allowed to submit new operations directly from the callback (this
> simplifies the descriptor cleanup routines in other drivers).
>
>> +
>> +       list_del(&desc->node);
>> +       list_add_tail(&desc->node, &xor_chan->free_desc);
>> +       spin_unlock_bh(&xor_chan->desc_lock);
>> +       if (!list_empty(&xor_chan->pending_q))
>> +               talitos_process_pending(xor_chan);
>> +}
>
> It appears this routine is missing a call to dma_run_dependencies()?
> This is needed if another channel is handling memory copy offload.  I
> assume this is the case due to your other patch to fsldma.  See
> iop_adma_run_tx_complete_actions().  If this xor resource can also
> handle copy operations that would be more efficient as it eliminates
> channel switching.  See the ioatdma driver and its use of
> ASYNC_TX_DISABLE_CHANNEL_SWITCH.
>
>> +static struct dma_async_tx_descriptor * talitos_prep_dma_xor(
>> +                       struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
>> +                       unsigned int src_cnt, size_t len, unsigned long flags)
>> +{
>> +       struct talitos_xor_chan *xor_chan;
>> +       struct talitos_xor_desc *new;
>> +       struct talitos_desc *desc;
>> +       int i, j;
>> +
>> +       BUG_ON(unlikely(len > TALITOS_MAX_DATA_LEN));
>> +
>> +       xor_chan = container_of(chan, struct talitos_xor_chan, common);
>> +
>> +       spin_lock_bh(&xor_chan->desc_lock);
>> +       if (!list_empty(&xor_chan->free_desc)) {
>> +               new = container_of(xor_chan->free_desc.next,
>> +                                  struct talitos_xor_desc, node);
>> +               list_del(&new->node);
>> +       } else {
>> +               new = talitos_xor_alloc_descriptor(xor_chan, GFP_KERNEL);
>> +       }
>> +       spin_unlock_bh(&xor_chan->desc_lock);
>> +
>> +       if (!new) {
>> +               dev_err(xor_chan->common.device->dev,
>> +                       "No free memory for XOR DMA descriptor\n");
>> +               return NULL;
>> +       }
>> +       dma_async_tx_descriptor_init(&new->async_tx, &xor_chan->common);
>> +
>> +       INIT_LIST_HEAD(&new->node);
>> +       INIT_LIST_HEAD(&new->tx_list);
>
> You can save some overhead in the fast path by moving this
> initialization to talitos_xor_alloc_descriptor.
>
>> +
>> +       desc = &new->hwdesc;
>> +       /* Set destination: Last pointer pair */
>> +       to_talitos_ptr(&desc->ptr[6], dest);
>> +       desc->ptr[6].len = cpu_to_be16(len);
>> +       desc->ptr[6].j_extent = 0;
>> +
>> +       /* Set Sources: End loading from second-last pointer pair */
>> +       for (i = 5, j = 0; (j < src_cnt) && (i > 0); i--, j++) {
>> +               to_talitos_ptr(&desc->ptr[i], src[j]);
>> +               desc->ptr[i].len = cpu_to_be16(len);
>> +               desc->ptr[i].j_extent = 0;
>> +       }
>> +
>> +       /*
>> +        * documentation states first 0 ptr/len combo marks end of sources
>> +        * yet device produces scatter boundary error unless all subsequent
>> +        * sources are zeroed out
>> +        */
>> +       for (; i >= 0; i--) {
>> +               to_talitos_ptr(&desc->ptr[i], 0);
>> +               desc->ptr[i].len = 0;
>> +               desc->ptr[i].j_extent = 0;
>> +       }
>> +
>> +       desc->hdr = DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_XOR
>> +                   | DESC_HDR_TYPE_RAID_XOR;
>> +
>> +       new->async_tx.parent = NULL;
>> +       new->async_tx.next = NULL;
>
> These fields are managed by the async_tx channel switch code.  No need
> to manage it here.
>
>> +       new->async_tx.cookie = 0;
>
> This is set below to -EBUSY, it's redundant to touch it here.
>
>> +       async_tx_ack(&new->async_tx);
>
> This makes it impossible to attach a dependency to this operation.
> Not sure this is what you want.
>
> --
> Dan
> --
> To unsubscribe from this list: send the line "unsubscribe linux-raid" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
diff mbox

Patch

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index b08403d..343e578 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -192,6 +192,8 @@  config CRYPTO_DEV_TALITOS
 	select CRYPTO_ALGAPI
 	select CRYPTO_AUTHENC
 	select HW_RANDOM
+	select DMA_ENGINE
+	select ASYNC_XOR
 	depends on FSL_SOC
 	help
 	  Say 'Y' here to use the Freescale Security Engine (SEC)
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index c47ffe8..84819d4 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1,7 +1,7 @@ 
 /*
  * talitos - Freescale Integrated Security Engine (SEC) device driver
  *
- * Copyright (c) 2008 Freescale Semiconductor, Inc.
+ * Copyright (c) 2008-2009 Freescale Semiconductor, Inc.
  *
  * Scatterlist Crypto API glue code copied from files with the following:
  * Copyright (c) 2006-2007 Herbert Xu <herbert@gondor.apana.org.au>
@@ -37,6 +37,8 @@ 
 #include <linux/io.h>
 #include <linux/spinlock.h>
 #include <linux/rtnetlink.h>
+#include <linux/dmaengine.h>
+#include <linux/raid/xor.h>
 
 #include <crypto/algapi.h>
 #include <crypto/aes.h>
@@ -140,6 +142,9 @@  struct talitos_private {
 
 	/* hwrng device */
 	struct hwrng rng;
+
+	/* XOR Device */
+	struct dma_device dma_dev_common;
 };
 
 /* .features flag */
@@ -685,6 +690,401 @@  static void talitos_unregister_rng(struct device *dev)
 }
 
 /*
+ * async_tx interface for XOR-capable SECs
+ *
+ * Dipen Dudhat <Dipen.Dudhat@freescale.com>
+ * Maneesh Gupta <Maneesh.Gupta@freescale.com>
+ * Vishnu Suresh <Vishnu@freescale.com>
+ */
+
+/**
+ * talitos_xor_chan - context management for the async_tx channel
+ * @completed_cookie: the last completed cookie
+ * @desc_lock: lock for tx queue
+ * @total_desc: number of descriptors allocated
+ * @submit_q: queue of submitted descriptors
+ * @pending_q: queue of pending descriptors
+ * @in_progress_q: queue of descriptors in progress
+ * @free_desc: queue of unused descriptors
+ * @dev: talitos device implementing this channel
+ * @common: the corresponding xor channel in async_tx
+ */
+struct talitos_xor_chan {
+	dma_cookie_t completed_cookie;
+	spinlock_t desc_lock;
+	unsigned int total_desc;
+	struct list_head submit_q;
+	struct list_head pending_q;
+	struct list_head in_progress_q;
+	struct list_head free_desc;
+	struct device *dev;
+	struct dma_chan common;
+};
+
+/**
+ * talitos_xor_desc - software xor descriptor
+ * @async_tx: the referring async_tx descriptor
+ * @node:
+ * @hwdesc: h/w descriptor
+ */
+struct talitos_xor_desc {
+	struct dma_async_tx_descriptor async_tx;
+	struct list_head tx_list;
+	struct list_head node;
+	struct talitos_desc hwdesc;
+};
+
+static void talitos_release_xor(struct device *dev, struct talitos_desc *hwdesc,
+				void *context, int error);
+
+static enum dma_status talitos_is_tx_complete(struct dma_chan *chan,
+					      dma_cookie_t cookie,
+					      dma_cookie_t *done,
+					      dma_cookie_t *used)
+{
+	struct talitos_xor_chan *xor_chan;
+	dma_cookie_t last_used;
+	dma_cookie_t last_complete;
+
+	xor_chan = container_of(chan, struct talitos_xor_chan, common);
+
+	last_used = chan->cookie;
+	last_complete = xor_chan->completed_cookie;
+
+	if (done)
+		*done = last_complete;
+
+	if (used)
+		*used = last_used;
+
+	return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static void talitos_process_pending(struct talitos_xor_chan *xor_chan)
+{
+	struct talitos_xor_desc *desc, *_desc;
+	unsigned long flags;
+	int status;
+
+	spin_lock_irqsave(&xor_chan->desc_lock, flags);
+
+	list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q, node) {
+		status = talitos_submit(xor_chan->dev, &desc->hwdesc,
+					talitos_release_xor, desc);
+		if (status != -EINPROGRESS)
+			break;
+
+		list_del(&desc->node);
+		list_add_tail(&desc->node, &xor_chan->in_progress_q);
+	}
+
+	spin_unlock_irqrestore(&xor_chan->desc_lock, flags);
+}
+
+static void talitos_release_xor(struct device *dev, struct talitos_desc *hwdesc,
+				void *context, int error)
+{
+	struct talitos_xor_desc *desc = context;
+	struct talitos_xor_chan *xor_chan;
+	dma_async_tx_callback callback;
+	void *callback_param;
+
+	if (unlikely(error)) {
+		dev_err(dev, "xor operation: talitos error %d\n", error);
+		BUG();
+	}
+
+	xor_chan = container_of(desc->async_tx.chan, struct talitos_xor_chan,
+				common);
+	spin_lock_bh(&xor_chan->desc_lock);
+	if (xor_chan->completed_cookie < desc->async_tx.cookie)
+		xor_chan->completed_cookie = desc->async_tx.cookie;
+
+	callback = desc->async_tx.callback;
+	callback_param = desc->async_tx.callback_param;
+
+	if (callback) {
+		spin_unlock_bh(&xor_chan->desc_lock);
+		callback(callback_param);
+		spin_lock_bh(&xor_chan->desc_lock);
+	}
+
+	list_del(&desc->node);
+	list_add_tail(&desc->node, &xor_chan->free_desc);
+	spin_unlock_bh(&xor_chan->desc_lock);
+	if (!list_empty(&xor_chan->pending_q))
+		talitos_process_pending(xor_chan);
+}
+
+/**
+ * talitos_issue_pending - move the descriptors in submit
+ * queue to pending queue and submit them for processing
+ * @chan: DMA channel
+ */
+static void talitos_issue_pending(struct dma_chan *chan)
+{
+	struct talitos_xor_chan *xor_chan;
+
+	xor_chan = container_of(chan, struct talitos_xor_chan, common);
+	spin_lock_bh(&xor_chan->desc_lock);
+	list_splice_tail_init(&xor_chan->submit_q,
+				 &xor_chan->pending_q);
+	spin_unlock_bh(&xor_chan->desc_lock);
+	talitos_process_pending(xor_chan);
+}
+
+static dma_cookie_t talitos_async_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct talitos_xor_desc *desc;
+	struct talitos_xor_chan *xor_chan;
+	dma_cookie_t cookie;
+
+	desc = container_of(tx, struct talitos_xor_desc, async_tx);
+	xor_chan = container_of(tx->chan, struct talitos_xor_chan, common);
+
+	spin_lock_bh(&xor_chan->desc_lock);
+
+	cookie = xor_chan->common.cookie + 1;
+	if (cookie < 0)
+		cookie = 1;
+
+	desc->async_tx.cookie = cookie;
+	xor_chan->common.cookie = desc->async_tx.cookie;
+
+	list_splice_tail_init(&desc->tx_list,
+				 &xor_chan->submit_q);
+
+	spin_unlock_bh(&xor_chan->desc_lock);
+
+	return cookie;
+}
+
+static struct talitos_xor_desc *talitos_xor_alloc_descriptor(
+				struct talitos_xor_chan *xor_chan, gfp_t flags)
+{
+	struct talitos_xor_desc *desc;
+
+	desc = kmalloc(sizeof(*desc), flags);
+	if (desc) {
+		xor_chan->total_desc++;
+		desc->async_tx.tx_submit = talitos_async_tx_submit;
+	}
+
+	return desc;
+}
+
+static void talitos_free_chan_resources(struct dma_chan *chan)
+{
+	struct talitos_xor_chan *xor_chan;
+	struct talitos_xor_desc *desc, *_desc;
+
+	xor_chan = container_of(chan, struct talitos_xor_chan, common);
+
+	spin_lock_bh(&xor_chan->desc_lock);
+
+	list_for_each_entry_safe(desc, _desc, &xor_chan->submit_q, node) {
+		list_del(&desc->node);
+		xor_chan->total_desc--;
+		kfree(desc);
+	}
+	list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q, node) {
+		list_del(&desc->node);
+		xor_chan->total_desc--;
+		kfree(desc);
+	}
+	list_for_each_entry_safe(desc, _desc, &xor_chan->in_progress_q, node) {
+		list_del(&desc->node);
+		xor_chan->total_desc--;
+		kfree(desc);
+	}
+	list_for_each_entry_safe(desc, _desc, &xor_chan->free_desc, node) {
+		list_del(&desc->node);
+		xor_chan->total_desc--;
+		kfree(desc);
+	}
+	BUG_ON(unlikely(xor_chan->total_desc));	/* Some descriptor not freed? */
+
+	spin_unlock_bh(&xor_chan->desc_lock);
+}
+
+static int talitos_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct talitos_xor_chan *xor_chan;
+	struct talitos_xor_desc *desc;
+	LIST_HEAD(tmp_list);
+	int i;
+
+	xor_chan = container_of(chan, struct talitos_xor_chan, common);
+
+	if (!list_empty(&xor_chan->free_desc))
+		return xor_chan->total_desc;
+
+	/* 256 initial descriptors */
+	for (i = 0; i < 256; i++) {
+		desc = talitos_xor_alloc_descriptor(xor_chan, GFP_KERNEL);
+		if (!desc) {
+			dev_err(xor_chan->common.device->dev,
+				"Only %d initial descriptors\n", i);
+			break;
+		}
+		list_add_tail(&desc->node, &tmp_list);
+	}
+
+	if (!i)
+		return -ENOMEM;
+
+	/* At least one desc is allocated */
+	spin_lock_bh(&xor_chan->desc_lock);
+	list_splice_init(&tmp_list, &xor_chan->free_desc);
+	spin_unlock_bh(&xor_chan->desc_lock);
+
+	return xor_chan->total_desc;
+}
+
+static struct dma_async_tx_descriptor * talitos_prep_dma_xor(
+			struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+			unsigned int src_cnt, size_t len, unsigned long flags)
+{
+	struct talitos_xor_chan *xor_chan;
+	struct talitos_xor_desc *new;
+	struct talitos_desc *desc;
+	int i, j;
+
+	BUG_ON(unlikely(len > TALITOS_MAX_DATA_LEN));
+
+	xor_chan = container_of(chan, struct talitos_xor_chan, common);
+
+	spin_lock_bh(&xor_chan->desc_lock);
+	if (!list_empty(&xor_chan->free_desc)) {
+		new = container_of(xor_chan->free_desc.next,
+				   struct talitos_xor_desc, node);
+		list_del(&new->node);
+	} else {
+		new = talitos_xor_alloc_descriptor(xor_chan, GFP_KERNEL);
+	}
+	spin_unlock_bh(&xor_chan->desc_lock);
+
+	if (!new) {
+		dev_err(xor_chan->common.device->dev,
+			"No free memory for XOR DMA descriptor\n");
+		return NULL;
+	}
+	dma_async_tx_descriptor_init(&new->async_tx, &xor_chan->common);
+
+	INIT_LIST_HEAD(&new->node);
+	INIT_LIST_HEAD(&new->tx_list);
+
+	desc = &new->hwdesc;
+	/* Set destination: Last pointer pair */
+	to_talitos_ptr(&desc->ptr[6], dest);
+	desc->ptr[6].len = cpu_to_be16(len);
+	desc->ptr[6].j_extent = 0;
+
+	/* Set Sources: End loading from second-last pointer pair */
+	for (i = 5, j = 0; (j < src_cnt) && (i > 0); i--, j++) {
+		to_talitos_ptr(&desc->ptr[i], src[j]);
+		desc->ptr[i].len = cpu_to_be16(len);
+		desc->ptr[i].j_extent = 0;
+	}
+
+	/*
+	 * documentation states first 0 ptr/len combo marks end of sources
+	 * yet device produces scatter boundary error unless all subsequent
+	 * sources are zeroed out
+	 */
+	for (; i >= 0; i--) {
+		to_talitos_ptr(&desc->ptr[i], 0);
+		desc->ptr[i].len = 0;
+		desc->ptr[i].j_extent = 0;
+	}
+
+	desc->hdr = DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_XOR
+		    | DESC_HDR_TYPE_RAID_XOR;
+
+	new->async_tx.parent = NULL;
+	new->async_tx.next = NULL;
+	new->async_tx.cookie = 0;
+	async_tx_ack(&new->async_tx);
+
+	list_add_tail(&new->node, &new->tx_list);
+
+	new->async_tx.flags = flags;
+	new->async_tx.cookie = -EBUSY;
+
+	return &new->async_tx;
+}
+
+static void talitos_unregister_async_xor(struct device *dev)
+{
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	struct talitos_xor_chan *xor_chan;
+	struct dma_chan *chan;
+
+	if (priv->dma_dev_common.chancnt)
+		dma_async_device_unregister(&priv->dma_dev_common);
+
+	list_for_each_entry(chan, &priv->dma_dev_common.channels, device_node) {
+		xor_chan = container_of(chan, struct talitos_xor_chan, common);
+		list_del(&chan->device_node);
+		priv->dma_dev_common.chancnt--;
+		kfree(xor_chan);
+	}
+}
+
+/**
+ * talitos_register_dma_async - Initialize the Freescale XOR ADMA device
+ * It is registered as a DMA device with the capability to perform
+ * XOR operation with the Async_tx layer.
+ * The various queues and channel resources are also allocated.
+ */
+static int talitos_register_async_tx(struct device *dev, int max_xor_srcs)
+{
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	struct dma_device *dma_dev = &priv->dma_dev_common;
+	struct talitos_xor_chan *xor_chan;
+	int err;
+
+	xor_chan = kzalloc(sizeof(struct talitos_xor_chan), GFP_KERNEL);
+	if (!xor_chan) {
+		dev_err(dev, "unable to allocate xor channel\n");
+		return -ENOMEM;
+	}
+
+	dma_dev->dev = dev;
+	dma_dev->device_alloc_chan_resources = talitos_alloc_chan_resources;
+	dma_dev->device_free_chan_resources = talitos_free_chan_resources;
+	dma_dev->device_prep_dma_xor = talitos_prep_dma_xor;
+	dma_dev->max_xor = max_xor_srcs;
+	dma_dev->device_is_tx_complete = talitos_is_tx_complete;
+	dma_dev->device_issue_pending = talitos_issue_pending;
+	INIT_LIST_HEAD(&dma_dev->channels);
+	dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+
+	xor_chan->dev = dev;
+	xor_chan->common.device = dma_dev;
+	xor_chan->total_desc = 0;
+	INIT_LIST_HEAD(&xor_chan->submit_q);
+	INIT_LIST_HEAD(&xor_chan->pending_q);
+	INIT_LIST_HEAD(&xor_chan->in_progress_q);
+	INIT_LIST_HEAD(&xor_chan->free_desc);
+	spin_lock_init(&xor_chan->desc_lock);
+
+	list_add_tail(&xor_chan->common.device_node, &dma_dev->channels);
+	dma_dev->chancnt++;
+
+	err = dma_async_device_register(dma_dev);
+	if (err) {
+		dev_err(dev, "Unable to register XOR with Async_tx\n");
+		goto err_out;
+	}
+
+	return err;
+
+err_out:
+	talitos_unregister_async_xor(dev);
+	return err;
+}
+/*
  * crypto alg
  */
 #define TALITOS_CRA_PRIORITY		3000
@@ -1768,6 +2168,8 @@  static int talitos_remove(struct of_device *ofdev)
 	tasklet_kill(&priv->done_task);
 
 	iounmap(priv->reg);
+	if (priv->dma_dev_common.chancnt)
+		talitos_unregister_async_xor(dev);
 
 	dev_set_drvdata(dev, NULL);
 
@@ -1926,6 +2328,25 @@  static int talitos_probe(struct of_device *ofdev,
 			dev_info(dev, "hwrng\n");
 	}
 
+	/*
+	 * register with async_tx xor, if capable
+	 * SEC 2.x support up to 3 RAID sources,
+	 * SEC 3.x support up to 6
+	 */
+	if (hw_supports(dev, DESC_HDR_SEL0_AESU | DESC_HDR_TYPE_RAID_XOR)) {
+		int max_xor_srcs = 3;
+		if (of_device_is_compatible(np, "fsl,sec3.0"))
+			max_xor_srcs = 6;
+
+		err = talitos_register_async_tx(dev, max_xor_srcs);
+		if (err) {
+			dev_err(dev, "failed to register async_tx xor: %d\n",
+				err);
+			goto err_out;
+		}
+		dev_info(dev, "max_xor_srcs %d\n", max_xor_srcs);
+	}
+
 	/* register crypto algorithms the device supports */
 	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
 		if (hw_supports(dev, driver_algs[i].desc_hdr_template)) {
diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index ff5a145..b6197bc 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -155,6 +155,7 @@ 
 /* primary execution unit mode (MODE0) and derivatives */
 #define	DESC_HDR_MODE0_ENCRYPT		cpu_to_be32(0x00100000)
 #define	DESC_HDR_MODE0_AESU_CBC		cpu_to_be32(0x00200000)
+#define	DESC_HDR_MODE0_AESU_XOR		cpu_to_be32(0x0c600000)
 #define	DESC_HDR_MODE0_DEU_CBC		cpu_to_be32(0x00400000)
 #define	DESC_HDR_MODE0_DEU_3DES		cpu_to_be32(0x00200000)
 #define	DESC_HDR_MODE0_MDEU_INIT	cpu_to_be32(0x01000000)
@@ -202,6 +203,7 @@ 
 #define DESC_HDR_TYPE_IPSEC_ESP			cpu_to_be32(1 << 3)
 #define DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU	cpu_to_be32(2 << 3)
 #define DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU	cpu_to_be32(4 << 3)
+#define DESC_HDR_TYPE_RAID_XOR			cpu_to_be32(21 << 3)
 
 /* link table extent field bits */
 #define DESC_PTR_LNKTBL_JUMP			0x80