diff mbox

[U-Boot,3/3] mmc: tegra: use bounce buffer APIs

Message ID 1352156642-7975-3-git-send-email-swarren@wwwdotorg.org
State Superseded
Delegated to: Andy Fleming
Headers show

Commit Message

Stephen Warren Nov. 5, 2012, 11:04 p.m. UTC
From: Stephen Warren <swarren@nvidia.com>

Tegra's MMC driver does DMA, and hence needs cache-aligned buffers. In
some cases (e.g. user load commands) this cannot be guaranteed by callers
of the MMC APIs. To solve this, modify the Tegra MMC driver to use the
new bounce_buffer_*() APIs.

Note: Ideally, all U-Boot code will always provide address- and size-
aligned buffers, so a bounce buffer will only ever be needed for user-
supplied buffers (e.g. load commands). Ensuring this removes the need
for performance-sucking bounce buffer memory allocations and memcpy()s.
The one known exception at present is the SCR buffer in sd_change_freq(),
which is only 8 bytes long. Solving this requires enhancing struct
mmc_data to know the difference between buffer size and transferred data
size, or forcing all callers of mmc_send_cmd() to have allocated buffers
using ALLOC_CACHE_ALIGN_BUFFER(), which will true in this case, is not
enforced in any way at present, and so cannot be assumed by the core MMC
code.

Signed-off-by: Stephen Warren <swarren@nvidia.com>
---
 drivers/mmc/tegra_mmc.c             |   81 +++++++++++++++++++++++------------
 include/configs/tegra-common-post.h |    4 ++
 2 files changed, 58 insertions(+), 27 deletions(-)

Comments

Simon Glass Nov. 6, 2012, midnight UTC | #1
Hi Stephen,

On Mon, Nov 5, 2012 at 3:04 PM, Stephen Warren <swarren@wwwdotorg.org> wrote:
> From: Stephen Warren <swarren@nvidia.com>
>
> Tegra's MMC driver does DMA, and hence needs cache-aligned buffers. In
> some cases (e.g. user load commands) this cannot be guaranteed by callers
> of the MMC APIs. To solve this, modify the Tegra MMC driver to use the
> new bounce_buffer_*() APIs.
>
> Note: Ideally, all U-Boot code will always provide address- and size-
> aligned buffers, so a bounce buffer will only ever be needed for user-
> supplied buffers (e.g. load commands). Ensuring this removes the need
> for performance-sucking bounce buffer memory allocations and memcpy()s.
> The one known exception at present is the SCR buffer in sd_change_freq(),
> which is only 8 bytes long. Solving this requires enhancing struct
> mmc_data to know the difference between buffer size and transferred data
> size, or forcing all callers of mmc_send_cmd() to have allocated buffers
> using ALLOC_CACHE_ALIGN_BUFFER(), which will true in this case, is not
> enforced in any way at present, and so cannot be assumed by the core MMC
> code.
>
> Signed-off-by: Stephen Warren <swarren@nvidia.com>
> ---
>  drivers/mmc/tegra_mmc.c             |   81 +++++++++++++++++++++++------------
>  include/configs/tegra-common-post.h |    4 ++
>  2 files changed, 58 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/mmc/tegra_mmc.c b/drivers/mmc/tegra_mmc.c
> index 1fd5592..e5d55b0 100644
> --- a/drivers/mmc/tegra_mmc.c
> +++ b/drivers/mmc/tegra_mmc.c
> @@ -26,6 +26,7 @@
>  #include <asm/arch-tegra/clk_rst.h>
>  #include <asm/arch-tegra/tegra_mmc.h>
>  #include <mmc.h>
> +#include <bouncebuf.h>

The order seems wrong here - I think bouncebuf and mmc should go above
the asm/ ones, and bouncebuf should be first.

>
>  /* support 4 mmc hosts */
>  struct mmc mmc_dev[4];
> @@ -66,14 +67,34 @@ static void tegra_get_setup(struct mmc_host *host, int dev_index)
>         host->reg = (struct tegra_mmc *)host->base;
>  }
>
> -static void mmc_prepare_data(struct mmc_host *host, struct mmc_data *data)
> +static void mmc_prepare_data(struct mmc_host *host, struct mmc_data *data,
> +                               struct bounce_buffer_state *bbstate)
>  {
>         unsigned char ctrl;
> +       void *buf;
> +       uint8_t bbflags;
> +       size_t len;
> +
> +       if (data->flags & MMC_DATA_READ) {
> +               buf = data->dest;
> +               bbflags = GEN_BB_WRITE;
> +       } else {
> +               buf = (void *)data->src;
> +               bbflags = GEN_BB_READ;
> +       }
> +       len = data->blocks * data->blocksize;
> +
> +       bounce_buffer_start(bbstate, buf, len, bbflags);
> +
> +       debug("buf: %08X, data->blocks: %u, data->blocksize: %u\n",
> +               (u32)bbstate->bounce_buffer, data->blocks, data->blocksize);
>
> -       debug("data->dest: %08X, data->blocks: %u, data->blocksize: %u\n",
> -       (u32)data->dest, data->blocks, data->blocksize);
> +       if (data->flags & MMC_DATA_WRITE)
> +               flush_dcache_range((ulong)bbstate->bounce_buffer,
> +                                       (ulong)bbstate->bounce_buffer +
> +                                               bbstate->len_aligned);
>
> -       writel((u32)data->dest, &host->reg->sysad);
> +       writel((u32)bbstate->bounce_buffer, &host->reg->sysad);
>         /*
>          * DMASEL[4:3]
>          * 00 = Selects SDMA
> @@ -114,14 +135,6 @@ static void mmc_set_transfer_mode(struct mmc_host *host, struct mmc_data *data)
>         if (data->flags & MMC_DATA_READ)
>                 mode |= TEGRA_MMC_TRNMOD_DATA_XFER_DIR_SEL_READ;
>
> -       if (data->flags & MMC_DATA_WRITE) {
> -               if ((uintptr_t)data->src & (ARCH_DMA_MINALIGN - 1))
> -                       printf("Warning: unaligned write to %p may fail\n",
> -                              data->src);
> -               flush_dcache_range((ulong)data->src, (ulong)data->src +
> -                       data->blocks * data->blocksize);
> -       }
> -
>         writew(mode, &host->reg->trnmod);
>  }
>
> @@ -164,6 +177,9 @@ static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
>         int result;
>         unsigned int mask = 0;
>         unsigned int retry = 0x100000;
> +       struct bounce_buffer_state bbstate;
> +       int ret;
> +
>         debug(" mmc_send_cmd called\n");
>
>         result = mmc_wait_inhibit(host, cmd, data, 10 /* ms */);
> @@ -172,7 +188,7 @@ static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
>                 return result;
>
>         if (data)
> -               mmc_prepare_data(host, data);
> +               mmc_prepare_data(host, data, &bbstate);
>
>         debug("cmd->arg: %08x\n", cmd->cmdarg);
>         writel(cmd->cmdarg, &host->reg->argument);
> @@ -180,8 +196,10 @@ static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
>         if (data)
>                 mmc_set_transfer_mode(host, data);
>
> -       if ((cmd->resp_type & MMC_RSP_136) && (cmd->resp_type & MMC_RSP_BUSY))
> -               return -1;
> +       if ((cmd->resp_type & MMC_RSP_136) && (cmd->resp_type & MMC_RSP_BUSY)) {
> +               ret = -1;
> +               goto cleanup;
> +       }

You might consider putting this body in a function so you don't need
these two lines everywhere below.

>
>         /*
>          * CMDREG
> @@ -228,19 +246,22 @@ static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
>         if (i == retry) {
>                 printf("%s: waiting for status update\n", __func__);
>                 writel(mask, &host->reg->norintsts);
> -               return TIMEOUT;
> +               ret = TIMEOUT;
> +               goto cleanup;
>         }
>
>         if (mask & TEGRA_MMC_NORINTSTS_CMD_TIMEOUT) {
>                 /* Timeout Error */
>                 debug("timeout: %08x cmd %d\n", mask, cmd->cmdidx);
>                 writel(mask, &host->reg->norintsts);
> -               return TIMEOUT;
> +               ret = TIMEOUT;
> +               goto cleanup;
>         } else if (mask & TEGRA_MMC_NORINTSTS_ERR_INTERRUPT) {
>                 /* Error Interrupt */
>                 debug("error: %08x cmd %d\n", mask, cmd->cmdidx);
>                 writel(mask, &host->reg->norintsts);
> -               return -1;
> +               ret = -1;
> +               goto cleanup;
>         }
>
>         if (cmd->resp_type & MMC_RSP_PRESENT) {
> @@ -269,7 +290,8 @@ static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
>                         if (i == retry) {
>                                 printf("%s: card is still busy\n", __func__);
>                                 writel(mask, &host->reg->norintsts);
> -                               return TIMEOUT;
> +                               ret = TIMEOUT;
> +                               goto cleanup;
>                         }
>
>                         cmd->response[0] = readl(&host->reg->rspreg0);
> @@ -291,7 +313,8 @@ static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
>                                 writel(mask, &host->reg->norintsts);
>                                 printf("%s: error during transfer: 0x%08x\n",
>                                                 __func__, mask);
> -                               return -1;
> +                               ret = -1;
> +                               goto cleanup;
>                         } else if (mask & TEGRA_MMC_NORINTSTS_DMA_INTERRUPT) {
>                                 /*
>                                  * DMA Interrupt, restart the transfer where
> @@ -318,22 +341,26 @@ static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
>                                        readl(&host->reg->norintstsen),
>                                        readl(&host->reg->norintsigen),
>                                        readl(&host->reg->prnsts));
> -                               return -1;
> +                               ret = -1;
> +                               goto cleanup;
>                         }
>                 }
>                 writel(mask, &host->reg->norintsts);
>                 if (data->flags & MMC_DATA_READ) {
> -                       if ((uintptr_t)data->dest & (ARCH_DMA_MINALIGN - 1))
> -                               printf("Warning: unaligned read from %p "
> -                                       "may fail\n", data->dest);
> -                       invalidate_dcache_range((ulong)data->dest,
> -                               (ulong)data->dest +
> -                                       data->blocks * data->blocksize);
> +                       invalidate_dcache_range((ulong)bbstate.bounce_buffer,
> +                                               (ulong)bbstate.bounce_buffer +
> +                                                       bbstate.len_aligned);
>                 }
> +               bounce_buffer_stop(&bbstate);
>         }
>
>         udelay(1000);
>         return 0;
> +
> +cleanup:
> +       if (data)
> +               bounce_buffer_stop(&bbstate);
> +       return ret;
>  }
>
>  static void mmc_change_clock(struct mmc_host *host, uint clock)
> diff --git a/include/configs/tegra-common-post.h b/include/configs/tegra-common-post.h
> index ab62e71..2d45933 100644
> --- a/include/configs/tegra-common-post.h
> +++ b/include/configs/tegra-common-post.h
> @@ -251,4 +251,8 @@
>
>  #endif /* CONFIG_SPL_BUILD */
>
> +#ifdef CONFIG_TEGRA_MMC
> +#define CONFIG_BOUNCE_BUFFER
> +#endif

Is there really any harm in just defining this always (say in the
tegra20-common.h)? The functions should be dropped if not used.

> +
>  #endif /* __TEGRA_COMMON_POST_H */
> --
> 1.7.0.4
>

Regards,
Simon
Stephen Warren Nov. 6, 2012, 6:50 p.m. UTC | #2
On 11/05/2012 05:00 PM, Simon Glass wrote:
> Hi Stephen,
> 
> On Mon, Nov 5, 2012 at 3:04 PM, Stephen Warren <swarren@wwwdotorg.org> wrote:
>> From: Stephen Warren <swarren@nvidia.com>
>>
>> Tegra's MMC driver does DMA, and hence needs cache-aligned buffers. In
>> some cases (e.g. user load commands) this cannot be guaranteed by callers
>> of the MMC APIs. To solve this, modify the Tegra MMC driver to use the
>> new bounce_buffer_*() APIs.

>> diff --git a/drivers/mmc/tegra_mmc.c b/drivers/mmc/tegra_mmc.c

>>  #include <asm/arch-tegra/clk_rst.h>
>>  #include <asm/arch-tegra/tegra_mmc.h>
>>  #include <mmc.h>
>> +#include <bouncebuf.h>
> 
> The order seems wrong here - I think bouncebuf and mmc should go above
> the asm/ ones, and bouncebuf should be first.

Is there a defined order for header files? I suppose I should try and
read and remember more documentation!

>> @@ -180,8 +196,10 @@ static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
>>         if (data)
>>                 mmc_set_transfer_mode(host, data);
>>
>> -       if ((cmd->resp_type & MMC_RSP_136) && (cmd->resp_type & MMC_RSP_BUSY))
>> -               return -1;
>> +       if ((cmd->resp_type & MMC_RSP_136) && (cmd->resp_type & MMC_RSP_BUSY)) {
>> +               ret = -1;
>> +               goto cleanup;
>> +       }
> 
> You might consider putting this body in a function so you don't need
> these two lines everywhere below.

I'm not quite sure how a function would work here; a function can't
really goto. Do you mean a macro? I'd tend to this a macro would
obfuscate the pretty simple code.

Oh, perhaps you mean having a new top-level function that does:

bounce_buffer_start();
calls a function to do all the work
bounce_buffer_stop();

That would certainly simplify the patch.

>> diff --git a/include/configs/tegra-common-post.h b/include/configs/tegra-common-post.h

>> +#ifdef CONFIG_TEGRA_MMC
>> +#define CONFIG_BOUNCE_BUFFER
>> +#endif
> 
> Is there really any harm in just defining this always (say in the
> tegra20-common.h)? The functions should be dropped if not used.

I suppose it'd be fine to always enable this since the linker should
drop the functions when not referenced. Of course, that relies on
bouncebuf.o not having any global side-effects (e.g. registering things
via custom linker segments that are always pulled in). The code above
represents the actual dependency too; hopefully one day U-Boot will
sprout Kconfig, and that logic can be replaced by:

config TEGRA_MMC
    select BOUNCE_BUFFER
Simon Glass Nov. 6, 2012, 7:03 p.m. UTC | #3
Hi Stephen,

On Tue, Nov 6, 2012 at 10:50 AM, Stephen Warren <swarren@wwwdotorg.org> wrote:
> On 11/05/2012 05:00 PM, Simon Glass wrote:
>> Hi Stephen,
>>
>> On Mon, Nov 5, 2012 at 3:04 PM, Stephen Warren <swarren@wwwdotorg.org> wrote:
>>> From: Stephen Warren <swarren@nvidia.com>
>>>
>>> Tegra's MMC driver does DMA, and hence needs cache-aligned buffers. In
>>> some cases (e.g. user load commands) this cannot be guaranteed by callers
>>> of the MMC APIs. To solve this, modify the Tegra MMC driver to use the
>>> new bounce_buffer_*() APIs.
>
>>> diff --git a/drivers/mmc/tegra_mmc.c b/drivers/mmc/tegra_mmc.c
>
>>>  #include <asm/arch-tegra/clk_rst.h>
>>>  #include <asm/arch-tegra/tegra_mmc.h>
>>>  #include <mmc.h>
>>> +#include <bouncebuf.h>
>>
>> The order seems wrong here - I think bouncebuf and mmc should go above
>> the asm/ ones, and bouncebuf should be first.
>
> Is there a defined order for header files? I suppose I should try and
> read and remember more documentation!

According to Mike, "the order should generally be:
        - stuff in include/
        - stuff in sys/
        - stuff in linux/
        - stuff in asm/
each sub-region can be sorted, but i don't think we should go against the
subdir rule"

>
>>> @@ -180,8 +196,10 @@ static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
>>>         if (data)
>>>                 mmc_set_transfer_mode(host, data);
>>>
>>> -       if ((cmd->resp_type & MMC_RSP_136) && (cmd->resp_type & MMC_RSP_BUSY))
>>> -               return -1;
>>> +       if ((cmd->resp_type & MMC_RSP_136) && (cmd->resp_type & MMC_RSP_BUSY)) {
>>> +               ret = -1;
>>> +               goto cleanup;
>>> +       }
>>
>> You might consider putting this body in a function so you don't need
>> these two lines everywhere below.
>
> I'm not quite sure how a function would work here; a function can't
> really goto. Do you mean a macro? I'd tend to this a macro would
> obfuscate the pretty simple code.
>
> Oh, perhaps you mean having a new top-level function that does:
>
> bounce_buffer_start();
> calls a function to do all the work
> bounce_buffer_stop();
>
> That would certainly simplify the patch.

Yes that's what I meant.

>
>>> diff --git a/include/configs/tegra-common-post.h b/include/configs/tegra-common-post.h
>
>>> +#ifdef CONFIG_TEGRA_MMC
>>> +#define CONFIG_BOUNCE_BUFFER
>>> +#endif
>>
>> Is there really any harm in just defining this always (say in the
>> tegra20-common.h)? The functions should be dropped if not used.
>
> I suppose it'd be fine to always enable this since the linker should
> drop the functions when not referenced. Of course, that relies on
> bouncebuf.o not having any global side-effects (e.g. registering things
> via custom linker segments that are always pulled in). The code above
> represents the actual dependency too; hopefully one day U-Boot will
> sprout Kconfig, and that logic can be replaced by:
>
> config TEGRA_MMC
>     select BOUNCE_BUFFER

Yes, maybe it isn't that simple in general, but it seems that it might
be safe with Tegra at least.

Regards,
Simon
diff mbox

Patch

diff --git a/drivers/mmc/tegra_mmc.c b/drivers/mmc/tegra_mmc.c
index 1fd5592..e5d55b0 100644
--- a/drivers/mmc/tegra_mmc.c
+++ b/drivers/mmc/tegra_mmc.c
@@ -26,6 +26,7 @@ 
 #include <asm/arch-tegra/clk_rst.h>
 #include <asm/arch-tegra/tegra_mmc.h>
 #include <mmc.h>
+#include <bouncebuf.h>
 
 /* support 4 mmc hosts */
 struct mmc mmc_dev[4];
@@ -66,14 +67,34 @@  static void tegra_get_setup(struct mmc_host *host, int dev_index)
 	host->reg = (struct tegra_mmc *)host->base;
 }
 
-static void mmc_prepare_data(struct mmc_host *host, struct mmc_data *data)
+static void mmc_prepare_data(struct mmc_host *host, struct mmc_data *data,
+				struct bounce_buffer_state *bbstate)
 {
 	unsigned char ctrl;
+	void *buf;
+	uint8_t bbflags;
+	size_t len;
+
+	if (data->flags & MMC_DATA_READ) {
+		buf = data->dest;
+		bbflags = GEN_BB_WRITE;
+	} else {
+		buf = (void *)data->src;
+		bbflags = GEN_BB_READ;
+	}
+	len = data->blocks * data->blocksize;
+
+	bounce_buffer_start(bbstate, buf, len, bbflags);
+
+	debug("buf: %08X, data->blocks: %u, data->blocksize: %u\n",
+		(u32)bbstate->bounce_buffer, data->blocks, data->blocksize);
 
-	debug("data->dest: %08X, data->blocks: %u, data->blocksize: %u\n",
-	(u32)data->dest, data->blocks, data->blocksize);
+	if (data->flags & MMC_DATA_WRITE)
+		flush_dcache_range((ulong)bbstate->bounce_buffer,
+					(ulong)bbstate->bounce_buffer +
+						bbstate->len_aligned);
 
-	writel((u32)data->dest, &host->reg->sysad);
+	writel((u32)bbstate->bounce_buffer, &host->reg->sysad);
 	/*
 	 * DMASEL[4:3]
 	 * 00 = Selects SDMA
@@ -114,14 +135,6 @@  static void mmc_set_transfer_mode(struct mmc_host *host, struct mmc_data *data)
 	if (data->flags & MMC_DATA_READ)
 		mode |= TEGRA_MMC_TRNMOD_DATA_XFER_DIR_SEL_READ;
 
-	if (data->flags & MMC_DATA_WRITE) {
-		if ((uintptr_t)data->src & (ARCH_DMA_MINALIGN - 1))
-			printf("Warning: unaligned write to %p may fail\n",
-			       data->src);
-		flush_dcache_range((ulong)data->src, (ulong)data->src +
-			data->blocks * data->blocksize);
-	}
-
 	writew(mode, &host->reg->trnmod);
 }
 
@@ -164,6 +177,9 @@  static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
 	int result;
 	unsigned int mask = 0;
 	unsigned int retry = 0x100000;
+	struct bounce_buffer_state bbstate;
+	int ret;
+
 	debug(" mmc_send_cmd called\n");
 
 	result = mmc_wait_inhibit(host, cmd, data, 10 /* ms */);
@@ -172,7 +188,7 @@  static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
 		return result;
 
 	if (data)
-		mmc_prepare_data(host, data);
+		mmc_prepare_data(host, data, &bbstate);
 
 	debug("cmd->arg: %08x\n", cmd->cmdarg);
 	writel(cmd->cmdarg, &host->reg->argument);
@@ -180,8 +196,10 @@  static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
 	if (data)
 		mmc_set_transfer_mode(host, data);
 
-	if ((cmd->resp_type & MMC_RSP_136) && (cmd->resp_type & MMC_RSP_BUSY))
-		return -1;
+	if ((cmd->resp_type & MMC_RSP_136) && (cmd->resp_type & MMC_RSP_BUSY)) {
+		ret = -1;
+		goto cleanup;
+	}
 
 	/*
 	 * CMDREG
@@ -228,19 +246,22 @@  static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
 	if (i == retry) {
 		printf("%s: waiting for status update\n", __func__);
 		writel(mask, &host->reg->norintsts);
-		return TIMEOUT;
+		ret = TIMEOUT;
+		goto cleanup;
 	}
 
 	if (mask & TEGRA_MMC_NORINTSTS_CMD_TIMEOUT) {
 		/* Timeout Error */
 		debug("timeout: %08x cmd %d\n", mask, cmd->cmdidx);
 		writel(mask, &host->reg->norintsts);
-		return TIMEOUT;
+		ret = TIMEOUT;
+		goto cleanup;
 	} else if (mask & TEGRA_MMC_NORINTSTS_ERR_INTERRUPT) {
 		/* Error Interrupt */
 		debug("error: %08x cmd %d\n", mask, cmd->cmdidx);
 		writel(mask, &host->reg->norintsts);
-		return -1;
+		ret = -1;
+		goto cleanup;
 	}
 
 	if (cmd->resp_type & MMC_RSP_PRESENT) {
@@ -269,7 +290,8 @@  static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
 			if (i == retry) {
 				printf("%s: card is still busy\n", __func__);
 				writel(mask, &host->reg->norintsts);
-				return TIMEOUT;
+				ret = TIMEOUT;
+				goto cleanup;
 			}
 
 			cmd->response[0] = readl(&host->reg->rspreg0);
@@ -291,7 +313,8 @@  static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
 				writel(mask, &host->reg->norintsts);
 				printf("%s: error during transfer: 0x%08x\n",
 						__func__, mask);
-				return -1;
+				ret = -1;
+				goto cleanup;
 			} else if (mask & TEGRA_MMC_NORINTSTS_DMA_INTERRUPT) {
 				/*
 				 * DMA Interrupt, restart the transfer where
@@ -318,22 +341,26 @@  static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
 				       readl(&host->reg->norintstsen),
 				       readl(&host->reg->norintsigen),
 				       readl(&host->reg->prnsts));
-				return -1;
+				ret = -1;
+				goto cleanup;
 			}
 		}
 		writel(mask, &host->reg->norintsts);
 		if (data->flags & MMC_DATA_READ) {
-			if ((uintptr_t)data->dest & (ARCH_DMA_MINALIGN - 1))
-				printf("Warning: unaligned read from %p "
-					"may fail\n", data->dest);
-			invalidate_dcache_range((ulong)data->dest,
-				(ulong)data->dest +
-					data->blocks * data->blocksize);
+			invalidate_dcache_range((ulong)bbstate.bounce_buffer,
+						(ulong)bbstate.bounce_buffer +
+							bbstate.len_aligned);
 		}
+		bounce_buffer_stop(&bbstate);
 	}
 
 	udelay(1000);
 	return 0;
+
+cleanup:
+	if (data)
+		bounce_buffer_stop(&bbstate);
+	return ret;
 }
 
 static void mmc_change_clock(struct mmc_host *host, uint clock)
diff --git a/include/configs/tegra-common-post.h b/include/configs/tegra-common-post.h
index ab62e71..2d45933 100644
--- a/include/configs/tegra-common-post.h
+++ b/include/configs/tegra-common-post.h
@@ -251,4 +251,8 @@ 
 
 #endif /* CONFIG_SPL_BUILD */
 
+#ifdef CONFIG_TEGRA_MMC
+#define CONFIG_BOUNCE_BUFFER
+#endif
+
 #endif /* __TEGRA_COMMON_POST_H */