diff --git a/drivers/mmc/tegra_mmc.c b/drivers/mmc/tegra_mmc.c
index 1fd5592..e5d55b0 100644
--- a/drivers/mmc/tegra_mmc.c
+++ b/drivers/mmc/tegra_mmc.c
@@ -26,6 +26,7 @@
 #include <asm/arch-tegra/clk_rst.h>
 #include <asm/arch-tegra/tegra_mmc.h>
 #include <mmc.h>
+#include <bouncebuf.h>
 
 /* support 4 mmc hosts */
 struct mmc mmc_dev[4];
@@ -66,14 +67,34 @@ static void tegra_get_setup(struct mmc_host *host, int dev_index)
 	host->reg = (struct tegra_mmc *)host->base;
 }
 
-static void mmc_prepare_data(struct mmc_host *host, struct mmc_data *data)
+static void mmc_prepare_data(struct mmc_host *host, struct mmc_data *data,
+				struct bounce_buffer_state *bbstate)
 {
 	unsigned char ctrl;
+	void *buf;
+	uint8_t bbflags;
+	size_t len;
+
+	if (data->flags & MMC_DATA_READ) {
+		buf = data->dest;
+		bbflags = GEN_BB_WRITE;
+	} else {
+		buf = (void *)data->src;
+		bbflags = GEN_BB_READ;
+	}
+	len = data->blocks * data->blocksize;
+
+	bounce_buffer_start(bbstate, buf, len, bbflags);
+
+	debug("buf: %08X, data->blocks: %u, data->blocksize: %u\n",
+		(u32)bbstate->bounce_buffer, data->blocks, data->blocksize);
 
-	debug("data->dest: %08X, data->blocks: %u, data->blocksize: %u\n",
-	(u32)data->dest, data->blocks, data->blocksize);
+	if (data->flags & MMC_DATA_WRITE)
+		flush_dcache_range((ulong)bbstate->bounce_buffer,
+					(ulong)bbstate->bounce_buffer +
+						bbstate->len_aligned);
 
-	writel((u32)data->dest, &host->reg->sysad);
+	writel((u32)bbstate->bounce_buffer, &host->reg->sysad);
 	/*
 	 * DMASEL[4:3]
 	 * 00 = Selects SDMA
@@ -114,14 +135,6 @@ static void mmc_set_transfer_mode(struct mmc_host *host, struct mmc_data *data)
 	if (data->flags & MMC_DATA_READ)
 		mode |= TEGRA_MMC_TRNMOD_DATA_XFER_DIR_SEL_READ;
 
-	if (data->flags & MMC_DATA_WRITE) {
-		if ((uintptr_t)data->src & (ARCH_DMA_MINALIGN - 1))
-			printf("Warning: unaligned write to %p may fail\n",
-			       data->src);
-		flush_dcache_range((ulong)data->src, (ulong)data->src +
-			data->blocks * data->blocksize);
-	}
-
 	writew(mode, &host->reg->trnmod);
 }
 
@@ -164,6 +177,9 @@ static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
 	int result;
 	unsigned int mask = 0;
 	unsigned int retry = 0x100000;
+	struct bounce_buffer_state bbstate;
+	int ret;
+
 	debug(" mmc_send_cmd called\n");
 
 	result = mmc_wait_inhibit(host, cmd, data, 10 /* ms */);
@@ -172,7 +188,7 @@ static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
 		return result;
 
 	if (data)
-		mmc_prepare_data(host, data);
+		mmc_prepare_data(host, data, &bbstate);
 
 	debug("cmd->arg: %08x\n", cmd->cmdarg);
 	writel(cmd->cmdarg, &host->reg->argument);
@@ -180,8 +196,10 @@ static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
 	if (data)
 		mmc_set_transfer_mode(host, data);
 
-	if ((cmd->resp_type & MMC_RSP_136) && (cmd->resp_type & MMC_RSP_BUSY))
-		return -1;
+	if ((cmd->resp_type & MMC_RSP_136) && (cmd->resp_type & MMC_RSP_BUSY)) {
+		ret = -1;
+		goto cleanup;
+	}
 
 	/*
 	 * CMDREG
@@ -228,19 +246,22 @@ static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
 	if (i == retry) {
 		printf("%s: waiting for status update\n", __func__);
 		writel(mask, &host->reg->norintsts);
-		return TIMEOUT;
+		ret = TIMEOUT;
+		goto cleanup;
 	}
 
 	if (mask & TEGRA_MMC_NORINTSTS_CMD_TIMEOUT) {
 		/* Timeout Error */
 		debug("timeout: %08x cmd %d\n", mask, cmd->cmdidx);
 		writel(mask, &host->reg->norintsts);
-		return TIMEOUT;
+		ret = TIMEOUT;
+		goto cleanup;
 	} else if (mask & TEGRA_MMC_NORINTSTS_ERR_INTERRUPT) {
 		/* Error Interrupt */
 		debug("error: %08x cmd %d\n", mask, cmd->cmdidx);
 		writel(mask, &host->reg->norintsts);
-		return -1;
+		ret = -1;
+		goto cleanup;
 	}
 
 	if (cmd->resp_type & MMC_RSP_PRESENT) {
@@ -269,7 +290,8 @@ static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
 			if (i == retry) {
 				printf("%s: card is still busy\n", __func__);
 				writel(mask, &host->reg->norintsts);
-				return TIMEOUT;
+				ret = TIMEOUT;
+				goto cleanup;
 			}
 
 			cmd->response[0] = readl(&host->reg->rspreg0);
@@ -291,7 +313,8 @@ static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
 				writel(mask, &host->reg->norintsts);
 				printf("%s: error during transfer: 0x%08x\n",
 						__func__, mask);
-				return -1;
+				ret = -1;
+				goto cleanup;
 			} else if (mask & TEGRA_MMC_NORINTSTS_DMA_INTERRUPT) {
 				/*
 				 * DMA Interrupt, restart the transfer where
@@ -318,22 +341,26 @@ static int mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
 				       readl(&host->reg->norintstsen),
 				       readl(&host->reg->norintsigen),
 				       readl(&host->reg->prnsts));
-				return -1;
+				ret = -1;
+				goto cleanup;
 			}
 		}
 		writel(mask, &host->reg->norintsts);
 		if (data->flags & MMC_DATA_READ) {
-			if ((uintptr_t)data->dest & (ARCH_DMA_MINALIGN - 1))
-				printf("Warning: unaligned read from %p "
-					"may fail\n", data->dest);
-			invalidate_dcache_range((ulong)data->dest,
-				(ulong)data->dest +
-					data->blocks * data->blocksize);
+			invalidate_dcache_range((ulong)bbstate.bounce_buffer,
+						(ulong)bbstate.bounce_buffer +
+							bbstate.len_aligned);
 		}
+		bounce_buffer_stop(&bbstate);
 	}
 
 	udelay(1000);
 	return 0;
+
+cleanup:
+	if (data)
+		bounce_buffer_stop(&bbstate);
+	return ret;
 }
 
 static void mmc_change_clock(struct mmc_host *host, uint clock)
diff --git a/include/configs/tegra-common-post.h b/include/configs/tegra-common-post.h
index ab62e71..2d45933 100644
--- a/include/configs/tegra-common-post.h
+++ b/include/configs/tegra-common-post.h
@@ -251,4 +251,8 @@
 
 #endif /* CONFIG_SPL_BUILD */
 
+#ifdef CONFIG_TEGRA_MMC
+#define CONFIG_BOUNCE_BUFFER
+#endif
+
 #endif /* __TEGRA_COMMON_POST_H */
