diff mbox series

mmc: rockchip_sdhci: Use bounce buffer in SPL to fix read performance

Message ID 20240205182736.2770679-1-jonas@kwiboo.se
State Deferred
Delegated to: Kever Yang
Headers show
Series mmc: rockchip_sdhci: Use bounce buffer in SPL to fix read performance | expand

Commit Message

Jonas Karlman Feb. 5, 2024, 6:27 p.m. UTC
The commit 3b804b370dd8 ("mmc: rockchip_sdhci: Disable DMA mode using a
device tree property") and commit 2cc6cde647e2 ("mmc: rockchip_sdhci:
Limit number of blocks read in a single command") implemented a
workaround to fix loading a part of TF-A into SRAM from eMMC in SPL.

This has resulted in very slow read performance of FIT from eMMC in SPL
on RK3588.

Change to make use of a bounce buffer to significantly improve the read
performance when malloc_limit is large enough and use PIO mode as fall
back.

Also update the size of align_buffer to use SDHCI_DEFAULT_BOUNDARY_SIZE
instead of a hardcoded 512 * 1024.

Signed-off-by: Jonas Karlman <jonas@kwiboo.se>
---
- This has been tested on RK3588 boards with malloc_limit above 512 KiB
  and on RK3399 boards with less than 512 KiB malloc_limit.
- Together with HS200 mode [1] and D-cache [2] enabled in SPL loading of
  FIT can be very fast on RK3588.

[1] https://patchwork.ozlabs.org/patch/1895043/
[2] https://patchwork.ozlabs.org/patch/1891025/
---
 drivers/mmc/rockchip_sdhci.c | 20 ++++++++++++++++----
 drivers/mmc/sdhci.c          |  2 +-
 2 files changed, 17 insertions(+), 5 deletions(-)

Comments

Jonas Karlman Feb. 18, 2024, 10:03 p.m. UTC | #1
Hi all,

Please ignore this patch for now.

Reading more than 4 blocks with a single CMD18 command in PIO mode seem
to work much better after the HS200 mode series [1] together with a fix
so that the stack and simple malloc heap does not overlap.

I will send a different patch in next few days.

Regards,
Jonas

On 2024-02-05 19:27, Jonas Karlman wrote:
> The commit 3b804b370dd8 ("mmc: rockchip_sdhci: Disable DMA mode using a
> device tree property") and commit 2cc6cde647e2 ("mmc: rockchip_sdhci:
> Limit number of blocks read in a single command") implemented a
> workaround to fix loading a part of TF-A into SRAM from eMMC in SPL.
> 
> This has resulted in very slow read performance of FIT from eMMC in SPL
> on RK3588.
> 
> Change to make use of a bounce buffer to significantly improve the read
> performance when malloc_limit is large enough and use PIO mode as fall
> back.
> 
> Also update the size of align_buffer to use SDHCI_DEFAULT_BOUNDARY_SIZE
> instead of a hardcoded 512 * 1024.
> 
> Signed-off-by: Jonas Karlman <jonas@kwiboo.se>
> ---
> - This has been tested on RK3588 boards with malloc_limit above 512 KiB
>   and on RK3399 boards with less than 512 KiB malloc_limit.
> - Together with HS200 mode [1] and D-cache [2] enabled in SPL loading of
>   FIT can be very fast on RK3588.
> 
> [1] https://patchwork.ozlabs.org/patch/1895043/
> [2] https://patchwork.ozlabs.org/patch/1891025/
> ---
>  drivers/mmc/rockchip_sdhci.c | 20 ++++++++++++++++----
>  drivers/mmc/sdhci.c          |  2 +-
>  2 files changed, 17 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/mmc/rockchip_sdhci.c b/drivers/mmc/rockchip_sdhci.c
> index 706fb1235796..528544b1eff8 100644
> --- a/drivers/mmc/rockchip_sdhci.c
> +++ b/drivers/mmc/rockchip_sdhci.c
> @@ -21,6 +21,9 @@
>  #include <syscon.h>
>  #include <asm/arch-rockchip/clock.h>
>  #include <asm/arch-rockchip/hardware.h>
> +#include <asm/global_data.h>
> +
> +DECLARE_GLOBAL_DATA_PTR;
>  
>  /* DWCMSHC specific Mode Select value */
>  #define DWCMSHC_CTRL_HS400		0x7
> @@ -591,12 +594,21 @@ static int rockchip_sdhci_probe(struct udevice *dev)
>  		return ret;
>  
>  	/*
> -	 * Disable use of DMA and force use of PIO mode in SPL to fix an issue
> -	 * where loading part of TF-A into SRAM using DMA silently fails.
> +	 * Use a bounce buffer or PIO mode in SPL to fix an issue where loading
> +	 * part of TF-A directly into SRAM using DMA silently fails.
>  	 */
>  	if (IS_ENABLED(CONFIG_SPL_BUILD) &&
> -	    dev_read_bool(dev, "u-boot,spl-fifo-mode"))
> -		host->flags &= ~USE_DMA;
> +	    dev_read_bool(dev, "u-boot,spl-fifo-mode")) {
> +		if (CONFIG_IS_ENABLED(SYS_MALLOC_F) &&
> +		    gd->malloc_limit > SDHCI_DEFAULT_BOUNDARY_SIZE) {
> +			cfg->b_max = SDHCI_DEFAULT_BOUNDARY_SIZE /
> +				     MMC_MAX_BLOCK_LEN;
> +			host->force_align_buffer = true;
> +			host->quirks |= SDHCI_QUIRK_32BIT_DMA_ADDR;
> +		} else {
> +			host->flags &= ~USE_DMA;
> +		}
> +	}
>  
>  	/*
>  	 * Reading more than 4 blocks with a single CMD18 command in PIO mode
> diff --git a/drivers/mmc/sdhci.c b/drivers/mmc/sdhci.c
> index 0178ed8a11e1..cc855dc7ba3d 100644
> --- a/drivers/mmc/sdhci.c
> +++ b/drivers/mmc/sdhci.c
> @@ -736,7 +736,7 @@ static int sdhci_init(struct mmc *mmc)
>  	host->force_align_buffer = true;
>  #else
>  	if (host->quirks & SDHCI_QUIRK_32BIT_DMA_ADDR) {
> -		host->align_buffer = memalign(8, 512 * 1024);
> +		host->align_buffer = memalign(8, SDHCI_DEFAULT_BOUNDARY_SIZE);
>  		if (!host->align_buffer) {
>  			printf("%s: Aligned buffer alloc failed!!!\n",
>  			       __func__);
diff mbox series

Patch

diff --git a/drivers/mmc/rockchip_sdhci.c b/drivers/mmc/rockchip_sdhci.c
index 706fb1235796..528544b1eff8 100644
--- a/drivers/mmc/rockchip_sdhci.c
+++ b/drivers/mmc/rockchip_sdhci.c
@@ -21,6 +21,9 @@ 
 #include <syscon.h>
 #include <asm/arch-rockchip/clock.h>
 #include <asm/arch-rockchip/hardware.h>
+#include <asm/global_data.h>
+
+DECLARE_GLOBAL_DATA_PTR;
 
 /* DWCMSHC specific Mode Select value */
 #define DWCMSHC_CTRL_HS400		0x7
@@ -591,12 +594,21 @@  static int rockchip_sdhci_probe(struct udevice *dev)
 		return ret;
 
 	/*
-	 * Disable use of DMA and force use of PIO mode in SPL to fix an issue
-	 * where loading part of TF-A into SRAM using DMA silently fails.
+	 * Use a bounce buffer or PIO mode in SPL to fix an issue where loading
+	 * part of TF-A directly into SRAM using DMA silently fails.
 	 */
 	if (IS_ENABLED(CONFIG_SPL_BUILD) &&
-	    dev_read_bool(dev, "u-boot,spl-fifo-mode"))
-		host->flags &= ~USE_DMA;
+	    dev_read_bool(dev, "u-boot,spl-fifo-mode")) {
+		if (CONFIG_IS_ENABLED(SYS_MALLOC_F) &&
+		    gd->malloc_limit > SDHCI_DEFAULT_BOUNDARY_SIZE) {
+			cfg->b_max = SDHCI_DEFAULT_BOUNDARY_SIZE /
+				     MMC_MAX_BLOCK_LEN;
+			host->force_align_buffer = true;
+			host->quirks |= SDHCI_QUIRK_32BIT_DMA_ADDR;
+		} else {
+			host->flags &= ~USE_DMA;
+		}
+	}
 
 	/*
 	 * Reading more than 4 blocks with a single CMD18 command in PIO mode
diff --git a/drivers/mmc/sdhci.c b/drivers/mmc/sdhci.c
index 0178ed8a11e1..cc855dc7ba3d 100644
--- a/drivers/mmc/sdhci.c
+++ b/drivers/mmc/sdhci.c
@@ -736,7 +736,7 @@  static int sdhci_init(struct mmc *mmc)
 	host->force_align_buffer = true;
 #else
 	if (host->quirks & SDHCI_QUIRK_32BIT_DMA_ADDR) {
-		host->align_buffer = memalign(8, 512 * 1024);
+		host->align_buffer = memalign(8, SDHCI_DEFAULT_BOUNDARY_SIZE);
 		if (!host->align_buffer) {
 			printf("%s: Aligned buffer alloc failed!!!\n",
 			       __func__);