diff mbox series

Revert "lib: sparse: Make CHUNK_TYPE_RAW buffer aligned"

Message ID 20221118121323.4009193-1-gary.bisson@boundarydevices.com
State Superseded
Delegated to: Tom Rini
Headers show
Series Revert "lib: sparse: Make CHUNK_TYPE_RAW buffer aligned" | expand

Commit Message

Gary Bisson Nov. 18, 2022, 12:13 p.m. UTC
This reverts commit 62649165cb02ab95b57360bb362886935f524f26.

The patch decreased the write performance quite a bit.
Here is an example on an i.MX 8M Quad platform.
- Before the revert:
Sending sparse 'vendor' 1/2 (516436 KB)            OKAY [  5.113s]
Writing 'vendor'                                   OKAY [128.335s]
Sending sparse 'vendor' 2/2 (76100 KB)             OKAY [  0.802s]
Writing 'vendor'                                   OKAY [ 27.902s]
- After the revert:
Sending sparse 'vendor' 1/2 (516436 KB)            OKAY [  5.310s]
Writing 'vendor'                                   OKAY [ 18.041s]
Sending sparse 'vendor' 2/2 (76100 KB)             OKAY [  1.244s]
Writing 'vendor'                                   OKAY [  2.663s]

Considering that the patch only moves buffer around to avoid a warning
message about misaligned buffers, let's keep the best performances.

Signed-off-by: Gary Bisson <gary.bisson@boundarydevices.com>
Signed-off-by: Troy Kisky <troy.kisky@boundarydevices.com>
---
 lib/image-sparse.c | 69 ++++++----------------------------------------
 1 file changed, 8 insertions(+), 61 deletions(-)

Comments

Sean Anderson Nov. 18, 2022, 3:36 p.m. UTC | #1
On 11/18/22 07:13, Gary Bisson wrote:
> This reverts commit 62649165cb02ab95b57360bb362886935f524f26.
> 
> The patch decreased the write performance quite a bit.
> Here is an example on an i.MX 8M Quad platform.
> - Before the revert:
> Sending sparse 'vendor' 1/2 (516436 KB)            OKAY [  5.113s]
> Writing 'vendor'                                   OKAY [128.335s]
> Sending sparse 'vendor' 2/2 (76100 KB)             OKAY [  0.802s]
> Writing 'vendor'                                   OKAY [ 27.902s]
> - After the revert:
> Sending sparse 'vendor' 1/2 (516436 KB)            OKAY [  5.310s]
> Writing 'vendor'                                   OKAY [ 18.041s]
> Sending sparse 'vendor' 2/2 (76100 KB)             OKAY [  1.244s]
> Writing 'vendor'                                   OKAY [  2.663s]
> 
> Considering that the patch only moves buffer around to avoid a warning
> message about misaligned buffers, let's keep the best performances.

So what is the point of this warning?

--Sean

> Signed-off-by: Gary Bisson <gary.bisson@boundarydevices.com>
> Signed-off-by: Troy Kisky <troy.kisky@boundarydevices.com>
> ---
>  lib/image-sparse.c | 69 ++++++----------------------------------------
>  1 file changed, 8 insertions(+), 61 deletions(-)
> 
> diff --git a/lib/image-sparse.c b/lib/image-sparse.c
> index 5ec0f94ab3e..d80fdbbf58e 100644
> --- a/lib/image-sparse.c
> +++ b/lib/image-sparse.c
> @@ -46,66 +46,9 @@
>  #include <asm/cache.h>
>  
>  #include <linux/math64.h>
> -#include <linux/err.h>
>  
>  static void default_log(const char *ignored, char *response) {}
>  
> -static lbaint_t write_sparse_chunk_raw(struct sparse_storage *info,
> -				       lbaint_t blk, lbaint_t blkcnt,
> -				       void *data,
> -				       char *response)
> -{
> -	lbaint_t n = blkcnt, write_blks, blks = 0, aligned_buf_blks = 100;
> -	uint32_t *aligned_buf = NULL;
> -
> -	if (CONFIG_IS_ENABLED(SYS_DCACHE_OFF)) {
> -		write_blks = info->write(info, blk, n, data);
> -		if (write_blks < n)
> -			goto write_fail;
> -
> -		return write_blks;
> -	}
> -
> -	aligned_buf = memalign(ARCH_DMA_MINALIGN, info->blksz * aligned_buf_blks);
> -	if (!aligned_buf) {
> -		info->mssg("Malloc failed for: CHUNK_TYPE_RAW", response);
> -		return -ENOMEM;
> -	}
> -
> -	while (blkcnt > 0) {
> -		n = min(aligned_buf_blks, blkcnt);
> -		memcpy(aligned_buf, data, n * info->blksz);
> -
> -		/* write_blks might be > n due to NAND bad-blocks */
> -		write_blks = info->write(info, blk + blks, n, aligned_buf);
> -		if (write_blks < n) {
> -			free(aligned_buf);
> -			goto write_fail;
> -		}
> -
> -		blks += write_blks;
> -		data += n * info->blksz;
> -		blkcnt -= n;
> -	}
> -
> -	free(aligned_buf);
> -	return blks;
> -
> -write_fail:
> -	if (IS_ERR_VALUE(write_blks)) {
> -		printf("%s: Write failed, block #" LBAFU " [" LBAFU "] (%lld)\n",
> -		       __func__, blk + blks, n, (long long)write_blks);
> -		info->mssg("flash write failure", response);
> -		return write_blks;
> -	}
> -
> -	/* write_blks < n */
> -	printf("%s: Write failed, block #" LBAFU " [" LBAFU "]\n",
> -	       __func__, blk + blks, n);
> -	info->mssg("flash write failure(incomplete)", response);
> -	return -1;
> -}
> -
>  int write_sparse_image(struct sparse_storage *info,
>  		       const char *part_name, void *data, char *response)
>  {
> @@ -209,11 +152,15 @@ int write_sparse_image(struct sparse_storage *info,
>  				return -1;
>  			}
>  
> -			blks = write_sparse_chunk_raw(info, blk, blkcnt,
> -						      data, response);
> -			if (blks < 0)
> +			blks = info->write(info, blk, blkcnt, data);
> +			/* blks might be > blkcnt (eg. NAND bad-blocks) */
> +			if (blks < blkcnt) {
> +				printf("%s: %s" LBAFU " [" LBAFU "]\n",
> +				       __func__, "Write failed, block #",
> +				       blk, blks);
> +				info->mssg("flash write failure", response);
>  				return -1;
> -
> +			}
>  			blk += blks;
>  			bytes_written += ((u64)blkcnt) * info->blksz;
>  			total_blocks += chunk_header->chunk_sz;
Gary Bisson Nov. 21, 2022, 2:50 p.m. UTC | #2
Hi,

On Fri, Nov 18, 2022 at 10:36:58AM -0500, Sean Anderson wrote:
> On 11/18/22 07:13, Gary Bisson wrote:
> > This reverts commit 62649165cb02ab95b57360bb362886935f524f26.
> > 
> > The patch decreased the write performance quite a bit.
> > Here is an example on an i.MX 8M Quad platform.
> > - Before the revert:
> > Sending sparse 'vendor' 1/2 (516436 KB)            OKAY [  5.113s]
> > Writing 'vendor'                                   OKAY [128.335s]
> > Sending sparse 'vendor' 2/2 (76100 KB)             OKAY [  0.802s]
> > Writing 'vendor'                                   OKAY [ 27.902s]
> > - After the revert:
> > Sending sparse 'vendor' 1/2 (516436 KB)            OKAY [  5.310s]
> > Writing 'vendor'                                   OKAY [ 18.041s]
> > Sending sparse 'vendor' 2/2 (76100 KB)             OKAY [  1.244s]
> > Writing 'vendor'                                   OKAY [  2.663s]
> > 
> > Considering that the patch only moves buffer around to avoid a warning
> > message about misaligned buffers, let's keep the best performances.
> 
> So what is the point of this warning?

Well the warning does say something true that the cache operation is not
aligned. Better ask Simon as he's the one who changed the print from a
debug to warn_non_spl one:
bcc53bf0958 arm: Show cache warnings in U-Boot proper only

BTW, in my case I couldn't see the misaligned messages, yet I saw the
performance hit described above.

Regards,
Gary
Sean Anderson Nov. 21, 2022, 3:09 p.m. UTC | #3
On 11/21/22 09:50, Gary Bisson wrote:
> Hi,
> 
> On Fri, Nov 18, 2022 at 10:36:58AM -0500, Sean Anderson wrote:
>> On 11/18/22 07:13, Gary Bisson wrote:
>> > This reverts commit 62649165cb02ab95b57360bb362886935f524f26.
>> > 
>> > The patch decreased the write performance quite a bit.
>> > Here is an example on an i.MX 8M Quad platform.
>> > - Before the revert:
>> > Sending sparse 'vendor' 1/2 (516436 KB)            OKAY [  5.113s]
>> > Writing 'vendor'                                   OKAY [128.335s]
>> > Sending sparse 'vendor' 2/2 (76100 KB)             OKAY [  0.802s]
>> > Writing 'vendor'                                   OKAY [ 27.902s]
>> > - After the revert:
>> > Sending sparse 'vendor' 1/2 (516436 KB)            OKAY [  5.310s]
>> > Writing 'vendor'                                   OKAY [ 18.041s]
>> > Sending sparse 'vendor' 2/2 (76100 KB)             OKAY [  1.244s]
>> > Writing 'vendor'                                   OKAY [  2.663s]
>> > 
>> > Considering that the patch only moves buffer around to avoid a warning
>> > message about misaligned buffers, let's keep the best performances.
>> 
>> So what is the point of this warning?
> 
> Well the warning does say something true that the cache operation is not
> aligned. Better ask Simon as he's the one who changed the print from a
> debug to warn_non_spl one:
> bcc53bf0958 arm: Show cache warnings in U-Boot proper only
> 
> BTW, in my case I couldn't see the misaligned messages, yet I saw the
> performance hit described above.

Maybe it is better to keep this as a Kconfig? Some arches may support
unaligned access but others may not. I wonder if we have something like
this already.

--Seam
Mattijs Korpershoek June 16, 2023, 11:56 a.m. UTC | #4
Hi Gary, Sean,

On lun., nov. 21, 2022 at 10:09, Sean Anderson <sean.anderson@seco.com> wrote:

> On 11/21/22 09:50, Gary Bisson wrote:
>> Hi,
>> 
>> On Fri, Nov 18, 2022 at 10:36:58AM -0500, Sean Anderson wrote:
>>> On 11/18/22 07:13, Gary Bisson wrote:
>>> > This reverts commit 62649165cb02ab95b57360bb362886935f524f26.
>>> > 
>>> > The patch decreased the write performance quite a bit.
>>> > Here is an example on an i.MX 8M Quad platform.
>>> > - Before the revert:
>>> > Sending sparse 'vendor' 1/2 (516436 KB)            OKAY [  5.113s]
>>> > Writing 'vendor'                                   OKAY [128.335s]
>>> > Sending sparse 'vendor' 2/2 (76100 KB)             OKAY [  0.802s]
>>> > Writing 'vendor'                                   OKAY [ 27.902s]
>>> > - After the revert:
>>> > Sending sparse 'vendor' 1/2 (516436 KB)            OKAY [  5.310s]
>>> > Writing 'vendor'                                   OKAY [ 18.041s]
>>> > Sending sparse 'vendor' 2/2 (76100 KB)             OKAY [  1.244s]
>>> > Writing 'vendor'                                   OKAY [  2.663s]
>>> > 
>>> > Considering that the patch only moves buffer around to avoid a warning
>>> > message about misaligned buffers, let's keep the best performances.
>>> 
>>> So what is the point of this warning?
>> 
>> Well the warning does say something true that the cache operation is not
>> aligned. Better ask Simon as he's the one who changed the print from a
>> debug to warn_non_spl one:
>> bcc53bf0958 arm: Show cache warnings in U-Boot proper only
>> 
>> BTW, in my case I couldn't see the misaligned messages, yet I saw the
>> performance hit described above.

I also reproduce this problem on AM62x SK EVM.

Before the revert:
Sending sparse 'super' 1/2 (768793 KB)             OKAY [ 23.954s]
Writing 'super'                                    OKAY [ 75.926s]
Sending sparse 'super' 2/2 (629819 KB)             OKAY [ 19.641s]
Writing 'super'                                    OKAY [ 62.849s]
Finished. Total time: 182.474s

After the revert:
Sending sparse 'super' 1/2 (768793 KB)             OKAY [ 23.895s]
Writing 'super'                                    OKAY [ 12.961s]
Sending sparse 'super' 2/2 (629819 KB)             OKAY [ 19.562s]
Writing 'super'                                    OKAY [ 12.805s]
Finished. Total time: 69.327s

And like Gary, I did not observe the misaligned messages.

Did we come up with a solution for this performance regression?

I will continue looking on my end but please let me know if you already
solved this.

Thanks,

Matijs

>
> Maybe it is better to keep this as a Kconfig? Some arches may support
> unaligned access but others may not. I wonder if we have something like
> this already.
>
> --Seam
Mattijs Korpershoek June 16, 2023, 1:50 p.m. UTC | #5
On ven., juin 16, 2023 at 13:56, Mattijs Korpershoek <mkorpershoek@baylibre.com> wrote:

> Hi Gary, Sean,
>
> On lun., nov. 21, 2022 at 10:09, Sean Anderson <sean.anderson@seco.com> wrote:
>
>> On 11/21/22 09:50, Gary Bisson wrote:
>>> Hi,
>>> 
>>> On Fri, Nov 18, 2022 at 10:36:58AM -0500, Sean Anderson wrote:
>>>> On 11/18/22 07:13, Gary Bisson wrote:
>>>> > This reverts commit 62649165cb02ab95b57360bb362886935f524f26.
>>>> > 
>>>> > The patch decreased the write performance quite a bit.
>>>> > Here is an example on an i.MX 8M Quad platform.
>>>> > - Before the revert:
>>>> > Sending sparse 'vendor' 1/2 (516436 KB)            OKAY [  5.113s]
>>>> > Writing 'vendor'                                   OKAY [128.335s]
>>>> > Sending sparse 'vendor' 2/2 (76100 KB)             OKAY [  0.802s]
>>>> > Writing 'vendor'                                   OKAY [ 27.902s]
>>>> > - After the revert:
>>>> > Sending sparse 'vendor' 1/2 (516436 KB)            OKAY [  5.310s]
>>>> > Writing 'vendor'                                   OKAY [ 18.041s]
>>>> > Sending sparse 'vendor' 2/2 (76100 KB)             OKAY [  1.244s]
>>>> > Writing 'vendor'                                   OKAY [  2.663s]
>>>> > 
>>>> > Considering that the patch only moves buffer around to avoid a warning
>>>> > message about misaligned buffers, let's keep the best performances.
>>>> 
>>>> So what is the point of this warning?
>>> 
>>> Well the warning does say something true that the cache operation is not
>>> aligned. Better ask Simon as he's the one who changed the print from a
>>> debug to warn_non_spl one:
>>> bcc53bf0958 arm: Show cache warnings in U-Boot proper only
>>> 
>>> BTW, in my case I couldn't see the misaligned messages, yet I saw the
>>> performance hit described above.
>
> I also reproduce this problem on AM62x SK EVM.
>
> Before the revert:
> Sending sparse 'super' 1/2 (768793 KB)             OKAY [ 23.954s]
> Writing 'super'                                    OKAY [ 75.926s]
> Sending sparse 'super' 2/2 (629819 KB)             OKAY [ 19.641s]
> Writing 'super'                                    OKAY [ 62.849s]
> Finished. Total time: 182.474s
>
> After the revert:
> Sending sparse 'super' 1/2 (768793 KB)             OKAY [ 23.895s]
> Writing 'super'                                    OKAY [ 12.961s]
> Sending sparse 'super' 2/2 (629819 KB)             OKAY [ 19.562s]
> Writing 'super'                                    OKAY [ 12.805s]
> Finished. Total time: 69.327s
>
> And like Gary, I did not observe the misaligned messages.
>
> Did we come up with a solution for this performance regression?
>
> I will continue looking on my end but please let me know if you already
> solved this.

Answering to myself here. My attempt of solving this problem has been
submitted here:

https://lore.kernel.org/r/20230616-sparse-flash-fix-v1-1-6bafeacc567b@baylibre.com


>
> Thanks,
>
> Matijs
>
>>
>> Maybe it is better to keep this as a Kconfig? Some arches may support
>> unaligned access but others may not. I wonder if we have something like
>> this already.
>>
>> --Seam
Tom Rini June 17, 2023, 2:55 p.m. UTC | #6
On Fri, Jun 16, 2023 at 03:50:06PM +0200, Mattijs Korpershoek wrote:
> On ven., juin 16, 2023 at 13:56, Mattijs Korpershoek <mkorpershoek@baylibre.com> wrote:
> 
> > Hi Gary, Sean,
> >
> > On lun., nov. 21, 2022 at 10:09, Sean Anderson <sean.anderson@seco.com> wrote:
> >
> >> On 11/21/22 09:50, Gary Bisson wrote:
> >>> Hi,
> >>> 
> >>> On Fri, Nov 18, 2022 at 10:36:58AM -0500, Sean Anderson wrote:
> >>>> On 11/18/22 07:13, Gary Bisson wrote:
> >>>> > This reverts commit 62649165cb02ab95b57360bb362886935f524f26.
> >>>> > 
> >>>> > The patch decreased the write performance quite a bit.
> >>>> > Here is an example on an i.MX 8M Quad platform.
> >>>> > - Before the revert:
> >>>> > Sending sparse 'vendor' 1/2 (516436 KB)            OKAY [  5.113s]
> >>>> > Writing 'vendor'                                   OKAY [128.335s]
> >>>> > Sending sparse 'vendor' 2/2 (76100 KB)             OKAY [  0.802s]
> >>>> > Writing 'vendor'                                   OKAY [ 27.902s]
> >>>> > - After the revert:
> >>>> > Sending sparse 'vendor' 1/2 (516436 KB)            OKAY [  5.310s]
> >>>> > Writing 'vendor'                                   OKAY [ 18.041s]
> >>>> > Sending sparse 'vendor' 2/2 (76100 KB)             OKAY [  1.244s]
> >>>> > Writing 'vendor'                                   OKAY [  2.663s]
> >>>> > 
> >>>> > Considering that the patch only moves buffer around to avoid a warning
> >>>> > message about misaligned buffers, let's keep the best performances.
> >>>> 
> >>>> So what is the point of this warning?
> >>> 
> >>> Well the warning does say something true that the cache operation is not
> >>> aligned. Better ask Simon as he's the one who changed the print from a
> >>> debug to warn_non_spl one:
> >>> bcc53bf0958 arm: Show cache warnings in U-Boot proper only
> >>> 
> >>> BTW, in my case I couldn't see the misaligned messages, yet I saw the
> >>> performance hit described above.
> >
> > I also reproduce this problem on AM62x SK EVM.
> >
> > Before the revert:
> > Sending sparse 'super' 1/2 (768793 KB)             OKAY [ 23.954s]
> > Writing 'super'                                    OKAY [ 75.926s]
> > Sending sparse 'super' 2/2 (629819 KB)             OKAY [ 19.641s]
> > Writing 'super'                                    OKAY [ 62.849s]
> > Finished. Total time: 182.474s
> >
> > After the revert:
> > Sending sparse 'super' 1/2 (768793 KB)             OKAY [ 23.895s]
> > Writing 'super'                                    OKAY [ 12.961s]
> > Sending sparse 'super' 2/2 (629819 KB)             OKAY [ 19.562s]
> > Writing 'super'                                    OKAY [ 12.805s]
> > Finished. Total time: 69.327s
> >
> > And like Gary, I did not observe the misaligned messages.
> >
> > Did we come up with a solution for this performance regression?
> >
> > I will continue looking on my end but please let me know if you already
> > solved this.
> 
> Answering to myself here. My attempt of solving this problem has been
> submitted here:
> 
> https://lore.kernel.org/r/20230616-sparse-flash-fix-v1-1-6bafeacc567b@baylibre.com

Thanks for digging in to this!
diff mbox series

Patch

diff --git a/lib/image-sparse.c b/lib/image-sparse.c
index 5ec0f94ab3e..d80fdbbf58e 100644
--- a/lib/image-sparse.c
+++ b/lib/image-sparse.c
@@ -46,66 +46,9 @@ 
 #include <asm/cache.h>
 
 #include <linux/math64.h>
-#include <linux/err.h>
 
 static void default_log(const char *ignored, char *response) {}
 
-static lbaint_t write_sparse_chunk_raw(struct sparse_storage *info,
-				       lbaint_t blk, lbaint_t blkcnt,
-				       void *data,
-				       char *response)
-{
-	lbaint_t n = blkcnt, write_blks, blks = 0, aligned_buf_blks = 100;
-	uint32_t *aligned_buf = NULL;
-
-	if (CONFIG_IS_ENABLED(SYS_DCACHE_OFF)) {
-		write_blks = info->write(info, blk, n, data);
-		if (write_blks < n)
-			goto write_fail;
-
-		return write_blks;
-	}
-
-	aligned_buf = memalign(ARCH_DMA_MINALIGN, info->blksz * aligned_buf_blks);
-	if (!aligned_buf) {
-		info->mssg("Malloc failed for: CHUNK_TYPE_RAW", response);
-		return -ENOMEM;
-	}
-
-	while (blkcnt > 0) {
-		n = min(aligned_buf_blks, blkcnt);
-		memcpy(aligned_buf, data, n * info->blksz);
-
-		/* write_blks might be > n due to NAND bad-blocks */
-		write_blks = info->write(info, blk + blks, n, aligned_buf);
-		if (write_blks < n) {
-			free(aligned_buf);
-			goto write_fail;
-		}
-
-		blks += write_blks;
-		data += n * info->blksz;
-		blkcnt -= n;
-	}
-
-	free(aligned_buf);
-	return blks;
-
-write_fail:
-	if (IS_ERR_VALUE(write_blks)) {
-		printf("%s: Write failed, block #" LBAFU " [" LBAFU "] (%lld)\n",
-		       __func__, blk + blks, n, (long long)write_blks);
-		info->mssg("flash write failure", response);
-		return write_blks;
-	}
-
-	/* write_blks < n */
-	printf("%s: Write failed, block #" LBAFU " [" LBAFU "]\n",
-	       __func__, blk + blks, n);
-	info->mssg("flash write failure(incomplete)", response);
-	return -1;
-}
-
 int write_sparse_image(struct sparse_storage *info,
 		       const char *part_name, void *data, char *response)
 {
@@ -209,11 +152,15 @@  int write_sparse_image(struct sparse_storage *info,
 				return -1;
 			}
 
-			blks = write_sparse_chunk_raw(info, blk, blkcnt,
-						      data, response);
-			if (blks < 0)
+			blks = info->write(info, blk, blkcnt, data);
+			/* blks might be > blkcnt (eg. NAND bad-blocks) */
+			if (blks < blkcnt) {
+				printf("%s: %s" LBAFU " [" LBAFU "]\n",
+				       __func__, "Write failed, block #",
+				       blk, blks);
+				info->mssg("flash write failure", response);
 				return -1;
-
+			}
 			blk += blks;
 			bytes_written += ((u64)blkcnt) * info->blksz;
 			total_blocks += chunk_header->chunk_sz;