diff mbox series

[U-Boot,1/2] fs: fat: dynamically allocate memory for temporary buffer

Message ID 1547707948-21484-1-git-send-email-tien.fong.chee@intel.com
State Changes Requested
Delegated to: Marek Vasut
Headers show
Series [U-Boot,1/2] fs: fat: dynamically allocate memory for temporary buffer | expand

Commit Message

Chee, Tien Fong Jan. 17, 2019, 6:52 a.m. UTC
From: Stefan Agner <stefan.ag...@toradex.com>

Drop the statically allocated get_contents_vfatname_block and
dynamically allocate a buffer only if required. This saves
64KiB of memory.

Signed-off-by: Stefan Agner <stefan.ag...@toradex.com>
Signed-off-by: Tien Fong Chee <tien.fong.chee@intel.com>
---
 fs/fat/fat.c |   19 +++++++++++++------
 1 files changed, 13 insertions(+), 6 deletions(-)

Comments

Marek Vasut Jan. 18, 2019, 6:26 a.m. UTC | #1
On 1/17/19 7:52 AM, tien.fong.chee@intel.com wrote:
> From: Stefan Agner <stefan.ag...@toradex.com>
> 
> Drop the statically allocated get_contents_vfatname_block and
> dynamically allocate a buffer only if required. This saves
> 64KiB of memory.
> 
> Signed-off-by: Stefan Agner <stefan.ag...@toradex.com>
> Signed-off-by: Tien Fong Chee <tien.fong.chee@intel.com>
> ---
>  fs/fat/fat.c |   19 +++++++++++++------
>  1 files changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/fat/fat.c b/fs/fat/fat.c
> index 8803fb4..aa4636d 100644
> --- a/fs/fat/fat.c
> +++ b/fs/fat/fat.c
> @@ -307,9 +307,6 @@ get_cluster(fsdata *mydata, __u32 clustnum, __u8 *buffer, unsigned long size)
>   * into 'buffer'.
>   * Update the number of bytes read in *gotsize or return -1 on fatal errors.
>   */
> -__u8 get_contents_vfatname_block[MAX_CLUSTSIZE]
> -	__aligned(ARCH_DMA_MINALIGN);
> -
>  static int get_contents(fsdata *mydata, dir_entry *dentptr, loff_t pos,
>  			__u8 *buffer, loff_t maxsize, loff_t *gotsize)
>  {
> @@ -320,7 +317,7 @@ static int get_contents(fsdata *mydata, dir_entry *dentptr, loff_t pos,
>  	loff_t actsize;
>  
>  	*gotsize = 0;
> -	debug("Filesize: %llu bytes\n", filesize);
> +	debug("Filesize: %llu bytes, starting at pos %llu\n", filesize, pos);

This looks like a separate change.

>  	if (pos >= filesize) {
>  		debug("Read position past EOF: %llu\n", pos);
> @@ -352,15 +349,25 @@ static int get_contents(fsdata *mydata, dir_entry *dentptr, loff_t pos,
>  
>  	/* align to beginning of next cluster if any */
>  	if (pos) {
> +		__u8 *tmp_buffer;
> +
> +		tmp_buffer = malloc_cache_aligned(MAX_CLUSTSIZE);

Don't you know the cluster size by now ?

> +		if (!tmp_buffer) {
> +			debug("Error: allocating buffer\n");
> +			return -ENOMEM;
> +		}
> +
>  		actsize = min(filesize, (loff_t)bytesperclust);
> -		if (get_cluster(mydata, curclust, get_contents_vfatname_block,
> +		if (get_cluster(mydata, curclust, tmp_buffer,
>  				(int)actsize) != 0) {
>  			printf("Error reading cluster\n");
> +			free(tmp_buffer);
>  			return -1;
>  		}
>  		filesize -= actsize;
>  		actsize -= pos;
> -		memcpy(buffer, get_contents_vfatname_block + pos, actsize);
> +		memcpy(buffer, tmp_buffer + pos, actsize);
> +		free(tmp_buffer);

How many times is this malloc()/free() called ? I wonder how much this
slows things down and how much it fragments the heap. Maybe the amount
of calls to this can be reduced somehow.

>  		*gotsize += actsize;
>  		if (!filesize)
>  			return 0;
>
Chee, Tien Fong Jan. 24, 2019, 6:26 a.m. UTC | #2
On Fri, 2019-01-18 at 07:26 +0100, Marek Vasut wrote:
> On 1/17/19 7:52 AM, tien.fong.chee@intel.com wrote:
> > 
> > From: Stefan Agner <stefan.ag...@toradex.com>
> > 
> > Drop the statically allocated get_contents_vfatname_block and
> > dynamically allocate a buffer only if required. This saves
> > 64KiB of memory.
> > 
> > Signed-off-by: Stefan Agner <stefan.ag...@toradex.com>
> > Signed-off-by: Tien Fong Chee <tien.fong.chee@intel.com>
> > ---
> >  fs/fat/fat.c |   19 +++++++++++++------
> >  1 files changed, 13 insertions(+), 6 deletions(-)
> > 
> > diff --git a/fs/fat/fat.c b/fs/fat/fat.c
> > index 8803fb4..aa4636d 100644
> > --- a/fs/fat/fat.c
> > +++ b/fs/fat/fat.c
> > @@ -307,9 +307,6 @@ get_cluster(fsdata *mydata, __u32 clustnum,
> > __u8 *buffer, unsigned long size)
> >   * into 'buffer'.
> >   * Update the number of bytes read in *gotsize or return -1 on
> > fatal errors.
> >   */
> > -__u8 get_contents_vfatname_block[MAX_CLUSTSIZE]
> > -	__aligned(ARCH_DMA_MINALIGN);
> > -
> >  static int get_contents(fsdata *mydata, dir_entry *dentptr, loff_t
> > pos,
> >  			__u8 *buffer, loff_t maxsize, loff_t
> > *gotsize)
> >  {
> > @@ -320,7 +317,7 @@ static int get_contents(fsdata *mydata,
> > dir_entry *dentptr, loff_t pos,
> >  	loff_t actsize;
> >  
> >  	*gotsize = 0;
> > -	debug("Filesize: %llu bytes\n", filesize);
> > +	debug("Filesize: %llu bytes, starting at pos %llu\n",
> > filesize, pos);
> This looks like a separate change.
Okay.
> 
> > 
> >  	if (pos >= filesize) {
> >  		debug("Read position past EOF: %llu\n", pos);
> > @@ -352,15 +349,25 @@ static int get_contents(fsdata *mydata,
> > dir_entry *dentptr, loff_t pos,
> >  
> >  	/* align to beginning of next cluster if any */
> >  	if (pos) {
> > +		__u8 *tmp_buffer;
> > +
> > +		tmp_buffer = malloc_cache_aligned(MAX_CLUSTSIZE);
> Don't you know the cluster size by now ?
Yeah, i think so, we can run allocate memory based on actsize. Please
correct me if i'm wrong.
> 
> > 
> > +		if (!tmp_buffer) {
> > +			debug("Error: allocating buffer\n");
> > +			return -ENOMEM;
> > +		}
> > +
> >  		actsize = min(filesize, (loff_t)bytesperclust);
> > -		if (get_cluster(mydata, curclust,
> > get_contents_vfatname_block,
> > +		if (get_cluster(mydata, curclust, tmp_buffer,
> >  				(int)actsize) != 0) {
> >  			printf("Error reading cluster\n");
> > +			free(tmp_buffer);
> >  			return -1;
> >  		}
> >  		filesize -= actsize;
> >  		actsize -= pos;
> > -		memcpy(buffer, get_contents_vfatname_block + pos,
> > actsize);
> > +		memcpy(buffer, tmp_buffer + pos, actsize);
> > +		free(tmp_buffer);
> How many times is this malloc()/free() called ? I wonder how much
> this
> slows things down and how much it fragments the heap. Maybe the
> amount
> of calls to this can be reduced somehow.
There is performance penalty for when reading file based on offset
chunk by chunk at small memory such as OCRAM. However, i believe this
doesn't impact in most use cases because most of them running in large
DDR size. Most of the time, the reading of the file is starting from
offset zero(no memory allocation is required).

I can "feel" that is not much difference actually in performance based
on print out responding for my chunk by chunk file reading use case in
these changes. But these changes with [patch 2/2] would help to
maximize reusable from memory pool, which lead to only 1 block of max
cluster is required allocated in memory pool instead of two blocks.

What do you think?
> 
> > 
> >  		*gotsize += actsize;
> >  		if (!filesize)
> >  			return 0;
> > 
>
diff mbox series

Patch

diff --git a/fs/fat/fat.c b/fs/fat/fat.c
index 8803fb4..aa4636d 100644
--- a/fs/fat/fat.c
+++ b/fs/fat/fat.c
@@ -307,9 +307,6 @@  get_cluster(fsdata *mydata, __u32 clustnum, __u8 *buffer, unsigned long size)
  * into 'buffer'.
  * Update the number of bytes read in *gotsize or return -1 on fatal errors.
  */
-__u8 get_contents_vfatname_block[MAX_CLUSTSIZE]
-	__aligned(ARCH_DMA_MINALIGN);
-
 static int get_contents(fsdata *mydata, dir_entry *dentptr, loff_t pos,
 			__u8 *buffer, loff_t maxsize, loff_t *gotsize)
 {
@@ -320,7 +317,7 @@  static int get_contents(fsdata *mydata, dir_entry *dentptr, loff_t pos,
 	loff_t actsize;
 
 	*gotsize = 0;
-	debug("Filesize: %llu bytes\n", filesize);
+	debug("Filesize: %llu bytes, starting at pos %llu\n", filesize, pos);
 
 	if (pos >= filesize) {
 		debug("Read position past EOF: %llu\n", pos);
@@ -352,15 +349,25 @@  static int get_contents(fsdata *mydata, dir_entry *dentptr, loff_t pos,
 
 	/* align to beginning of next cluster if any */
 	if (pos) {
+		__u8 *tmp_buffer;
+
+		tmp_buffer = malloc_cache_aligned(MAX_CLUSTSIZE);
+		if (!tmp_buffer) {
+			debug("Error: allocating buffer\n");
+			return -ENOMEM;
+		}
+
 		actsize = min(filesize, (loff_t)bytesperclust);
-		if (get_cluster(mydata, curclust, get_contents_vfatname_block,
+		if (get_cluster(mydata, curclust, tmp_buffer,
 				(int)actsize) != 0) {
 			printf("Error reading cluster\n");
+			free(tmp_buffer);
 			return -1;
 		}
 		filesize -= actsize;
 		actsize -= pos;
-		memcpy(buffer, get_contents_vfatname_block + pos, actsize);
+		memcpy(buffer, tmp_buffer + pos, actsize);
+		free(tmp_buffer);
 		*gotsize += actsize;
 		if (!filesize)
 			return 0;