diff mbox

[U-Boot] malloc_simple: Add simple malloc free function

Message ID 1470194652-2461-1-git-send-email-clsee@altera.com
State Deferred
Delegated to: Tom Rini
Headers show

Commit Message

Chin Liang See Aug. 3, 2016, 3:24 a.m. UTC
Enable a simple malloc implementation which will minimize
memory usage prior relocation. This is essential as memory
available prior location is internal memory and limited in
size.

This implementation will stored last 2 usage of malloc. When
free is invoked and the free address matched, we shall revert
to previous value of gd->malloc_ptr that we stored.

Signed-off-by: Chin Liang See <clsee@altera.com>
Cc: Marek Vasut <marex@denx.de>
Cc: Simon Glass <sjg@chromium.org>
Cc: Tom Rini <trini@konsulko.com>
Cc: Dinh Nguyen <dinguyen@opensource.altera.com>
Cc: Tien Fong Chee <tfchee@altera.com>
---
 common/dlmalloc.c                 |  6 ++++--
 common/malloc_simple.c            | 34 ++++++++++++++++++++++++++++++++++
 include/asm-generic/global_data.h |  2 ++
 include/malloc.h                  |  3 ++-
 4 files changed, 42 insertions(+), 3 deletions(-)

Comments

Marek Vasut Aug. 3, 2016, 6:58 a.m. UTC | #1
On 08/03/2016 05:24 AM, Chin Liang See wrote:
> Enable a simple malloc implementation which will minimize
> memory usage prior relocation. This is essential as memory
> available prior location is internal memory and limited in
> size.
> 
> This implementation will stored last 2 usage of malloc. When
> free is invoked and the free address matched, we shall revert
> to previous value of gd->malloc_ptr that we stored.

This looks unnecessarily convoluted and fragile design.
What problem do you observe and on what platform ?

> Signed-off-by: Chin Liang See <clsee@altera.com>
> Cc: Marek Vasut <marex@denx.de>
> Cc: Simon Glass <sjg@chromium.org>
> Cc: Tom Rini <trini@konsulko.com>
> Cc: Dinh Nguyen <dinguyen@opensource.altera.com>
> Cc: Tien Fong Chee <tfchee@altera.com>
> ---
>  common/dlmalloc.c                 |  6 ++++--
>  common/malloc_simple.c            | 34 ++++++++++++++++++++++++++++++++++
>  include/asm-generic/global_data.h |  2 ++
>  include/malloc.h                  |  3 ++-
>  4 files changed, 42 insertions(+), 3 deletions(-)
> 
> diff --git a/common/dlmalloc.c b/common/dlmalloc.c
> index adc680e..ba42d0d 100644
> --- a/common/dlmalloc.c
> +++ b/common/dlmalloc.c
> @@ -1523,9 +1523,11 @@ void fREe(mem) Void_t* mem;
>    int       islr;      /* track whether merging with last_remainder */
>  
>  #ifdef CONFIG_SYS_MALLOC_F_LEN
> -	/* free() is a no-op - all the memory will be freed on relocation */
> -	if (!(gd->flags & GD_FLG_FULL_MALLOC_INIT))
> +	/* Invoke free_simple. All the memory will be freed on relocation */
> +	if (!(gd->flags & GD_FLG_FULL_MALLOC_INIT)) {
> +		free_simple(mem);
>  		return;
> +	}
>  #endif
>  
>    if (mem == NULL)                              /* free(0) has no effect */
> diff --git a/common/malloc_simple.c b/common/malloc_simple.c
> index 0f6bcbc..383a546 100644
> --- a/common/malloc_simple.c
> +++ b/common/malloc_simple.c
> @@ -26,6 +26,18 @@ void *malloc_simple(size_t bytes)
>  		return NULL;
>  	}
>  	ptr = map_sysmem(gd->malloc_base + gd->malloc_ptr, bytes);
> +
> +	/* implement a simple free mechanism which stored last 2 usage */
> +	if (gd->malloc_free_addr[0] != 0) {
> +		/* shifting as we are storing depth of 2 */
> +		gd->malloc_free_addr[1] = gd->malloc_free_addr[0];
> +		gd->malloc_free_ptr[1] = gd->malloc_free_ptr[0];
> +	}
> +	/* saving last malloc address for malloc free */
> +	gd->malloc_free_addr[0] = ptr;
> +	/* saving last malloc_ptr prior allocation for malloc free */
> +	gd->malloc_free_ptr[0] = gd->malloc_ptr;
> +
>  	gd->malloc_ptr = ALIGN(new_ptr, sizeof(new_ptr));
>  	debug("%lx\n", (ulong)ptr);
>  
> @@ -42,6 +54,18 @@ void *memalign_simple(size_t align, size_t bytes)
>  	if (new_ptr > gd->malloc_limit)
>  		return NULL;
>  	ptr = map_sysmem(addr, bytes);
> +
> +	/* implement a simple free mechanism which stored last 2 usage */
> +	if (gd->malloc_free_addr[0] != 0) {
> +		/* shifting as we are storing depth of 2 */
> +		gd->malloc_free_addr[1] = gd->malloc_free_addr[0];
> +		gd->malloc_free_ptr[1] = gd->malloc_free_ptr[0];
> +	}
> +	/* saving last malloc address for malloc free */
> +	gd->malloc_free_addr[0] = ptr;
> +	/* saving last malloc_ptr prior allocation for malloc free */
> +	gd->malloc_free_ptr[0] = gd->malloc_ptr;
> +
>  	gd->malloc_ptr = ALIGN(new_ptr, sizeof(new_ptr));
>  
>  	return ptr;
> @@ -59,3 +83,13 @@ void *calloc(size_t nmemb, size_t elem_size)
>  	return ptr;
>  }
>  #endif
> +
> +void free_simple(Void_t* mem)
> +{
> +	if (mem == gd->malloc_free_addr[0]) {
> +		gd->malloc_ptr = gd->malloc_free_ptr[0];
> +		/* shifting as we are storing depth of 2 */
> +		gd->malloc_free_addr[0] = gd->malloc_free_addr[1];
> +		gd->malloc_free_ptr[0] = gd->malloc_free_ptr[1];
> +	}
> +}
> diff --git a/include/asm-generic/global_data.h b/include/asm-generic/global_data.h
> index a6d1d2a..9380772 100644
> --- a/include/asm-generic/global_data.h
> +++ b/include/asm-generic/global_data.h
> @@ -90,6 +90,8 @@ typedef struct global_data {
>  	unsigned long malloc_base;	/* base address of early malloc() */
>  	unsigned long malloc_limit;	/* limit address */
>  	unsigned long malloc_ptr;	/* current address */
> +	void *malloc_free_addr[2];	/* Last malloc pointer address*/
> +	unsigned long malloc_free_ptr[2];	/* Last malloc_ptr */
>  #endif
>  #ifdef CONFIG_PCI
>  	struct pci_controller *hose;	/* PCI hose for early use */
> diff --git a/include/malloc.h b/include/malloc.h
> index 8175c75..02651aa 100644
> --- a/include/malloc.h
> +++ b/include/malloc.h
> @@ -876,7 +876,7 @@ extern Void_t*     sbrk();
>  #define malloc malloc_simple
>  #define realloc realloc_simple
>  #define memalign memalign_simple
> -static inline void free(void *ptr) {}
> +#define free free_simple
>  void *calloc(size_t nmemb, size_t size);
>  void *memalign_simple(size_t alignment, size_t bytes);
>  void *realloc_simple(void *ptr, size_t size);
> @@ -913,6 +913,7 @@ int initf_malloc(void);
>  
>  /* Simple versions which can be used when space is tight */
>  void *malloc_simple(size_t size);
> +void free_simple(Void_t* mem);
>  
>  #pragma GCC visibility push(hidden)
>  # if __STD_C
>
Chin Liang See Aug. 3, 2016, 7:30 a.m. UTC | #2
Hi Marek,

On Wed, 2016-08-03 at 08:58 +0200, Marek Vasut wrote:
> On 08/03/2016 05:24 AM, Chin Liang See wrote:
> > Enable a simple malloc implementation which will minimize
> > memory usage prior relocation. This is essential as memory
> > available prior location is internal memory and limited in
> > size.
> > 
> > This implementation will stored last 2 usage of malloc. When
> > free is invoked and the free address matched, we shall revert
> > to previous value of gd->malloc_ptr that we stored.
> 
> This looks unnecessarily convoluted and fragile design.
> What problem do you observe and on what platform ?

Actually this for our Arria10 SoC device. In order to get DDR working,
we need to program FPGA. To improve the usability, we put the FPGA
programming file (RBF) into FAT partition.

In that sense, we would need to use FAT driver prior relocation / DDR
available. Due to that, the malloc usage is high and memory available
is limited prior DDR available. 

The simple malloc helps but without the free, its consumed way too much
memory than saving. Hence this simple malloc free patch help. So I
believe this would benefits those who are executing complex operation
prior relocation :) 

Thanks
Chin Liang
Marek Vasut Aug. 3, 2016, 7:53 a.m. UTC | #3
On 08/03/2016 09:30 AM, Chin Liang See wrote:
> Hi Marek,

Hi,

> On Wed, 2016-08-03 at 08:58 +0200, Marek Vasut wrote:
>> On 08/03/2016 05:24 AM, Chin Liang See wrote:
>>> Enable a simple malloc implementation which will minimize
>>> memory usage prior relocation. This is essential as memory
>>> available prior location is internal memory and limited in
>>> size.
>>>
>>> This implementation will stored last 2 usage of malloc. When
>>> free is invoked and the free address matched, we shall revert
>>> to previous value of gd->malloc_ptr that we stored.
>>
>> This looks unnecessarily convoluted and fragile design.
>> What problem do you observe and on what platform ?
> 
> Actually this for our Arria10 SoC device. In order to get DDR working,
> we need to program FPGA. To improve the usability, we put the FPGA
> programming file (RBF) into FAT partition.
> 
> In that sense, we would need to use FAT driver prior relocation / DDR
> available. Due to that, the malloc usage is high and memory available
> is limited prior DDR available. 

I was under the impression that you have 256kiB of SRAM on the A10.
SPL should consume about 64 kiB tops, including support for loading
from VFAT. So you have 192 kiB available, how is that not enough ?

> The simple malloc helps but without the free, its consumed way too much
> memory than saving. Hence this simple malloc free patch help. So I
> believe this would benefits those who are executing complex operation
> prior relocation :) 

I believe you need to identify who is calling malloc() to obtain big
buffers without recycling them.

Your design breaks in the scenario where someone does big malloc
followed by two small mallocs if I understand it correctly. This
doesn't scale and is a hack.

> Thanks
> Chin Liang
>
Chin Liang See Aug. 3, 2016, 1:41 p.m. UTC | #4
On Wed, 2016-08-03 at 09:53 +0200, Marek Vasut wrote:
> On 08/03/2016 09:30 AM, Chin Liang See wrote:
> > Hi Marek,
> 
> Hi,
> 
> > On Wed, 2016-08-03 at 08:58 +0200, Marek Vasut wrote:
> > > On 08/03/2016 05:24 AM, Chin Liang See wrote:
> > > > Enable a simple malloc implementation which will minimize
> > > > memory usage prior relocation. This is essential as memory
> > > > available prior location is internal memory and limited in
> > > > size.
> > > > 
> > > > This implementation will stored last 2 usage of malloc. When
> > > > free is invoked and the free address matched, we shall revert
> > > > to previous value of gd->malloc_ptr that we stored.
> > > 
> > > This looks unnecessarily convoluted and fragile design.
> > > What problem do you observe and on what platform ?
> > 
> > Actually this for our Arria10 SoC device. In order to get DDR
> > working,
> > we need to program FPGA. To improve the usability, we put the FPGA
> > programming file (RBF) into FAT partition.
> > 
> > In that sense, we would need to use FAT driver prior relocation /
> > DDR
> > available. Due to that, the malloc usage is high and memory
> > available
> > is limited prior DDR available. 
> 
> I was under the impression that you have 256kiB of SRAM on the A10.
> SPL should consume about 64 kiB tops, including support for loading
> from VFAT. So you have 192 kiB available, how is that not enough ?

But I presume we won't want to limit that minimum 256MB of internal
memory needed for simple malloc usage, right?

> 
> > The simple malloc helps but without the free, its consumed way too
> > much
> > memory than saving. Hence this simple malloc free patch help. So I
> > believe this would benefits those who are executing complex
> > operation
> > prior relocation :) 
> 
> I believe you need to identify who is calling malloc() to obtain big
> buffers without recycling them.
> 

It's the fat driver which is utilizing the malloc.

> Your design breaks in the scenario where someone does big malloc
> followed by two small mallocs if I understand it correctly. This
> doesn't scale and is a hack.
> 

Actually the proposed free is a simple implementation which acts as
stack push and pop with depth of 2. This is to enhance existing
implementation which don't handle the pop. This get worst especially
dealing with fat driver.

Thanks
Chin Liang

> > Thanks
> > Chin Liang
> > 
> 
>
Marek Vasut Aug. 3, 2016, 1:57 p.m. UTC | #5
On 08/03/2016 03:41 PM, Chin Liang See wrote:
> On Wed, 2016-08-03 at 09:53 +0200, Marek Vasut wrote:
>> On 08/03/2016 09:30 AM, Chin Liang See wrote:
>>> Hi Marek,
>>
>> Hi,
>>
>>> On Wed, 2016-08-03 at 08:58 +0200, Marek Vasut wrote:
>>>> On 08/03/2016 05:24 AM, Chin Liang See wrote:
>>>>> Enable a simple malloc implementation which will minimize
>>>>> memory usage prior relocation. This is essential as memory
>>>>> available prior location is internal memory and limited in
>>>>> size.
>>>>>
>>>>> This implementation will stored last 2 usage of malloc. When
>>>>> free is invoked and the free address matched, we shall revert
>>>>> to previous value of gd->malloc_ptr that we stored.
>>>>
>>>> This looks unnecessarily convoluted and fragile design.
>>>> What problem do you observe and on what platform ?
>>>
>>> Actually this for our Arria10 SoC device. In order to get DDR
>>> working,
>>> we need to program FPGA. To improve the usability, we put the FPGA
>>> programming file (RBF) into FAT partition.
>>>
>>> In that sense, we would need to use FAT driver prior relocation /
>>> DDR
>>> available. Due to that, the malloc usage is high and memory
>>> available
>>> is limited prior DDR available. 
>>
>> I was under the impression that you have 256kiB of SRAM on the A10.
>> SPL should consume about 64 kiB tops, including support for loading
>> from VFAT. So you have 192 kiB available, how is that not enough ?
> 
> But I presume we won't want to limit that minimum 256MB of internal
> memory needed for simple malloc usage, right?

Err, 256 MiB ? I think 192k of malloc area must be _plenty_ for SPL
either way you slice it.

>>
>>> The simple malloc helps but without the free, its consumed way too
>>> much
>>> memory than saving. Hence this simple malloc free patch help. So I
>>> believe this would benefits those who are executing complex
>>> operation
>>> prior relocation :) 
>>
>> I believe you need to identify who is calling malloc() to obtain big
>> buffers without recycling them.
>>
> 
> It's the fat driver which is utilizing the malloc.

So fat is allocating stuff without freeing it ? I wonder if we should
either fix fat or use full malloc in SPL on A10 . I am not really fond
of adding more stuff into simple malloc (to keep it small and simple).

>> Your design breaks in the scenario where someone does big malloc
>> followed by two small mallocs if I understand it correctly. This
>> doesn't scale and is a hack.
>>
> 
> Actually the proposed free is a simple implementation which acts as
> stack push and pop with depth of 2. This is to enhance existing
> implementation which don't handle the pop. This get worst especially
> dealing with fat driver.

Well, how does it handle my example? It doesn't and it fails to help in
such case, right ?

> Thanks
> Chin Liang
> 
>>> Thanks
>>> Chin Liang
>>>
>>
>>
Chin Liang See Aug. 3, 2016, 3:22 p.m. UTC | #6
On Wed, 2016-08-03 at 15:57 +0200, Marek Vasut wrote:
> On 08/03/2016 03:41 PM, Chin Liang See wrote:
> > On Wed, 2016-08-03 at 09:53 +0200, Marek Vasut wrote:
> > > On 08/03/2016 09:30 AM, Chin Liang See wrote:
> > > > Hi Marek,
> > > 
> > > Hi,
> > > 
> > > > On Wed, 2016-08-03 at 08:58 +0200, Marek Vasut wrote:
> > > > > On 08/03/2016 05:24 AM, Chin Liang See wrote:
> > > > > > Enable a simple malloc implementation which will minimize
> > > > > > memory usage prior relocation. This is essential as memory
> > > > > > available prior location is internal memory and limited in
> > > > > > size.
> > > > > > 
> > > > > > This implementation will stored last 2 usage of malloc.
> > > > > > When
> > > > > > free is invoked and the free address matched, we shall
> > > > > > revert
> > > > > > to previous value of gd->malloc_ptr that we stored.
> > > > > 
> > > > > This looks unnecessarily convoluted and fragile design.
> > > > > What problem do you observe and on what platform ?
> > > > 
> > > > Actually this for our Arria10 SoC device. In order to get DDR
> > > > working,
> > > > we need to program FPGA. To improve the usability, we put the
> > > > FPGA
> > > > programming file (RBF) into FAT partition.
> > > > 
> > > > In that sense, we would need to use FAT driver prior relocation
> > > > /
> > > > DDR
> > > > available. Due to that, the malloc usage is high and memory
> > > > available
> > > > is limited prior DDR available. 
> > > 
> > > I was under the impression that you have 256kiB of SRAM on the
> > > A10.
> > > SPL should consume about 64 kiB tops, including support for
> > > loading
> > > from VFAT. So you have 192 kiB available, how is that not enough
> > > ?
> > 
> > But I presume we won't want to limit that minimum 256MB of internal
> > memory needed for simple malloc usage, right?
> 
> Err, 256 MiB ? I think 192k of malloc area must be _plenty_ for SPL
> either way you slice it.
> 
> > > 
> > > > The simple malloc helps but without the free, its consumed way
> > > > too
> > > > much
> > > > memory than saving. Hence this simple malloc free patch help.
> > > > So I
> > > > believe this would benefits those who are executing complex
> > > > operation
> > > > prior relocation :) 
> > > 
> > > I believe you need to identify who is calling malloc() to obtain
> > > big
> > > buffers without recycling them.
> > > 
> > 
> > It's the fat driver which is utilizing the malloc.
> 
> So fat is allocating stuff without freeing it ? I wonder if we should
> either fix fat or use full malloc in SPL on A10 . I am not really
> fond
> of adding more stuff into simple malloc (to keep it small and 
> simple).

Actually fat driver is good where it invoke malloc and free during the
operation. Just that with existing malloc, there is no free
implementation and memory keep "push" every time malloc invoked.

> 
> > > Your design breaks in the scenario where someone does big malloc
> > > followed by two small mallocs if I understand it correctly. This
> > > doesn't scale and is a hack.
> > > 
> > 
> > Actually the proposed free is a simple implementation which acts as
> > stack push and pop with depth of 2. This is to enhance existing
> > implementation which don't handle the pop. This get worst
> > especially
> > dealing with fat driver.
> 
> Well, how does it handle my example? It doesn't and it fails to help
> in
> such case, right ?

I was thinking what is the correct depth while trying to keep things
simple. From the FAT access testing with SD and eMMC, depth of 2 works
well by cutting lot of memory consumption by simple malloc
implementation. Any thoughts whether should have more flexibility?

Thanks
Chin Liang

> 
> > Thanks
> > Chin Liang
> > 
> > > > Thanks
> > > > Chin Liang
> > > > 
> > > 
> > > 
> 
>
Simon Glass Aug. 4, 2016, 1:17 a.m. UTC | #7
Hi,

On 2 August 2016 at 21:24, Chin Liang See <clsee@altera.com> wrote:
> Enable a simple malloc implementation which will minimize
> memory usage prior relocation. This is essential as memory
> available prior location is internal memory and limited in
> size.
>
> This implementation will stored last 2 usage of malloc. When
> free is invoked and the free address matched, we shall revert
> to previous value of gd->malloc_ptr that we stored.

I'm really not keen on this patch. Can we not adjust the FAT code to
avoid repeated malloc()/free()?


>
> Signed-off-by: Chin Liang See <clsee@altera.com>
> Cc: Marek Vasut <marex@denx.de>
> Cc: Simon Glass <sjg@chromium.org>
> Cc: Tom Rini <trini@konsulko.com>
> Cc: Dinh Nguyen <dinguyen@opensource.altera.com>
> Cc: Tien Fong Chee <tfchee@altera.com>
> ---
>  common/dlmalloc.c                 |  6 ++++--
>  common/malloc_simple.c            | 34 ++++++++++++++++++++++++++++++++++
>  include/asm-generic/global_data.h |  2 ++
>  include/malloc.h                  |  3 ++-
>  4 files changed, 42 insertions(+), 3 deletions(-)

Regards,
Simon
Marek Vasut Aug. 4, 2016, 5:30 a.m. UTC | #8
On 08/03/2016 05:22 PM, Chin Liang See wrote:

Hi,

[...]
>>> It's the fat driver which is utilizing the malloc.
>>
>> So fat is allocating stuff without freeing it ? I wonder if we should
>> either fix fat or use full malloc in SPL on A10 . I am not really
>> fond
>> of adding more stuff into simple malloc (to keep it small and 
>> simple).
> 
> Actually fat driver is good where it invoke malloc and free during the
> operation. Just that with existing malloc, there is no free
> implementation and memory keep "push" every time malloc invoked.

And I agree with Simon that we should look into the FAT driver and fix
it. Is that not possible ?

>>>> Your design breaks in the scenario where someone does big malloc
>>>> followed by two small mallocs if I understand it correctly. This
>>>> doesn't scale and is a hack.
>>>>
>>>
>>> Actually the proposed free is a simple implementation which acts as
>>> stack push and pop with depth of 2. This is to enhance existing
>>> implementation which don't handle the pop. This get worst
>>> especially
>>> dealing with fat driver.
>>
>> Well, how does it handle my example? It doesn't and it fails to help
>> in
>> such case, right ?
> 
> I was thinking what is the correct depth while trying to keep things
> simple. From the FAT access testing with SD and eMMC, depth of 2 works
> well by cutting lot of memory consumption by simple malloc
> implementation. Any thoughts whether should have more flexibility?

You still didn't answer my question -- how will this handle my example
usecase ?
Chin Liang See Aug. 4, 2016, 3:09 p.m. UTC | #9
On Wed, 2016-08-03 at 19:17 -0600, Simon Glass wrote:
> Hi,
> 

Hi Simon,

> On 2 August 2016 at 21:24, Chin Liang See <clsee@altera.com> wrote:
> > Enable a simple malloc implementation which will minimize
> > memory usage prior relocation. This is essential as memory
> > available prior location is internal memory and limited in
> > size.
> > 
> > This implementation will stored last 2 usage of malloc. When
> > free is invoked and the free address matched, we shall revert
> > to previous value of gd->malloc_ptr that we stored.
> 
> I'm really not keen on this patch. Can we not adjust the FAT code to
> avoid repeated malloc()/free()?
> 

Sure and let me take a look on enhancing the FAT driver for the
malloc/free aspect.

Thanks
Chin Liang


> 
> > 
> > Signed-off-by: Chin Liang See <clsee@altera.com>
> > Cc: Marek Vasut <marex@denx.de>
> > Cc: Simon Glass <sjg@chromium.org>
> > Cc: Tom Rini <trini@konsulko.com>
> > Cc: Dinh Nguyen <dinguyen@opensource.altera.com>
> > Cc: Tien Fong Chee <tfchee@altera.com>
> > ---
> >  common/dlmalloc.c                 |  6 ++++--
> >  common/malloc_simple.c            | 34
> > ++++++++++++++++++++++++++++++++++
> >  include/asm-generic/global_data.h |  2 ++
> >  include/malloc.h                  |  3 ++-
> >  4 files changed, 42 insertions(+), 3 deletions(-)
> 
> Regards,
> Simon
Chin Liang See Aug. 4, 2016, 3:12 p.m. UTC | #10
On Thu, 2016-08-04 at 07:30 +0200, Marek Vasut wrote:
> On 08/03/2016 05:22 PM, Chin Liang See wrote:
> 
> Hi,

Hi Marek,

> 
> [...]
> > > > It's the fat driver which is utilizing the malloc.
> > > 
> > > So fat is allocating stuff without freeing it ? I wonder if we
> > > should
> > > either fix fat or use full malloc in SPL on A10 . I am not really
> > > fond
> > > of adding more stuff into simple malloc (to keep it small and 
> > > simple).
> > 
> > Actually fat driver is good where it invoke malloc and free during
> > the
> > operation. Just that with existing malloc, there is no free
> > implementation and memory keep "push" every time malloc invoked.
> 
> And I agree with Simon that we should look into the FAT driver and
> fix
> it. Is that not possible ?


Definitely as seems everyone believe this should be the right way to
go.

> 
> > > > > Your design breaks in the scenario where someone does big
> > > > > malloc
> > > > > followed by two small mallocs if I understand it correctly.
> > > > > This
> > > > > doesn't scale and is a hack.
> > > > > 
> > > > 
> > > > Actually the proposed free is a simple implementation which
> > > > acts as
> > > > stack push and pop with depth of 2. This is to enhance existing
> > > > implementation which don't handle the pop. This get worst
> > > > especially
> > > > dealing with fat driver.
> > > 
> > > Well, how does it handle my example? It doesn't and it fails to
> > > help
> > > in
> > > such case, right ?
> > 
> > I was thinking what is the correct depth while trying to keep
> > things
> > simple. From the FAT access testing with SD and eMMC, depth of 2
> > works
> > well by cutting lot of memory consumption by simple malloc
> > implementation. Any thoughts whether should have more flexibility?
> 
> You still didn't answer my question -- how will this handle my
> example
> usecase ?
> 

I did and wonder my email server having issue again. For this case,
yah, it will not being handle as we try to keep it simple by having
depth of 2. FYI, the 2 is being derived from my experiment of FAT
driver access with SD and eMMC devices. We can enhance that but
probably that might not a good idea per Simon suggested.

Thanks
Chin Liang
Marek Vasut Aug. 4, 2016, 3:26 p.m. UTC | #11
On 08/04/2016 05:12 PM, Chin Liang See wrote:
> On Thu, 2016-08-04 at 07:30 +0200, Marek Vasut wrote:
>> On 08/03/2016 05:22 PM, Chin Liang See wrote:
>>
>> Hi,
> 
> Hi Marek,
> 
>>
>> [...]
>>>>> It's the fat driver which is utilizing the malloc.
>>>>
>>>> So fat is allocating stuff without freeing it ? I wonder if we
>>>> should
>>>> either fix fat or use full malloc in SPL on A10 . I am not really
>>>> fond
>>>> of adding more stuff into simple malloc (to keep it small and 
>>>> simple).
>>>
>>> Actually fat driver is good where it invoke malloc and free during
>>> the
>>> operation. Just that with existing malloc, there is no free
>>> implementation and memory keep "push" every time malloc invoked.
>>
>> And I agree with Simon that we should look into the FAT driver and
>> fix
>> it. Is that not possible ?
> 
> 
> Definitely as seems everyone believe this should be the right way to
> go.

Indeed

>>
>>>>>> Your design breaks in the scenario where someone does big
>>>>>> malloc
>>>>>> followed by two small mallocs if I understand it correctly.
>>>>>> This
>>>>>> doesn't scale and is a hack.
>>>>>>
>>>>>
>>>>> Actually the proposed free is a simple implementation which
>>>>> acts as
>>>>> stack push and pop with depth of 2. This is to enhance existing
>>>>> implementation which don't handle the pop. This get worst
>>>>> especially
>>>>> dealing with fat driver.
>>>>
>>>> Well, how does it handle my example? It doesn't and it fails to
>>>> help
>>>> in
>>>> such case, right ?
>>>
>>> I was thinking what is the correct depth while trying to keep
>>> things
>>> simple. From the FAT access testing with SD and eMMC, depth of 2
>>> works
>>> well by cutting lot of memory consumption by simple malloc
>>> implementation. Any thoughts whether should have more flexibility?
>>
>> You still didn't answer my question -- how will this handle my
>> example
>> usecase ?
>>
> 
> I did and wonder my email server having issue again. For this case,
> yah, it will not being handle as we try to keep it simple by having
> depth of 2. FYI, the 2 is being derived from my experiment of FAT
> driver access with SD and eMMC devices. We can enhance that but
> probably that might not a good idea per Simon suggested.

How would you enhance it such that I cannot find counter-example ? :)
diff mbox

Patch

diff --git a/common/dlmalloc.c b/common/dlmalloc.c
index adc680e..ba42d0d 100644
--- a/common/dlmalloc.c
+++ b/common/dlmalloc.c
@@ -1523,9 +1523,11 @@  void fREe(mem) Void_t* mem;
   int       islr;      /* track whether merging with last_remainder */
 
 #ifdef CONFIG_SYS_MALLOC_F_LEN
-	/* free() is a no-op - all the memory will be freed on relocation */
-	if (!(gd->flags & GD_FLG_FULL_MALLOC_INIT))
+	/* Invoke free_simple. All the memory will be freed on relocation */
+	if (!(gd->flags & GD_FLG_FULL_MALLOC_INIT)) {
+		free_simple(mem);
 		return;
+	}
 #endif
 
   if (mem == NULL)                              /* free(0) has no effect */
diff --git a/common/malloc_simple.c b/common/malloc_simple.c
index 0f6bcbc..383a546 100644
--- a/common/malloc_simple.c
+++ b/common/malloc_simple.c
@@ -26,6 +26,18 @@  void *malloc_simple(size_t bytes)
 		return NULL;
 	}
 	ptr = map_sysmem(gd->malloc_base + gd->malloc_ptr, bytes);
+
+	/* implement a simple free mechanism which stored last 2 usage */
+	if (gd->malloc_free_addr[0] != 0) {
+		/* shifting as we are storing depth of 2 */
+		gd->malloc_free_addr[1] = gd->malloc_free_addr[0];
+		gd->malloc_free_ptr[1] = gd->malloc_free_ptr[0];
+	}
+	/* saving last malloc address for malloc free */
+	gd->malloc_free_addr[0] = ptr;
+	/* saving last malloc_ptr prior allocation for malloc free */
+	gd->malloc_free_ptr[0] = gd->malloc_ptr;
+
 	gd->malloc_ptr = ALIGN(new_ptr, sizeof(new_ptr));
 	debug("%lx\n", (ulong)ptr);
 
@@ -42,6 +54,18 @@  void *memalign_simple(size_t align, size_t bytes)
 	if (new_ptr > gd->malloc_limit)
 		return NULL;
 	ptr = map_sysmem(addr, bytes);
+
+	/* implement a simple free mechanism which stored last 2 usage */
+	if (gd->malloc_free_addr[0] != 0) {
+		/* shifting as we are storing depth of 2 */
+		gd->malloc_free_addr[1] = gd->malloc_free_addr[0];
+		gd->malloc_free_ptr[1] = gd->malloc_free_ptr[0];
+	}
+	/* saving last malloc address for malloc free */
+	gd->malloc_free_addr[0] = ptr;
+	/* saving last malloc_ptr prior allocation for malloc free */
+	gd->malloc_free_ptr[0] = gd->malloc_ptr;
+
 	gd->malloc_ptr = ALIGN(new_ptr, sizeof(new_ptr));
 
 	return ptr;
@@ -59,3 +83,13 @@  void *calloc(size_t nmemb, size_t elem_size)
 	return ptr;
 }
 #endif
+
+void free_simple(Void_t* mem)
+{
+	if (mem == gd->malloc_free_addr[0]) {
+		gd->malloc_ptr = gd->malloc_free_ptr[0];
+		/* shifting as we are storing depth of 2 */
+		gd->malloc_free_addr[0] = gd->malloc_free_addr[1];
+		gd->malloc_free_ptr[0] = gd->malloc_free_ptr[1];
+	}
+}
diff --git a/include/asm-generic/global_data.h b/include/asm-generic/global_data.h
index a6d1d2a..9380772 100644
--- a/include/asm-generic/global_data.h
+++ b/include/asm-generic/global_data.h
@@ -90,6 +90,8 @@  typedef struct global_data {
 	unsigned long malloc_base;	/* base address of early malloc() */
 	unsigned long malloc_limit;	/* limit address */
 	unsigned long malloc_ptr;	/* current address */
+	void *malloc_free_addr[2];	/* Last malloc pointer address*/
+	unsigned long malloc_free_ptr[2];	/* Last malloc_ptr */
 #endif
 #ifdef CONFIG_PCI
 	struct pci_controller *hose;	/* PCI hose for early use */
diff --git a/include/malloc.h b/include/malloc.h
index 8175c75..02651aa 100644
--- a/include/malloc.h
+++ b/include/malloc.h
@@ -876,7 +876,7 @@  extern Void_t*     sbrk();
 #define malloc malloc_simple
 #define realloc realloc_simple
 #define memalign memalign_simple
-static inline void free(void *ptr) {}
+#define free free_simple
 void *calloc(size_t nmemb, size_t size);
 void *memalign_simple(size_t alignment, size_t bytes);
 void *realloc_simple(void *ptr, size_t size);
@@ -913,6 +913,7 @@  int initf_malloc(void);
 
 /* Simple versions which can be used when space is tight */
 void *malloc_simple(size_t size);
+void free_simple(Void_t* mem);
 
 #pragma GCC visibility push(hidden)
 # if __STD_C