diff mbox series

riscv: bypass malloc when spl fit boots from ram

Message ID 20221222072152.2624-1-rick@andestech.com
State Superseded
Delegated to: Andes
Headers show
Series riscv: bypass malloc when spl fit boots from ram | expand

Commit Message

Rick Chen Dec. 22, 2022, 7:21 a.m. UTC
When fit image boots from ram, the payload will
be prepared in the address of SPL_LOAD_FIT_ADDRESS.
In spl fit generic flow, it will malloc another
memory address and copy whole fit image to this
malloc address.  But it is un-necessary for booting
from RAM.

This patch improves this flow by declare the
board_spl_fit_buffer_addr() to replace the original one.
The larger image size (eq: Kernel Image 10~20MB), it
can save more booting time.

Also enhance memcpy function by checking source and
destination address. If they are the same address,
just return and don't copy data anymore.

Signed-off-by: Rick Chen <rick@andestech.com>
---
 arch/riscv/lib/memcpy.S |  2 ++
 arch/riscv/lib/spl.c    | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)

Comments

Samuel Holland Dec. 28, 2022, 2:47 a.m. UTC | #1
On 12/22/22 01:21, Rick Chen wrote:
> When fit image boots from ram, the payload will
> be prepared in the address of SPL_LOAD_FIT_ADDRESS.
> In spl fit generic flow, it will malloc another
> memory address and copy whole fit image to this
> malloc address.  But it is un-necessary for booting
> from RAM.

This should mostly be solved by using `mkimage -E` or binman's
`fit,external-offset`. Then only the FIT structure, without any data,
will be copied.

> This patch improves this flow by declare the
> board_spl_fit_buffer_addr() to replace the original one.
> The larger image size (eq: Kernel Image 10~20MB), it
> can save more booting time.
> 
> Also enhance memcpy function by checking source and
> destination address. If they are the same address,
> just return and don't copy data anymore.
> 
> Signed-off-by: Rick Chen <rick@andestech.com>
> ---
>  arch/riscv/lib/memcpy.S |  2 ++
>  arch/riscv/lib/spl.c    | 16 ++++++++++++++++
>  2 files changed, 18 insertions(+)
> 
> diff --git a/arch/riscv/lib/memcpy.S b/arch/riscv/lib/memcpy.S
> index 00672c19ad..9884077c93 100644
> --- a/arch/riscv/lib/memcpy.S
> +++ b/arch/riscv/lib/memcpy.S
> @@ -9,6 +9,7 @@
>  /* void *memcpy(void *, const void *, size_t) */
>  ENTRY(__memcpy)
>  WEAK(memcpy)
> +	beq	a0, a1, .copy_end

I see one call to memcpy() in common/spl/spl_ram.c. I think it would
make more sense to do the check there instead of adding a branch to
every memcpy() call.

>  	/* Save for return value */
>  	mv	t6, a0
>  
> @@ -121,6 +122,7 @@ WEAK(memcpy)
>  2:
>  
>  	mv	a0, t6
> +.copy_end:
>  	ret
>  
>  .Lmisaligned_word_copy:
> diff --git a/arch/riscv/lib/spl.c b/arch/riscv/lib/spl.c
> index f4d3b67e5d..18f86ee207 100644
> --- a/arch/riscv/lib/spl.c
> +++ b/arch/riscv/lib/spl.c
> @@ -61,3 +61,19 @@ void __noreturn jump_to_image_no_args(struct spl_image_info *spl_image)
>  #endif
>  	image_entry(gd->arch.boot_hart, fdt_blob);
>  }
> +
> +#ifdef CONFIG_SPL_RAM_SUPPORT
> +struct legacy_img_hdr *spl_get_load_buffer(ssize_t offset, size_t size)
> +{
> +	return (void *)(CONFIG_SPL_LOAD_FIT_ADDRESS + offset);
> +}
> +
> +void *board_spl_fit_buffer_addr(ulong fit_size, int sectors, int bl_len)
> +{
> +	void *buf;
> +
> +	buf = spl_get_load_buffer(0, sectors * bl_len);
> +
> +	return buf;
> +}
> +#endif

You cannot provide strong definitions of these functions at an
architecture level, as this prevents any board from overriding them.

Regards,
Samuel
Rick Chen Dec. 28, 2022, 3:22 a.m. UTC | #2
Hi Samuel,

Samuel Holland <samuel@sholland.org> 於 2022年12月28日 週三 上午10:47寫道:
>
> On 12/22/22 01:21, Rick Chen wrote:
> > When fit image boots from ram, the payload will
> > be prepared in the address of SPL_LOAD_FIT_ADDRESS.
> > In spl fit generic flow, it will malloc another
> > memory address and copy whole fit image to this
> > malloc address.  But it is un-necessary for booting
> > from RAM.
>
> This should mostly be solved by using `mkimage -E` or binman's
> `fit,external-offset`. Then only the FIT structure, without any data,
> will be copied.
>
> > This patch improves this flow by declare the
> > board_spl_fit_buffer_addr() to replace the original one.
> > The larger image size (eq: Kernel Image 10~20MB), it
> > can save more booting time.
> >
> > Also enhance memcpy function by checking source and
> > destination address. If they are the same address,
> > just return and don't copy data anymore.
> >
> > Signed-off-by: Rick Chen <rick@andestech.com>
> > ---
> >  arch/riscv/lib/memcpy.S |  2 ++
> >  arch/riscv/lib/spl.c    | 16 ++++++++++++++++
> >  2 files changed, 18 insertions(+)
> >
> > diff --git a/arch/riscv/lib/memcpy.S b/arch/riscv/lib/memcpy.S
> > index 00672c19ad..9884077c93 100644
> > --- a/arch/riscv/lib/memcpy.S
> > +++ b/arch/riscv/lib/memcpy.S
> > @@ -9,6 +9,7 @@
> >  /* void *memcpy(void *, const void *, size_t) */
> >  ENTRY(__memcpy)
> >  WEAK(memcpy)
> > +     beq     a0, a1, .copy_end
>
> I see one call to memcpy() in common/spl/spl_ram.c. I think it would
> make more sense to do the check there instead of adding a branch to
> every memcpy() call.

There is not only one memcpy being called in spl_ram.c during spl
booting progress, but also in spl_fit.c .
Otherwise I prefer not to modify it in generic flow level but arch
level, because the src and dest checking
may not be necessary for other booting devices.

>
> >       /* Save for return value */
> >       mv      t6, a0
> >
> > @@ -121,6 +122,7 @@ WEAK(memcpy)
> >  2:
> >
> >       mv      a0, t6
> > +.copy_end:
> >       ret
> >
> >  .Lmisaligned_word_copy:
> > diff --git a/arch/riscv/lib/spl.c b/arch/riscv/lib/spl.c
> > index f4d3b67e5d..18f86ee207 100644
> > --- a/arch/riscv/lib/spl.c
> > +++ b/arch/riscv/lib/spl.c
> > @@ -61,3 +61,19 @@ void __noreturn jump_to_image_no_args(struct spl_image_info *spl_image)
> >  #endif
> >       image_entry(gd->arch.boot_hart, fdt_blob);
> >  }
> > +
> > +#ifdef CONFIG_SPL_RAM_SUPPORT
> > +struct legacy_img_hdr *spl_get_load_buffer(ssize_t offset, size_t size)
> > +{
> > +     return (void *)(CONFIG_SPL_LOAD_FIT_ADDRESS + offset);
> > +}
> > +
> > +void *board_spl_fit_buffer_addr(ulong fit_size, int sectors, int bl_len)
> > +{
> > +     void *buf;
> > +
> > +     buf = spl_get_load_buffer(0, sectors * bl_len);
> > +
> > +     return buf;
> > +}
> > +#endif
>
> You cannot provide strong definitions of these functions at an
> architecture level, as this prevents any board from overriding them.

Originally I wanted to put it in arch/riscv/cpu/ax25/spl.c .
However It is a fundamental improvement for spl_ram flow, but not
likely a private change.
That's why I put it here, so that all boards of RISC-V can be benefited.

Thanks,
Rick

>
> Regards,
> Samuel
>
Samuel Holland Dec. 28, 2022, 4:26 a.m. UTC | #3
On 12/27/22 21:22, Rick Chen wrote:
> Hi Samuel,
> 
> Samuel Holland <samuel@sholland.org> 於 2022年12月28日 週三 上午10:47寫道:
>>
>> On 12/22/22 01:21, Rick Chen wrote:
>>> When fit image boots from ram, the payload will
>>> be prepared in the address of SPL_LOAD_FIT_ADDRESS.
>>> In spl fit generic flow, it will malloc another
>>> memory address and copy whole fit image to this
>>> malloc address.  But it is un-necessary for booting
>>> from RAM.
>>
>> This should mostly be solved by using `mkimage -E` or binman's
>> `fit,external-offset`. Then only the FIT structure, without any data,
>> will be copied.
>>
>>> This patch improves this flow by declare the
>>> board_spl_fit_buffer_addr() to replace the original one.
>>> The larger image size (eq: Kernel Image 10~20MB), it
>>> can save more booting time.
>>>
>>> Also enhance memcpy function by checking source and
>>> destination address. If they are the same address,
>>> just return and don't copy data anymore.
>>>
>>> Signed-off-by: Rick Chen <rick@andestech.com>
>>> ---
>>>  arch/riscv/lib/memcpy.S |  2 ++
>>>  arch/riscv/lib/spl.c    | 16 ++++++++++++++++
>>>  2 files changed, 18 insertions(+)
>>>
>>> diff --git a/arch/riscv/lib/memcpy.S b/arch/riscv/lib/memcpy.S
>>> index 00672c19ad..9884077c93 100644
>>> --- a/arch/riscv/lib/memcpy.S
>>> +++ b/arch/riscv/lib/memcpy.S
>>> @@ -9,6 +9,7 @@
>>>  /* void *memcpy(void *, const void *, size_t) */
>>>  ENTRY(__memcpy)
>>>  WEAK(memcpy)
>>> +     beq     a0, a1, .copy_end
>>
>> I see one call to memcpy() in common/spl/spl_ram.c. I think it would
>> make more sense to do the check there instead of adding a branch to
>> every memcpy() call.
> 
> There is not only one memcpy being called in spl_ram.c during spl
> booting progress, but also in spl_fit.c .

Either your FIT uses external data, and the one in spl_ram.c is trivial,
or it uses internal data, and the one in spl_fit.c is unavoidable.
Either way, there is only one place where this change makes a difference.

> Otherwise I prefer not to modify it in generic flow level but arch
> level, because the src and dest checking
> may not be necessary for other booting devices.

I see arch/arm/lib/memcpy.S has similar logic, so other code may be
depending on this optimization, and there is precedent for this change.

>>
>>>       /* Save for return value */
>>>       mv      t6, a0
>>>
>>> @@ -121,6 +122,7 @@ WEAK(memcpy)
>>>  2:
>>>
>>>       mv      a0, t6
>>> +.copy_end:
>>>       ret
>>>
>>>  .Lmisaligned_word_copy:
>>> diff --git a/arch/riscv/lib/spl.c b/arch/riscv/lib/spl.c
>>> index f4d3b67e5d..18f86ee207 100644
>>> --- a/arch/riscv/lib/spl.c
>>> +++ b/arch/riscv/lib/spl.c
>>> @@ -61,3 +61,19 @@ void __noreturn jump_to_image_no_args(struct spl_image_info *spl_image)
>>>  #endif
>>>       image_entry(gd->arch.boot_hart, fdt_blob);
>>>  }
>>> +
>>> +#ifdef CONFIG_SPL_RAM_SUPPORT
>>> +struct legacy_img_hdr *spl_get_load_buffer(ssize_t offset, size_t size)
>>> +{
>>> +     return (void *)(CONFIG_SPL_LOAD_FIT_ADDRESS + offset);
>>> +}
>>> +
>>> +void *board_spl_fit_buffer_addr(ulong fit_size, int sectors, int bl_len)
>>> +{
>>> +     void *buf;
>>> +
>>> +     buf = spl_get_load_buffer(0, sectors * bl_len);
>>> +
>>> +     return buf;
>>> +}
>>> +#endif
>>
>> You cannot provide strong definitions of these functions at an
>> architecture level, as this prevents any board from overriding them.
> 
> Originally I wanted to put it in arch/riscv/cpu/ax25/spl.c .
> However It is a fundamental improvement for spl_ram flow, but not
> likely a private change.
> That's why I put it here, so that all boards of RISC-V can be benefited.

This change isn't in any way specific to RISC-V. If you want to make
this generic, it would belong in spl_ram.c. But I still think it is
inappropriate to provide a strong definition of board_* functions
outside board code.

Regards,
Samuel
Rick Chen Dec. 28, 2022, 5:12 a.m. UTC | #4
> On 12/27/22 21:22, Rick Chen wrote:
> > Hi Samuel,
> >
> > Samuel Holland <samuel@sholland.org> 於 2022年12月28日 週三 上午10:47寫道:
> >>
> >> On 12/22/22 01:21, Rick Chen wrote:
> >>> When fit image boots from ram, the payload will
> >>> be prepared in the address of SPL_LOAD_FIT_ADDRESS.
> >>> In spl fit generic flow, it will malloc another
> >>> memory address and copy whole fit image to this
> >>> malloc address.  But it is un-necessary for booting
> >>> from RAM.
> >>
> >> This should mostly be solved by using `mkimage -E` or binman's
> >> `fit,external-offset`. Then only the FIT structure, without any data,
> >> will be copied.
> >>
> >>> This patch improves this flow by declare the
> >>> board_spl_fit_buffer_addr() to replace the original one.
> >>> The larger image size (eq: Kernel Image 10~20MB), it
> >>> can save more booting time.
> >>>
> >>> Also enhance memcpy function by checking source and
> >>> destination address. If they are the same address,
> >>> just return and don't copy data anymore.
> >>>
> >>> Signed-off-by: Rick Chen <rick@andestech.com>
> >>> ---
> >>>  arch/riscv/lib/memcpy.S |  2 ++
> >>>  arch/riscv/lib/spl.c    | 16 ++++++++++++++++
> >>>  2 files changed, 18 insertions(+)
> >>>
> >>> diff --git a/arch/riscv/lib/memcpy.S b/arch/riscv/lib/memcpy.S
> >>> index 00672c19ad..9884077c93 100644
> >>> --- a/arch/riscv/lib/memcpy.S
> >>> +++ b/arch/riscv/lib/memcpy.S
> >>> @@ -9,6 +9,7 @@
> >>>  /* void *memcpy(void *, const void *, size_t) */
> >>>  ENTRY(__memcpy)
> >>>  WEAK(memcpy)
> >>> +     beq     a0, a1, .copy_end
> >>
> >> I see one call to memcpy() in common/spl/spl_ram.c. I think it would
> >> make more sense to do the check there instead of adding a branch to
> >> every memcpy() call.
> >
> > There is not only one memcpy being called in spl_ram.c during spl
> > booting progress, but also in spl_fit.c .
>
> Either your FIT uses external data, and the one in spl_ram.c is trivial,
> or it uses internal data, and the one in spl_fit.c is unavoidable.
> Either way, there is only one place where this change makes a difference.
>
> > Otherwise I prefer not to modify it in generic flow level but arch
> > level, because the src and dest checking
> > may not be necessary for other booting devices.
>
> I see arch/arm/lib/memcpy.S has similar logic, so other code may be
> depending on this optimization, and there is precedent for this change.
>
> >>
> >>>       /* Save for return value */
> >>>       mv      t6, a0
> >>>
> >>> @@ -121,6 +122,7 @@ WEAK(memcpy)
> >>>  2:
> >>>
> >>>       mv      a0, t6
> >>> +.copy_end:
> >>>       ret
> >>>
> >>>  .Lmisaligned_word_copy:
> >>> diff --git a/arch/riscv/lib/spl.c b/arch/riscv/lib/spl.c
> >>> index f4d3b67e5d..18f86ee207 100644
> >>> --- a/arch/riscv/lib/spl.c
> >>> +++ b/arch/riscv/lib/spl.c
> >>> @@ -61,3 +61,19 @@ void __noreturn jump_to_image_no_args(struct spl_image_info *spl_image)
> >>>  #endif
> >>>       image_entry(gd->arch.boot_hart, fdt_blob);
> >>>  }
> >>> +
> >>> +#ifdef CONFIG_SPL_RAM_SUPPORT
> >>> +struct legacy_img_hdr *spl_get_load_buffer(ssize_t offset, size_t size)
> >>> +{
> >>> +     return (void *)(CONFIG_SPL_LOAD_FIT_ADDRESS + offset);
> >>> +}
> >>> +
> >>> +void *board_spl_fit_buffer_addr(ulong fit_size, int sectors, int bl_len)
> >>> +{
> >>> +     void *buf;
> >>> +
> >>> +     buf = spl_get_load_buffer(0, sectors * bl_len);
> >>> +
> >>> +     return buf;
> >>> +}
> >>> +#endif
> >>
> >> You cannot provide strong definitions of these functions at an
> >> architecture level, as this prevents any board from overriding them.
> >
> > Originally I wanted to put it in arch/riscv/cpu/ax25/spl.c .
> > However It is a fundamental improvement for spl_ram flow, but not
> > likely a private change.
> > That's why I put it here, so that all boards of RISC-V can be benefited.
>
> This change isn't in any way specific to RISC-V. If you want to make
> this generic, it would belong in spl_ram.c. But I still think it is
> inappropriate to provide a strong definition of board_* functions
> outside board code.

OK, I will move it to arch/riscv/cpu/ax25/spl.c.

>
> Regards,
> Samuel
>
diff mbox series

Patch

diff --git a/arch/riscv/lib/memcpy.S b/arch/riscv/lib/memcpy.S
index 00672c19ad..9884077c93 100644
--- a/arch/riscv/lib/memcpy.S
+++ b/arch/riscv/lib/memcpy.S
@@ -9,6 +9,7 @@ 
 /* void *memcpy(void *, const void *, size_t) */
 ENTRY(__memcpy)
 WEAK(memcpy)
+	beq	a0, a1, .copy_end
 	/* Save for return value */
 	mv	t6, a0
 
@@ -121,6 +122,7 @@  WEAK(memcpy)
 2:
 
 	mv	a0, t6
+.copy_end:
 	ret
 
 .Lmisaligned_word_copy:
diff --git a/arch/riscv/lib/spl.c b/arch/riscv/lib/spl.c
index f4d3b67e5d..18f86ee207 100644
--- a/arch/riscv/lib/spl.c
+++ b/arch/riscv/lib/spl.c
@@ -61,3 +61,19 @@  void __noreturn jump_to_image_no_args(struct spl_image_info *spl_image)
 #endif
 	image_entry(gd->arch.boot_hart, fdt_blob);
 }
+
+#ifdef CONFIG_SPL_RAM_SUPPORT
+struct legacy_img_hdr *spl_get_load_buffer(ssize_t offset, size_t size)
+{
+	return (void *)(CONFIG_SPL_LOAD_FIT_ADDRESS + offset);
+}
+
+void *board_spl_fit_buffer_addr(ulong fit_size, int sectors, int bl_len)
+{
+	void *buf;
+
+	buf = spl_get_load_buffer(0, sectors * bl_len);
+
+	return buf;
+}
+#endif