diff mbox

rtas-nvram: optimize erase

Message ID 1462428819-11150-1-git-send-email-nikunj@linux.vnet.ibm.com
State Superseded
Headers show

Commit Message

Nikunj A Dadhania May 5, 2016, 6:13 a.m. UTC
As this was done at byte granularity, erasing complete nvram(64K
default) took a lot of time. To reduce the number of rtas call per byte
write which is expensive, the erase is done in a block of 1024.

After this patch there is ~450msec improvement during boot. Default qemu
booting does not provide file backed nvram, so every boot there would be
full erase of 64K.

Before this patch:

real	0m2.214s
user	0m0.015s
sys	  0m0.006s

real	0m2.222s
user	0m0.014s
sys	  0m0.005s

real	0m2.201s
user	0m0.010s
sys	  0m0.005s

After this patch:

real	0m1.762s
user	0m0.014s
sys	  0m0.006s

real	0m1.773s
user	0m0.011s
sys   0m0.004s

real	0m1.754s
user	0m0.013s
sys	  0m0.005s

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 lib/libnvram/nvram.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

Comments

Thomas Huth May 9, 2016, 9:54 a.m. UTC | #1
On 05.05.2016 08:13, Nikunj A Dadhania wrote:
> As this was done at byte granularity, erasing complete nvram(64K
> default) took a lot of time. To reduce the number of rtas call per byte
> write which is expensive, the erase is done in a block of 1024.
> 
> After this patch there is ~450msec improvement during boot. Default qemu
> booting does not provide file backed nvram, so every boot there would be
> full erase of 64K.

Wow, that's a pretty good improvement!

> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> ---
>  lib/libnvram/nvram.c | 11 +++++++++++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/lib/libnvram/nvram.c b/lib/libnvram/nvram.c
> index 473814e..5a895ee 100644
> --- a/lib/libnvram/nvram.c
> +++ b/lib/libnvram/nvram.c
> @@ -80,6 +80,8 @@ static volatile uint8_t nvram[NVRAM_LENGTH]; /* FAKE */
>  
>  #elif defined(RTAS_NVRAM)
>  
> +#define RTAS_ERASE_BUF_SZ 1024
> +unsigned char erase_buf[RTAS_ERASE_BUF_SZ] = {0};

No need for the "= {0}" here since you memset() the buffer later anyway.

Actually, could you maybe move this buffer into the nvram_fetch()
function so that it gets a stack variable, so we can save this memory
from the global space? It should work, at least if you'd decrease
RTAS_ERASE_BUF_SZ to 512, since this is the size that is used in
fast_rfill() for a local array, too.

>  static inline void nvram_fetch(unsigned int offset, void *buf, unsigned int len)
>  {
>   	struct hv_rtas_call rtas = {
> @@ -372,9 +374,18 @@ partition_t get_partition_fs(char *name, int namelen)
>  void erase_nvram(int offset, int len)
>  {
>  	int i;
> +#ifdef RTAS_NVRAM
> +	int chunk;
>  
> +	memset(erase_buf, 0, RTAS_ERASE_BUF_SZ);
> +	for (i = len; i > 0; i -= RTAS_ERASE_BUF_SZ, offset += RTAS_ERASE_BUF_SZ) {
> +		chunk = (i > RTAS_ERASE_BUF_SZ)? RTAS_ERASE_BUF_SZ : i;
> +		nvram_store(offset, erase_buf, chunk);
> +	}
> +#else
>  	for (i=offset; i<offset+len; i++)
>  		nvram_write_byte(i, 0);
> +#endif
>  }

Yet another idea: There seems to be buffer called "nvram_buffer"
available in nvram.c already, which can be used as scratch space?
(I hope I understood that code right...)

So maybe that buffer could be used to clear the whole area at once?
Something like:

void erase_nvram(int offset, int len)
{
 	int i;

#ifdef RTAS_NVRAM
	uint8_t *erase_buf = get_nvram_buffer(len);
	if (erase_buf) {
		/* Speed up by erasing all memory at once */
		memset(erase_buf, 0, len);
		nvram_store(offset, erase_buf, len);
		free_nvram_buffer(erase_buf);
		return;
	}
	/* If get_nvram_buffer failed, fall through to default code */
#endif

	for (i=offset; i<offset+len; i++)
		nvram_write_byte(i, 0);
}

That whole concept with the nvram_buffer looks a little bit badly
documented to me, but as far as I understood the code, it could work?

 Thomas
Nikunj A Dadhania May 9, 2016, 10:36 a.m. UTC | #2
Thomas Huth <thuth@redhat.com> writes:

> On 05.05.2016 08:13, Nikunj A Dadhania wrote:
>> As this was done at byte granularity, erasing complete nvram(64K
>> default) took a lot of time. To reduce the number of rtas call per byte
>> write which is expensive, the erase is done in a block of 1024.
>> 
>> After this patch there is ~450msec improvement during boot. Default qemu
>> booting does not provide file backed nvram, so every boot there would be
>> full erase of 64K.
>
> Wow, that's a pretty good improvement!
>
>> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>> ---
>>  lib/libnvram/nvram.c | 11 +++++++++++
>>  1 file changed, 11 insertions(+)
>> 
>> diff --git a/lib/libnvram/nvram.c b/lib/libnvram/nvram.c
>> index 473814e..5a895ee 100644
>> --- a/lib/libnvram/nvram.c
>> +++ b/lib/libnvram/nvram.c
>> @@ -80,6 +80,8 @@ static volatile uint8_t nvram[NVRAM_LENGTH]; /* FAKE */
>>  
>>  #elif defined(RTAS_NVRAM)
>>  
>> +#define RTAS_ERASE_BUF_SZ 1024
>> +unsigned char erase_buf[RTAS_ERASE_BUF_SZ] = {0};
>
> No need for the "= {0}" here since you memset() the buffer later anyway.
>
> Actually, could you maybe move this buffer into the nvram_fetch()
> function so that it gets a stack variable, so we can save this memory
> from the global space? It should work, at least if you'd decrease
> RTAS_ERASE_BUF_SZ to 512, since this is the size that is used in
> fast_rfill() for a local array, too.
>
>>  static inline void nvram_fetch(unsigned int offset, void *buf, unsigned int len)
>>  {
>>   	struct hv_rtas_call rtas = {
>> @@ -372,9 +374,18 @@ partition_t get_partition_fs(char *name, int namelen)
>>  void erase_nvram(int offset, int len)
>>  {
>>  	int i;
>> +#ifdef RTAS_NVRAM
>> +	int chunk;
>>  
>> +	memset(erase_buf, 0, RTAS_ERASE_BUF_SZ);
>> +	for (i = len; i > 0; i -= RTAS_ERASE_BUF_SZ, offset += RTAS_ERASE_BUF_SZ) {
>> +		chunk = (i > RTAS_ERASE_BUF_SZ)? RTAS_ERASE_BUF_SZ : i;
>> +		nvram_store(offset, erase_buf, chunk);
>> +	}
>> +#else
>>  	for (i=offset; i<offset+len; i++)
>>  		nvram_write_byte(i, 0);
>> +#endif
>>  }
>
> Yet another idea: There seems to be buffer called "nvram_buffer"
> available in nvram.c already, which can be used as scratch space?
> (I hope I understood that code right...)

Yes, I have cross checked, board-qemu/slof/rtas-nvram.fs allocates the
memory equal to nvram_size. This is then initialized via nvram_init
call, that sets both nvram_buffer and NVRAM_LENGTH.

> So maybe that buffer could be used to clear the whole area at once?
> Something like:
>
> void erase_nvram(int offset, int len)
> {
>  	int i;
>
> #ifdef RTAS_NVRAM
> 	uint8_t *erase_buf = get_nvram_buffer(len);
> 	if (erase_buf) {
> 		/* Speed up by erasing all memory at once */
> 		memset(erase_buf, 0, len);
> 		nvram_store(offset, erase_buf, len);
> 		free_nvram_buffer(erase_buf);
> 		return;
> 	}
> 	/* If get_nvram_buffer failed, fall through to default code */
> #endif
>
> 	for (i=offset; i<offset+len; i++)
> 		nvram_write_byte(i, 0);
> }
>
> That whole concept with the nvram_buffer looks a little bit badly
> documented to me, but as far as I understood the code, it could work?

Yes,should work, will post the patch after testing.

Regards
Nikunj
diff mbox

Patch

diff --git a/lib/libnvram/nvram.c b/lib/libnvram/nvram.c
index 473814e..5a895ee 100644
--- a/lib/libnvram/nvram.c
+++ b/lib/libnvram/nvram.c
@@ -80,6 +80,8 @@  static volatile uint8_t nvram[NVRAM_LENGTH]; /* FAKE */
 
 #elif defined(RTAS_NVRAM)
 
+#define RTAS_ERASE_BUF_SZ 1024
+unsigned char erase_buf[RTAS_ERASE_BUF_SZ] = {0};
 static inline void nvram_fetch(unsigned int offset, void *buf, unsigned int len)
 {
  	struct hv_rtas_call rtas = {
@@ -372,9 +374,18 @@  partition_t get_partition_fs(char *name, int namelen)
 void erase_nvram(int offset, int len)
 {
 	int i;
+#ifdef RTAS_NVRAM
+	int chunk;
 
+	memset(erase_buf, 0, RTAS_ERASE_BUF_SZ);
+	for (i = len; i > 0; i -= RTAS_ERASE_BUF_SZ, offset += RTAS_ERASE_BUF_SZ) {
+		chunk = (i > RTAS_ERASE_BUF_SZ)? RTAS_ERASE_BUF_SZ : i;
+		nvram_store(offset, erase_buf, chunk);
+	}
+#else
 	for (i=offset; i<offset+len; i++)
 		nvram_write_byte(i, 0);
+#endif
 }
 
 void wipe_nvram(void)