diff mbox series

[RFC,v2,1/1] Fix missing __udivdi3 in SquashFS implementation.

Message ID 20200930154511.630103-2-mc5686@mclink.it
State Superseded
Delegated to: Tom Rini
Headers show
Series Current strategy is to use right/left shift to implement div/mult | expand

Commit Message

Mauro Condarelli Sept. 30, 2020, 3:45 p.m. UTC
Use right shift to avoid 64-bit divisions.

These divisions are needed to convert from file length (potentially
over 32-bit range) to block number, so result and remainder are
guaranteed to fit in 32-bit integers.

Signed-off-by: Mauro Condarelli <mc5686@mclink.it>
---

Changes in v2:
- replace division with right shift (Daniel Schwierzeck).
- remove vocore2-specific change (Daniel Schwierzeck).
- add warning to Kconfig about CONFIG_SYS_MALLOC_LEN (Tom Rini).

 fs/squashfs/Kconfig      |  2 ++
 fs/squashfs/sqfs.c       | 30 +++++++++++++++---------------
 fs/squashfs/sqfs_inode.c |  6 +++---
 3 files changed, 20 insertions(+), 18 deletions(-)

Comments

Daniel Schwierzeck Sept. 30, 2020, 9:40 p.m. UTC | #1
Am Mittwoch, den 30.09.2020, 17:45 +0200 schrieb Mauro Condarelli:
> Use right shift to avoid 64-bit divisions.
> 
> These divisions are needed to convert from file length (potentially
> over 32-bit range) to block number, so result and remainder are
> guaranteed to fit in 32-bit integers.
> 
> Signed-off-by: Mauro Condarelli <mc5686@mclink.it>

the commit subject needs to be fixed. If you change a specific
subsystem, you need to add the according prefix. You can use the Git
log to figure out that prefix. Also you don't fix a missing __udivdi3 ,
you just want to avoid 64-bit divisions.

So maybe something like: "fs/squashfs: avoid 64-bit divisions on 32-bit 
systems"

> ---
> 
> Changes in v2:
> - replace division with right shift (Daniel Schwierzeck).
> - remove vocore2-specific change (Daniel Schwierzeck).
> - add warning to Kconfig about CONFIG_SYS_MALLOC_LEN (Tom Rini).
> 
>  fs/squashfs/Kconfig      |  2 ++
>  fs/squashfs/sqfs.c       | 30 +++++++++++++++---------------
>  fs/squashfs/sqfs_inode.c |  6 +++---
>  3 files changed, 20 insertions(+), 18 deletions(-)
> 
> diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
> index 54ab1618f1..7c3f83d007 100644
> --- a/fs/squashfs/Kconfig
> +++ b/fs/squashfs/Kconfig
> @@ -9,3 +9,5 @@ config FS_SQUASHFS
>  	  filesystem use, for archival use (i.e. in cases where a .tar.gz file
>  	  may be used), and in constrained block device/memory systems (e.g.
>  	  embedded systems) where low overhead is needed.
> +	  WARNING: if compression is enabled SquashFS needs a large amount
> +	  of dynamic memory; make sure CONFIG_SYS_MALLOC_LEN >= 0x4000.
> diff --git a/fs/squashfs/sqfs.c b/fs/squashfs/sqfs.c
> index 15208b4dab..90e6848e6c 100644
> --- a/fs/squashfs/sqfs.c
> +++ b/fs/squashfs/sqfs.c
> @@ -85,10 +85,10 @@ static int sqfs_calc_n_blks(__le64 start, __le64 end, u64 *offset)
>  	u64 start_, table_size;
>  
>  	table_size = le64_to_cpu(end) - le64_to_cpu(start);
> -	start_ = le64_to_cpu(start) / ctxt.cur_dev->blksz;
> -	*offset = le64_to_cpu(start) - (start_ * ctxt.cur_dev->blksz);
> +	start_ = le64_to_cpu(start) >> ctxt.cur_dev->log2blksz;
> +	*offset = le64_to_cpu(start) - (start_ << ctxt.cur_dev->log2blksz);
>  
> -	return DIV_ROUND_UP(table_size + *offset, ctxt.cur_dev->blksz);
> +	return (table_size + *offset + ctxt.cur_dev->blksz - 1) >> ctxt.cur_dev->log2blksz;
>  }
>  
>  /*
> @@ -109,8 +109,8 @@ static int sqfs_frag_lookup(u32 inode_fragment_index,
>  	if (inode_fragment_index >= get_unaligned_le32(&sblk->fragments))
>  		return -EINVAL;
>  
> -	start = get_unaligned_le64(&sblk->fragment_table_start) /
> -		ctxt.cur_dev->blksz;
> +	start = get_unaligned_le64(&sblk->fragment_table_start) >>
> +		ctxt.cur_dev->log2blksz;
>  	n_blks = sqfs_calc_n_blks(sblk->fragment_table_start,
>  				  sblk->export_table_start,
>  				  &table_offset);
> @@ -135,7 +135,7 @@ static int sqfs_frag_lookup(u32 inode_fragment_index,
>  	start_block = get_unaligned_le64(table + table_offset + block *
>  					 sizeof(u64));
>  
> -	start = start_block / ctxt.cur_dev->blksz;
> +	start = start_block >> ctxt.cur_dev->log2blksz;
>  	n_blks = sqfs_calc_n_blks(cpu_to_le64(start_block),
>  				  sblk->fragment_table_start, &table_offset);
>  
> @@ -641,8 +641,8 @@ static int sqfs_read_inode_table(unsigned char **inode_table)
>  
>  	table_size = get_unaligned_le64(&sblk->directory_table_start) -
>  		get_unaligned_le64(&sblk->inode_table_start);
> -	start = get_unaligned_le64(&sblk->inode_table_start) /
> -		ctxt.cur_dev->blksz;
> +	start = get_unaligned_le64(&sblk->inode_table_start) >>
> +		ctxt.cur_dev->log2blksz;
>  	n_blks = sqfs_calc_n_blks(sblk->inode_table_start,
>  				  sblk->directory_table_start, &table_offset);
>  
> @@ -725,8 +725,8 @@ static int sqfs_read_directory_table(unsigned char **dir_table, u32 **pos_list)
>  	/* DIRECTORY TABLE */
>  	table_size = get_unaligned_le64(&sblk->fragment_table_start) -
>  		get_unaligned_le64(&sblk->directory_table_start);
> -	start = get_unaligned_le64(&sblk->directory_table_start) /
> -		ctxt.cur_dev->blksz;
> +	start = get_unaligned_le64(&sblk->directory_table_start) >>
> +		ctxt.cur_dev->log2blksz;
>  	n_blks = sqfs_calc_n_blks(sblk->directory_table_start,
>  				  sblk->fragment_table_start, &table_offset);
>  
> @@ -1334,11 +1334,11 @@ int sqfs_read(const char *filename, void *buf, loff_t offset, loff_t len,
>  	}
>  
>  	for (j = 0; j < datablk_count; j++) {
> -		start = data_offset / ctxt.cur_dev->blksz;
> +		start = data_offset >> ctxt.cur_dev->log2blksz;
>  		table_size = SQFS_BLOCK_SIZE(finfo.blk_sizes[j]);
>  		table_offset = data_offset - (start * ctxt.cur_dev->blksz);
> -		n_blks = DIV_ROUND_UP(table_size + table_offset,
> -				      ctxt.cur_dev->blksz);
> +		n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >>
> +			ctxt.cur_dev->log2blksz;
>  
>  		data_buffer = malloc_cache_aligned(n_blks * ctxt.cur_dev->blksz);
>  
> @@ -1388,10 +1388,10 @@ int sqfs_read(const char *filename, void *buf, loff_t offset, loff_t len,
>  		goto free_buffer;
>  	}
>  
> -	start = frag_entry.start / ctxt.cur_dev->blksz;
> +	start = frag_entry.start >> ctxt.cur_dev->log2blksz;
>  	table_size = SQFS_BLOCK_SIZE(frag_entry.size);
>  	table_offset = frag_entry.start - (start * ctxt.cur_dev->blksz);
> -	n_blks = DIV_ROUND_UP(table_size + table_offset, ctxt.cur_dev->blksz);
> +	n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >> ctxt.cur_dev->log2blksz;
>  
>  	fragment = malloc_cache_aligned(n_blks * ctxt.cur_dev->blksz);
>  
> diff --git a/fs/squashfs/sqfs_inode.c b/fs/squashfs/sqfs_inode.c
> index 1368f3063c..5dbf945c80 100644
> --- a/fs/squashfs/sqfs_inode.c
> +++ b/fs/squashfs/sqfs_inode.c
> @@ -10,7 +10,7 @@
>  #include <stdint.h>
>  #include <stdio.h>
>  #include <stdlib.h>
> -#include <string.h>
> +#include <part.h>
>  
>  #include "sqfs_decompressor.h"
>  #include "sqfs_filesystem.h"
> @@ -68,9 +68,9 @@ int sqfs_inode_size(struct squashfs_base_inode *inode, u32 blk_size)
>  		unsigned int blk_list_size;
>  
>  		if (fragment == 0xFFFFFFFF)
> -			blk_list_size = DIV_ROUND_UP(file_size, blk_size);
> +			blk_list_size = (file_size + blk_size - 1) >> LOG2(blk_size);
>  		else
> -			blk_list_size = file_size / blk_size;
> +			blk_list_size = file_size > LOG2(blk_size);

one > is missing:

   blk_list_size = file_size >> LOG2(blk_size);

>  
>  		return sizeof(*lreg) + blk_list_size * sizeof(u32);
>  	}
Thomas Petazzoni Oct. 1, 2020, 7:28 a.m. UTC | #2
Hello,

On Wed, 30 Sep 2020 17:45:11 +0200
Mauro Condarelli <mc5686@mclink.it> wrote:

> Use right shift to avoid 64-bit divisions.
> 
> These divisions are needed to convert from file length (potentially
> over 32-bit range) to block number, so result and remainder are
> guaranteed to fit in 32-bit integers.
> 
> Signed-off-by: Mauro Condarelli <mc5686@mclink.it>

Perhaps the commit log should contain an example U-Boot
configuration/platform where it fails to build. Indeed, we did test the
SquashFS code on ARM 32-bit, and it built and worked fine.

> ---
> 
> Changes in v2:
> - replace division with right shift (Daniel Schwierzeck).
> - remove vocore2-specific change (Daniel Schwierzeck).
> - add warning to Kconfig about CONFIG_SYS_MALLOC_LEN (Tom Rini).
> 
>  fs/squashfs/Kconfig      |  2 ++
>  fs/squashfs/sqfs.c       | 30 +++++++++++++++---------------
>  fs/squashfs/sqfs_inode.c |  6 +++---
>  3 files changed, 20 insertions(+), 18 deletions(-)
> 
> diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
> index 54ab1618f1..7c3f83d007 100644
> --- a/fs/squashfs/Kconfig
> +++ b/fs/squashfs/Kconfig
> @@ -9,3 +9,5 @@ config FS_SQUASHFS
>  	  filesystem use, for archival use (i.e. in cases where a .tar.gz file
>  	  may be used), and in constrained block device/memory systems (e.g.
>  	  embedded systems) where low overhead is needed.
> +	  WARNING: if compression is enabled SquashFS needs a large amount
> +	  of dynamic memory; make sure CONFIG_SYS_MALLOC_LEN >= 0x4000.

This change is completely unrelated, and should be in a separate patch.


> -		n_blks = DIV_ROUND_UP(table_size + table_offset,
> -				      ctxt.cur_dev->blksz);
> +		n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >>
> +			ctxt.cur_dev->log2blksz;

I understand why you have to add blksz - 1 before doing the shift, but
I find that it's overall a lot less readable/clear. Is there a way to
do better ?

We could use DIV_ROUND_UP_ULL() I guess.


>  		else
> -			blk_list_size = file_size / blk_size;
> +			blk_list_size = file_size > LOG2(blk_size);

Very bad mistake here: > should be >>.

Best regards,

Thomas
Miquel Raynal Oct. 1, 2020, 7:59 a.m. UTC | #3
Hello,

Thomas Petazzoni <thomas.petazzoni@bootlin.com> wrote on Thu, 1 Oct
2020 09:28:41 +0200:

> Hello,
> 
> On Wed, 30 Sep 2020 17:45:11 +0200
> Mauro Condarelli <mc5686@mclink.it> wrote:
> 
> > Use right shift to avoid 64-bit divisions.
> > 
> > These divisions are needed to convert from file length (potentially
> > over 32-bit range) to block number, so result and remainder are
> > guaranteed to fit in 32-bit integers.
> > 
> > Signed-off-by: Mauro Condarelli <mc5686@mclink.it>  
> 
> Perhaps the commit log should contain an example U-Boot
> configuration/platform where it fails to build. Indeed, we did test the
> SquashFS code on ARM 32-bit, and it built and worked fine.
> 
> > ---
> > 
> > Changes in v2:
> > - replace division with right shift (Daniel Schwierzeck).
> > - remove vocore2-specific change (Daniel Schwierzeck).
> > - add warning to Kconfig about CONFIG_SYS_MALLOC_LEN (Tom Rini).
> > 
> >  fs/squashfs/Kconfig      |  2 ++
> >  fs/squashfs/sqfs.c       | 30 +++++++++++++++---------------
> >  fs/squashfs/sqfs_inode.c |  6 +++---
> >  3 files changed, 20 insertions(+), 18 deletions(-)
> > 
> > diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
> > index 54ab1618f1..7c3f83d007 100644
> > --- a/fs/squashfs/Kconfig
> > +++ b/fs/squashfs/Kconfig
> > @@ -9,3 +9,5 @@ config FS_SQUASHFS
> >  	  filesystem use, for archival use (i.e. in cases where a .tar.gz file
> >  	  may be used), and in constrained block device/memory systems (e.g.
> >  	  embedded systems) where low overhead is needed.
> > +	  WARNING: if compression is enabled SquashFS needs a large amount
> > +	  of dynamic memory; make sure CONFIG_SYS_MALLOC_LEN >= 0x4000.  
> 
> This change is completely unrelated, and should be in a separate patch.

I was about to tell you the same thing, this warning is useful but
should definitely lay into its own commit.

> > -		n_blks = DIV_ROUND_UP(table_size + table_offset,
> > -				      ctxt.cur_dev->blksz);
> > +		n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >>
> > +			ctxt.cur_dev->log2blksz;  
> 
> I understand why you have to add blksz - 1 before doing the shift, but
> I find that it's overall a lot less readable/clear. Is there a way to
> do better ?
> 
> We could use DIV_ROUND_UP_ULL() I guess.
> 
> 
> >  		else
> > -			blk_list_size = file_size / blk_size;
> > +			blk_list_size = file_size > LOG2(blk_size);  
> 
> Very bad mistake here: > should be >>.

I personally highly dislike replacing divisions into shifts. I think
it's too much effort when trying to understand code you did not write
yourself. Is it possible to use something like do_div? plus, you can
check the remainder to be 0 in this case.

Thanks,
Miquèl
Mauro Condarelli Oct. 1, 2020, 8:41 a.m. UTC | #4
Thanks for Your review.

On 10/1/20 9:59 AM, Miquel Raynal wrote:
> Hello,
>
> Thomas Petazzoni <thomas.petazzoni@bootlin.com> wrote on Thu, 1 Oct
> 2020 09:28:41 +0200:
>
>> Hello,
>>
>> On Wed, 30 Sep 2020 17:45:11 +0200
>> Mauro Condarelli <mc5686@mclink.it> wrote:
>>
>>> Use right shift to avoid 64-bit divisions.
>>>
>>> These divisions are needed to convert from file length (potentially
>>> over 32-bit range) to block number, so result and remainder are
>>> guaranteed to fit in 32-bit integers.
>>>
>>> Signed-off-by: Mauro Condarelli <mc5686@mclink.it>  
>> Perhaps the commit log should contain an example U-Boot
>> configuration/platform where it fails to build. Indeed, we did test the
>> SquashFS code on ARM 32-bit, and it built and worked fine.
This fails on mips32, specifically for the vocore2 board.
Problem here is __udivdi3 is not defined for this architecture
while it is for ARM32.
My (limited) understanding suggests it should be removed for ARM
since its usage has been (is being?) weeded out from both kernel
and u-boot. This is not my call, though.

I will add a note to v3.

>>
>>> ---
>>>
>>> Changes in v2:
>>> - replace division with right shift (Daniel Schwierzeck).
>>> - remove vocore2-specific change (Daniel Schwierzeck).
>>> - add warning to Kconfig about CONFIG_SYS_MALLOC_LEN (Tom Rini).
>>>
>>>  fs/squashfs/Kconfig      |  2 ++
>>>  fs/squashfs/sqfs.c       | 30 +++++++++++++++---------------
>>>  fs/squashfs/sqfs_inode.c |  6 +++---
>>>  3 files changed, 20 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
>>> index 54ab1618f1..7c3f83d007 100644
>>> --- a/fs/squashfs/Kconfig
>>> +++ b/fs/squashfs/Kconfig
>>> @@ -9,3 +9,5 @@ config FS_SQUASHFS
>>>  	  filesystem use, for archival use (i.e. in cases where a .tar.gz file
>>>  	  may be used), and in constrained block device/memory systems (e.g.
>>>  	  embedded systems) where low overhead is needed.
>>> +	  WARNING: if compression is enabled SquashFS needs a large amount
>>> +	  of dynamic memory; make sure CONFIG_SYS_MALLOC_LEN >= 0x4000.  
>> This change is completely unrelated, and should be in a separate patch.
> I was about to tell you the same thing, this warning is useful but
> should definitely lay into its own commit.
Will do in v3.


>>> -		n_blks = DIV_ROUND_UP(table_size + table_offset,
>>> -				      ctxt.cur_dev->blksz);
>>> +		n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >>
>>> +			ctxt.cur_dev->log2blksz;  
>> I understand why you have to add blksz - 1 before doing the shift, but
>> I find that it's overall a lot less readable/clear. Is there a way to
>> do better ?
>>
>> We could use DIV_ROUND_UP_ULL() I guess.
I did not do this because DIV_ROUND_UP() is in a global (non-architecture-specific)
header and it unconditionally uses division; I did not know how to handle this.
Would a comment suffice to clarify intent? Something like:

n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >>
	  ctxt.cur_dev->log2blksz; /* ROUND_UP division */

Note: this problem stays even if we roll-back to use do_div(); see below.

>>>  		else
>>> -			blk_list_size = file_size / blk_size;
>>> +			blk_list_size = file_size > LOG2(blk_size);  
>> Very bad mistake here: > should be >>.
Sorry, my bad.
Corrected for v3.

> I personally highly dislike replacing divisions into shifts. I think
> it's too much effort when trying to understand code you did not write
> yourself. Is it possible to use something like do_div? plus, you can
> check the remainder to be 0 in this case.
Please make up your mind about this.
I personally tend to agree with Miquèl and my v1 patch used
do_div() exclusively (i did not use even the lldiv() wrapper), but
I will not insist either way... just let me know what's considered
better.

>
> Thanks,
> Miquèl
Many thanks
Mauro
Mauro Condarelli Oct. 1, 2020, 8:53 a.m. UTC | #5
Correcting myself.
See below.

On 10/1/20 10:41 AM, Mauro Condarelli wrote:
> Thanks for Your review.
>
> On 10/1/20 9:59 AM, Miquel Raynal wrote:
>> Hello,
>>
>> Thomas Petazzoni <thomas.petazzoni@bootlin.com> wrote on Thu, 1 Oct
>> 2020 09:28:41 +0200:
>>
>>> Hello,
>>>
>>> On Wed, 30 Sep 2020 17:45:11 +0200
>>> Mauro Condarelli <mc5686@mclink.it> wrote:
>>>
>>>> Use right shift to avoid 64-bit divisions.
>>>>
>>>> These divisions are needed to convert from file length (potentially
>>>> over 32-bit range) to block number, so result and remainder are
>>>> guaranteed to fit in 32-bit integers.
>>>>
>>>> Signed-off-by: Mauro Condarelli <mc5686@mclink.it>  
>>> Perhaps the commit log should contain an example U-Boot
>>> configuration/platform where it fails to build. Indeed, we did test the
>>> SquashFS code on ARM 32-bit, and it built and worked fine.
> This fails on mips32, specifically for the vocore2 board.
> Problem here is __udivdi3 is not defined for this architecture
> while it is for ARM32.
> My (limited) understanding suggests it should be removed for ARM
> since its usage has been (is being?) weeded out from both kernel
> and u-boot. This is not my call, though.
>
> I will add a note to v3.
>
>>>> ---
>>>>
>>>> Changes in v2:
>>>> - replace division with right shift (Daniel Schwierzeck).
>>>> - remove vocore2-specific change (Daniel Schwierzeck).
>>>> - add warning to Kconfig about CONFIG_SYS_MALLOC_LEN (Tom Rini).
>>>>
>>>>  fs/squashfs/Kconfig      |  2 ++
>>>>  fs/squashfs/sqfs.c       | 30 +++++++++++++++---------------
>>>>  fs/squashfs/sqfs_inode.c |  6 +++---
>>>>  3 files changed, 20 insertions(+), 18 deletions(-)
>>>>
>>>> diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
>>>> index 54ab1618f1..7c3f83d007 100644
>>>> --- a/fs/squashfs/Kconfig
>>>> +++ b/fs/squashfs/Kconfig
>>>> @@ -9,3 +9,5 @@ config FS_SQUASHFS
>>>>  	  filesystem use, for archival use (i.e. in cases where a .tar.gz file
>>>>  	  may be used), and in constrained block device/memory systems (e.g.
>>>>  	  embedded systems) where low overhead is needed.
>>>> +	  WARNING: if compression is enabled SquashFS needs a large amount
>>>> +	  of dynamic memory; make sure CONFIG_SYS_MALLOC_LEN >= 0x4000.  
>>> This change is completely unrelated, and should be in a separate patch.
>> I was about to tell you the same thing, this warning is useful but
>> should definitely lay into its own commit.
> Will do in v3.
>
>
>>>> -		n_blks = DIV_ROUND_UP(table_size + table_offset,
>>>> -				      ctxt.cur_dev->blksz);
>>>> +		n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >>
>>>> +			ctxt.cur_dev->log2blksz;  
>>> I understand why you have to add blksz - 1 before doing the shift, but
>>> I find that it's overall a lot less readable/clear. Is there a way to
>>> do better ?
>>>
>>> We could use DIV_ROUND_UP_ULL() I guess.
> I did not do this because DIV_ROUND_UP() is in a global (non-architecture-specific)
> header and it unconditionally uses division; I did not know how to handle this.
> Would a comment suffice to clarify intent? Something like:
>
> n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >>
> 	  ctxt.cur_dev->log2blksz; /* ROUND_UP division */
>
> Note: this problem stays even if we roll-back to use do_div(); see below.
actually include/linux/kernel.h defines both DIV_ROUND_DOWN_ULL()
and DIV_ROUND_UP_ULL() I suppose we should use those in all cases.


>>>>  		else
>>>> -			blk_list_size = file_size / blk_size;
>>>> +			blk_list_size = file_size > LOG2(blk_size);  
>>> Very bad mistake here: > should be >>.
> Sorry, my bad.
> Corrected for v3.
>
>> I personally highly dislike replacing divisions into shifts. I think
>> it's too much effort when trying to understand code you did not write
>> yourself. Is it possible to use something like do_div? plus, you can
>> check the remainder to be 0 in this case.
> Please make up your mind about this.
> I personally tend to agree with Miquèl and my v1 patch used
> do_div() exclusively (i did not use even the lldiv() wrapper), but
> I will not insist either way... just let me know what's considered
> better.
As said above: it seems using the macros is both "standard" and
safer than using shifts.
If I get a go-ahead I'll use those macros in v3.

>
>> Thanks,
>> Miquèl
> Many thanks
> Mauro
Miquel Raynal Oct. 1, 2020, 8:56 a.m. UTC | #6
Hi Mauro,

Mauro Condarelli <mc5686@mclink.it> wrote on Thu, 1 Oct 2020 10:53:30
+0200:

> Correcting myself.
> See below.
> 
> On 10/1/20 10:41 AM, Mauro Condarelli wrote:
> > Thanks for Your review.
> >
> > On 10/1/20 9:59 AM, Miquel Raynal wrote:  
> >> Hello,
> >>
> >> Thomas Petazzoni <thomas.petazzoni@bootlin.com> wrote on Thu, 1 Oct
> >> 2020 09:28:41 +0200:
> >>  
> >>> Hello,
> >>>
> >>> On Wed, 30 Sep 2020 17:45:11 +0200
> >>> Mauro Condarelli <mc5686@mclink.it> wrote:
> >>>  
> >>>> Use right shift to avoid 64-bit divisions.
> >>>>
> >>>> These divisions are needed to convert from file length (potentially
> >>>> over 32-bit range) to block number, so result and remainder are
> >>>> guaranteed to fit in 32-bit integers.
> >>>>
> >>>> Signed-off-by: Mauro Condarelli <mc5686@mclink.it>    
> >>> Perhaps the commit log should contain an example U-Boot
> >>> configuration/platform where it fails to build. Indeed, we did test the
> >>> SquashFS code on ARM 32-bit, and it built and worked fine.  
> > This fails on mips32, specifically for the vocore2 board.
> > Problem here is __udivdi3 is not defined for this architecture
> > while it is for ARM32.
> > My (limited) understanding suggests it should be removed for ARM
> > since its usage has been (is being?) weeded out from both kernel
> > and u-boot. This is not my call, though.
> >
> > I will add a note to v3.
> >  
> >>>> ---
> >>>>
> >>>> Changes in v2:
> >>>> - replace division with right shift (Daniel Schwierzeck).
> >>>> - remove vocore2-specific change (Daniel Schwierzeck).
> >>>> - add warning to Kconfig about CONFIG_SYS_MALLOC_LEN (Tom Rini).
> >>>>
> >>>>  fs/squashfs/Kconfig      |  2 ++
> >>>>  fs/squashfs/sqfs.c       | 30 +++++++++++++++---------------
> >>>>  fs/squashfs/sqfs_inode.c |  6 +++---
> >>>>  3 files changed, 20 insertions(+), 18 deletions(-)
> >>>>
> >>>> diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
> >>>> index 54ab1618f1..7c3f83d007 100644
> >>>> --- a/fs/squashfs/Kconfig
> >>>> +++ b/fs/squashfs/Kconfig
> >>>> @@ -9,3 +9,5 @@ config FS_SQUASHFS
> >>>>  	  filesystem use, for archival use (i.e. in cases where a .tar.gz file
> >>>>  	  may be used), and in constrained block device/memory systems (e.g.
> >>>>  	  embedded systems) where low overhead is needed.
> >>>> +	  WARNING: if compression is enabled SquashFS needs a large amount
> >>>> +	  of dynamic memory; make sure CONFIG_SYS_MALLOC_LEN >= 0x4000.    
> >>> This change is completely unrelated, and should be in a separate patch.  
> >> I was about to tell you the same thing, this warning is useful but
> >> should definitely lay into its own commit.  
> > Will do in v3.
> >
> >  
> >>>> -		n_blks = DIV_ROUND_UP(table_size + table_offset,
> >>>> -				      ctxt.cur_dev->blksz);
> >>>> +		n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >>
> >>>> +			ctxt.cur_dev->log2blksz;    
> >>> I understand why you have to add blksz - 1 before doing the shift, but
> >>> I find that it's overall a lot less readable/clear. Is there a way to
> >>> do better ?
> >>>
> >>> We could use DIV_ROUND_UP_ULL() I guess.  
> > I did not do this because DIV_ROUND_UP() is in a global (non-architecture-specific)
> > header and it unconditionally uses division; I did not know how to handle this.
> > Would a comment suffice to clarify intent? Something like:
> >
> > n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >>
> > 	  ctxt.cur_dev->log2blksz; /* ROUND_UP division */
> >
> > Note: this problem stays even if we roll-back to use do_div(); see below.  
> actually include/linux/kernel.h defines both DIV_ROUND_DOWN_ULL()
> and DIV_ROUND_UP_ULL() I suppose we should use those in all cases.
> 
> 
> >>>>  		else
> >>>> -			blk_list_size = file_size / blk_size;
> >>>> +			blk_list_size = file_size > LOG2(blk_size);    
> >>> Very bad mistake here: > should be >>.  
> > Sorry, my bad.
> > Corrected for v3.
> >  
> >> I personally highly dislike replacing divisions into shifts. I think
> >> it's too much effort when trying to understand code you did not write
> >> yourself. Is it possible to use something like do_div? plus, you can
> >> check the remainder to be 0 in this case.  
> > Please make up your mind about this.
> > I personally tend to agree with Miquèl and my v1 patch used
> > do_div() exclusively (i did not use even the lldiv() wrapper), but
> > I will not insist either way... just let me know what's considered
> > better.  
> As said above: it seems using the macros is both "standard" and
> safer than using shifts.
> If I get a go-ahead I'll use those macros in v3.

I think we all agree using these macros is much nicer, readable and
probably almost as fast.

Thanks,
Miquèl
Mauro Condarelli Oct. 1, 2020, 10:17 a.m. UTC | #7
Ok, Thanks.
Patch is ready, I'll send it after some extended testing on my system (vocore2).

Regards
Mauro

On 10/1/20 10:56 AM, Miquel Raynal wrote:
> Hi Mauro,
>
> Mauro Condarelli <mc5686@mclink.it> wrote on Thu, 1 Oct 2020 10:53:30
> +0200:
>
>> Correcting myself.
>> See below.
>>
>> On 10/1/20 10:41 AM, Mauro Condarelli wrote:
>>> Thanks for Your review.
>>>
>>> On 10/1/20 9:59 AM, Miquel Raynal wrote:  
>>>> Hello,
>>>>
>>>> Thomas Petazzoni <thomas.petazzoni@bootlin.com> wrote on Thu, 1 Oct
>>>> 2020 09:28:41 +0200:
>>>>  
>>>>> Hello,
>>>>>
>>>>> On Wed, 30 Sep 2020 17:45:11 +0200
>>>>> Mauro Condarelli <mc5686@mclink.it> wrote:
>>>>>  
>>>>>> Use right shift to avoid 64-bit divisions.
>>>>>>
>>>>>> These divisions are needed to convert from file length (potentially
>>>>>> over 32-bit range) to block number, so result and remainder are
>>>>>> guaranteed to fit in 32-bit integers.
>>>>>>
>>>>>> Signed-off-by: Mauro Condarelli <mc5686@mclink.it>    
>>>>> Perhaps the commit log should contain an example U-Boot
>>>>> configuration/platform where it fails to build. Indeed, we did test the
>>>>> SquashFS code on ARM 32-bit, and it built and worked fine.  
>>> This fails on mips32, specifically for the vocore2 board.
>>> Problem here is __udivdi3 is not defined for this architecture
>>> while it is for ARM32.
>>> My (limited) understanding suggests it should be removed for ARM
>>> since its usage has been (is being?) weeded out from both kernel
>>> and u-boot. This is not my call, though.
>>>
>>> I will add a note to v3.
>>>  
>>>>>> ---
>>>>>>
>>>>>> Changes in v2:
>>>>>> - replace division with right shift (Daniel Schwierzeck).
>>>>>> - remove vocore2-specific change (Daniel Schwierzeck).
>>>>>> - add warning to Kconfig about CONFIG_SYS_MALLOC_LEN (Tom Rini).
>>>>>>
>>>>>>  fs/squashfs/Kconfig      |  2 ++
>>>>>>  fs/squashfs/sqfs.c       | 30 +++++++++++++++---------------
>>>>>>  fs/squashfs/sqfs_inode.c |  6 +++---
>>>>>>  3 files changed, 20 insertions(+), 18 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
>>>>>> index 54ab1618f1..7c3f83d007 100644
>>>>>> --- a/fs/squashfs/Kconfig
>>>>>> +++ b/fs/squashfs/Kconfig
>>>>>> @@ -9,3 +9,5 @@ config FS_SQUASHFS
>>>>>>  	  filesystem use, for archival use (i.e. in cases where a .tar.gz file
>>>>>>  	  may be used), and in constrained block device/memory systems (e.g.
>>>>>>  	  embedded systems) where low overhead is needed.
>>>>>> +	  WARNING: if compression is enabled SquashFS needs a large amount
>>>>>> +	  of dynamic memory; make sure CONFIG_SYS_MALLOC_LEN >= 0x4000.    
>>>>> This change is completely unrelated, and should be in a separate patch.  
>>>> I was about to tell you the same thing, this warning is useful but
>>>> should definitely lay into its own commit.  
>>> Will do in v3.
>>>
>>>  
>>>>>> -		n_blks = DIV_ROUND_UP(table_size + table_offset,
>>>>>> -				      ctxt.cur_dev->blksz);
>>>>>> +		n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >>
>>>>>> +			ctxt.cur_dev->log2blksz;    
>>>>> I understand why you have to add blksz - 1 before doing the shift, but
>>>>> I find that it's overall a lot less readable/clear. Is there a way to
>>>>> do better ?
>>>>>
>>>>> We could use DIV_ROUND_UP_ULL() I guess.  
>>> I did not do this because DIV_ROUND_UP() is in a global (non-architecture-specific)
>>> header and it unconditionally uses division; I did not know how to handle this.
>>> Would a comment suffice to clarify intent? Something like:
>>>
>>> n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >>
>>> 	  ctxt.cur_dev->log2blksz; /* ROUND_UP division */
>>>
>>> Note: this problem stays even if we roll-back to use do_div(); see below.  
>> actually include/linux/kernel.h defines both DIV_ROUND_DOWN_ULL()
>> and DIV_ROUND_UP_ULL() I suppose we should use those in all cases.
>>
>>
>>>>>>  		else
>>>>>> -			blk_list_size = file_size / blk_size;
>>>>>> +			blk_list_size = file_size > LOG2(blk_size);    
>>>>> Very bad mistake here: > should be >>.  
>>> Sorry, my bad.
>>> Corrected for v3.
>>>  
>>>> I personally highly dislike replacing divisions into shifts. I think
>>>> it's too much effort when trying to understand code you did not write
>>>> yourself. Is it possible to use something like do_div? plus, you can
>>>> check the remainder to be 0 in this case.  
>>> Please make up your mind about this.
>>> I personally tend to agree with Miquèl and my v1 patch used
>>> do_div() exclusively (i did not use even the lldiv() wrapper), but
>>> I will not insist either way... just let me know what's considered
>>> better.  
>> As said above: it seems using the macros is both "standard" and
>> safer than using shifts.
>> If I get a go-ahead I'll use those macros in v3.
> I think we all agree using these macros is much nicer, readable and
> probably almost as fast.
>
> Thanks,
> Miquèl
>
diff mbox series

Patch

diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index 54ab1618f1..7c3f83d007 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -9,3 +9,5 @@  config FS_SQUASHFS
 	  filesystem use, for archival use (i.e. in cases where a .tar.gz file
 	  may be used), and in constrained block device/memory systems (e.g.
 	  embedded systems) where low overhead is needed.
+	  WARNING: if compression is enabled SquashFS needs a large amount
+	  of dynamic memory; make sure CONFIG_SYS_MALLOC_LEN >= 0x4000.
diff --git a/fs/squashfs/sqfs.c b/fs/squashfs/sqfs.c
index 15208b4dab..90e6848e6c 100644
--- a/fs/squashfs/sqfs.c
+++ b/fs/squashfs/sqfs.c
@@ -85,10 +85,10 @@  static int sqfs_calc_n_blks(__le64 start, __le64 end, u64 *offset)
 	u64 start_, table_size;
 
 	table_size = le64_to_cpu(end) - le64_to_cpu(start);
-	start_ = le64_to_cpu(start) / ctxt.cur_dev->blksz;
-	*offset = le64_to_cpu(start) - (start_ * ctxt.cur_dev->blksz);
+	start_ = le64_to_cpu(start) >> ctxt.cur_dev->log2blksz;
+	*offset = le64_to_cpu(start) - (start_ << ctxt.cur_dev->log2blksz);
 
-	return DIV_ROUND_UP(table_size + *offset, ctxt.cur_dev->blksz);
+	return (table_size + *offset + ctxt.cur_dev->blksz - 1) >> ctxt.cur_dev->log2blksz;
 }
 
 /*
@@ -109,8 +109,8 @@  static int sqfs_frag_lookup(u32 inode_fragment_index,
 	if (inode_fragment_index >= get_unaligned_le32(&sblk->fragments))
 		return -EINVAL;
 
-	start = get_unaligned_le64(&sblk->fragment_table_start) /
-		ctxt.cur_dev->blksz;
+	start = get_unaligned_le64(&sblk->fragment_table_start) >>
+		ctxt.cur_dev->log2blksz;
 	n_blks = sqfs_calc_n_blks(sblk->fragment_table_start,
 				  sblk->export_table_start,
 				  &table_offset);
@@ -135,7 +135,7 @@  static int sqfs_frag_lookup(u32 inode_fragment_index,
 	start_block = get_unaligned_le64(table + table_offset + block *
 					 sizeof(u64));
 
-	start = start_block / ctxt.cur_dev->blksz;
+	start = start_block >> ctxt.cur_dev->log2blksz;
 	n_blks = sqfs_calc_n_blks(cpu_to_le64(start_block),
 				  sblk->fragment_table_start, &table_offset);
 
@@ -641,8 +641,8 @@  static int sqfs_read_inode_table(unsigned char **inode_table)
 
 	table_size = get_unaligned_le64(&sblk->directory_table_start) -
 		get_unaligned_le64(&sblk->inode_table_start);
-	start = get_unaligned_le64(&sblk->inode_table_start) /
-		ctxt.cur_dev->blksz;
+	start = get_unaligned_le64(&sblk->inode_table_start) >>
+		ctxt.cur_dev->log2blksz;
 	n_blks = sqfs_calc_n_blks(sblk->inode_table_start,
 				  sblk->directory_table_start, &table_offset);
 
@@ -725,8 +725,8 @@  static int sqfs_read_directory_table(unsigned char **dir_table, u32 **pos_list)
 	/* DIRECTORY TABLE */
 	table_size = get_unaligned_le64(&sblk->fragment_table_start) -
 		get_unaligned_le64(&sblk->directory_table_start);
-	start = get_unaligned_le64(&sblk->directory_table_start) /
-		ctxt.cur_dev->blksz;
+	start = get_unaligned_le64(&sblk->directory_table_start) >>
+		ctxt.cur_dev->log2blksz;
 	n_blks = sqfs_calc_n_blks(sblk->directory_table_start,
 				  sblk->fragment_table_start, &table_offset);
 
@@ -1334,11 +1334,11 @@  int sqfs_read(const char *filename, void *buf, loff_t offset, loff_t len,
 	}
 
 	for (j = 0; j < datablk_count; j++) {
-		start = data_offset / ctxt.cur_dev->blksz;
+		start = data_offset >> ctxt.cur_dev->log2blksz;
 		table_size = SQFS_BLOCK_SIZE(finfo.blk_sizes[j]);
 		table_offset = data_offset - (start * ctxt.cur_dev->blksz);
-		n_blks = DIV_ROUND_UP(table_size + table_offset,
-				      ctxt.cur_dev->blksz);
+		n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >>
+			ctxt.cur_dev->log2blksz;
 
 		data_buffer = malloc_cache_aligned(n_blks * ctxt.cur_dev->blksz);
 
@@ -1388,10 +1388,10 @@  int sqfs_read(const char *filename, void *buf, loff_t offset, loff_t len,
 		goto free_buffer;
 	}
 
-	start = frag_entry.start / ctxt.cur_dev->blksz;
+	start = frag_entry.start >> ctxt.cur_dev->log2blksz;
 	table_size = SQFS_BLOCK_SIZE(frag_entry.size);
 	table_offset = frag_entry.start - (start * ctxt.cur_dev->blksz);
-	n_blks = DIV_ROUND_UP(table_size + table_offset, ctxt.cur_dev->blksz);
+	n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >> ctxt.cur_dev->log2blksz;
 
 	fragment = malloc_cache_aligned(n_blks * ctxt.cur_dev->blksz);
 
diff --git a/fs/squashfs/sqfs_inode.c b/fs/squashfs/sqfs_inode.c
index 1368f3063c..5dbf945c80 100644
--- a/fs/squashfs/sqfs_inode.c
+++ b/fs/squashfs/sqfs_inode.c
@@ -10,7 +10,7 @@ 
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <string.h>
+#include <part.h>
 
 #include "sqfs_decompressor.h"
 #include "sqfs_filesystem.h"
@@ -68,9 +68,9 @@  int sqfs_inode_size(struct squashfs_base_inode *inode, u32 blk_size)
 		unsigned int blk_list_size;
 
 		if (fragment == 0xFFFFFFFF)
-			blk_list_size = DIV_ROUND_UP(file_size, blk_size);
+			blk_list_size = (file_size + blk_size - 1) >> LOG2(blk_size);
 		else
-			blk_list_size = file_size / blk_size;
+			blk_list_size = file_size > LOG2(blk_size);
 
 		return sizeof(*lreg) + blk_list_size * sizeof(u32);
 	}