Message ID | 20200930154511.630103-2-mc5686@mclink.it |
---|---|
State | Superseded |
Delegated to: | Tom Rini |
Headers | show |
Series | Current strategy is to use right/left shift to implement div/mult | expand |
Am Mittwoch, den 30.09.2020, 17:45 +0200 schrieb Mauro Condarelli: > Use right shift to avoid 64-bit divisions. > > These divisions are needed to convert from file length (potentially > over 32-bit range) to block number, so result and remainder are > guaranteed to fit in 32-bit integers. > > Signed-off-by: Mauro Condarelli <mc5686@mclink.it> the commit subject needs to be fixed. If you change a specific subsystem, you need to add the according prefix. You can use the Git log to figure out that prefix. Also you don't fix a missing __udivdi3 , you just want to avoid 64-bit divisions. So maybe something like: "fs/squashfs: avoid 64-bit divisions on 32-bit systems" > --- > > Changes in v2: > - replace division with right shift (Daniel Schwierzeck). > - remove vocore2-specific change (Daniel Schwierzeck). > - add warning to Kconfig about CONFIG_SYS_MALLOC_LEN (Tom Rini). > > fs/squashfs/Kconfig | 2 ++ > fs/squashfs/sqfs.c | 30 +++++++++++++++--------------- > fs/squashfs/sqfs_inode.c | 6 +++--- > 3 files changed, 20 insertions(+), 18 deletions(-) > > diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig > index 54ab1618f1..7c3f83d007 100644 > --- a/fs/squashfs/Kconfig > +++ b/fs/squashfs/Kconfig > @@ -9,3 +9,5 @@ config FS_SQUASHFS > filesystem use, for archival use (i.e. in cases where a .tar.gz file > may be used), and in constrained block device/memory systems (e.g. > embedded systems) where low overhead is needed. > + WARNING: if compression is enabled SquashFS needs a large amount > + of dynamic memory; make sure CONFIG_SYS_MALLOC_LEN >= 0x4000. > diff --git a/fs/squashfs/sqfs.c b/fs/squashfs/sqfs.c > index 15208b4dab..90e6848e6c 100644 > --- a/fs/squashfs/sqfs.c > +++ b/fs/squashfs/sqfs.c > @@ -85,10 +85,10 @@ static int sqfs_calc_n_blks(__le64 start, __le64 end, u64 *offset) > u64 start_, table_size; > > table_size = le64_to_cpu(end) - le64_to_cpu(start); > - start_ = le64_to_cpu(start) / ctxt.cur_dev->blksz; > - *offset = le64_to_cpu(start) - (start_ * ctxt.cur_dev->blksz); > + start_ = le64_to_cpu(start) >> ctxt.cur_dev->log2blksz; > + *offset = le64_to_cpu(start) - (start_ << ctxt.cur_dev->log2blksz); > > - return DIV_ROUND_UP(table_size + *offset, ctxt.cur_dev->blksz); > + return (table_size + *offset + ctxt.cur_dev->blksz - 1) >> ctxt.cur_dev->log2blksz; > } > > /* > @@ -109,8 +109,8 @@ static int sqfs_frag_lookup(u32 inode_fragment_index, > if (inode_fragment_index >= get_unaligned_le32(&sblk->fragments)) > return -EINVAL; > > - start = get_unaligned_le64(&sblk->fragment_table_start) / > - ctxt.cur_dev->blksz; > + start = get_unaligned_le64(&sblk->fragment_table_start) >> > + ctxt.cur_dev->log2blksz; > n_blks = sqfs_calc_n_blks(sblk->fragment_table_start, > sblk->export_table_start, > &table_offset); > @@ -135,7 +135,7 @@ static int sqfs_frag_lookup(u32 inode_fragment_index, > start_block = get_unaligned_le64(table + table_offset + block * > sizeof(u64)); > > - start = start_block / ctxt.cur_dev->blksz; > + start = start_block >> ctxt.cur_dev->log2blksz; > n_blks = sqfs_calc_n_blks(cpu_to_le64(start_block), > sblk->fragment_table_start, &table_offset); > > @@ -641,8 +641,8 @@ static int sqfs_read_inode_table(unsigned char **inode_table) > > table_size = get_unaligned_le64(&sblk->directory_table_start) - > get_unaligned_le64(&sblk->inode_table_start); > - start = get_unaligned_le64(&sblk->inode_table_start) / > - ctxt.cur_dev->blksz; > + start = get_unaligned_le64(&sblk->inode_table_start) >> > + ctxt.cur_dev->log2blksz; > n_blks = sqfs_calc_n_blks(sblk->inode_table_start, > sblk->directory_table_start, &table_offset); > > @@ -725,8 +725,8 @@ static int sqfs_read_directory_table(unsigned char **dir_table, u32 **pos_list) > /* DIRECTORY TABLE */ > table_size = get_unaligned_le64(&sblk->fragment_table_start) - > get_unaligned_le64(&sblk->directory_table_start); > - start = get_unaligned_le64(&sblk->directory_table_start) / > - ctxt.cur_dev->blksz; > + start = get_unaligned_le64(&sblk->directory_table_start) >> > + ctxt.cur_dev->log2blksz; > n_blks = sqfs_calc_n_blks(sblk->directory_table_start, > sblk->fragment_table_start, &table_offset); > > @@ -1334,11 +1334,11 @@ int sqfs_read(const char *filename, void *buf, loff_t offset, loff_t len, > } > > for (j = 0; j < datablk_count; j++) { > - start = data_offset / ctxt.cur_dev->blksz; > + start = data_offset >> ctxt.cur_dev->log2blksz; > table_size = SQFS_BLOCK_SIZE(finfo.blk_sizes[j]); > table_offset = data_offset - (start * ctxt.cur_dev->blksz); > - n_blks = DIV_ROUND_UP(table_size + table_offset, > - ctxt.cur_dev->blksz); > + n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >> > + ctxt.cur_dev->log2blksz; > > data_buffer = malloc_cache_aligned(n_blks * ctxt.cur_dev->blksz); > > @@ -1388,10 +1388,10 @@ int sqfs_read(const char *filename, void *buf, loff_t offset, loff_t len, > goto free_buffer; > } > > - start = frag_entry.start / ctxt.cur_dev->blksz; > + start = frag_entry.start >> ctxt.cur_dev->log2blksz; > table_size = SQFS_BLOCK_SIZE(frag_entry.size); > table_offset = frag_entry.start - (start * ctxt.cur_dev->blksz); > - n_blks = DIV_ROUND_UP(table_size + table_offset, ctxt.cur_dev->blksz); > + n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >> ctxt.cur_dev->log2blksz; > > fragment = malloc_cache_aligned(n_blks * ctxt.cur_dev->blksz); > > diff --git a/fs/squashfs/sqfs_inode.c b/fs/squashfs/sqfs_inode.c > index 1368f3063c..5dbf945c80 100644 > --- a/fs/squashfs/sqfs_inode.c > +++ b/fs/squashfs/sqfs_inode.c > @@ -10,7 +10,7 @@ > #include <stdint.h> > #include <stdio.h> > #include <stdlib.h> > -#include <string.h> > +#include <part.h> > > #include "sqfs_decompressor.h" > #include "sqfs_filesystem.h" > @@ -68,9 +68,9 @@ int sqfs_inode_size(struct squashfs_base_inode *inode, u32 blk_size) > unsigned int blk_list_size; > > if (fragment == 0xFFFFFFFF) > - blk_list_size = DIV_ROUND_UP(file_size, blk_size); > + blk_list_size = (file_size + blk_size - 1) >> LOG2(blk_size); > else > - blk_list_size = file_size / blk_size; > + blk_list_size = file_size > LOG2(blk_size); one > is missing: blk_list_size = file_size >> LOG2(blk_size); > > return sizeof(*lreg) + blk_list_size * sizeof(u32); > }
Hello, On Wed, 30 Sep 2020 17:45:11 +0200 Mauro Condarelli <mc5686@mclink.it> wrote: > Use right shift to avoid 64-bit divisions. > > These divisions are needed to convert from file length (potentially > over 32-bit range) to block number, so result and remainder are > guaranteed to fit in 32-bit integers. > > Signed-off-by: Mauro Condarelli <mc5686@mclink.it> Perhaps the commit log should contain an example U-Boot configuration/platform where it fails to build. Indeed, we did test the SquashFS code on ARM 32-bit, and it built and worked fine. > --- > > Changes in v2: > - replace division with right shift (Daniel Schwierzeck). > - remove vocore2-specific change (Daniel Schwierzeck). > - add warning to Kconfig about CONFIG_SYS_MALLOC_LEN (Tom Rini). > > fs/squashfs/Kconfig | 2 ++ > fs/squashfs/sqfs.c | 30 +++++++++++++++--------------- > fs/squashfs/sqfs_inode.c | 6 +++--- > 3 files changed, 20 insertions(+), 18 deletions(-) > > diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig > index 54ab1618f1..7c3f83d007 100644 > --- a/fs/squashfs/Kconfig > +++ b/fs/squashfs/Kconfig > @@ -9,3 +9,5 @@ config FS_SQUASHFS > filesystem use, for archival use (i.e. in cases where a .tar.gz file > may be used), and in constrained block device/memory systems (e.g. > embedded systems) where low overhead is needed. > + WARNING: if compression is enabled SquashFS needs a large amount > + of dynamic memory; make sure CONFIG_SYS_MALLOC_LEN >= 0x4000. This change is completely unrelated, and should be in a separate patch. > - n_blks = DIV_ROUND_UP(table_size + table_offset, > - ctxt.cur_dev->blksz); > + n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >> > + ctxt.cur_dev->log2blksz; I understand why you have to add blksz - 1 before doing the shift, but I find that it's overall a lot less readable/clear. Is there a way to do better ? We could use DIV_ROUND_UP_ULL() I guess. > else > - blk_list_size = file_size / blk_size; > + blk_list_size = file_size > LOG2(blk_size); Very bad mistake here: > should be >>. Best regards, Thomas
Hello, Thomas Petazzoni <thomas.petazzoni@bootlin.com> wrote on Thu, 1 Oct 2020 09:28:41 +0200: > Hello, > > On Wed, 30 Sep 2020 17:45:11 +0200 > Mauro Condarelli <mc5686@mclink.it> wrote: > > > Use right shift to avoid 64-bit divisions. > > > > These divisions are needed to convert from file length (potentially > > over 32-bit range) to block number, so result and remainder are > > guaranteed to fit in 32-bit integers. > > > > Signed-off-by: Mauro Condarelli <mc5686@mclink.it> > > Perhaps the commit log should contain an example U-Boot > configuration/platform where it fails to build. Indeed, we did test the > SquashFS code on ARM 32-bit, and it built and worked fine. > > > --- > > > > Changes in v2: > > - replace division with right shift (Daniel Schwierzeck). > > - remove vocore2-specific change (Daniel Schwierzeck). > > - add warning to Kconfig about CONFIG_SYS_MALLOC_LEN (Tom Rini). > > > > fs/squashfs/Kconfig | 2 ++ > > fs/squashfs/sqfs.c | 30 +++++++++++++++--------------- > > fs/squashfs/sqfs_inode.c | 6 +++--- > > 3 files changed, 20 insertions(+), 18 deletions(-) > > > > diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig > > index 54ab1618f1..7c3f83d007 100644 > > --- a/fs/squashfs/Kconfig > > +++ b/fs/squashfs/Kconfig > > @@ -9,3 +9,5 @@ config FS_SQUASHFS > > filesystem use, for archival use (i.e. in cases where a .tar.gz file > > may be used), and in constrained block device/memory systems (e.g. > > embedded systems) where low overhead is needed. > > + WARNING: if compression is enabled SquashFS needs a large amount > > + of dynamic memory; make sure CONFIG_SYS_MALLOC_LEN >= 0x4000. > > This change is completely unrelated, and should be in a separate patch. I was about to tell you the same thing, this warning is useful but should definitely lay into its own commit. > > - n_blks = DIV_ROUND_UP(table_size + table_offset, > > - ctxt.cur_dev->blksz); > > + n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >> > > + ctxt.cur_dev->log2blksz; > > I understand why you have to add blksz - 1 before doing the shift, but > I find that it's overall a lot less readable/clear. Is there a way to > do better ? > > We could use DIV_ROUND_UP_ULL() I guess. > > > > else > > - blk_list_size = file_size / blk_size; > > + blk_list_size = file_size > LOG2(blk_size); > > Very bad mistake here: > should be >>. I personally highly dislike replacing divisions into shifts. I think it's too much effort when trying to understand code you did not write yourself. Is it possible to use something like do_div? plus, you can check the remainder to be 0 in this case. Thanks, Miquèl
Thanks for Your review. On 10/1/20 9:59 AM, Miquel Raynal wrote: > Hello, > > Thomas Petazzoni <thomas.petazzoni@bootlin.com> wrote on Thu, 1 Oct > 2020 09:28:41 +0200: > >> Hello, >> >> On Wed, 30 Sep 2020 17:45:11 +0200 >> Mauro Condarelli <mc5686@mclink.it> wrote: >> >>> Use right shift to avoid 64-bit divisions. >>> >>> These divisions are needed to convert from file length (potentially >>> over 32-bit range) to block number, so result and remainder are >>> guaranteed to fit in 32-bit integers. >>> >>> Signed-off-by: Mauro Condarelli <mc5686@mclink.it> >> Perhaps the commit log should contain an example U-Boot >> configuration/platform where it fails to build. Indeed, we did test the >> SquashFS code on ARM 32-bit, and it built and worked fine. This fails on mips32, specifically for the vocore2 board. Problem here is __udivdi3 is not defined for this architecture while it is for ARM32. My (limited) understanding suggests it should be removed for ARM since its usage has been (is being?) weeded out from both kernel and u-boot. This is not my call, though. I will add a note to v3. >> >>> --- >>> >>> Changes in v2: >>> - replace division with right shift (Daniel Schwierzeck). >>> - remove vocore2-specific change (Daniel Schwierzeck). >>> - add warning to Kconfig about CONFIG_SYS_MALLOC_LEN (Tom Rini). >>> >>> fs/squashfs/Kconfig | 2 ++ >>> fs/squashfs/sqfs.c | 30 +++++++++++++++--------------- >>> fs/squashfs/sqfs_inode.c | 6 +++--- >>> 3 files changed, 20 insertions(+), 18 deletions(-) >>> >>> diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig >>> index 54ab1618f1..7c3f83d007 100644 >>> --- a/fs/squashfs/Kconfig >>> +++ b/fs/squashfs/Kconfig >>> @@ -9,3 +9,5 @@ config FS_SQUASHFS >>> filesystem use, for archival use (i.e. in cases where a .tar.gz file >>> may be used), and in constrained block device/memory systems (e.g. >>> embedded systems) where low overhead is needed. >>> + WARNING: if compression is enabled SquashFS needs a large amount >>> + of dynamic memory; make sure CONFIG_SYS_MALLOC_LEN >= 0x4000. >> This change is completely unrelated, and should be in a separate patch. > I was about to tell you the same thing, this warning is useful but > should definitely lay into its own commit. Will do in v3. >>> - n_blks = DIV_ROUND_UP(table_size + table_offset, >>> - ctxt.cur_dev->blksz); >>> + n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >> >>> + ctxt.cur_dev->log2blksz; >> I understand why you have to add blksz - 1 before doing the shift, but >> I find that it's overall a lot less readable/clear. Is there a way to >> do better ? >> >> We could use DIV_ROUND_UP_ULL() I guess. I did not do this because DIV_ROUND_UP() is in a global (non-architecture-specific) header and it unconditionally uses division; I did not know how to handle this. Would a comment suffice to clarify intent? Something like: n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >> ctxt.cur_dev->log2blksz; /* ROUND_UP division */ Note: this problem stays even if we roll-back to use do_div(); see below. >>> else >>> - blk_list_size = file_size / blk_size; >>> + blk_list_size = file_size > LOG2(blk_size); >> Very bad mistake here: > should be >>. Sorry, my bad. Corrected for v3. > I personally highly dislike replacing divisions into shifts. I think > it's too much effort when trying to understand code you did not write > yourself. Is it possible to use something like do_div? plus, you can > check the remainder to be 0 in this case. Please make up your mind about this. I personally tend to agree with Miquèl and my v1 patch used do_div() exclusively (i did not use even the lldiv() wrapper), but I will not insist either way... just let me know what's considered better. > > Thanks, > Miquèl Many thanks Mauro
Correcting myself. See below. On 10/1/20 10:41 AM, Mauro Condarelli wrote: > Thanks for Your review. > > On 10/1/20 9:59 AM, Miquel Raynal wrote: >> Hello, >> >> Thomas Petazzoni <thomas.petazzoni@bootlin.com> wrote on Thu, 1 Oct >> 2020 09:28:41 +0200: >> >>> Hello, >>> >>> On Wed, 30 Sep 2020 17:45:11 +0200 >>> Mauro Condarelli <mc5686@mclink.it> wrote: >>> >>>> Use right shift to avoid 64-bit divisions. >>>> >>>> These divisions are needed to convert from file length (potentially >>>> over 32-bit range) to block number, so result and remainder are >>>> guaranteed to fit in 32-bit integers. >>>> >>>> Signed-off-by: Mauro Condarelli <mc5686@mclink.it> >>> Perhaps the commit log should contain an example U-Boot >>> configuration/platform where it fails to build. Indeed, we did test the >>> SquashFS code on ARM 32-bit, and it built and worked fine. > This fails on mips32, specifically for the vocore2 board. > Problem here is __udivdi3 is not defined for this architecture > while it is for ARM32. > My (limited) understanding suggests it should be removed for ARM > since its usage has been (is being?) weeded out from both kernel > and u-boot. This is not my call, though. > > I will add a note to v3. > >>>> --- >>>> >>>> Changes in v2: >>>> - replace division with right shift (Daniel Schwierzeck). >>>> - remove vocore2-specific change (Daniel Schwierzeck). >>>> - add warning to Kconfig about CONFIG_SYS_MALLOC_LEN (Tom Rini). >>>> >>>> fs/squashfs/Kconfig | 2 ++ >>>> fs/squashfs/sqfs.c | 30 +++++++++++++++--------------- >>>> fs/squashfs/sqfs_inode.c | 6 +++--- >>>> 3 files changed, 20 insertions(+), 18 deletions(-) >>>> >>>> diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig >>>> index 54ab1618f1..7c3f83d007 100644 >>>> --- a/fs/squashfs/Kconfig >>>> +++ b/fs/squashfs/Kconfig >>>> @@ -9,3 +9,5 @@ config FS_SQUASHFS >>>> filesystem use, for archival use (i.e. in cases where a .tar.gz file >>>> may be used), and in constrained block device/memory systems (e.g. >>>> embedded systems) where low overhead is needed. >>>> + WARNING: if compression is enabled SquashFS needs a large amount >>>> + of dynamic memory; make sure CONFIG_SYS_MALLOC_LEN >= 0x4000. >>> This change is completely unrelated, and should be in a separate patch. >> I was about to tell you the same thing, this warning is useful but >> should definitely lay into its own commit. > Will do in v3. > > >>>> - n_blks = DIV_ROUND_UP(table_size + table_offset, >>>> - ctxt.cur_dev->blksz); >>>> + n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >> >>>> + ctxt.cur_dev->log2blksz; >>> I understand why you have to add blksz - 1 before doing the shift, but >>> I find that it's overall a lot less readable/clear. Is there a way to >>> do better ? >>> >>> We could use DIV_ROUND_UP_ULL() I guess. > I did not do this because DIV_ROUND_UP() is in a global (non-architecture-specific) > header and it unconditionally uses division; I did not know how to handle this. > Would a comment suffice to clarify intent? Something like: > > n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >> > ctxt.cur_dev->log2blksz; /* ROUND_UP division */ > > Note: this problem stays even if we roll-back to use do_div(); see below. actually include/linux/kernel.h defines both DIV_ROUND_DOWN_ULL() and DIV_ROUND_UP_ULL() I suppose we should use those in all cases. >>>> else >>>> - blk_list_size = file_size / blk_size; >>>> + blk_list_size = file_size > LOG2(blk_size); >>> Very bad mistake here: > should be >>. > Sorry, my bad. > Corrected for v3. > >> I personally highly dislike replacing divisions into shifts. I think >> it's too much effort when trying to understand code you did not write >> yourself. Is it possible to use something like do_div? plus, you can >> check the remainder to be 0 in this case. > Please make up your mind about this. > I personally tend to agree with Miquèl and my v1 patch used > do_div() exclusively (i did not use even the lldiv() wrapper), but > I will not insist either way... just let me know what's considered > better. As said above: it seems using the macros is both "standard" and safer than using shifts. If I get a go-ahead I'll use those macros in v3. > >> Thanks, >> Miquèl > Many thanks > Mauro
Hi Mauro, Mauro Condarelli <mc5686@mclink.it> wrote on Thu, 1 Oct 2020 10:53:30 +0200: > Correcting myself. > See below. > > On 10/1/20 10:41 AM, Mauro Condarelli wrote: > > Thanks for Your review. > > > > On 10/1/20 9:59 AM, Miquel Raynal wrote: > >> Hello, > >> > >> Thomas Petazzoni <thomas.petazzoni@bootlin.com> wrote on Thu, 1 Oct > >> 2020 09:28:41 +0200: > >> > >>> Hello, > >>> > >>> On Wed, 30 Sep 2020 17:45:11 +0200 > >>> Mauro Condarelli <mc5686@mclink.it> wrote: > >>> > >>>> Use right shift to avoid 64-bit divisions. > >>>> > >>>> These divisions are needed to convert from file length (potentially > >>>> over 32-bit range) to block number, so result and remainder are > >>>> guaranteed to fit in 32-bit integers. > >>>> > >>>> Signed-off-by: Mauro Condarelli <mc5686@mclink.it> > >>> Perhaps the commit log should contain an example U-Boot > >>> configuration/platform where it fails to build. Indeed, we did test the > >>> SquashFS code on ARM 32-bit, and it built and worked fine. > > This fails on mips32, specifically for the vocore2 board. > > Problem here is __udivdi3 is not defined for this architecture > > while it is for ARM32. > > My (limited) understanding suggests it should be removed for ARM > > since its usage has been (is being?) weeded out from both kernel > > and u-boot. This is not my call, though. > > > > I will add a note to v3. > > > >>>> --- > >>>> > >>>> Changes in v2: > >>>> - replace division with right shift (Daniel Schwierzeck). > >>>> - remove vocore2-specific change (Daniel Schwierzeck). > >>>> - add warning to Kconfig about CONFIG_SYS_MALLOC_LEN (Tom Rini). > >>>> > >>>> fs/squashfs/Kconfig | 2 ++ > >>>> fs/squashfs/sqfs.c | 30 +++++++++++++++--------------- > >>>> fs/squashfs/sqfs_inode.c | 6 +++--- > >>>> 3 files changed, 20 insertions(+), 18 deletions(-) > >>>> > >>>> diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig > >>>> index 54ab1618f1..7c3f83d007 100644 > >>>> --- a/fs/squashfs/Kconfig > >>>> +++ b/fs/squashfs/Kconfig > >>>> @@ -9,3 +9,5 @@ config FS_SQUASHFS > >>>> filesystem use, for archival use (i.e. in cases where a .tar.gz file > >>>> may be used), and in constrained block device/memory systems (e.g. > >>>> embedded systems) where low overhead is needed. > >>>> + WARNING: if compression is enabled SquashFS needs a large amount > >>>> + of dynamic memory; make sure CONFIG_SYS_MALLOC_LEN >= 0x4000. > >>> This change is completely unrelated, and should be in a separate patch. > >> I was about to tell you the same thing, this warning is useful but > >> should definitely lay into its own commit. > > Will do in v3. > > > > > >>>> - n_blks = DIV_ROUND_UP(table_size + table_offset, > >>>> - ctxt.cur_dev->blksz); > >>>> + n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >> > >>>> + ctxt.cur_dev->log2blksz; > >>> I understand why you have to add blksz - 1 before doing the shift, but > >>> I find that it's overall a lot less readable/clear. Is there a way to > >>> do better ? > >>> > >>> We could use DIV_ROUND_UP_ULL() I guess. > > I did not do this because DIV_ROUND_UP() is in a global (non-architecture-specific) > > header and it unconditionally uses division; I did not know how to handle this. > > Would a comment suffice to clarify intent? Something like: > > > > n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >> > > ctxt.cur_dev->log2blksz; /* ROUND_UP division */ > > > > Note: this problem stays even if we roll-back to use do_div(); see below. > actually include/linux/kernel.h defines both DIV_ROUND_DOWN_ULL() > and DIV_ROUND_UP_ULL() I suppose we should use those in all cases. > > > >>>> else > >>>> - blk_list_size = file_size / blk_size; > >>>> + blk_list_size = file_size > LOG2(blk_size); > >>> Very bad mistake here: > should be >>. > > Sorry, my bad. > > Corrected for v3. > > > >> I personally highly dislike replacing divisions into shifts. I think > >> it's too much effort when trying to understand code you did not write > >> yourself. Is it possible to use something like do_div? plus, you can > >> check the remainder to be 0 in this case. > > Please make up your mind about this. > > I personally tend to agree with Miquèl and my v1 patch used > > do_div() exclusively (i did not use even the lldiv() wrapper), but > > I will not insist either way... just let me know what's considered > > better. > As said above: it seems using the macros is both "standard" and > safer than using shifts. > If I get a go-ahead I'll use those macros in v3. I think we all agree using these macros is much nicer, readable and probably almost as fast. Thanks, Miquèl
Ok, Thanks. Patch is ready, I'll send it after some extended testing on my system (vocore2). Regards Mauro On 10/1/20 10:56 AM, Miquel Raynal wrote: > Hi Mauro, > > Mauro Condarelli <mc5686@mclink.it> wrote on Thu, 1 Oct 2020 10:53:30 > +0200: > >> Correcting myself. >> See below. >> >> On 10/1/20 10:41 AM, Mauro Condarelli wrote: >>> Thanks for Your review. >>> >>> On 10/1/20 9:59 AM, Miquel Raynal wrote: >>>> Hello, >>>> >>>> Thomas Petazzoni <thomas.petazzoni@bootlin.com> wrote on Thu, 1 Oct >>>> 2020 09:28:41 +0200: >>>> >>>>> Hello, >>>>> >>>>> On Wed, 30 Sep 2020 17:45:11 +0200 >>>>> Mauro Condarelli <mc5686@mclink.it> wrote: >>>>> >>>>>> Use right shift to avoid 64-bit divisions. >>>>>> >>>>>> These divisions are needed to convert from file length (potentially >>>>>> over 32-bit range) to block number, so result and remainder are >>>>>> guaranteed to fit in 32-bit integers. >>>>>> >>>>>> Signed-off-by: Mauro Condarelli <mc5686@mclink.it> >>>>> Perhaps the commit log should contain an example U-Boot >>>>> configuration/platform where it fails to build. Indeed, we did test the >>>>> SquashFS code on ARM 32-bit, and it built and worked fine. >>> This fails on mips32, specifically for the vocore2 board. >>> Problem here is __udivdi3 is not defined for this architecture >>> while it is for ARM32. >>> My (limited) understanding suggests it should be removed for ARM >>> since its usage has been (is being?) weeded out from both kernel >>> and u-boot. This is not my call, though. >>> >>> I will add a note to v3. >>> >>>>>> --- >>>>>> >>>>>> Changes in v2: >>>>>> - replace division with right shift (Daniel Schwierzeck). >>>>>> - remove vocore2-specific change (Daniel Schwierzeck). >>>>>> - add warning to Kconfig about CONFIG_SYS_MALLOC_LEN (Tom Rini). >>>>>> >>>>>> fs/squashfs/Kconfig | 2 ++ >>>>>> fs/squashfs/sqfs.c | 30 +++++++++++++++--------------- >>>>>> fs/squashfs/sqfs_inode.c | 6 +++--- >>>>>> 3 files changed, 20 insertions(+), 18 deletions(-) >>>>>> >>>>>> diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig >>>>>> index 54ab1618f1..7c3f83d007 100644 >>>>>> --- a/fs/squashfs/Kconfig >>>>>> +++ b/fs/squashfs/Kconfig >>>>>> @@ -9,3 +9,5 @@ config FS_SQUASHFS >>>>>> filesystem use, for archival use (i.e. in cases where a .tar.gz file >>>>>> may be used), and in constrained block device/memory systems (e.g. >>>>>> embedded systems) where low overhead is needed. >>>>>> + WARNING: if compression is enabled SquashFS needs a large amount >>>>>> + of dynamic memory; make sure CONFIG_SYS_MALLOC_LEN >= 0x4000. >>>>> This change is completely unrelated, and should be in a separate patch. >>>> I was about to tell you the same thing, this warning is useful but >>>> should definitely lay into its own commit. >>> Will do in v3. >>> >>> >>>>>> - n_blks = DIV_ROUND_UP(table_size + table_offset, >>>>>> - ctxt.cur_dev->blksz); >>>>>> + n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >> >>>>>> + ctxt.cur_dev->log2blksz; >>>>> I understand why you have to add blksz - 1 before doing the shift, but >>>>> I find that it's overall a lot less readable/clear. Is there a way to >>>>> do better ? >>>>> >>>>> We could use DIV_ROUND_UP_ULL() I guess. >>> I did not do this because DIV_ROUND_UP() is in a global (non-architecture-specific) >>> header and it unconditionally uses division; I did not know how to handle this. >>> Would a comment suffice to clarify intent? Something like: >>> >>> n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >> >>> ctxt.cur_dev->log2blksz; /* ROUND_UP division */ >>> >>> Note: this problem stays even if we roll-back to use do_div(); see below. >> actually include/linux/kernel.h defines both DIV_ROUND_DOWN_ULL() >> and DIV_ROUND_UP_ULL() I suppose we should use those in all cases. >> >> >>>>>> else >>>>>> - blk_list_size = file_size / blk_size; >>>>>> + blk_list_size = file_size > LOG2(blk_size); >>>>> Very bad mistake here: > should be >>. >>> Sorry, my bad. >>> Corrected for v3. >>> >>>> I personally highly dislike replacing divisions into shifts. I think >>>> it's too much effort when trying to understand code you did not write >>>> yourself. Is it possible to use something like do_div? plus, you can >>>> check the remainder to be 0 in this case. >>> Please make up your mind about this. >>> I personally tend to agree with Miquèl and my v1 patch used >>> do_div() exclusively (i did not use even the lldiv() wrapper), but >>> I will not insist either way... just let me know what's considered >>> better. >> As said above: it seems using the macros is both "standard" and >> safer than using shifts. >> If I get a go-ahead I'll use those macros in v3. > I think we all agree using these macros is much nicer, readable and > probably almost as fast. > > Thanks, > Miquèl >
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index 54ab1618f1..7c3f83d007 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -9,3 +9,5 @@ config FS_SQUASHFS filesystem use, for archival use (i.e. in cases where a .tar.gz file may be used), and in constrained block device/memory systems (e.g. embedded systems) where low overhead is needed. + WARNING: if compression is enabled SquashFS needs a large amount + of dynamic memory; make sure CONFIG_SYS_MALLOC_LEN >= 0x4000. diff --git a/fs/squashfs/sqfs.c b/fs/squashfs/sqfs.c index 15208b4dab..90e6848e6c 100644 --- a/fs/squashfs/sqfs.c +++ b/fs/squashfs/sqfs.c @@ -85,10 +85,10 @@ static int sqfs_calc_n_blks(__le64 start, __le64 end, u64 *offset) u64 start_, table_size; table_size = le64_to_cpu(end) - le64_to_cpu(start); - start_ = le64_to_cpu(start) / ctxt.cur_dev->blksz; - *offset = le64_to_cpu(start) - (start_ * ctxt.cur_dev->blksz); + start_ = le64_to_cpu(start) >> ctxt.cur_dev->log2blksz; + *offset = le64_to_cpu(start) - (start_ << ctxt.cur_dev->log2blksz); - return DIV_ROUND_UP(table_size + *offset, ctxt.cur_dev->blksz); + return (table_size + *offset + ctxt.cur_dev->blksz - 1) >> ctxt.cur_dev->log2blksz; } /* @@ -109,8 +109,8 @@ static int sqfs_frag_lookup(u32 inode_fragment_index, if (inode_fragment_index >= get_unaligned_le32(&sblk->fragments)) return -EINVAL; - start = get_unaligned_le64(&sblk->fragment_table_start) / - ctxt.cur_dev->blksz; + start = get_unaligned_le64(&sblk->fragment_table_start) >> + ctxt.cur_dev->log2blksz; n_blks = sqfs_calc_n_blks(sblk->fragment_table_start, sblk->export_table_start, &table_offset); @@ -135,7 +135,7 @@ static int sqfs_frag_lookup(u32 inode_fragment_index, start_block = get_unaligned_le64(table + table_offset + block * sizeof(u64)); - start = start_block / ctxt.cur_dev->blksz; + start = start_block >> ctxt.cur_dev->log2blksz; n_blks = sqfs_calc_n_blks(cpu_to_le64(start_block), sblk->fragment_table_start, &table_offset); @@ -641,8 +641,8 @@ static int sqfs_read_inode_table(unsigned char **inode_table) table_size = get_unaligned_le64(&sblk->directory_table_start) - get_unaligned_le64(&sblk->inode_table_start); - start = get_unaligned_le64(&sblk->inode_table_start) / - ctxt.cur_dev->blksz; + start = get_unaligned_le64(&sblk->inode_table_start) >> + ctxt.cur_dev->log2blksz; n_blks = sqfs_calc_n_blks(sblk->inode_table_start, sblk->directory_table_start, &table_offset); @@ -725,8 +725,8 @@ static int sqfs_read_directory_table(unsigned char **dir_table, u32 **pos_list) /* DIRECTORY TABLE */ table_size = get_unaligned_le64(&sblk->fragment_table_start) - get_unaligned_le64(&sblk->directory_table_start); - start = get_unaligned_le64(&sblk->directory_table_start) / - ctxt.cur_dev->blksz; + start = get_unaligned_le64(&sblk->directory_table_start) >> + ctxt.cur_dev->log2blksz; n_blks = sqfs_calc_n_blks(sblk->directory_table_start, sblk->fragment_table_start, &table_offset); @@ -1334,11 +1334,11 @@ int sqfs_read(const char *filename, void *buf, loff_t offset, loff_t len, } for (j = 0; j < datablk_count; j++) { - start = data_offset / ctxt.cur_dev->blksz; + start = data_offset >> ctxt.cur_dev->log2blksz; table_size = SQFS_BLOCK_SIZE(finfo.blk_sizes[j]); table_offset = data_offset - (start * ctxt.cur_dev->blksz); - n_blks = DIV_ROUND_UP(table_size + table_offset, - ctxt.cur_dev->blksz); + n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >> + ctxt.cur_dev->log2blksz; data_buffer = malloc_cache_aligned(n_blks * ctxt.cur_dev->blksz); @@ -1388,10 +1388,10 @@ int sqfs_read(const char *filename, void *buf, loff_t offset, loff_t len, goto free_buffer; } - start = frag_entry.start / ctxt.cur_dev->blksz; + start = frag_entry.start >> ctxt.cur_dev->log2blksz; table_size = SQFS_BLOCK_SIZE(frag_entry.size); table_offset = frag_entry.start - (start * ctxt.cur_dev->blksz); - n_blks = DIV_ROUND_UP(table_size + table_offset, ctxt.cur_dev->blksz); + n_blks = (table_size + table_offset + ctxt.cur_dev->blksz - 1) >> ctxt.cur_dev->log2blksz; fragment = malloc_cache_aligned(n_blks * ctxt.cur_dev->blksz); diff --git a/fs/squashfs/sqfs_inode.c b/fs/squashfs/sqfs_inode.c index 1368f3063c..5dbf945c80 100644 --- a/fs/squashfs/sqfs_inode.c +++ b/fs/squashfs/sqfs_inode.c @@ -10,7 +10,7 @@ #include <stdint.h> #include <stdio.h> #include <stdlib.h> -#include <string.h> +#include <part.h> #include "sqfs_decompressor.h" #include "sqfs_filesystem.h" @@ -68,9 +68,9 @@ int sqfs_inode_size(struct squashfs_base_inode *inode, u32 blk_size) unsigned int blk_list_size; if (fragment == 0xFFFFFFFF) - blk_list_size = DIV_ROUND_UP(file_size, blk_size); + blk_list_size = (file_size + blk_size - 1) >> LOG2(blk_size); else - blk_list_size = file_size / blk_size; + blk_list_size = file_size > LOG2(blk_size); return sizeof(*lreg) + blk_list_size * sizeof(u32); }
Use right shift to avoid 64-bit divisions. These divisions are needed to convert from file length (potentially over 32-bit range) to block number, so result and remainder are guaranteed to fit in 32-bit integers. Signed-off-by: Mauro Condarelli <mc5686@mclink.it> --- Changes in v2: - replace division with right shift (Daniel Schwierzeck). - remove vocore2-specific change (Daniel Schwierzeck). - add warning to Kconfig about CONFIG_SYS_MALLOC_LEN (Tom Rini). fs/squashfs/Kconfig | 2 ++ fs/squashfs/sqfs.c | 30 +++++++++++++++--------------- fs/squashfs/sqfs_inode.c | 6 +++--- 3 files changed, 20 insertions(+), 18 deletions(-)