Message ID | 1307453714-2741-1-git-send-email-lczerner@redhat.com |
---|---|
State | Accepted, archived |
Headers | show |
On 6/7/11 8:35 AM, Lukas Czerner wrote: > For a long time now orlov is the default block allocator in the ext4. It > performs better than the old one and no one seems to claim otherwise so > we can safely drop it and make oldalloc and orlov mount option > deprecated. > > This is a part of the effort to reduce number of ext4 options hence the > test matrix. > > Signed-off-by: Lukas Czerner <lczerner@redhat.com> Seems like a good idea to me. But I'm doing a little digging into why find_group_flex() was there; why all that flex_bg-related inode allocation work for a deprecated option? commit 772cb7c83ba256a11c7bf99a11bef3858d23767c Author: Jose R. Santos <jrs@us.ibm.com> Date: Fri Jul 11 19:27:31 2008 -0400 ext4: New inode allocation for FLEX_BG meta-data groups. This patch mostly controls the way inode are allocated in order to make ialloc aware of flex_bg block group grouping. It achieves this by bypassing the Orlov allocator when block group meta-data are packed toghether through mke2fs. <snip> find_group_flex() used to be called by ext4_new_inode() regardless of OLDALLOC, (I think) so just want to see for sure what happened to that plan... -eric > --- > Documentation/filesystems/ext4.txt | 8 -- > fs/ext4/ext4.h | 1 - > fs/ext4/ialloc.c | 136 +----------------------------------- > fs/ext4/super.c | 8 +- > 4 files changed, 7 insertions(+), 146 deletions(-) > > diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt > index 3ae9bc9..ec469fa 100644 > --- a/Documentation/filesystems/ext4.txt > +++ b/Documentation/filesystems/ext4.txt > @@ -201,14 +201,6 @@ inode_readahead_blks=n This tuning parameter controls the maximum > table readahead algorithm will pre-read into > the buffer cache. The default value is 32 blocks. > > -orlov (*) This enables the new Orlov block allocator. It is > - enabled by default. > - > -oldalloc This disables the Orlov block allocator and enables > - the old block allocator. Orlov should have better > - performance - we'd like to get some feedback if it's > - the contrary for you. > - > user_xattr Enables Extended User Attributes. Additionally, you > need to have extended attribute support enabled in the > kernel configuration (CONFIG_EXT4_FS_XATTR). See the > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index 1921392..7e0b8aa 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -884,7 +884,6 @@ struct ext4_inode_info { > /* > * Mount flags > */ > -#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ > #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ > #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ > #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ > diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c > index 21bb2f6..0b5ec23 100644 > --- a/fs/ext4/ialloc.c > +++ b/fs/ext4/ialloc.c > @@ -293,118 +293,6 @@ error_return: > ext4_std_error(sb, fatal); > } > > -/* > - * There are two policies for allocating an inode. If the new inode is > - * a directory, then a forward search is made for a block group with both > - * free space and a low directory-to-inode ratio; if that fails, then of > - * the groups with above-average free space, that group with the fewest > - * directories already is chosen. > - * > - * For other inodes, search forward from the parent directory\'s block > - * group to find a free inode. > - */ > -static int find_group_dir(struct super_block *sb, struct inode *parent, > - ext4_group_t *best_group) > -{ > - ext4_group_t ngroups = ext4_get_groups_count(sb); > - unsigned int freei, avefreei; > - struct ext4_group_desc *desc, *best_desc = NULL; > - ext4_group_t group; > - int ret = -1; > - > - freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter); > - avefreei = freei / ngroups; > - > - for (group = 0; group < ngroups; group++) { > - desc = ext4_get_group_desc(sb, group, NULL); > - if (!desc || !ext4_free_inodes_count(sb, desc)) > - continue; > - if (ext4_free_inodes_count(sb, desc) < avefreei) > - continue; > - if (!best_desc || > - (ext4_free_blks_count(sb, desc) > > - ext4_free_blks_count(sb, best_desc))) { > - *best_group = group; > - best_desc = desc; > - ret = 0; > - } > - } > - return ret; > -} > - > -#define free_block_ratio 10 > - > -static int find_group_flex(struct super_block *sb, struct inode *parent, > - ext4_group_t *best_group) > -{ > - struct ext4_sb_info *sbi = EXT4_SB(sb); > - struct ext4_group_desc *desc; > - struct flex_groups *flex_group = sbi->s_flex_groups; > - ext4_group_t parent_group = EXT4_I(parent)->i_block_group; > - ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); > - ext4_group_t ngroups = ext4_get_groups_count(sb); > - int flex_size = ext4_flex_bg_size(sbi); > - ext4_group_t best_flex = parent_fbg_group; > - int blocks_per_flex = sbi->s_blocks_per_group * flex_size; > - int flexbg_free_blocks; > - int flex_freeb_ratio; > - ext4_group_t n_fbg_groups; > - ext4_group_t i; > - > - n_fbg_groups = (ngroups + flex_size - 1) >> > - sbi->s_log_groups_per_flex; > - > -find_close_to_parent: > - flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks); > - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; > - if (atomic_read(&flex_group[best_flex].free_inodes) && > - flex_freeb_ratio > free_block_ratio) > - goto found_flexbg; > - > - if (best_flex && best_flex == parent_fbg_group) { > - best_flex--; > - goto find_close_to_parent; > - } > - > - for (i = 0; i < n_fbg_groups; i++) { > - if (i == parent_fbg_group || i == parent_fbg_group - 1) > - continue; > - > - flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks); > - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; > - > - if (flex_freeb_ratio > free_block_ratio && > - (atomic_read(&flex_group[i].free_inodes))) { > - best_flex = i; > - goto found_flexbg; > - } > - > - if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) || > - ((atomic_read(&flex_group[i].free_blocks) > > - atomic_read(&flex_group[best_flex].free_blocks)) && > - atomic_read(&flex_group[i].free_inodes))) > - best_flex = i; > - } > - > - if (!atomic_read(&flex_group[best_flex].free_inodes) || > - !atomic_read(&flex_group[best_flex].free_blocks)) > - return -1; > - > -found_flexbg: > - for (i = best_flex * flex_size; i < ngroups && > - i < (best_flex + 1) * flex_size; i++) { > - desc = ext4_get_group_desc(sb, i, NULL); > - if (ext4_free_inodes_count(sb, desc)) { > - *best_group = i; > - goto out; > - } > - } > - > - return -1; > -out: > - return 0; > -} > - > struct orlov_stats { > __u32 free_inodes; > __u32 free_blocks; > @@ -817,7 +705,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, > struct inode *ret; > ext4_group_t i; > int free = 0; > - static int once = 1; > ext4_group_t flex_group; > > /* Cannot create files in a deleted directory */ > @@ -843,26 +730,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, > goto got_group; > } > > - if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { > - ret2 = find_group_flex(sb, dir, &group); > - if (ret2 == -1) { > - ret2 = find_group_other(sb, dir, &group, mode); > - if (ret2 == 0 && once) { > - once = 0; > - printk(KERN_NOTICE "ext4: find_group_flex " > - "failed, fallback succeeded dir %lu\n", > - dir->i_ino); > - } > - } > - goto got_group; > - } > - > - if (S_ISDIR(mode)) { > - if (test_opt(sb, OLDALLOC)) > - ret2 = find_group_dir(sb, dir, &group); > - else > - ret2 = find_group_orlov(sb, dir, &group, mode, qstr); > - } else > + if (S_ISDIR(mode)) > + ret2 = find_group_orlov(sb, dir, &group, mode, qstr); > + else > ret2 = find_group_other(sb, dir, &group, mode); > > got_group: > diff --git a/fs/ext4/super.c b/fs/ext4/super.c > index cc5c157..e1f8f73 100644 > --- a/fs/ext4/super.c > +++ b/fs/ext4/super.c > @@ -1031,8 +1031,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) > seq_puts(seq, ",nouid32"); > if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) > seq_puts(seq, ",debug"); > - if (test_opt(sb, OLDALLOC)) > - seq_puts(seq, ",oldalloc"); > #ifdef CONFIG_EXT4_FS_XATTR > if (test_opt(sb, XATTR_USER)) > seq_puts(seq, ",user_xattr"); > @@ -1541,10 +1539,12 @@ static int parse_options(char *options, struct super_block *sb, > set_opt(sb, DEBUG); > break; > case Opt_oldalloc: > - set_opt(sb, OLDALLOC); > + ext4_msg(sb, KERN_WARNING, > + "Ignoring deprecated oldalloc option"); > break; > case Opt_orlov: > - clear_opt(sb, OLDALLOC); > + ext4_msg(sb, KERN_WARNING, > + "Ignoring deprecated orlov option"); > break; > #ifdef CONFIG_EXT4_FS_XATTR > case Opt_user_xattr: -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 6/7/11 9:50 AM, Eric Sandeen wrote: > On 6/7/11 8:35 AM, Lukas Czerner wrote: >> For a long time now orlov is the default block allocator in the ext4. It >> performs better than the old one and no one seems to claim otherwise so >> we can safely drop it and make oldalloc and orlov mount option >> deprecated. >> >> This is a part of the effort to reduce number of ext4 options hence the >> test matrix. >> >> Signed-off-by: Lukas Czerner <lczerner@redhat.com> > > Seems like a good idea to me. > > But I'm doing a little digging into why find_group_flex() was there; > why all that flex_bg-related inode allocation work for a deprecated option? > > commit 772cb7c83ba256a11c7bf99a11bef3858d23767c > Author: Jose R. Santos <jrs@us.ibm.com> > Date: Fri Jul 11 19:27:31 2008 -0400 > > ext4: New inode allocation for FLEX_BG meta-data groups. > > This patch mostly controls the way inode are allocated in order to > make ialloc aware of flex_bg block group grouping. It achieves this > by bypassing the Orlov allocator when block group meta-data are packed > toghether through mke2fs. <snip> > > find_group_flex() used to be called by ext4_new_inode() regardless of > OLDALLOC, (I think) so just want to see for sure what happened to that plan... Ah, ok: commit a4912123b688e057084e6557cef8924f7ae5bbde Author: Theodore Ts'o <tytso@mit.edu> Date: Thu Mar 12 12:18:34 2009 -0400 ext4: New inode/block allocation algorithms for flex_bg filesystems The find_group_flex() inode allocator is now only used if the filesystem is mounted using the "oldalloc" mount option. It is replaced with the original Orlov allocator that has been updated for flex_bg filesystems <snip> So: Reviewed-by: Eric Sandeen <sandeen@redhat.com> > -eric > >> --- >> Documentation/filesystems/ext4.txt | 8 -- >> fs/ext4/ext4.h | 1 - >> fs/ext4/ialloc.c | 136 +----------------------------------- >> fs/ext4/super.c | 8 +- >> 4 files changed, 7 insertions(+), 146 deletions(-) >> >> diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt >> index 3ae9bc9..ec469fa 100644 >> --- a/Documentation/filesystems/ext4.txt >> +++ b/Documentation/filesystems/ext4.txt >> @@ -201,14 +201,6 @@ inode_readahead_blks=n This tuning parameter controls the maximum >> table readahead algorithm will pre-read into >> the buffer cache. The default value is 32 blocks. >> >> -orlov (*) This enables the new Orlov block allocator. It is >> - enabled by default. >> - >> -oldalloc This disables the Orlov block allocator and enables >> - the old block allocator. Orlov should have better >> - performance - we'd like to get some feedback if it's >> - the contrary for you. >> - >> user_xattr Enables Extended User Attributes. Additionally, you >> need to have extended attribute support enabled in the >> kernel configuration (CONFIG_EXT4_FS_XATTR). See the >> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h >> index 1921392..7e0b8aa 100644 >> --- a/fs/ext4/ext4.h >> +++ b/fs/ext4/ext4.h >> @@ -884,7 +884,6 @@ struct ext4_inode_info { >> /* >> * Mount flags >> */ >> -#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ >> #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ >> #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ >> #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ >> diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c >> index 21bb2f6..0b5ec23 100644 >> --- a/fs/ext4/ialloc.c >> +++ b/fs/ext4/ialloc.c >> @@ -293,118 +293,6 @@ error_return: >> ext4_std_error(sb, fatal); >> } >> >> -/* >> - * There are two policies for allocating an inode. If the new inode is >> - * a directory, then a forward search is made for a block group with both >> - * free space and a low directory-to-inode ratio; if that fails, then of >> - * the groups with above-average free space, that group with the fewest >> - * directories already is chosen. >> - * >> - * For other inodes, search forward from the parent directory\'s block >> - * group to find a free inode. >> - */ >> -static int find_group_dir(struct super_block *sb, struct inode *parent, >> - ext4_group_t *best_group) >> -{ >> - ext4_group_t ngroups = ext4_get_groups_count(sb); >> - unsigned int freei, avefreei; >> - struct ext4_group_desc *desc, *best_desc = NULL; >> - ext4_group_t group; >> - int ret = -1; >> - >> - freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter); >> - avefreei = freei / ngroups; >> - >> - for (group = 0; group < ngroups; group++) { >> - desc = ext4_get_group_desc(sb, group, NULL); >> - if (!desc || !ext4_free_inodes_count(sb, desc)) >> - continue; >> - if (ext4_free_inodes_count(sb, desc) < avefreei) >> - continue; >> - if (!best_desc || >> - (ext4_free_blks_count(sb, desc) > >> - ext4_free_blks_count(sb, best_desc))) { >> - *best_group = group; >> - best_desc = desc; >> - ret = 0; >> - } >> - } >> - return ret; >> -} >> - >> -#define free_block_ratio 10 >> - >> -static int find_group_flex(struct super_block *sb, struct inode *parent, >> - ext4_group_t *best_group) >> -{ >> - struct ext4_sb_info *sbi = EXT4_SB(sb); >> - struct ext4_group_desc *desc; >> - struct flex_groups *flex_group = sbi->s_flex_groups; >> - ext4_group_t parent_group = EXT4_I(parent)->i_block_group; >> - ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); >> - ext4_group_t ngroups = ext4_get_groups_count(sb); >> - int flex_size = ext4_flex_bg_size(sbi); >> - ext4_group_t best_flex = parent_fbg_group; >> - int blocks_per_flex = sbi->s_blocks_per_group * flex_size; >> - int flexbg_free_blocks; >> - int flex_freeb_ratio; >> - ext4_group_t n_fbg_groups; >> - ext4_group_t i; >> - >> - n_fbg_groups = (ngroups + flex_size - 1) >> >> - sbi->s_log_groups_per_flex; >> - >> -find_close_to_parent: >> - flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks); >> - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; >> - if (atomic_read(&flex_group[best_flex].free_inodes) && >> - flex_freeb_ratio > free_block_ratio) >> - goto found_flexbg; >> - >> - if (best_flex && best_flex == parent_fbg_group) { >> - best_flex--; >> - goto find_close_to_parent; >> - } >> - >> - for (i = 0; i < n_fbg_groups; i++) { >> - if (i == parent_fbg_group || i == parent_fbg_group - 1) >> - continue; >> - >> - flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks); >> - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; >> - >> - if (flex_freeb_ratio > free_block_ratio && >> - (atomic_read(&flex_group[i].free_inodes))) { >> - best_flex = i; >> - goto found_flexbg; >> - } >> - >> - if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) || >> - ((atomic_read(&flex_group[i].free_blocks) > >> - atomic_read(&flex_group[best_flex].free_blocks)) && >> - atomic_read(&flex_group[i].free_inodes))) >> - best_flex = i; >> - } >> - >> - if (!atomic_read(&flex_group[best_flex].free_inodes) || >> - !atomic_read(&flex_group[best_flex].free_blocks)) >> - return -1; >> - >> -found_flexbg: >> - for (i = best_flex * flex_size; i < ngroups && >> - i < (best_flex + 1) * flex_size; i++) { >> - desc = ext4_get_group_desc(sb, i, NULL); >> - if (ext4_free_inodes_count(sb, desc)) { >> - *best_group = i; >> - goto out; >> - } >> - } >> - >> - return -1; >> -out: >> - return 0; >> -} >> - >> struct orlov_stats { >> __u32 free_inodes; >> __u32 free_blocks; >> @@ -817,7 +705,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, >> struct inode *ret; >> ext4_group_t i; >> int free = 0; >> - static int once = 1; >> ext4_group_t flex_group; >> >> /* Cannot create files in a deleted directory */ >> @@ -843,26 +730,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, >> goto got_group; >> } >> >> - if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { >> - ret2 = find_group_flex(sb, dir, &group); >> - if (ret2 == -1) { >> - ret2 = find_group_other(sb, dir, &group, mode); >> - if (ret2 == 0 && once) { >> - once = 0; >> - printk(KERN_NOTICE "ext4: find_group_flex " >> - "failed, fallback succeeded dir %lu\n", >> - dir->i_ino); >> - } >> - } >> - goto got_group; >> - } >> - >> - if (S_ISDIR(mode)) { >> - if (test_opt(sb, OLDALLOC)) >> - ret2 = find_group_dir(sb, dir, &group); >> - else >> - ret2 = find_group_orlov(sb, dir, &group, mode, qstr); >> - } else >> + if (S_ISDIR(mode)) >> + ret2 = find_group_orlov(sb, dir, &group, mode, qstr); >> + else >> ret2 = find_group_other(sb, dir, &group, mode); >> >> got_group: >> diff --git a/fs/ext4/super.c b/fs/ext4/super.c >> index cc5c157..e1f8f73 100644 >> --- a/fs/ext4/super.c >> +++ b/fs/ext4/super.c >> @@ -1031,8 +1031,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) >> seq_puts(seq, ",nouid32"); >> if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) >> seq_puts(seq, ",debug"); >> - if (test_opt(sb, OLDALLOC)) >> - seq_puts(seq, ",oldalloc"); >> #ifdef CONFIG_EXT4_FS_XATTR >> if (test_opt(sb, XATTR_USER)) >> seq_puts(seq, ",user_xattr"); >> @@ -1541,10 +1539,12 @@ static int parse_options(char *options, struct super_block *sb, >> set_opt(sb, DEBUG); >> break; >> case Opt_oldalloc: >> - set_opt(sb, OLDALLOC); >> + ext4_msg(sb, KERN_WARNING, >> + "Ignoring deprecated oldalloc option"); >> break; >> case Opt_orlov: >> - clear_opt(sb, OLDALLOC); >> + ext4_msg(sb, KERN_WARNING, >> + "Ignoring deprecated orlov option"); >> break; >> #ifdef CONFIG_EXT4_FS_XATTR >> case Opt_user_xattr: > > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 7 Jun 2011, Eric Sandeen wrote: > On 6/7/11 9:50 AM, Eric Sandeen wrote: > > On 6/7/11 8:35 AM, Lukas Czerner wrote: > >> For a long time now orlov is the default block allocator in the ext4. It > >> performs better than the old one and no one seems to claim otherwise so > >> we can safely drop it and make oldalloc and orlov mount option > >> deprecated. > >> > >> This is a part of the effort to reduce number of ext4 options hence the > >> test matrix. > >> > >> Signed-off-by: Lukas Czerner <lczerner@redhat.com> > > > > Seems like a good idea to me. > > > > But I'm doing a little digging into why find_group_flex() was there; > > why all that flex_bg-related inode allocation work for a deprecated option? > > > > commit 772cb7c83ba256a11c7bf99a11bef3858d23767c > > Author: Jose R. Santos <jrs@us.ibm.com> > > Date: Fri Jul 11 19:27:31 2008 -0400 > > > > ext4: New inode allocation for FLEX_BG meta-data groups. > > > > This patch mostly controls the way inode are allocated in order to > > make ialloc aware of flex_bg block group grouping. It achieves this > > by bypassing the Orlov allocator when block group meta-data are packed > > toghether through mke2fs. <snip> > > > > find_group_flex() used to be called by ext4_new_inode() regardless of > > OLDALLOC, (I think) so just want to see for sure what happened to that plan... > > Ah, ok: > > commit a4912123b688e057084e6557cef8924f7ae5bbde > Author: Theodore Ts'o <tytso@mit.edu> > Date: Thu Mar 12 12:18:34 2009 -0400 > > ext4: New inode/block allocation algorithms for flex_bg filesystems > > The find_group_flex() inode allocator is now only used if the > filesystem is mounted using the "oldalloc" mount option. It is > replaced with the original Orlov allocator that has been updated for > flex_bg filesystems <snip> > > So: > > Reviewed-by: Eric Sandeen <sandeen@redhat.com> Thanks Eric, but I need to take it back for the moment. You've pointed me to more code which is not needed anymore, so I have to update the patch to remove all the useless pieces. Thanks! -Lukas > > > > -eric > > > >> --- > >> Documentation/filesystems/ext4.txt | 8 -- > >> fs/ext4/ext4.h | 1 - > >> fs/ext4/ialloc.c | 136 +----------------------------------- > >> fs/ext4/super.c | 8 +- > >> 4 files changed, 7 insertions(+), 146 deletions(-) > >> > >> diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt > >> index 3ae9bc9..ec469fa 100644 > >> --- a/Documentation/filesystems/ext4.txt > >> +++ b/Documentation/filesystems/ext4.txt > >> @@ -201,14 +201,6 @@ inode_readahead_blks=n This tuning parameter controls the maximum > >> table readahead algorithm will pre-read into > >> the buffer cache. The default value is 32 blocks. > >> > >> -orlov (*) This enables the new Orlov block allocator. It is > >> - enabled by default. > >> - > >> -oldalloc This disables the Orlov block allocator and enables > >> - the old block allocator. Orlov should have better > >> - performance - we'd like to get some feedback if it's > >> - the contrary for you. > >> - > >> user_xattr Enables Extended User Attributes. Additionally, you > >> need to have extended attribute support enabled in the > >> kernel configuration (CONFIG_EXT4_FS_XATTR). See the > >> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > >> index 1921392..7e0b8aa 100644 > >> --- a/fs/ext4/ext4.h > >> +++ b/fs/ext4/ext4.h > >> @@ -884,7 +884,6 @@ struct ext4_inode_info { > >> /* > >> * Mount flags > >> */ > >> -#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ > >> #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ > >> #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ > >> #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ > >> diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c > >> index 21bb2f6..0b5ec23 100644 > >> --- a/fs/ext4/ialloc.c > >> +++ b/fs/ext4/ialloc.c > >> @@ -293,118 +293,6 @@ error_return: > >> ext4_std_error(sb, fatal); > >> } > >> > >> -/* > >> - * There are two policies for allocating an inode. If the new inode is > >> - * a directory, then a forward search is made for a block group with both > >> - * free space and a low directory-to-inode ratio; if that fails, then of > >> - * the groups with above-average free space, that group with the fewest > >> - * directories already is chosen. > >> - * > >> - * For other inodes, search forward from the parent directory\'s block > >> - * group to find a free inode. > >> - */ > >> -static int find_group_dir(struct super_block *sb, struct inode *parent, > >> - ext4_group_t *best_group) > >> -{ > >> - ext4_group_t ngroups = ext4_get_groups_count(sb); > >> - unsigned int freei, avefreei; > >> - struct ext4_group_desc *desc, *best_desc = NULL; > >> - ext4_group_t group; > >> - int ret = -1; > >> - > >> - freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter); > >> - avefreei = freei / ngroups; > >> - > >> - for (group = 0; group < ngroups; group++) { > >> - desc = ext4_get_group_desc(sb, group, NULL); > >> - if (!desc || !ext4_free_inodes_count(sb, desc)) > >> - continue; > >> - if (ext4_free_inodes_count(sb, desc) < avefreei) > >> - continue; > >> - if (!best_desc || > >> - (ext4_free_blks_count(sb, desc) > > >> - ext4_free_blks_count(sb, best_desc))) { > >> - *best_group = group; > >> - best_desc = desc; > >> - ret = 0; > >> - } > >> - } > >> - return ret; > >> -} > >> - > >> -#define free_block_ratio 10 > >> - > >> -static int find_group_flex(struct super_block *sb, struct inode *parent, > >> - ext4_group_t *best_group) > >> -{ > >> - struct ext4_sb_info *sbi = EXT4_SB(sb); > >> - struct ext4_group_desc *desc; > >> - struct flex_groups *flex_group = sbi->s_flex_groups; > >> - ext4_group_t parent_group = EXT4_I(parent)->i_block_group; > >> - ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); > >> - ext4_group_t ngroups = ext4_get_groups_count(sb); > >> - int flex_size = ext4_flex_bg_size(sbi); > >> - ext4_group_t best_flex = parent_fbg_group; > >> - int blocks_per_flex = sbi->s_blocks_per_group * flex_size; > >> - int flexbg_free_blocks; > >> - int flex_freeb_ratio; > >> - ext4_group_t n_fbg_groups; > >> - ext4_group_t i; > >> - > >> - n_fbg_groups = (ngroups + flex_size - 1) >> > >> - sbi->s_log_groups_per_flex; > >> - > >> -find_close_to_parent: > >> - flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks); > >> - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; > >> - if (atomic_read(&flex_group[best_flex].free_inodes) && > >> - flex_freeb_ratio > free_block_ratio) > >> - goto found_flexbg; > >> - > >> - if (best_flex && best_flex == parent_fbg_group) { > >> - best_flex--; > >> - goto find_close_to_parent; > >> - } > >> - > >> - for (i = 0; i < n_fbg_groups; i++) { > >> - if (i == parent_fbg_group || i == parent_fbg_group - 1) > >> - continue; > >> - > >> - flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks); > >> - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; > >> - > >> - if (flex_freeb_ratio > free_block_ratio && > >> - (atomic_read(&flex_group[i].free_inodes))) { > >> - best_flex = i; > >> - goto found_flexbg; > >> - } > >> - > >> - if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) || > >> - ((atomic_read(&flex_group[i].free_blocks) > > >> - atomic_read(&flex_group[best_flex].free_blocks)) && > >> - atomic_read(&flex_group[i].free_inodes))) > >> - best_flex = i; > >> - } > >> - > >> - if (!atomic_read(&flex_group[best_flex].free_inodes) || > >> - !atomic_read(&flex_group[best_flex].free_blocks)) > >> - return -1; > >> - > >> -found_flexbg: > >> - for (i = best_flex * flex_size; i < ngroups && > >> - i < (best_flex + 1) * flex_size; i++) { > >> - desc = ext4_get_group_desc(sb, i, NULL); > >> - if (ext4_free_inodes_count(sb, desc)) { > >> - *best_group = i; > >> - goto out; > >> - } > >> - } > >> - > >> - return -1; > >> -out: > >> - return 0; > >> -} > >> - > >> struct orlov_stats { > >> __u32 free_inodes; > >> __u32 free_blocks; > >> @@ -817,7 +705,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, > >> struct inode *ret; > >> ext4_group_t i; > >> int free = 0; > >> - static int once = 1; > >> ext4_group_t flex_group; > >> > >> /* Cannot create files in a deleted directory */ > >> @@ -843,26 +730,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, > >> goto got_group; > >> } > >> > >> - if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { > >> - ret2 = find_group_flex(sb, dir, &group); > >> - if (ret2 == -1) { > >> - ret2 = find_group_other(sb, dir, &group, mode); > >> - if (ret2 == 0 && once) { > >> - once = 0; > >> - printk(KERN_NOTICE "ext4: find_group_flex " > >> - "failed, fallback succeeded dir %lu\n", > >> - dir->i_ino); > >> - } > >> - } > >> - goto got_group; > >> - } > >> - > >> - if (S_ISDIR(mode)) { > >> - if (test_opt(sb, OLDALLOC)) > >> - ret2 = find_group_dir(sb, dir, &group); > >> - else > >> - ret2 = find_group_orlov(sb, dir, &group, mode, qstr); > >> - } else > >> + if (S_ISDIR(mode)) > >> + ret2 = find_group_orlov(sb, dir, &group, mode, qstr); > >> + else > >> ret2 = find_group_other(sb, dir, &group, mode); > >> > >> got_group: > >> diff --git a/fs/ext4/super.c b/fs/ext4/super.c > >> index cc5c157..e1f8f73 100644 > >> --- a/fs/ext4/super.c > >> +++ b/fs/ext4/super.c > >> @@ -1031,8 +1031,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) > >> seq_puts(seq, ",nouid32"); > >> if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) > >> seq_puts(seq, ",debug"); > >> - if (test_opt(sb, OLDALLOC)) > >> - seq_puts(seq, ",oldalloc"); > >> #ifdef CONFIG_EXT4_FS_XATTR > >> if (test_opt(sb, XATTR_USER)) > >> seq_puts(seq, ",user_xattr"); > >> @@ -1541,10 +1539,12 @@ static int parse_options(char *options, struct super_block *sb, > >> set_opt(sb, DEBUG); > >> break; > >> case Opt_oldalloc: > >> - set_opt(sb, OLDALLOC); > >> + ext4_msg(sb, KERN_WARNING, > >> + "Ignoring deprecated oldalloc option"); > >> break; > >> case Opt_orlov: > >> - clear_opt(sb, OLDALLOC); > >> + ext4_msg(sb, KERN_WARNING, > >> + "Ignoring deprecated orlov option"); > >> break; > >> #ifdef CONFIG_EXT4_FS_XATTR > >> case Opt_user_xattr: > > > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html > >
On Tue, 7 Jun 2011, Lukas Czerner wrote: > On Tue, 7 Jun 2011, Eric Sandeen wrote: > > > On 6/7/11 9:50 AM, Eric Sandeen wrote: > > > On 6/7/11 8:35 AM, Lukas Czerner wrote: > > >> For a long time now orlov is the default block allocator in the ext4. It > > >> performs better than the old one and no one seems to claim otherwise so > > >> we can safely drop it and make oldalloc and orlov mount option > > >> deprecated. > > >> > > >> This is a part of the effort to reduce number of ext4 options hence the > > >> test matrix. > > >> > > >> Signed-off-by: Lukas Czerner <lczerner@redhat.com> > > > > > > Seems like a good idea to me. > > > > > > But I'm doing a little digging into why find_group_flex() was there; > > > why all that flex_bg-related inode allocation work for a deprecated option? > > > > > > commit 772cb7c83ba256a11c7bf99a11bef3858d23767c > > > Author: Jose R. Santos <jrs@us.ibm.com> > > > Date: Fri Jul 11 19:27:31 2008 -0400 > > > > > > ext4: New inode allocation for FLEX_BG meta-data groups. > > > > > > This patch mostly controls the way inode are allocated in order to > > > make ialloc aware of flex_bg block group grouping. It achieves this > > > by bypassing the Orlov allocator when block group meta-data are packed > > > toghether through mke2fs. <snip> > > > > > > find_group_flex() used to be called by ext4_new_inode() regardless of > > > OLDALLOC, (I think) so just want to see for sure what happened to that plan... > > > > Ah, ok: > > > > commit a4912123b688e057084e6557cef8924f7ae5bbde > > Author: Theodore Ts'o <tytso@mit.edu> > > Date: Thu Mar 12 12:18:34 2009 -0400 > > > > ext4: New inode/block allocation algorithms for flex_bg filesystems > > > > The find_group_flex() inode allocator is now only used if the > > filesystem is mounted using the "oldalloc" mount option. It is > > replaced with the original Orlov allocator that has been updated for > > flex_bg filesystems <snip> > > > > So: > > > > Reviewed-by: Eric Sandeen <sandeen@redhat.com> > > Thanks Eric, but I need to take it back for the moment. You've pointed > me to more code which is not needed anymore, so I have to update the patch > to remove all the useless pieces. > > Thanks! > -Lukas Nope, I was wrong, there is no more code to remove wrt. oldalloc. So the patch is fine. Sorry for the noise. Thanks! -Lukas > > > > > > > > -eric > > > > > >> --- > > >> Documentation/filesystems/ext4.txt | 8 -- > > >> fs/ext4/ext4.h | 1 - > > >> fs/ext4/ialloc.c | 136 +----------------------------------- > > >> fs/ext4/super.c | 8 +- > > >> 4 files changed, 7 insertions(+), 146 deletions(-) > > >> > > >> diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt > > >> index 3ae9bc9..ec469fa 100644 > > >> --- a/Documentation/filesystems/ext4.txt > > >> +++ b/Documentation/filesystems/ext4.txt > > >> @@ -201,14 +201,6 @@ inode_readahead_blks=n This tuning parameter controls the maximum > > >> table readahead algorithm will pre-read into > > >> the buffer cache. The default value is 32 blocks. > > >> > > >> -orlov (*) This enables the new Orlov block allocator. It is > > >> - enabled by default. > > >> - > > >> -oldalloc This disables the Orlov block allocator and enables > > >> - the old block allocator. Orlov should have better > > >> - performance - we'd like to get some feedback if it's > > >> - the contrary for you. > > >> - > > >> user_xattr Enables Extended User Attributes. Additionally, you > > >> need to have extended attribute support enabled in the > > >> kernel configuration (CONFIG_EXT4_FS_XATTR). See the > > >> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > > >> index 1921392..7e0b8aa 100644 > > >> --- a/fs/ext4/ext4.h > > >> +++ b/fs/ext4/ext4.h > > >> @@ -884,7 +884,6 @@ struct ext4_inode_info { > > >> /* > > >> * Mount flags > > >> */ > > >> -#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ > > >> #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ > > >> #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ > > >> #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ > > >> diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c > > >> index 21bb2f6..0b5ec23 100644 > > >> --- a/fs/ext4/ialloc.c > > >> +++ b/fs/ext4/ialloc.c > > >> @@ -293,118 +293,6 @@ error_return: > > >> ext4_std_error(sb, fatal); > > >> } > > >> > > >> -/* > > >> - * There are two policies for allocating an inode. If the new inode is > > >> - * a directory, then a forward search is made for a block group with both > > >> - * free space and a low directory-to-inode ratio; if that fails, then of > > >> - * the groups with above-average free space, that group with the fewest > > >> - * directories already is chosen. > > >> - * > > >> - * For other inodes, search forward from the parent directory\'s block > > >> - * group to find a free inode. > > >> - */ > > >> -static int find_group_dir(struct super_block *sb, struct inode *parent, > > >> - ext4_group_t *best_group) > > >> -{ > > >> - ext4_group_t ngroups = ext4_get_groups_count(sb); > > >> - unsigned int freei, avefreei; > > >> - struct ext4_group_desc *desc, *best_desc = NULL; > > >> - ext4_group_t group; > > >> - int ret = -1; > > >> - > > >> - freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter); > > >> - avefreei = freei / ngroups; > > >> - > > >> - for (group = 0; group < ngroups; group++) { > > >> - desc = ext4_get_group_desc(sb, group, NULL); > > >> - if (!desc || !ext4_free_inodes_count(sb, desc)) > > >> - continue; > > >> - if (ext4_free_inodes_count(sb, desc) < avefreei) > > >> - continue; > > >> - if (!best_desc || > > >> - (ext4_free_blks_count(sb, desc) > > > >> - ext4_free_blks_count(sb, best_desc))) { > > >> - *best_group = group; > > >> - best_desc = desc; > > >> - ret = 0; > > >> - } > > >> - } > > >> - return ret; > > >> -} > > >> - > > >> -#define free_block_ratio 10 > > >> - > > >> -static int find_group_flex(struct super_block *sb, struct inode *parent, > > >> - ext4_group_t *best_group) > > >> -{ > > >> - struct ext4_sb_info *sbi = EXT4_SB(sb); > > >> - struct ext4_group_desc *desc; > > >> - struct flex_groups *flex_group = sbi->s_flex_groups; > > >> - ext4_group_t parent_group = EXT4_I(parent)->i_block_group; > > >> - ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); > > >> - ext4_group_t ngroups = ext4_get_groups_count(sb); > > >> - int flex_size = ext4_flex_bg_size(sbi); > > >> - ext4_group_t best_flex = parent_fbg_group; > > >> - int blocks_per_flex = sbi->s_blocks_per_group * flex_size; > > >> - int flexbg_free_blocks; > > >> - int flex_freeb_ratio; > > >> - ext4_group_t n_fbg_groups; > > >> - ext4_group_t i; > > >> - > > >> - n_fbg_groups = (ngroups + flex_size - 1) >> > > >> - sbi->s_log_groups_per_flex; > > >> - > > >> -find_close_to_parent: > > >> - flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks); > > >> - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; > > >> - if (atomic_read(&flex_group[best_flex].free_inodes) && > > >> - flex_freeb_ratio > free_block_ratio) > > >> - goto found_flexbg; > > >> - > > >> - if (best_flex && best_flex == parent_fbg_group) { > > >> - best_flex--; > > >> - goto find_close_to_parent; > > >> - } > > >> - > > >> - for (i = 0; i < n_fbg_groups; i++) { > > >> - if (i == parent_fbg_group || i == parent_fbg_group - 1) > > >> - continue; > > >> - > > >> - flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks); > > >> - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; > > >> - > > >> - if (flex_freeb_ratio > free_block_ratio && > > >> - (atomic_read(&flex_group[i].free_inodes))) { > > >> - best_flex = i; > > >> - goto found_flexbg; > > >> - } > > >> - > > >> - if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) || > > >> - ((atomic_read(&flex_group[i].free_blocks) > > > >> - atomic_read(&flex_group[best_flex].free_blocks)) && > > >> - atomic_read(&flex_group[i].free_inodes))) > > >> - best_flex = i; > > >> - } > > >> - > > >> - if (!atomic_read(&flex_group[best_flex].free_inodes) || > > >> - !atomic_read(&flex_group[best_flex].free_blocks)) > > >> - return -1; > > >> - > > >> -found_flexbg: > > >> - for (i = best_flex * flex_size; i < ngroups && > > >> - i < (best_flex + 1) * flex_size; i++) { > > >> - desc = ext4_get_group_desc(sb, i, NULL); > > >> - if (ext4_free_inodes_count(sb, desc)) { > > >> - *best_group = i; > > >> - goto out; > > >> - } > > >> - } > > >> - > > >> - return -1; > > >> -out: > > >> - return 0; > > >> -} > > >> - > > >> struct orlov_stats { > > >> __u32 free_inodes; > > >> __u32 free_blocks; > > >> @@ -817,7 +705,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, > > >> struct inode *ret; > > >> ext4_group_t i; > > >> int free = 0; > > >> - static int once = 1; > > >> ext4_group_t flex_group; > > >> > > >> /* Cannot create files in a deleted directory */ > > >> @@ -843,26 +730,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, > > >> goto got_group; > > >> } > > >> > > >> - if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { > > >> - ret2 = find_group_flex(sb, dir, &group); > > >> - if (ret2 == -1) { > > >> - ret2 = find_group_other(sb, dir, &group, mode); > > >> - if (ret2 == 0 && once) { > > >> - once = 0; > > >> - printk(KERN_NOTICE "ext4: find_group_flex " > > >> - "failed, fallback succeeded dir %lu\n", > > >> - dir->i_ino); > > >> - } > > >> - } > > >> - goto got_group; > > >> - } > > >> - > > >> - if (S_ISDIR(mode)) { > > >> - if (test_opt(sb, OLDALLOC)) > > >> - ret2 = find_group_dir(sb, dir, &group); > > >> - else > > >> - ret2 = find_group_orlov(sb, dir, &group, mode, qstr); > > >> - } else > > >> + if (S_ISDIR(mode)) > > >> + ret2 = find_group_orlov(sb, dir, &group, mode, qstr); > > >> + else > > >> ret2 = find_group_other(sb, dir, &group, mode); > > >> > > >> got_group: > > >> diff --git a/fs/ext4/super.c b/fs/ext4/super.c > > >> index cc5c157..e1f8f73 100644 > > >> --- a/fs/ext4/super.c > > >> +++ b/fs/ext4/super.c > > >> @@ -1031,8 +1031,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) > > >> seq_puts(seq, ",nouid32"); > > >> if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) > > >> seq_puts(seq, ",debug"); > > >> - if (test_opt(sb, OLDALLOC)) > > >> - seq_puts(seq, ",oldalloc"); > > >> #ifdef CONFIG_EXT4_FS_XATTR > > >> if (test_opt(sb, XATTR_USER)) > > >> seq_puts(seq, ",user_xattr"); > > >> @@ -1541,10 +1539,12 @@ static int parse_options(char *options, struct super_block *sb, > > >> set_opt(sb, DEBUG); > > >> break; > > >> case Opt_oldalloc: > > >> - set_opt(sb, OLDALLOC); > > >> + ext4_msg(sb, KERN_WARNING, > > >> + "Ignoring deprecated oldalloc option"); > > >> break; > > >> case Opt_orlov: > > >> - clear_opt(sb, OLDALLOC); > > >> + ext4_msg(sb, KERN_WARNING, > > >> + "Ignoring deprecated orlov option"); > > >> break; > > >> #ifdef CONFIG_EXT4_FS_XATTR > > >> case Opt_user_xattr: > > > > > > -- > > > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > > > the body of a message to majordomo@vger.kernel.org > > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > > > > >
On Tue, 7 Jun 2011, Lukas Czerner wrote: > For a long time now orlov is the default block allocator in the ext4. It > performs better than the old one and no one seems to claim otherwise so > we can safely drop it and make oldalloc and orlov mount option > deprecated. > > This is a part of the effort to reduce number of ext4 options hence the > test matrix. > > Signed-off-by: Lukas Czerner <lczerner@redhat.com> ping > --- > Documentation/filesystems/ext4.txt | 8 -- > fs/ext4/ext4.h | 1 - > fs/ext4/ialloc.c | 136 +----------------------------------- > fs/ext4/super.c | 8 +- > 4 files changed, 7 insertions(+), 146 deletions(-) > > diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt > index 3ae9bc9..ec469fa 100644 > --- a/Documentation/filesystems/ext4.txt > +++ b/Documentation/filesystems/ext4.txt > @@ -201,14 +201,6 @@ inode_readahead_blks=n This tuning parameter controls the maximum > table readahead algorithm will pre-read into > the buffer cache. The default value is 32 blocks. > > -orlov (*) This enables the new Orlov block allocator. It is > - enabled by default. > - > -oldalloc This disables the Orlov block allocator and enables > - the old block allocator. Orlov should have better > - performance - we'd like to get some feedback if it's > - the contrary for you. > - > user_xattr Enables Extended User Attributes. Additionally, you > need to have extended attribute support enabled in the > kernel configuration (CONFIG_EXT4_FS_XATTR). See the > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index 1921392..7e0b8aa 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -884,7 +884,6 @@ struct ext4_inode_info { > /* > * Mount flags > */ > -#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ > #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ > #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ > #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ > diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c > index 21bb2f6..0b5ec23 100644 > --- a/fs/ext4/ialloc.c > +++ b/fs/ext4/ialloc.c > @@ -293,118 +293,6 @@ error_return: > ext4_std_error(sb, fatal); > } > > -/* > - * There are two policies for allocating an inode. If the new inode is > - * a directory, then a forward search is made for a block group with both > - * free space and a low directory-to-inode ratio; if that fails, then of > - * the groups with above-average free space, that group with the fewest > - * directories already is chosen. > - * > - * For other inodes, search forward from the parent directory\'s block > - * group to find a free inode. > - */ > -static int find_group_dir(struct super_block *sb, struct inode *parent, > - ext4_group_t *best_group) > -{ > - ext4_group_t ngroups = ext4_get_groups_count(sb); > - unsigned int freei, avefreei; > - struct ext4_group_desc *desc, *best_desc = NULL; > - ext4_group_t group; > - int ret = -1; > - > - freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter); > - avefreei = freei / ngroups; > - > - for (group = 0; group < ngroups; group++) { > - desc = ext4_get_group_desc(sb, group, NULL); > - if (!desc || !ext4_free_inodes_count(sb, desc)) > - continue; > - if (ext4_free_inodes_count(sb, desc) < avefreei) > - continue; > - if (!best_desc || > - (ext4_free_blks_count(sb, desc) > > - ext4_free_blks_count(sb, best_desc))) { > - *best_group = group; > - best_desc = desc; > - ret = 0; > - } > - } > - return ret; > -} > - > -#define free_block_ratio 10 > - > -static int find_group_flex(struct super_block *sb, struct inode *parent, > - ext4_group_t *best_group) > -{ > - struct ext4_sb_info *sbi = EXT4_SB(sb); > - struct ext4_group_desc *desc; > - struct flex_groups *flex_group = sbi->s_flex_groups; > - ext4_group_t parent_group = EXT4_I(parent)->i_block_group; > - ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); > - ext4_group_t ngroups = ext4_get_groups_count(sb); > - int flex_size = ext4_flex_bg_size(sbi); > - ext4_group_t best_flex = parent_fbg_group; > - int blocks_per_flex = sbi->s_blocks_per_group * flex_size; > - int flexbg_free_blocks; > - int flex_freeb_ratio; > - ext4_group_t n_fbg_groups; > - ext4_group_t i; > - > - n_fbg_groups = (ngroups + flex_size - 1) >> > - sbi->s_log_groups_per_flex; > - > -find_close_to_parent: > - flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks); > - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; > - if (atomic_read(&flex_group[best_flex].free_inodes) && > - flex_freeb_ratio > free_block_ratio) > - goto found_flexbg; > - > - if (best_flex && best_flex == parent_fbg_group) { > - best_flex--; > - goto find_close_to_parent; > - } > - > - for (i = 0; i < n_fbg_groups; i++) { > - if (i == parent_fbg_group || i == parent_fbg_group - 1) > - continue; > - > - flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks); > - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; > - > - if (flex_freeb_ratio > free_block_ratio && > - (atomic_read(&flex_group[i].free_inodes))) { > - best_flex = i; > - goto found_flexbg; > - } > - > - if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) || > - ((atomic_read(&flex_group[i].free_blocks) > > - atomic_read(&flex_group[best_flex].free_blocks)) && > - atomic_read(&flex_group[i].free_inodes))) > - best_flex = i; > - } > - > - if (!atomic_read(&flex_group[best_flex].free_inodes) || > - !atomic_read(&flex_group[best_flex].free_blocks)) > - return -1; > - > -found_flexbg: > - for (i = best_flex * flex_size; i < ngroups && > - i < (best_flex + 1) * flex_size; i++) { > - desc = ext4_get_group_desc(sb, i, NULL); > - if (ext4_free_inodes_count(sb, desc)) { > - *best_group = i; > - goto out; > - } > - } > - > - return -1; > -out: > - return 0; > -} > - > struct orlov_stats { > __u32 free_inodes; > __u32 free_blocks; > @@ -817,7 +705,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, > struct inode *ret; > ext4_group_t i; > int free = 0; > - static int once = 1; > ext4_group_t flex_group; > > /* Cannot create files in a deleted directory */ > @@ -843,26 +730,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, > goto got_group; > } > > - if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { > - ret2 = find_group_flex(sb, dir, &group); > - if (ret2 == -1) { > - ret2 = find_group_other(sb, dir, &group, mode); > - if (ret2 == 0 && once) { > - once = 0; > - printk(KERN_NOTICE "ext4: find_group_flex " > - "failed, fallback succeeded dir %lu\n", > - dir->i_ino); > - } > - } > - goto got_group; > - } > - > - if (S_ISDIR(mode)) { > - if (test_opt(sb, OLDALLOC)) > - ret2 = find_group_dir(sb, dir, &group); > - else > - ret2 = find_group_orlov(sb, dir, &group, mode, qstr); > - } else > + if (S_ISDIR(mode)) > + ret2 = find_group_orlov(sb, dir, &group, mode, qstr); > + else > ret2 = find_group_other(sb, dir, &group, mode); > > got_group: > diff --git a/fs/ext4/super.c b/fs/ext4/super.c > index cc5c157..e1f8f73 100644 > --- a/fs/ext4/super.c > +++ b/fs/ext4/super.c > @@ -1031,8 +1031,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) > seq_puts(seq, ",nouid32"); > if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) > seq_puts(seq, ",debug"); > - if (test_opt(sb, OLDALLOC)) > - seq_puts(seq, ",oldalloc"); > #ifdef CONFIG_EXT4_FS_XATTR > if (test_opt(sb, XATTR_USER)) > seq_puts(seq, ",user_xattr"); > @@ -1541,10 +1539,12 @@ static int parse_options(char *options, struct super_block *sb, > set_opt(sb, DEBUG); > break; > case Opt_oldalloc: > - set_opt(sb, OLDALLOC); > + ext4_msg(sb, KERN_WARNING, > + "Ignoring deprecated oldalloc option"); > break; > case Opt_orlov: > - clear_opt(sb, OLDALLOC); > + ext4_msg(sb, KERN_WARNING, > + "Ignoring deprecated orlov option"); > break; > #ifdef CONFIG_EXT4_FS_XATTR > case Opt_user_xattr: >
On 2011-08-11, at 8:58 AM, Lukas Czerner wrote: > On Tue, 7 Jun 2011, Lukas Czerner wrote: >> For a long time now orlov is the default block allocator in the ext4. It >> performs better than the old one and no one seems to claim otherwise so >> we can safely drop it and make oldalloc and orlov mount option >> deprecated. >> >> This is a part of the effort to reduce number of ext4 options hence the >> test matrix. >> >> Signed-off-by: Lukas Czerner <lczerner@redhat.com> > > ping I'm OK with removing this, I don't think anyone uses it, and it has almost no meaning with flex_bg anyway. That said, "orlov" is also mostly meaningless with flex_bg as well, since there is very little real benefit/affinity from inodes being "close" to their data blocks. We gain far more benefit from keeping the inodes together than spreading them out and keeping them close to the data blocks. >> --- >> Documentation/filesystems/ext4.txt | 8 -- >> fs/ext4/ext4.h | 1 - >> fs/ext4/ialloc.c | 136 +----------------------------------- >> fs/ext4/super.c | 8 +- >> 4 files changed, 7 insertions(+), 146 deletions(-) >> >> diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt >> index 3ae9bc9..ec469fa 100644 >> --- a/Documentation/filesystems/ext4.txt >> +++ b/Documentation/filesystems/ext4.txt >> @@ -201,14 +201,6 @@ inode_readahead_blks=n This tuning parameter controls the maximum >> table readahead algorithm will pre-read into >> the buffer cache. The default value is 32 blocks. >> >> -orlov (*) This enables the new Orlov block allocator. It is >> - enabled by default. >> - >> -oldalloc This disables the Orlov block allocator and enables >> - the old block allocator. Orlov should have better >> - performance - we'd like to get some feedback if it's >> - the contrary for you. >> - >> user_xattr Enables Extended User Attributes. Additionally, you >> need to have extended attribute support enabled in the >> kernel configuration (CONFIG_EXT4_FS_XATTR). See the >> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h >> index 1921392..7e0b8aa 100644 >> --- a/fs/ext4/ext4.h >> +++ b/fs/ext4/ext4.h >> @@ -884,7 +884,6 @@ struct ext4_inode_info { >> /* >> * Mount flags >> */ >> -#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ >> #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ >> #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ >> #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ >> diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c >> index 21bb2f6..0b5ec23 100644 >> --- a/fs/ext4/ialloc.c >> +++ b/fs/ext4/ialloc.c >> @@ -293,118 +293,6 @@ error_return: >> ext4_std_error(sb, fatal); >> } >> >> -/* >> - * There are two policies for allocating an inode. If the new inode is >> - * a directory, then a forward search is made for a block group with both >> - * free space and a low directory-to-inode ratio; if that fails, then of >> - * the groups with above-average free space, that group with the fewest >> - * directories already is chosen. >> - * >> - * For other inodes, search forward from the parent directory\'s block >> - * group to find a free inode. >> - */ >> -static int find_group_dir(struct super_block *sb, struct inode *parent, >> - ext4_group_t *best_group) >> -{ >> - ext4_group_t ngroups = ext4_get_groups_count(sb); >> - unsigned int freei, avefreei; >> - struct ext4_group_desc *desc, *best_desc = NULL; >> - ext4_group_t group; >> - int ret = -1; >> - >> - freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter); >> - avefreei = freei / ngroups; >> - >> - for (group = 0; group < ngroups; group++) { >> - desc = ext4_get_group_desc(sb, group, NULL); >> - if (!desc || !ext4_free_inodes_count(sb, desc)) >> - continue; >> - if (ext4_free_inodes_count(sb, desc) < avefreei) >> - continue; >> - if (!best_desc || >> - (ext4_free_blks_count(sb, desc) > >> - ext4_free_blks_count(sb, best_desc))) { >> - *best_group = group; >> - best_desc = desc; >> - ret = 0; >> - } >> - } >> - return ret; >> -} >> - >> -#define free_block_ratio 10 >> - >> -static int find_group_flex(struct super_block *sb, struct inode *parent, >> - ext4_group_t *best_group) >> -{ >> - struct ext4_sb_info *sbi = EXT4_SB(sb); >> - struct ext4_group_desc *desc; >> - struct flex_groups *flex_group = sbi->s_flex_groups; >> - ext4_group_t parent_group = EXT4_I(parent)->i_block_group; >> - ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); >> - ext4_group_t ngroups = ext4_get_groups_count(sb); >> - int flex_size = ext4_flex_bg_size(sbi); >> - ext4_group_t best_flex = parent_fbg_group; >> - int blocks_per_flex = sbi->s_blocks_per_group * flex_size; >> - int flexbg_free_blocks; >> - int flex_freeb_ratio; >> - ext4_group_t n_fbg_groups; >> - ext4_group_t i; >> - >> - n_fbg_groups = (ngroups + flex_size - 1) >> >> - sbi->s_log_groups_per_flex; >> - >> -find_close_to_parent: >> - flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks); >> - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; >> - if (atomic_read(&flex_group[best_flex].free_inodes) && >> - flex_freeb_ratio > free_block_ratio) >> - goto found_flexbg; >> - >> - if (best_flex && best_flex == parent_fbg_group) { >> - best_flex--; >> - goto find_close_to_parent; >> - } >> - >> - for (i = 0; i < n_fbg_groups; i++) { >> - if (i == parent_fbg_group || i == parent_fbg_group - 1) >> - continue; >> - >> - flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks); >> - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; >> - >> - if (flex_freeb_ratio > free_block_ratio && >> - (atomic_read(&flex_group[i].free_inodes))) { >> - best_flex = i; >> - goto found_flexbg; >> - } >> - >> - if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) || >> - ((atomic_read(&flex_group[i].free_blocks) > >> - atomic_read(&flex_group[best_flex].free_blocks)) && >> - atomic_read(&flex_group[i].free_inodes))) >> - best_flex = i; >> - } >> - >> - if (!atomic_read(&flex_group[best_flex].free_inodes) || >> - !atomic_read(&flex_group[best_flex].free_blocks)) >> - return -1; >> - >> -found_flexbg: >> - for (i = best_flex * flex_size; i < ngroups && >> - i < (best_flex + 1) * flex_size; i++) { >> - desc = ext4_get_group_desc(sb, i, NULL); >> - if (ext4_free_inodes_count(sb, desc)) { >> - *best_group = i; >> - goto out; >> - } >> - } >> - >> - return -1; >> -out: >> - return 0; >> -} >> - >> struct orlov_stats { >> __u32 free_inodes; >> __u32 free_blocks; >> @@ -817,7 +705,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, >> struct inode *ret; >> ext4_group_t i; >> int free = 0; >> - static int once = 1; >> ext4_group_t flex_group; >> >> /* Cannot create files in a deleted directory */ >> @@ -843,26 +730,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, >> goto got_group; >> } >> >> - if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { >> - ret2 = find_group_flex(sb, dir, &group); >> - if (ret2 == -1) { >> - ret2 = find_group_other(sb, dir, &group, mode); >> - if (ret2 == 0 && once) { >> - once = 0; >> - printk(KERN_NOTICE "ext4: find_group_flex " >> - "failed, fallback succeeded dir %lu\n", >> - dir->i_ino); >> - } >> - } >> - goto got_group; >> - } >> - >> - if (S_ISDIR(mode)) { >> - if (test_opt(sb, OLDALLOC)) >> - ret2 = find_group_dir(sb, dir, &group); >> - else >> - ret2 = find_group_orlov(sb, dir, &group, mode, qstr); >> - } else >> + if (S_ISDIR(mode)) >> + ret2 = find_group_orlov(sb, dir, &group, mode, qstr); >> + else >> ret2 = find_group_other(sb, dir, &group, mode); >> >> got_group: >> diff --git a/fs/ext4/super.c b/fs/ext4/super.c >> index cc5c157..e1f8f73 100644 >> --- a/fs/ext4/super.c >> +++ b/fs/ext4/super.c >> @@ -1031,8 +1031,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) >> seq_puts(seq, ",nouid32"); >> if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) >> seq_puts(seq, ",debug"); >> - if (test_opt(sb, OLDALLOC)) >> - seq_puts(seq, ",oldalloc"); >> #ifdef CONFIG_EXT4_FS_XATTR >> if (test_opt(sb, XATTR_USER)) >> seq_puts(seq, ",user_xattr"); >> @@ -1541,10 +1539,12 @@ static int parse_options(char *options, struct super_block *sb, >> set_opt(sb, DEBUG); >> break; >> case Opt_oldalloc: >> - set_opt(sb, OLDALLOC); >> + ext4_msg(sb, KERN_WARNING, >> + "Ignoring deprecated oldalloc option"); >> break; >> case Opt_orlov: >> - clear_opt(sb, OLDALLOC); >> + ext4_msg(sb, KERN_WARNING, >> + "Ignoring deprecated orlov option"); >> break; >> #ifdef CONFIG_EXT4_FS_XATTR >> case Opt_user_xattr: >> > > -- > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html Cheers, Andreas -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 11 Aug 2011, Andreas Dilger wrote: > On 2011-08-11, at 8:58 AM, Lukas Czerner wrote: > > On Tue, 7 Jun 2011, Lukas Czerner wrote: > >> For a long time now orlov is the default block allocator in the ext4. It > >> performs better than the old one and no one seems to claim otherwise so > >> we can safely drop it and make oldalloc and orlov mount option > >> deprecated. > >> > >> This is a part of the effort to reduce number of ext4 options hence the > >> test matrix. > >> > >> Signed-off-by: Lukas Czerner <lczerner@redhat.com> > > > > ping > > I'm OK with removing this, I don't think anyone uses it, and it has almost > no meaning with flex_bg anyway. > > That said, "orlov" is also mostly meaningless with flex_bg as well, since > there is very little real benefit/affinity from inodes being "close" to > their data blocks. We gain far more benefit from keeping the inodes > together than spreading them out and keeping them close to the data blocks. What about removing it for ext3 as well ? I can prepare a patch. Also note that there is a bug in the OLDALLOC where if there is approximately the same number of inodes in all of the allocation groups it might result in the state where no group has less free inode count than the average, hence we get ENOSPC even though there is enough space for the inode to be allocated. It is unlikely, but it is there. So Ted, could you take the patch ? Thanks! -Lukas > > > >> --- > >> Documentation/filesystems/ext4.txt | 8 -- > >> fs/ext4/ext4.h | 1 - > >> fs/ext4/ialloc.c | 136 +----------------------------------- > >> fs/ext4/super.c | 8 +- > >> 4 files changed, 7 insertions(+), 146 deletions(-) > >> > >> diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt > >> index 3ae9bc9..ec469fa 100644 > >> --- a/Documentation/filesystems/ext4.txt > >> +++ b/Documentation/filesystems/ext4.txt > >> @@ -201,14 +201,6 @@ inode_readahead_blks=n This tuning parameter controls the maximum > >> table readahead algorithm will pre-read into > >> the buffer cache. The default value is 32 blocks. > >> > >> -orlov (*) This enables the new Orlov block allocator. It is > >> - enabled by default. > >> - > >> -oldalloc This disables the Orlov block allocator and enables > >> - the old block allocator. Orlov should have better > >> - performance - we'd like to get some feedback if it's > >> - the contrary for you. > >> - > >> user_xattr Enables Extended User Attributes. Additionally, you > >> need to have extended attribute support enabled in the > >> kernel configuration (CONFIG_EXT4_FS_XATTR). See the > >> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > >> index 1921392..7e0b8aa 100644 > >> --- a/fs/ext4/ext4.h > >> +++ b/fs/ext4/ext4.h > >> @@ -884,7 +884,6 @@ struct ext4_inode_info { > >> /* > >> * Mount flags > >> */ > >> -#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ > >> #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ > >> #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ > >> #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ > >> diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c > >> index 21bb2f6..0b5ec23 100644 > >> --- a/fs/ext4/ialloc.c > >> +++ b/fs/ext4/ialloc.c > >> @@ -293,118 +293,6 @@ error_return: > >> ext4_std_error(sb, fatal); > >> } > >> > >> -/* > >> - * There are two policies for allocating an inode. If the new inode is > >> - * a directory, then a forward search is made for a block group with both > >> - * free space and a low directory-to-inode ratio; if that fails, then of > >> - * the groups with above-average free space, that group with the fewest > >> - * directories already is chosen. > >> - * > >> - * For other inodes, search forward from the parent directory\'s block > >> - * group to find a free inode. > >> - */ > >> -static int find_group_dir(struct super_block *sb, struct inode *parent, > >> - ext4_group_t *best_group) > >> -{ > >> - ext4_group_t ngroups = ext4_get_groups_count(sb); > >> - unsigned int freei, avefreei; > >> - struct ext4_group_desc *desc, *best_desc = NULL; > >> - ext4_group_t group; > >> - int ret = -1; > >> - > >> - freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter); > >> - avefreei = freei / ngroups; > >> - > >> - for (group = 0; group < ngroups; group++) { > >> - desc = ext4_get_group_desc(sb, group, NULL); > >> - if (!desc || !ext4_free_inodes_count(sb, desc)) > >> - continue; > >> - if (ext4_free_inodes_count(sb, desc) < avefreei) > >> - continue; > >> - if (!best_desc || > >> - (ext4_free_blks_count(sb, desc) > > >> - ext4_free_blks_count(sb, best_desc))) { > >> - *best_group = group; > >> - best_desc = desc; > >> - ret = 0; > >> - } > >> - } > >> - return ret; > >> -} > >> - > >> -#define free_block_ratio 10 > >> - > >> -static int find_group_flex(struct super_block *sb, struct inode *parent, > >> - ext4_group_t *best_group) > >> -{ > >> - struct ext4_sb_info *sbi = EXT4_SB(sb); > >> - struct ext4_group_desc *desc; > >> - struct flex_groups *flex_group = sbi->s_flex_groups; > >> - ext4_group_t parent_group = EXT4_I(parent)->i_block_group; > >> - ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); > >> - ext4_group_t ngroups = ext4_get_groups_count(sb); > >> - int flex_size = ext4_flex_bg_size(sbi); > >> - ext4_group_t best_flex = parent_fbg_group; > >> - int blocks_per_flex = sbi->s_blocks_per_group * flex_size; > >> - int flexbg_free_blocks; > >> - int flex_freeb_ratio; > >> - ext4_group_t n_fbg_groups; > >> - ext4_group_t i; > >> - > >> - n_fbg_groups = (ngroups + flex_size - 1) >> > >> - sbi->s_log_groups_per_flex; > >> - > >> -find_close_to_parent: > >> - flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks); > >> - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; > >> - if (atomic_read(&flex_group[best_flex].free_inodes) && > >> - flex_freeb_ratio > free_block_ratio) > >> - goto found_flexbg; > >> - > >> - if (best_flex && best_flex == parent_fbg_group) { > >> - best_flex--; > >> - goto find_close_to_parent; > >> - } > >> - > >> - for (i = 0; i < n_fbg_groups; i++) { > >> - if (i == parent_fbg_group || i == parent_fbg_group - 1) > >> - continue; > >> - > >> - flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks); > >> - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; > >> - > >> - if (flex_freeb_ratio > free_block_ratio && > >> - (atomic_read(&flex_group[i].free_inodes))) { > >> - best_flex = i; > >> - goto found_flexbg; > >> - } > >> - > >> - if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) || > >> - ((atomic_read(&flex_group[i].free_blocks) > > >> - atomic_read(&flex_group[best_flex].free_blocks)) && > >> - atomic_read(&flex_group[i].free_inodes))) > >> - best_flex = i; > >> - } > >> - > >> - if (!atomic_read(&flex_group[best_flex].free_inodes) || > >> - !atomic_read(&flex_group[best_flex].free_blocks)) > >> - return -1; > >> - > >> -found_flexbg: > >> - for (i = best_flex * flex_size; i < ngroups && > >> - i < (best_flex + 1) * flex_size; i++) { > >> - desc = ext4_get_group_desc(sb, i, NULL); > >> - if (ext4_free_inodes_count(sb, desc)) { > >> - *best_group = i; > >> - goto out; > >> - } > >> - } > >> - > >> - return -1; > >> -out: > >> - return 0; > >> -} > >> - > >> struct orlov_stats { > >> __u32 free_inodes; > >> __u32 free_blocks; > >> @@ -817,7 +705,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, > >> struct inode *ret; > >> ext4_group_t i; > >> int free = 0; > >> - static int once = 1; > >> ext4_group_t flex_group; > >> > >> /* Cannot create files in a deleted directory */ > >> @@ -843,26 +730,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, > >> goto got_group; > >> } > >> > >> - if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { > >> - ret2 = find_group_flex(sb, dir, &group); > >> - if (ret2 == -1) { > >> - ret2 = find_group_other(sb, dir, &group, mode); > >> - if (ret2 == 0 && once) { > >> - once = 0; > >> - printk(KERN_NOTICE "ext4: find_group_flex " > >> - "failed, fallback succeeded dir %lu\n", > >> - dir->i_ino); > >> - } > >> - } > >> - goto got_group; > >> - } > >> - > >> - if (S_ISDIR(mode)) { > >> - if (test_opt(sb, OLDALLOC)) > >> - ret2 = find_group_dir(sb, dir, &group); > >> - else > >> - ret2 = find_group_orlov(sb, dir, &group, mode, qstr); > >> - } else > >> + if (S_ISDIR(mode)) > >> + ret2 = find_group_orlov(sb, dir, &group, mode, qstr); > >> + else > >> ret2 = find_group_other(sb, dir, &group, mode); > >> > >> got_group: > >> diff --git a/fs/ext4/super.c b/fs/ext4/super.c > >> index cc5c157..e1f8f73 100644 > >> --- a/fs/ext4/super.c > >> +++ b/fs/ext4/super.c > >> @@ -1031,8 +1031,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) > >> seq_puts(seq, ",nouid32"); > >> if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) > >> seq_puts(seq, ",debug"); > >> - if (test_opt(sb, OLDALLOC)) > >> - seq_puts(seq, ",oldalloc"); > >> #ifdef CONFIG_EXT4_FS_XATTR > >> if (test_opt(sb, XATTR_USER)) > >> seq_puts(seq, ",user_xattr"); > >> @@ -1541,10 +1539,12 @@ static int parse_options(char *options, struct super_block *sb, > >> set_opt(sb, DEBUG); > >> break; > >> case Opt_oldalloc: > >> - set_opt(sb, OLDALLOC); > >> + ext4_msg(sb, KERN_WARNING, > >> + "Ignoring deprecated oldalloc option"); > >> break; > >> case Opt_orlov: > >> - clear_opt(sb, OLDALLOC); > >> + ext4_msg(sb, KERN_WARNING, > >> + "Ignoring deprecated orlov option"); > >> break; > >> #ifdef CONFIG_EXT4_FS_XATTR > >> case Opt_user_xattr: > >> > > > > -- > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > > Cheers, Andreas > > > > > >
On Mon 15-08-11 16:21:27, Lukas Czerner wrote: > On Thu, 11 Aug 2011, Andreas Dilger wrote: > > On 2011-08-11, at 8:58 AM, Lukas Czerner wrote: > > > On Tue, 7 Jun 2011, Lukas Czerner wrote: > > >> For a long time now orlov is the default block allocator in the ext4. It > > >> performs better than the old one and no one seems to claim otherwise so > > >> we can safely drop it and make oldalloc and orlov mount option > > >> deprecated. > > >> > > >> This is a part of the effort to reduce number of ext4 options hence the > > >> test matrix. > > >> > > >> Signed-off-by: Lukas Czerner <lczerner@redhat.com> > > > > > > ping > > > > I'm OK with removing this, I don't think anyone uses it, and it has almost > > no meaning with flex_bg anyway. > > > > That said, "orlov" is also mostly meaningless with flex_bg as well, since > > there is very little real benefit/affinity from inodes being "close" to > > their data blocks. We gain far more benefit from keeping the inodes > > together than spreading them out and keeping them close to the data blocks. > > What about removing it for ext3 as well ? I can prepare a patch. OK, let's start warning the option is deprecated and will be removed from ext3. We can remove it after 2-3 releases... > Also note that there is a bug in the OLDALLOC where if there is > approximately the same number of inodes in all of the allocation groups > it might result in the state where no group has less free inode count > than the average, hence we get ENOSPC even though there is enough space > for the inode to be allocated. It is unlikely, but it is there. Fix for this would be nice. > So Ted, could you take the patch ? ext3 patches go through my tree. Honza
On Mon, 15 Aug 2011, Jan Kara wrote: > On Mon 15-08-11 16:21:27, Lukas Czerner wrote: > > On Thu, 11 Aug 2011, Andreas Dilger wrote: > > > On 2011-08-11, at 8:58 AM, Lukas Czerner wrote: > > > > On Tue, 7 Jun 2011, Lukas Czerner wrote: > > > >> For a long time now orlov is the default block allocator in the ext4. It > > > >> performs better than the old one and no one seems to claim otherwise so > > > >> we can safely drop it and make oldalloc and orlov mount option > > > >> deprecated. > > > >> > > > >> This is a part of the effort to reduce number of ext4 options hence the > > > >> test matrix. > > > >> > > > >> Signed-off-by: Lukas Czerner <lczerner@redhat.com> > > > > > > > > ping > > > > > > I'm OK with removing this, I don't think anyone uses it, and it has almost > > > no meaning with flex_bg anyway. > > > > > > That said, "orlov" is also mostly meaningless with flex_bg as well, since > > > there is very little real benefit/affinity from inodes being "close" to > > > their data blocks. We gain far more benefit from keeping the inodes > > > together than spreading them out and keeping them close to the data blocks. > > > > What about removing it for ext3 as well ? I can prepare a patch. > OK, let's start warning the option is deprecated and will be removed from > ext3. We can remove it after 2-3 releases... Is that really necessary ? It is not like we are removing a feature which would not work anymore. > > > Also note that there is a bug in the OLDALLOC where if there is > > approximately the same number of inodes in all of the allocation groups > > it might result in the state where no group has less free inode count > > than the average, hence we get ENOSPC even though there is enough space > > for the inode to be allocated. It is unlikely, but it is there. > Fix for this would be nice. Will, I was kind of hoping that we will ditch that instead of fixing it, but I can do that if it is not going away right now. > > > So Ted, could you take the patch ? > ext3 patches go through my tree. I know, the was meant to the patch that started this thread which is for ext4. > > Honza > Thanks! -Lukas -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon 15-08-11 17:50:53, Lukas Czerner wrote: > On Mon, 15 Aug 2011, Jan Kara wrote: > > On Mon 15-08-11 16:21:27, Lukas Czerner wrote: > > > On Thu, 11 Aug 2011, Andreas Dilger wrote: > > > > On 2011-08-11, at 8:58 AM, Lukas Czerner wrote: > > > > > On Tue, 7 Jun 2011, Lukas Czerner wrote: > > > > >> For a long time now orlov is the default block allocator in the ext4. It > > > > >> performs better than the old one and no one seems to claim otherwise so > > > > >> we can safely drop it and make oldalloc and orlov mount option > > > > >> deprecated. > > > > >> > > > > >> This is a part of the effort to reduce number of ext4 options hence the > > > > >> test matrix. > > > > >> > > > > >> Signed-off-by: Lukas Czerner <lczerner@redhat.com> > > > > > > > > > > ping > > > > > > > > I'm OK with removing this, I don't think anyone uses it, and it has almost > > > > no meaning with flex_bg anyway. > > > > > > > > That said, "orlov" is also mostly meaningless with flex_bg as well, since > > > > there is very little real benefit/affinity from inodes being "close" to > > > > their data blocks. We gain far more benefit from keeping the inodes > > > > together than spreading them out and keeping them close to the data blocks. > > > > > > What about removing it for ext3 as well ? I can prepare a patch. > > OK, let's start warning the option is deprecated and will be removed from > > ext3. We can remove it after 2-3 releases... > > Is that really necessary ? It is not like we are removing a feature which > would not work anymore. Well, we are removing a mount option so if someone is using it e.g. in /etc/fstab, his machine will fail to mount the filesystem. Or did I misunderstood your intention? Honza -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 15 Aug 2011, Jan Kara wrote: > On Mon 15-08-11 17:50:53, Lukas Czerner wrote: > > On Mon, 15 Aug 2011, Jan Kara wrote: > > > On Mon 15-08-11 16:21:27, Lukas Czerner wrote: > > > > On Thu, 11 Aug 2011, Andreas Dilger wrote: > > > > > On 2011-08-11, at 8:58 AM, Lukas Czerner wrote: > > > > > > On Tue, 7 Jun 2011, Lukas Czerner wrote: > > > > > >> For a long time now orlov is the default block allocator in the ext4. It > > > > > >> performs better than the old one and no one seems to claim otherwise so > > > > > >> we can safely drop it and make oldalloc and orlov mount option > > > > > >> deprecated. > > > > > >> > > > > > >> This is a part of the effort to reduce number of ext4 options hence the > > > > > >> test matrix. > > > > > >> > > > > > >> Signed-off-by: Lukas Czerner <lczerner@redhat.com> > > > > > > > > > > > > ping > > > > > > > > > > I'm OK with removing this, I don't think anyone uses it, and it has almost > > > > > no meaning with flex_bg anyway. > > > > > > > > > > That said, "orlov" is also mostly meaningless with flex_bg as well, since > > > > > there is very little real benefit/affinity from inodes being "close" to > > > > > their data blocks. We gain far more benefit from keeping the inodes > > > > > together than spreading them out and keeping them close to the data blocks. > > > > > > > > What about removing it for ext3 as well ? I can prepare a patch. > > > OK, let's start warning the option is deprecated and will be removed from > > > ext3. We can remove it after 2-3 releases... > > > > Is that really necessary ? It is not like we are removing a feature which > > would not work anymore. > Well, we are removing a mount option so if someone is using it e.g. in > /etc/fstab, his machine will fail to mount the filesystem. Or did I > misunderstood your intention? It will not fail to boot, see the patch at the beginning of the thread. It will just print KERN_WARNING that we are ignoring this option. Since we are not removing a feature that will be missing, we can do that. Also note that this is the same thing what we have done to nobh option, and there are probably even other examples. Thanks! -Lukas > > Honza > -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue 16-08-11 10:40:12, Lukas Czerner wrote: > On Mon, 15 Aug 2011, Jan Kara wrote: > > > On Mon 15-08-11 17:50:53, Lukas Czerner wrote: > > > On Mon, 15 Aug 2011, Jan Kara wrote: > > > > On Mon 15-08-11 16:21:27, Lukas Czerner wrote: > > > > > On Thu, 11 Aug 2011, Andreas Dilger wrote: > > > > > > On 2011-08-11, at 8:58 AM, Lukas Czerner wrote: > > > > > > > On Tue, 7 Jun 2011, Lukas Czerner wrote: > > > > > > >> For a long time now orlov is the default block allocator in the ext4. It > > > > > > >> performs better than the old one and no one seems to claim otherwise so > > > > > > >> we can safely drop it and make oldalloc and orlov mount option > > > > > > >> deprecated. > > > > > > >> > > > > > > >> This is a part of the effort to reduce number of ext4 options hence the > > > > > > >> test matrix. > > > > > > >> > > > > > > >> Signed-off-by: Lukas Czerner <lczerner@redhat.com> > > > > > > > > > > > > > > ping > > > > > > > > > > > > I'm OK with removing this, I don't think anyone uses it, and it has almost > > > > > > no meaning with flex_bg anyway. > > > > > > > > > > > > That said, "orlov" is also mostly meaningless with flex_bg as well, since > > > > > > there is very little real benefit/affinity from inodes being "close" to > > > > > > their data blocks. We gain far more benefit from keeping the inodes > > > > > > together than spreading them out and keeping them close to the data blocks. > > > > > > > > > > What about removing it for ext3 as well ? I can prepare a patch. > > > > OK, let's start warning the option is deprecated and will be removed from > > > > ext3. We can remove it after 2-3 releases... > > > > > > Is that really necessary ? It is not like we are removing a feature which > > > would not work anymore. > > Well, we are removing a mount option so if someone is using it e.g. in > > /etc/fstab, his machine will fail to mount the filesystem. Or did I > > misunderstood your intention? > > It will not fail to boot, see the patch at the beginning of the thread. > It will just print KERN_WARNING that we are ignoring this option. Since > we are not removing a feature that will be missing, we can do that. Also > note that this is the same thing what we have done to nobh option, and > there are probably even other examples. Ah, OK. That would be fine I guess. I can take such patch. Honza
On Tue, 7 Jun 2011, Lukas Czerner wrote: > For a long time now orlov is the default block allocator in the ext4. It > performs better than the old one and no one seems to claim otherwise so > we can safely drop it and make oldalloc and orlov mount option > deprecated. > > This is a part of the effort to reduce number of ext4 options hence the > test matrix. ping > > Signed-off-by: Lukas Czerner <lczerner@redhat.com> > --- > Documentation/filesystems/ext4.txt | 8 -- > fs/ext4/ext4.h | 1 - > fs/ext4/ialloc.c | 136 +----------------------------------- > fs/ext4/super.c | 8 +- > 4 files changed, 7 insertions(+), 146 deletions(-) > > diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt > index 3ae9bc9..ec469fa 100644 > --- a/Documentation/filesystems/ext4.txt > +++ b/Documentation/filesystems/ext4.txt > @@ -201,14 +201,6 @@ inode_readahead_blks=n This tuning parameter controls the maximum > table readahead algorithm will pre-read into > the buffer cache. The default value is 32 blocks. > > -orlov (*) This enables the new Orlov block allocator. It is > - enabled by default. > - > -oldalloc This disables the Orlov block allocator and enables > - the old block allocator. Orlov should have better > - performance - we'd like to get some feedback if it's > - the contrary for you. > - > user_xattr Enables Extended User Attributes. Additionally, you > need to have extended attribute support enabled in the > kernel configuration (CONFIG_EXT4_FS_XATTR). See the > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index 1921392..7e0b8aa 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -884,7 +884,6 @@ struct ext4_inode_info { > /* > * Mount flags > */ > -#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ > #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ > #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ > #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ > diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c > index 21bb2f6..0b5ec23 100644 > --- a/fs/ext4/ialloc.c > +++ b/fs/ext4/ialloc.c > @@ -293,118 +293,6 @@ error_return: > ext4_std_error(sb, fatal); > } > > -/* > - * There are two policies for allocating an inode. If the new inode is > - * a directory, then a forward search is made for a block group with both > - * free space and a low directory-to-inode ratio; if that fails, then of > - * the groups with above-average free space, that group with the fewest > - * directories already is chosen. > - * > - * For other inodes, search forward from the parent directory\'s block > - * group to find a free inode. > - */ > -static int find_group_dir(struct super_block *sb, struct inode *parent, > - ext4_group_t *best_group) > -{ > - ext4_group_t ngroups = ext4_get_groups_count(sb); > - unsigned int freei, avefreei; > - struct ext4_group_desc *desc, *best_desc = NULL; > - ext4_group_t group; > - int ret = -1; > - > - freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter); > - avefreei = freei / ngroups; > - > - for (group = 0; group < ngroups; group++) { > - desc = ext4_get_group_desc(sb, group, NULL); > - if (!desc || !ext4_free_inodes_count(sb, desc)) > - continue; > - if (ext4_free_inodes_count(sb, desc) < avefreei) > - continue; > - if (!best_desc || > - (ext4_free_blks_count(sb, desc) > > - ext4_free_blks_count(sb, best_desc))) { > - *best_group = group; > - best_desc = desc; > - ret = 0; > - } > - } > - return ret; > -} > - > -#define free_block_ratio 10 > - > -static int find_group_flex(struct super_block *sb, struct inode *parent, > - ext4_group_t *best_group) > -{ > - struct ext4_sb_info *sbi = EXT4_SB(sb); > - struct ext4_group_desc *desc; > - struct flex_groups *flex_group = sbi->s_flex_groups; > - ext4_group_t parent_group = EXT4_I(parent)->i_block_group; > - ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); > - ext4_group_t ngroups = ext4_get_groups_count(sb); > - int flex_size = ext4_flex_bg_size(sbi); > - ext4_group_t best_flex = parent_fbg_group; > - int blocks_per_flex = sbi->s_blocks_per_group * flex_size; > - int flexbg_free_blocks; > - int flex_freeb_ratio; > - ext4_group_t n_fbg_groups; > - ext4_group_t i; > - > - n_fbg_groups = (ngroups + flex_size - 1) >> > - sbi->s_log_groups_per_flex; > - > -find_close_to_parent: > - flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks); > - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; > - if (atomic_read(&flex_group[best_flex].free_inodes) && > - flex_freeb_ratio > free_block_ratio) > - goto found_flexbg; > - > - if (best_flex && best_flex == parent_fbg_group) { > - best_flex--; > - goto find_close_to_parent; > - } > - > - for (i = 0; i < n_fbg_groups; i++) { > - if (i == parent_fbg_group || i == parent_fbg_group - 1) > - continue; > - > - flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks); > - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; > - > - if (flex_freeb_ratio > free_block_ratio && > - (atomic_read(&flex_group[i].free_inodes))) { > - best_flex = i; > - goto found_flexbg; > - } > - > - if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) || > - ((atomic_read(&flex_group[i].free_blocks) > > - atomic_read(&flex_group[best_flex].free_blocks)) && > - atomic_read(&flex_group[i].free_inodes))) > - best_flex = i; > - } > - > - if (!atomic_read(&flex_group[best_flex].free_inodes) || > - !atomic_read(&flex_group[best_flex].free_blocks)) > - return -1; > - > -found_flexbg: > - for (i = best_flex * flex_size; i < ngroups && > - i < (best_flex + 1) * flex_size; i++) { > - desc = ext4_get_group_desc(sb, i, NULL); > - if (ext4_free_inodes_count(sb, desc)) { > - *best_group = i; > - goto out; > - } > - } > - > - return -1; > -out: > - return 0; > -} > - > struct orlov_stats { > __u32 free_inodes; > __u32 free_blocks; > @@ -817,7 +705,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, > struct inode *ret; > ext4_group_t i; > int free = 0; > - static int once = 1; > ext4_group_t flex_group; > > /* Cannot create files in a deleted directory */ > @@ -843,26 +730,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, > goto got_group; > } > > - if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { > - ret2 = find_group_flex(sb, dir, &group); > - if (ret2 == -1) { > - ret2 = find_group_other(sb, dir, &group, mode); > - if (ret2 == 0 && once) { > - once = 0; > - printk(KERN_NOTICE "ext4: find_group_flex " > - "failed, fallback succeeded dir %lu\n", > - dir->i_ino); > - } > - } > - goto got_group; > - } > - > - if (S_ISDIR(mode)) { > - if (test_opt(sb, OLDALLOC)) > - ret2 = find_group_dir(sb, dir, &group); > - else > - ret2 = find_group_orlov(sb, dir, &group, mode, qstr); > - } else > + if (S_ISDIR(mode)) > + ret2 = find_group_orlov(sb, dir, &group, mode, qstr); > + else > ret2 = find_group_other(sb, dir, &group, mode); > > got_group: > diff --git a/fs/ext4/super.c b/fs/ext4/super.c > index cc5c157..e1f8f73 100644 > --- a/fs/ext4/super.c > +++ b/fs/ext4/super.c > @@ -1031,8 +1031,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) > seq_puts(seq, ",nouid32"); > if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) > seq_puts(seq, ",debug"); > - if (test_opt(sb, OLDALLOC)) > - seq_puts(seq, ",oldalloc"); > #ifdef CONFIG_EXT4_FS_XATTR > if (test_opt(sb, XATTR_USER)) > seq_puts(seq, ",user_xattr"); > @@ -1541,10 +1539,12 @@ static int parse_options(char *options, struct super_block *sb, > set_opt(sb, DEBUG); > break; > case Opt_oldalloc: > - set_opt(sb, OLDALLOC); > + ext4_msg(sb, KERN_WARNING, > + "Ignoring deprecated oldalloc option"); > break; > case Opt_orlov: > - clear_opt(sb, OLDALLOC); > + ext4_msg(sb, KERN_WARNING, > + "Ignoring deprecated orlov option"); > break; > #ifdef CONFIG_EXT4_FS_XATTR > case Opt_user_xattr: >
On Thu, 1 Sep 2011, Lukas Czerner wrote: > On Tue, 7 Jun 2011, Lukas Czerner wrote: > > > For a long time now orlov is the default block allocator in the ext4. It > > performs better than the old one and no one seems to claim otherwise so > > we can safely drop it and make oldalloc and orlov mount option > > deprecated. > > > > This is a part of the effort to reduce number of ext4 options hence the > > test matrix. > > ping ping^2 > > > > > Signed-off-by: Lukas Czerner <lczerner@redhat.com> > > --- > > Documentation/filesystems/ext4.txt | 8 -- > > fs/ext4/ext4.h | 1 - > > fs/ext4/ialloc.c | 136 +----------------------------------- > > fs/ext4/super.c | 8 +- > > 4 files changed, 7 insertions(+), 146 deletions(-) > > > > diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt > > index 3ae9bc9..ec469fa 100644 > > --- a/Documentation/filesystems/ext4.txt > > +++ b/Documentation/filesystems/ext4.txt > > @@ -201,14 +201,6 @@ inode_readahead_blks=n This tuning parameter controls the maximum > > table readahead algorithm will pre-read into > > the buffer cache. The default value is 32 blocks. > > > > -orlov (*) This enables the new Orlov block allocator. It is > > - enabled by default. > > - > > -oldalloc This disables the Orlov block allocator and enables > > - the old block allocator. Orlov should have better > > - performance - we'd like to get some feedback if it's > > - the contrary for you. > > - > > user_xattr Enables Extended User Attributes. Additionally, you > > need to have extended attribute support enabled in the > > kernel configuration (CONFIG_EXT4_FS_XATTR). See the > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > > index 1921392..7e0b8aa 100644 > > --- a/fs/ext4/ext4.h > > +++ b/fs/ext4/ext4.h > > @@ -884,7 +884,6 @@ struct ext4_inode_info { > > /* > > * Mount flags > > */ > > -#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ > > #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ > > #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ > > #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ > > diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c > > index 21bb2f6..0b5ec23 100644 > > --- a/fs/ext4/ialloc.c > > +++ b/fs/ext4/ialloc.c > > @@ -293,118 +293,6 @@ error_return: > > ext4_std_error(sb, fatal); > > } > > > > -/* > > - * There are two policies for allocating an inode. If the new inode is > > - * a directory, then a forward search is made for a block group with both > > - * free space and a low directory-to-inode ratio; if that fails, then of > > - * the groups with above-average free space, that group with the fewest > > - * directories already is chosen. > > - * > > - * For other inodes, search forward from the parent directory\'s block > > - * group to find a free inode. > > - */ > > -static int find_group_dir(struct super_block *sb, struct inode *parent, > > - ext4_group_t *best_group) > > -{ > > - ext4_group_t ngroups = ext4_get_groups_count(sb); > > - unsigned int freei, avefreei; > > - struct ext4_group_desc *desc, *best_desc = NULL; > > - ext4_group_t group; > > - int ret = -1; > > - > > - freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter); > > - avefreei = freei / ngroups; > > - > > - for (group = 0; group < ngroups; group++) { > > - desc = ext4_get_group_desc(sb, group, NULL); > > - if (!desc || !ext4_free_inodes_count(sb, desc)) > > - continue; > > - if (ext4_free_inodes_count(sb, desc) < avefreei) > > - continue; > > - if (!best_desc || > > - (ext4_free_blks_count(sb, desc) > > > - ext4_free_blks_count(sb, best_desc))) { > > - *best_group = group; > > - best_desc = desc; > > - ret = 0; > > - } > > - } > > - return ret; > > -} > > - > > -#define free_block_ratio 10 > > - > > -static int find_group_flex(struct super_block *sb, struct inode *parent, > > - ext4_group_t *best_group) > > -{ > > - struct ext4_sb_info *sbi = EXT4_SB(sb); > > - struct ext4_group_desc *desc; > > - struct flex_groups *flex_group = sbi->s_flex_groups; > > - ext4_group_t parent_group = EXT4_I(parent)->i_block_group; > > - ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); > > - ext4_group_t ngroups = ext4_get_groups_count(sb); > > - int flex_size = ext4_flex_bg_size(sbi); > > - ext4_group_t best_flex = parent_fbg_group; > > - int blocks_per_flex = sbi->s_blocks_per_group * flex_size; > > - int flexbg_free_blocks; > > - int flex_freeb_ratio; > > - ext4_group_t n_fbg_groups; > > - ext4_group_t i; > > - > > - n_fbg_groups = (ngroups + flex_size - 1) >> > > - sbi->s_log_groups_per_flex; > > - > > -find_close_to_parent: > > - flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks); > > - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; > > - if (atomic_read(&flex_group[best_flex].free_inodes) && > > - flex_freeb_ratio > free_block_ratio) > > - goto found_flexbg; > > - > > - if (best_flex && best_flex == parent_fbg_group) { > > - best_flex--; > > - goto find_close_to_parent; > > - } > > - > > - for (i = 0; i < n_fbg_groups; i++) { > > - if (i == parent_fbg_group || i == parent_fbg_group - 1) > > - continue; > > - > > - flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks); > > - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; > > - > > - if (flex_freeb_ratio > free_block_ratio && > > - (atomic_read(&flex_group[i].free_inodes))) { > > - best_flex = i; > > - goto found_flexbg; > > - } > > - > > - if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) || > > - ((atomic_read(&flex_group[i].free_blocks) > > > - atomic_read(&flex_group[best_flex].free_blocks)) && > > - atomic_read(&flex_group[i].free_inodes))) > > - best_flex = i; > > - } > > - > > - if (!atomic_read(&flex_group[best_flex].free_inodes) || > > - !atomic_read(&flex_group[best_flex].free_blocks)) > > - return -1; > > - > > -found_flexbg: > > - for (i = best_flex * flex_size; i < ngroups && > > - i < (best_flex + 1) * flex_size; i++) { > > - desc = ext4_get_group_desc(sb, i, NULL); > > - if (ext4_free_inodes_count(sb, desc)) { > > - *best_group = i; > > - goto out; > > - } > > - } > > - > > - return -1; > > -out: > > - return 0; > > -} > > - > > struct orlov_stats { > > __u32 free_inodes; > > __u32 free_blocks; > > @@ -817,7 +705,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, > > struct inode *ret; > > ext4_group_t i; > > int free = 0; > > - static int once = 1; > > ext4_group_t flex_group; > > > > /* Cannot create files in a deleted directory */ > > @@ -843,26 +730,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, > > goto got_group; > > } > > > > - if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { > > - ret2 = find_group_flex(sb, dir, &group); > > - if (ret2 == -1) { > > - ret2 = find_group_other(sb, dir, &group, mode); > > - if (ret2 == 0 && once) { > > - once = 0; > > - printk(KERN_NOTICE "ext4: find_group_flex " > > - "failed, fallback succeeded dir %lu\n", > > - dir->i_ino); > > - } > > - } > > - goto got_group; > > - } > > - > > - if (S_ISDIR(mode)) { > > - if (test_opt(sb, OLDALLOC)) > > - ret2 = find_group_dir(sb, dir, &group); > > - else > > - ret2 = find_group_orlov(sb, dir, &group, mode, qstr); > > - } else > > + if (S_ISDIR(mode)) > > + ret2 = find_group_orlov(sb, dir, &group, mode, qstr); > > + else > > ret2 = find_group_other(sb, dir, &group, mode); > > > > got_group: > > diff --git a/fs/ext4/super.c b/fs/ext4/super.c > > index cc5c157..e1f8f73 100644 > > --- a/fs/ext4/super.c > > +++ b/fs/ext4/super.c > > @@ -1031,8 +1031,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) > > seq_puts(seq, ",nouid32"); > > if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) > > seq_puts(seq, ",debug"); > > - if (test_opt(sb, OLDALLOC)) > > - seq_puts(seq, ",oldalloc"); > > #ifdef CONFIG_EXT4_FS_XATTR > > if (test_opt(sb, XATTR_USER)) > > seq_puts(seq, ",user_xattr"); > > @@ -1541,10 +1539,12 @@ static int parse_options(char *options, struct super_block *sb, > > set_opt(sb, DEBUG); > > break; > > case Opt_oldalloc: > > - set_opt(sb, OLDALLOC); > > + ext4_msg(sb, KERN_WARNING, > > + "Ignoring deprecated oldalloc option"); > > break; > > case Opt_orlov: > > - clear_opt(sb, OLDALLOC); > > + ext4_msg(sb, KERN_WARNING, > > + "Ignoring deprecated orlov option"); > > break; > > #ifdef CONFIG_EXT4_FS_XATTR > > case Opt_user_xattr: > > > >
On Thu, Aug 11, 2011 at 03:05:55PM -0600, Andreas Dilger wrote: > > That said, "orlov" is also mostly meaningless with flex_bg as well, since > there is very little real benefit/affinity from inodes being "close" to > their data blocks. We gain far more benefit from keeping the inodes > together than spreading them out and keeping them close to the data blocks. I'm going to be applying the patch remove the oldalloc allocator, but just for the record --- Orlov *does* matter (which is why it's a good thing the default allocator also uses an Orlov-like approach that's flex_bg aware). In fact, we saw significant latency drops that grew over time as more and more blocks got allocated. I traced it back to using bigalloc, which increased the average distance between the inode tables and the data blocks. So as you use larger bigalloc cluster sizes, which increases the block group size, it's a good idea to decrease the flex_bg size. Which is another way of saying that Orlov matters. - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 3ae9bc9..ec469fa 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -201,14 +201,6 @@ inode_readahead_blks=n This tuning parameter controls the maximum table readahead algorithm will pre-read into the buffer cache. The default value is 32 blocks. -orlov (*) This enables the new Orlov block allocator. It is - enabled by default. - -oldalloc This disables the Orlov block allocator and enables - the old block allocator. Orlov should have better - performance - we'd like to get some feedback if it's - the contrary for you. - user_xattr Enables Extended User Attributes. Additionally, you need to have extended attribute support enabled in the kernel configuration (CONFIG_EXT4_FS_XATTR). See the diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1921392..7e0b8aa 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -884,7 +884,6 @@ struct ext4_inode_info { /* * Mount flags */ -#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 21bb2f6..0b5ec23 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -293,118 +293,6 @@ error_return: ext4_std_error(sb, fatal); } -/* - * There are two policies for allocating an inode. If the new inode is - * a directory, then a forward search is made for a block group with both - * free space and a low directory-to-inode ratio; if that fails, then of - * the groups with above-average free space, that group with the fewest - * directories already is chosen. - * - * For other inodes, search forward from the parent directory\'s block - * group to find a free inode. - */ -static int find_group_dir(struct super_block *sb, struct inode *parent, - ext4_group_t *best_group) -{ - ext4_group_t ngroups = ext4_get_groups_count(sb); - unsigned int freei, avefreei; - struct ext4_group_desc *desc, *best_desc = NULL; - ext4_group_t group; - int ret = -1; - - freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter); - avefreei = freei / ngroups; - - for (group = 0; group < ngroups; group++) { - desc = ext4_get_group_desc(sb, group, NULL); - if (!desc || !ext4_free_inodes_count(sb, desc)) - continue; - if (ext4_free_inodes_count(sb, desc) < avefreei) - continue; - if (!best_desc || - (ext4_free_blks_count(sb, desc) > - ext4_free_blks_count(sb, best_desc))) { - *best_group = group; - best_desc = desc; - ret = 0; - } - } - return ret; -} - -#define free_block_ratio 10 - -static int find_group_flex(struct super_block *sb, struct inode *parent, - ext4_group_t *best_group) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_group_desc *desc; - struct flex_groups *flex_group = sbi->s_flex_groups; - ext4_group_t parent_group = EXT4_I(parent)->i_block_group; - ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); - ext4_group_t ngroups = ext4_get_groups_count(sb); - int flex_size = ext4_flex_bg_size(sbi); - ext4_group_t best_flex = parent_fbg_group; - int blocks_per_flex = sbi->s_blocks_per_group * flex_size; - int flexbg_free_blocks; - int flex_freeb_ratio; - ext4_group_t n_fbg_groups; - ext4_group_t i; - - n_fbg_groups = (ngroups + flex_size - 1) >> - sbi->s_log_groups_per_flex; - -find_close_to_parent: - flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks); - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; - if (atomic_read(&flex_group[best_flex].free_inodes) && - flex_freeb_ratio > free_block_ratio) - goto found_flexbg; - - if (best_flex && best_flex == parent_fbg_group) { - best_flex--; - goto find_close_to_parent; - } - - for (i = 0; i < n_fbg_groups; i++) { - if (i == parent_fbg_group || i == parent_fbg_group - 1) - continue; - - flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks); - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; - - if (flex_freeb_ratio > free_block_ratio && - (atomic_read(&flex_group[i].free_inodes))) { - best_flex = i; - goto found_flexbg; - } - - if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) || - ((atomic_read(&flex_group[i].free_blocks) > - atomic_read(&flex_group[best_flex].free_blocks)) && - atomic_read(&flex_group[i].free_inodes))) - best_flex = i; - } - - if (!atomic_read(&flex_group[best_flex].free_inodes) || - !atomic_read(&flex_group[best_flex].free_blocks)) - return -1; - -found_flexbg: - for (i = best_flex * flex_size; i < ngroups && - i < (best_flex + 1) * flex_size; i++) { - desc = ext4_get_group_desc(sb, i, NULL); - if (ext4_free_inodes_count(sb, desc)) { - *best_group = i; - goto out; - } - } - - return -1; -out: - return 0; -} - struct orlov_stats { __u32 free_inodes; __u32 free_blocks; @@ -817,7 +705,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, struct inode *ret; ext4_group_t i; int free = 0; - static int once = 1; ext4_group_t flex_group; /* Cannot create files in a deleted directory */ @@ -843,26 +730,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, goto got_group; } - if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { - ret2 = find_group_flex(sb, dir, &group); - if (ret2 == -1) { - ret2 = find_group_other(sb, dir, &group, mode); - if (ret2 == 0 && once) { - once = 0; - printk(KERN_NOTICE "ext4: find_group_flex " - "failed, fallback succeeded dir %lu\n", - dir->i_ino); - } - } - goto got_group; - } - - if (S_ISDIR(mode)) { - if (test_opt(sb, OLDALLOC)) - ret2 = find_group_dir(sb, dir, &group); - else - ret2 = find_group_orlov(sb, dir, &group, mode, qstr); - } else + if (S_ISDIR(mode)) + ret2 = find_group_orlov(sb, dir, &group, mode, qstr); + else ret2 = find_group_other(sb, dir, &group, mode); got_group: diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cc5c157..e1f8f73 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1031,8 +1031,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",nouid32"); if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) seq_puts(seq, ",debug"); - if (test_opt(sb, OLDALLOC)) - seq_puts(seq, ",oldalloc"); #ifdef CONFIG_EXT4_FS_XATTR if (test_opt(sb, XATTR_USER)) seq_puts(seq, ",user_xattr"); @@ -1541,10 +1539,12 @@ static int parse_options(char *options, struct super_block *sb, set_opt(sb, DEBUG); break; case Opt_oldalloc: - set_opt(sb, OLDALLOC); + ext4_msg(sb, KERN_WARNING, + "Ignoring deprecated oldalloc option"); break; case Opt_orlov: - clear_opt(sb, OLDALLOC); + ext4_msg(sb, KERN_WARNING, + "Ignoring deprecated orlov option"); break; #ifdef CONFIG_EXT4_FS_XATTR case Opt_user_xattr:
For a long time now orlov is the default block allocator in the ext4. It performs better than the old one and no one seems to claim otherwise so we can safely drop it and make oldalloc and orlov mount option deprecated. This is a part of the effort to reduce number of ext4 options hence the test matrix. Signed-off-by: Lukas Czerner <lczerner@redhat.com> --- Documentation/filesystems/ext4.txt | 8 -- fs/ext4/ext4.h | 1 - fs/ext4/ialloc.c | 136 +----------------------------------- fs/ext4/super.c | 8 +- 4 files changed, 7 insertions(+), 146 deletions(-)