Message ID | 20130620164249.GC4982@thunk.org |
---|---|
State | New, archived |
Headers | show |
Hi Ted, On Thu, Jun 20, 2013 at 12:42:49PM -0400, Theodore Ts'o wrote: > I've been carrying a patch in the unstable portion of the patch series > for a while now to debug problems with delayed allocation. This > allows us to observe the state of which inodes have inodes subject for > delayed allocation, and how many data/metadata blocks have been > reserved. > > I've finally cleaned it up enough that it's something where I wouldn't > feel terrible dropping it into the mainline kernel. (It's still a > little gross, but it's not truly horrifying any more.) > > What do people think? Is this something that's worth having in the > kernel sources? Or shall I continue carrying it as an out-of-tree > debugging patch? I think it is worth having it in the kernel source. But before we apply this patch, it seems that we need to solve some problems. 1. Now when we read /proc/fs/ext4/{$DEV}/delalloc_debug, it will print the result in console. IMHO, I don't think it is a good choice. I prefer to print this result in debugfs or in sysfs. 2. If we want to gain this feature, we will enable EXT4_DEBUG option. But in a product system, we never enable it because of performance degradation. So I think that maybe we can compile it without EXT4_DEBUG option and dynamically enable/disalbe it. 3. Maybe we can provide a interface to let the user indicate which inode they want to observe. Finally, the patch itself still has two minor problems. We forget to call remove_proc_entry() in ext4_put_super(). Another problem is compile warnings. > > (Note: we can use similar technique to gain visibility into the status > the extent status LRU list.) I am happy to generate a patch for extent status LRU list. > > - Ted > > From f6417debc1c96a9dfa6b9f19da14eff35bf0f504 Mon Sep 17 00:00:00 2001 > From: Theodore Ts'o <tytso@mit.edu> > Date: Thu, 20 Jun 2013 12:35:39 -0400 > Subject: [PATCH] ext4: add delalloc debugging > > This adds a file in /proc/fs/ext4/<dev> which when opened for reading, > will trigger debugging code that dumps a lot of information about > inodes subject to delayed allocation to the console. > > Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> > --- > fs/ext4/super.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 77 insertions(+), 1 deletion(-) > > diff --git a/fs/ext4/super.c b/fs/ext4/super.c > index 85b3dd6..ecb8256 100644 > --- a/fs/ext4/super.c > +++ b/fs/ext4/super.c > @@ -1832,6 +1832,74 @@ static const struct file_operations ext4_seq_options_fops = { > .release = single_release, > }; > > +#ifdef CONFIG_EXT4_DEBUG #ifndef MODULE > +static void print_inode_delalloc_info(struct inode *inode) > +{ > + if (!EXT4_I(inode)->i_reserved_data_blocks || > + !EXT4_I(inode)->i_reserved_meta_blocks) > + return; > + > + printk(KERN_DEBUG "ino %lu: %u %u\n", inode->i_ino, > + EXT4_I(inode)->i_reserved_data_blocks, > + EXT4_I(inode)->i_reserved_meta_blocks); > +} #endif > + > +static int debug_delalloc_show(struct seq_file *seq, void *offset) > +{ > + return 0; > +} > + > +static int options_delalloc_debug_open_fs(struct inode *proc_inode, > + struct file *file) > +{ > + struct super_block *sb = PDE_DATA(proc_inode); > + struct ext4_sb_info *sbi = EXT4_SB(sb); #ifndef MODULE > + struct inode *inode; > + extern spinlock_t inode_sb_list_lock; #endif Regards, - Zheng > + > + printk(KERN_DEBUG "EXT4-fs debug delalloc of %s\n", sb->s_id); > + printk(KERN_DEBUG "EXT4-fs: dirty clusters %lld free clusters %lld\n", > + percpu_counter_sum(&sbi->s_dirtyclusters_counter), > + percpu_counter_sum(&sbi->s_freeclusters_counter)); > + > +#ifndef MODULE > + spin_lock(&inode_sb_list_lock); > + if (!list_empty(&sb->s_bdi->wb.b_dirty)) { > + printk(KERN_DEBUG "s_bdi->wb.b_dirty list:\n"); > + list_for_each_entry(inode, &sb->s_bdi->wb.b_dirty, > + i_wb_list) { > + print_inode_delalloc_info(inode); > + } > + } > + if (!list_empty(&sb->s_bdi->wb.b_io)) { > + printk(KERN_DEBUG "s_bdi->wb.b_io list:\n"); > + list_for_each_entry(inode, &sb->s_bdi->wb.b_io, > + i_wb_list) { > + print_inode_delalloc_info(inode); > + } > + } > + if (!list_empty(&sb->s_bdi->wb.b_more_io)) { > + printk(KERN_DEBUG "s_bdi->wb.b_more_io list:\n"); > + list_for_each_entry(inode, &sb->s_bdi->wb.b_more_io, > + i_wb_list) { > + print_inode_delalloc_info(inode); > + } > + } > + spin_unlock(&inode_sb_list_lock); > + printk(KERN_DEBUG "ext4 debug delalloc done\n"); > +#endif > + return single_open(file, debug_delalloc_show, sb); > +} > + > +static const struct file_operations ext4_seq_delalloc_debug_fops = { > + .owner = THIS_MODULE, > + .open = options_delalloc_debug_open_fs, > + .read = seq_read, > + .llseek = seq_lseek, > + .release = single_release, > +}; > +#endif > + > static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, > int read_only) > { > @@ -3764,9 +3832,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) > if (ext4_proc_root) > sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); > > - if (sbi->s_proc) > + if (sbi->s_proc) { > proc_create_data("options", S_IRUGO, sbi->s_proc, > &ext4_seq_options_fops, sb); > +#ifdef CONFIG_EXT4_DEBUG > + proc_create_data("delalloc_debug", S_IRUSR, sbi->s_proc, > + &ext4_seq_delalloc_debug_fops, sb); > +#endif > + } > > bgl_lock_init(sbi->s_blockgroup_lock); > > @@ -4149,6 +4222,9 @@ failed_mount: > crypto_free_shash(sbi->s_chksum_driver); > if (sbi->s_proc) { > remove_proc_entry("options", sbi->s_proc); > +#ifdef CONFIG_EXT4_DEBUG > + remove_proc_entry("delalloc_debug", sbi->s_proc); > +#endif > remove_proc_entry(sb->s_id, ext4_proc_root); > } > #ifdef CONFIG_QUOTA > -- > 1.7.12.rc0.22.gcdd159b > -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 25 Jun 2013, Zheng Liu wrote: > Date: Tue, 25 Jun 2013 11:50:03 +0800 > From: Zheng Liu <gnehzuil.liu@gmail.com> > To: Theodore Ts'o <tytso@mit.edu> > Cc: linux-ext4@vger.kernel.org > Subject: Re: [PATCH FOR DISCUSSION] add delalloc debugging > > Hi Ted, > > On Thu, Jun 20, 2013 at 12:42:49PM -0400, Theodore Ts'o wrote: > > I've been carrying a patch in the unstable portion of the patch series > > for a while now to debug problems with delayed allocation. This > > allows us to observe the state of which inodes have inodes subject for > > delayed allocation, and how many data/metadata blocks have been > > reserved. > > > > I've finally cleaned it up enough that it's something where I wouldn't > > feel terrible dropping it into the mainline kernel. (It's still a > > little gross, but it's not truly horrifying any more.) > > > > What do people think? Is this something that's worth having in the > > kernel sources? Or shall I continue carrying it as an out-of-tree > > debugging patch? > > I think it is worth having it in the kernel source. But before we apply > this patch, it seems that we need to solve some problems. Yes, I very much like this patch. It's worth having and it can yield important information. > > 1. Now when we read /proc/fs/ext4/{$DEV}/delalloc_debug, it will print > the result in console. IMHO, I don't think it is a good choice. I > prefer to print this result in debugfs or in sysfs. I would prefer to print this info in debug_delalloc_show() so we can simply read the file instead of read the file + read the log. > > 2. If we want to gain this feature, we will enable EXT4_DEBUG option. > But in a product system, we never enable it because of performance > degradation. So I think that maybe we can compile it without EXT4_DEBUG > option and dynamically enable/disalbe it. I think that having this enabled only is EXT4_DEBUG is set is just fine. However if you want to be able to use it even without EXT4_DEBUG option we could just set proper permissions on the /proc/fs/ext4/{$DEV}/delalloc_debug so that only privileged user can open it. > > 3. Maybe we can provide a interface to let the user indicate which inode > they want to observe. you can grep for it, I do not think it's a big deal. > > Finally, the patch itself still has two minor problems. We forget > to call remove_proc_entry() in ext4_put_super(). Another problem is > compile warnings. > > > > > (Note: we can use similar technique to gain visibility into the status > > the extent status LRU list.) > > I am happy to generate a patch for extent status LRU list. > > > > > - Ted > > > > From f6417debc1c96a9dfa6b9f19da14eff35bf0f504 Mon Sep 17 00:00:00 2001 > > From: Theodore Ts'o <tytso@mit.edu> > > Date: Thu, 20 Jun 2013 12:35:39 -0400 > > Subject: [PATCH] ext4: add delalloc debugging > > > > This adds a file in /proc/fs/ext4/<dev> which when opened for reading, > > will trigger debugging code that dumps a lot of information about > > inodes subject to delayed allocation to the console. > > > > Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> > > --- > > fs/ext4/super.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- > > 1 file changed, 77 insertions(+), 1 deletion(-) > > > > diff --git a/fs/ext4/super.c b/fs/ext4/super.c > > index 85b3dd6..ecb8256 100644 > > --- a/fs/ext4/super.c > > +++ b/fs/ext4/super.c > > @@ -1832,6 +1832,74 @@ static const struct file_operations ext4_seq_options_fops = { > > .release = single_release, > > }; > > > > +#ifdef CONFIG_EXT4_DEBUG > #ifndef MODULE > > +static void print_inode_delalloc_info(struct inode *inode) > > +{ > > + if (!EXT4_I(inode)->i_reserved_data_blocks || > > + !EXT4_I(inode)->i_reserved_meta_blocks) > > + return; > > + > > + printk(KERN_DEBUG "ino %lu: %u %u\n", inode->i_ino, > > + EXT4_I(inode)->i_reserved_data_blocks, > > + EXT4_I(inode)->i_reserved_meta_blocks); > > +} > #endif > > + > > +static int debug_delalloc_show(struct seq_file *seq, void *offset) > > +{ > > + return 0; > > +} > > + > > +static int options_delalloc_debug_open_fs(struct inode *proc_inode, > > + struct file *file) > > +{ > > + struct super_block *sb = PDE_DATA(proc_inode); > > + struct ext4_sb_info *sbi = EXT4_SB(sb); > #ifndef MODULE > > + struct inode *inode; > > + extern spinlock_t inode_sb_list_lock; > #endif > > Regards, > - Zheng > > > + > > + printk(KERN_DEBUG "EXT4-fs debug delalloc of %s\n", sb->s_id); > > + printk(KERN_DEBUG "EXT4-fs: dirty clusters %lld free clusters %lld\n", > > + percpu_counter_sum(&sbi->s_dirtyclusters_counter), > > + percpu_counter_sum(&sbi->s_freeclusters_counter)); > > + > > +#ifndef MODULE > > + spin_lock(&inode_sb_list_lock); > > + if (!list_empty(&sb->s_bdi->wb.b_dirty)) { > > + printk(KERN_DEBUG "s_bdi->wb.b_dirty list:\n"); > > + list_for_each_entry(inode, &sb->s_bdi->wb.b_dirty, > > + i_wb_list) { > > + print_inode_delalloc_info(inode); > > + } > > + } > > + if (!list_empty(&sb->s_bdi->wb.b_io)) { > > + printk(KERN_DEBUG "s_bdi->wb.b_io list:\n"); > > + list_for_each_entry(inode, &sb->s_bdi->wb.b_io, > > + i_wb_list) { > > + print_inode_delalloc_info(inode); > > + } > > + } > > + if (!list_empty(&sb->s_bdi->wb.b_more_io)) { > > + printk(KERN_DEBUG "s_bdi->wb.b_more_io list:\n"); > > + list_for_each_entry(inode, &sb->s_bdi->wb.b_more_io, > > + i_wb_list) { > > + print_inode_delalloc_info(inode); > > + } > > + } > > + spin_unlock(&inode_sb_list_lock); > > + printk(KERN_DEBUG "ext4 debug delalloc done\n"); > > +#endif > > + return single_open(file, debug_delalloc_show, sb); > > +} > > + > > +static const struct file_operations ext4_seq_delalloc_debug_fops = { > > + .owner = THIS_MODULE, > > + .open = options_delalloc_debug_open_fs, > > + .read = seq_read, > > + .llseek = seq_lseek, > > + .release = single_release, > > +}; > > +#endif > > + > > static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, > > int read_only) > > { > > @@ -3764,9 +3832,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) > > if (ext4_proc_root) > > sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); > > > > - if (sbi->s_proc) > > + if (sbi->s_proc) { > > proc_create_data("options", S_IRUGO, sbi->s_proc, > > &ext4_seq_options_fops, sb); > > +#ifdef CONFIG_EXT4_DEBUG > > + proc_create_data("delalloc_debug", S_IRUSR, sbi->s_proc, > > + &ext4_seq_delalloc_debug_fops, sb); > > +#endif > > + } > > > > bgl_lock_init(sbi->s_blockgroup_lock); > > > > @@ -4149,6 +4222,9 @@ failed_mount: > > crypto_free_shash(sbi->s_chksum_driver); > > if (sbi->s_proc) { > > remove_proc_entry("options", sbi->s_proc); > > +#ifdef CONFIG_EXT4_DEBUG > > + remove_proc_entry("delalloc_debug", sbi->s_proc); > > +#endif > > remove_proc_entry(sb->s_id, ext4_proc_root); > > } > > #ifdef CONFIG_QUOTA > > -- > > 1.7.12.rc0.22.gcdd159b > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Jun 20, 2013 at 12:42:49PM -0400, Theodore Ts'o wrote: > I've been carrying a patch in the unstable portion of the patch series > for a while now to debug problems with delayed allocation. This > allows us to observe the state of which inodes have inodes subject for > delayed allocation, and how many data/metadata blocks have been > reserved. > > I've finally cleaned it up enough that it's something where I wouldn't > feel terrible dropping it into the mainline kernel. (It's still a > little gross, but it's not truly horrifying any more.) > > What do people think? Is this something that's worth having in the > kernel sources? Or shall I continue carrying it as an out-of-tree > debugging patch? > > (Note: we can use similar technique to gain visibility into the status > the extent status LRU list.) > > - Ted > > From f6417debc1c96a9dfa6b9f19da14eff35bf0f504 Mon Sep 17 00:00:00 2001 > From: Theodore Ts'o <tytso@mit.edu> > Date: Thu, 20 Jun 2013 12:35:39 -0400 > Subject: [PATCH] ext4: add delalloc debugging > > This adds a file in /proc/fs/ext4/<dev> which when opened for reading, > will trigger debugging code that dumps a lot of information about > inodes subject to delayed allocation to the console. > > Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> ..... > + > +#ifndef MODULE > + spin_lock(&inode_sb_list_lock); > + if (!list_empty(&sb->s_bdi->wb.b_dirty)) { NACK. Do not use VFS level locks and list walks in filesystem code, not even in debugging code. It's ok for you to do this in private patches and maintain such abuses yourself, but it's not acceptible for merging into the mainline tree... Besides, you haven't even used the correct lock - these bdi writeback lists are protected by the bdi writeback list lock, not the superblock inode list lock.... Cheers, Dave.
On Wed, Jun 26, 2013 at 02:31:14PM +0200, Lukáš Czerner wrote: > > 1. Now when we read /proc/fs/ext4/{$DEV}/delalloc_debug, it will print > > the result in console. IMHO, I don't think it is a good choice. I > > prefer to print this result in debugfs or in sysfs. > > I would prefer to print this info in debug_delalloc_show() so we can > simply read the file instead of read the file + read the log. This turns out to be tricky because we would need to grab all of this information and stash it in kernel allocated memory, since otherwise it might change out from under us if we need to read the information across multiple read requests to the /proc file. > I think that having this enabled only is EXT4_DEBUG is set is just > fine. However if you want to be able to use it even without EXT4_DEBUG > option we could just set proper permissions on the > /proc/fs/ext4/{$DEV}/delalloc_debug so that only privileged user can > open it. The file is mode 400 readable by root already. > > 3. Maybe we can provide a interface to let the user indicate which inode > > they want to observe. > > you can grep for it, I do not think it's a big deal. If we wanted to release the delalloc information for an inode, that's actually easier; we could do this via an ioctl. > > Finally, the patch itself still has two minor problems. We forget > > to call remove_proc_entry() in ext4_put_super(). Another problem is > > compile warnings. This were alrady fixed in the version which is in the ext4 patch queue. I noticed those issues shortly after I sent out the patch for discussion. - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Jun 26, 2013 at 11:18:23PM +1000, Dave Chinner wrote: > Do not use VFS level locks and list walks in filesystem code, > not even in debugging code. It's ok for you to do this in private > patches and maintain such abuses yourself, but it's not acceptible > for merging into the mainline tree... Well, that's why I wasn't proposing that the code be enabled by default, and why I described it as "a little gross". > Besides, you haven't even used the correct lock - these bdi > writeback lists are protected by the bdi writeback list lock, not > the superblock inode list lock.... Thanks for pointing that out. I'll fix that. Cheers, - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 85b3dd6..ecb8256 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1832,6 +1832,74 @@ static const struct file_operations ext4_seq_options_fops = { .release = single_release, }; +#ifdef CONFIG_EXT4_DEBUG +static void print_inode_delalloc_info(struct inode *inode) +{ + if (!EXT4_I(inode)->i_reserved_data_blocks || + !EXT4_I(inode)->i_reserved_meta_blocks) + return; + + printk(KERN_DEBUG "ino %lu: %u %u\n", inode->i_ino, + EXT4_I(inode)->i_reserved_data_blocks, + EXT4_I(inode)->i_reserved_meta_blocks); +} + +static int debug_delalloc_show(struct seq_file *seq, void *offset) +{ + return 0; +} + +static int options_delalloc_debug_open_fs(struct inode *proc_inode, + struct file *file) +{ + struct super_block *sb = PDE_DATA(proc_inode); + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct inode *inode; + extern spinlock_t inode_sb_list_lock; + + printk(KERN_DEBUG "EXT4-fs debug delalloc of %s\n", sb->s_id); + printk(KERN_DEBUG "EXT4-fs: dirty clusters %lld free clusters %lld\n", + percpu_counter_sum(&sbi->s_dirtyclusters_counter), + percpu_counter_sum(&sbi->s_freeclusters_counter)); + +#ifndef MODULE + spin_lock(&inode_sb_list_lock); + if (!list_empty(&sb->s_bdi->wb.b_dirty)) { + printk(KERN_DEBUG "s_bdi->wb.b_dirty list:\n"); + list_for_each_entry(inode, &sb->s_bdi->wb.b_dirty, + i_wb_list) { + print_inode_delalloc_info(inode); + } + } + if (!list_empty(&sb->s_bdi->wb.b_io)) { + printk(KERN_DEBUG "s_bdi->wb.b_io list:\n"); + list_for_each_entry(inode, &sb->s_bdi->wb.b_io, + i_wb_list) { + print_inode_delalloc_info(inode); + } + } + if (!list_empty(&sb->s_bdi->wb.b_more_io)) { + printk(KERN_DEBUG "s_bdi->wb.b_more_io list:\n"); + list_for_each_entry(inode, &sb->s_bdi->wb.b_more_io, + i_wb_list) { + print_inode_delalloc_info(inode); + } + } + spin_unlock(&inode_sb_list_lock); + printk(KERN_DEBUG "ext4 debug delalloc done\n"); +#endif + return single_open(file, debug_delalloc_show, sb); +} + +static const struct file_operations ext4_seq_delalloc_debug_fops = { + .owner = THIS_MODULE, + .open = options_delalloc_debug_open_fs, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, int read_only) { @@ -3764,9 +3832,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (ext4_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); - if (sbi->s_proc) + if (sbi->s_proc) { proc_create_data("options", S_IRUGO, sbi->s_proc, &ext4_seq_options_fops, sb); +#ifdef CONFIG_EXT4_DEBUG + proc_create_data("delalloc_debug", S_IRUSR, sbi->s_proc, + &ext4_seq_delalloc_debug_fops, sb); +#endif + } bgl_lock_init(sbi->s_blockgroup_lock); @@ -4149,6 +4222,9 @@ failed_mount: crypto_free_shash(sbi->s_chksum_driver); if (sbi->s_proc) { remove_proc_entry("options", sbi->s_proc); +#ifdef CONFIG_EXT4_DEBUG + remove_proc_entry("delalloc_debug", sbi->s_proc); +#endif remove_proc_entry(sb->s_id, ext4_proc_root); } #ifdef CONFIG_QUOTA