Patchwork Regular ext4 error warning with HD in USB dock

login
register
mail settings
Submitter Amir Goldstein
Date Jan. 2, 2011, 7:23 p.m.
Message ID <AANLkTimAe-p1-ApmnueybQY474J3_G9JL+8WJ2WTn+04@mail.gmail.com>
Download mbox | patch
Permalink /patch/77197/
State New
Headers show

Comments

Amir Goldstein - Jan. 2, 2011, 7:23 p.m.
On Sat, Jan 1, 2011 at 7:20 PM, Ric Wheeler <rwheeler@redhat.com> wrote:
> On 12/28/2010 05:30 AM, Amir Goldstein wrote:
>>
>> On Tue, Dec 28, 2010 at 10:19 AM, Rogier Wolff<R.E.Wolff@bitwizard.nl>
>>  wrote:
>>>
>>> On Mon, Dec 27, 2010 at 09:53:43PM -0500, Ted Ts'o wrote:
>>>>
>>>> On Tue, Dec 28, 2010 at 09:53:45AM +1100, Con Kolivas wrote:
>>>>>
>>>>> [1048401.773270] EXT4-fs (sde8): mounted filesystem with writeback data
>>>>> mode.
>>>>> Opts: (null)
>>>>> [1048702.736011] EXT4-fs (sde8): error count: 3
>>>>> [1048702.736016] EXT4-fs (sde8): initial error at 1289053677:
>>>>> ext4_journal_start_sb:251
>>>>> [1048702.736018] EXT4-fs (sde8): last error at 1289080948:
>>>>> ext4_put_super:719
>>>>
>>>> That's actually not an error.  It's a report which is generated every
>>>> 24 hours, indicating that there has been 3 errors since the last time
>>>> the error count has been cleared, with the first error taking place at
>>>> Sat Nov 6 10:27:57 2010 (US/Eastern) in the function
>>>> ext4_journal_start_sb(), at line 251, and the most recent error taking
>>>> place at Sat Nov 6 18:02:28 2010 (US/Eastern), in the function
>>>> ext4_put_super() at line 719.  This is a new feature which was added
>>>> in 2.6.36.
>>>
>>> Nice. But the issue you're not mentioning is: What errors could have
>>> happened on the 6th of november? Should Con worry about those errors?
>>>
>> Ted,
>>
>> I would like to use this opportunity to remind you about my
>> record_journal_errstr()
>> implementation, see:
>>
>> https://github.com/amir73il/ext4-snapshots/blob/next3-stable/fs/next3/super.c#L157
>>
>> It records the initial errors messages (which I found to be the most
>> interesting),
>> in a message buffer on the unused space after the journal super block
>> (3K on a 4K block fs).
>>
>> fsck prints out those messages and clears the buffer.
>> In under a year of Next3 fs in beta and production, it has helped me many
>> times
>> to analyse bugs post-mortem and find the problem.
>>
>> If there is demand, I can post the patch for Ext4.
>>
>> Amir.
>
> I do think that this sounds like a useful addition - should be very useful
> in doing post mortem analysis...
>
> Thanks!
>
> Ric
>
>

Hi Ric,

Attached are the patches for ext4 (against Ted's next branch) and
e2fsck (against 1.41.14).

e2fsck patch is running in production with e2fsprogs-1.41.9 and the
porting to 1.41.14 was trivial.
ext4 patch is a 'trivial+' port of the Next3 patch running in production.

Do you think you guys can take the patches for a test drive?

Thanks,
Amir.


ext4: record error messages in message buffer

Ext4 error messages are stored in the free space after the journal super block.
After journal recovery, the message buffer is copied from the journal super
block to the file system super block.

When error behavior is configured to remount-ro, the message buffer can be read
and displayed at a later time by e2fsck. The messages from the time of the
error are very helpful in post-mortem bug analysis.

Signed-off-by: Amir Goldstein <amir73il@users.sf.net>


@@ -311,6 +354,9 @@ void ext4_journal_abort_handle(const char *caller,
unsigned int line,
 	printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n",
 	       caller, line, errstr, err_fn);

+	/* record error message in journal super block */
+	ext4_record_journal_errstr(sb, caller, err_fn, errstr);
+
 	jbd2_journal_abort_handle(handle);
 }

@@ -398,6 +444,11 @@ void __ext4_error(struct super_block *sb, const
char *function,
 	       sb->s_id, function, line, current->comm, &vaf);
 	va_end(args);

+	/* record error message in journal super block */
+	va_start(args, fmt);
+	ext4_record_journal_err(sb, __func__, function, fmt, args);
+	va_end(args);
+
 	ext4_handle_error(sb);
 }

@@ -422,6 +473,11 @@ void ext4_error_inode(struct inode *inode, const
char *function,
 	printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf);
 	va_end(args);

+	/* record error message in journal super block */
+	va_start(args, fmt);
+	ext4_record_journal_err(inode->i_sb, __func__, function, fmt, args);
+	va_end(args);
+
 	ext4_handle_error(inode->i_sb);
 }

@@ -452,6 +508,11 @@ void ext4_error_file(struct file *file, const
char *function,
 	printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf);
 	va_end(args);

+	/* record error message in journal super block */
+	va_start(args, fmt);
+	ext4_record_journal_err(inode->i_sb, __func__, function, fmt, args);
+	va_end(args);
+
 	ext4_handle_error(inode->i_sb);
 }

@@ -510,6 +571,9 @@ void __ext4_std_error(struct super_block *sb,
const char *function,
 	       sb->s_id, function, line, errstr);
 	save_error_info(sb, function, line);

+	/* record error message in journal super block */
+	ext4_record_journal_errstr(sb, __func__, function, errstr);
+
 	ext4_handle_error(sb);
 }

@@ -536,6 +600,11 @@ void __ext4_abort(struct super_block *sb, const
char *function,
 	printk("\n");
 	va_end(args);

+	/* record error message in journal super block */
+	va_start(args, fmt);
+	ext4_record_journal_err(sb, __func__, function, fmt, args);
+	va_end(args);
+
 	if ((sb->s_flags & MS_RDONLY) == 0) {
 		ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 		sb->s_flags |= MS_RDONLY;
@@ -602,6 +671,11 @@ __acquires(bitlock)
 	printk(KERN_CONT "%pV\n", &vaf);
 	va_end(args);

+	/* record error message in journal super block */
+	va_start(args, fmt);
+	ext4_record_journal_err(sb, __func__, function, fmt, args);
+	va_end(args);
+
 	if (test_opt(sb, ERRORS_CONT)) {
 		ext4_commit_super(sb, 0);
 		return;
@@ -4067,12 +4141,35 @@ static void ext4_clear_journal_err(struct
super_block *sb,
 	j_errno = jbd2_journal_errno(journal);
 	if (j_errno) {
 		char nbuf[16];
+		char *buf1, *buf2;
+		unsigned long offset1, offset2;
+		int len1, len2;
+
+		/* copy message buffer from journal to super block */
+		buf1 = (char *)journal->j_superblock;
+		offset1 = (unsigned long)buf1 % sb->s_blocksize;
+		buf1 += sizeof(journal_superblock_t);
+		offset1 += sizeof(journal_superblock_t);
+		len1 = sb->s_blocksize - offset1;
+		buf2 = (char *)EXT4_SB(sb)->s_es;
+		offset2 = (unsigned long)buf2 % sb->s_blocksize;
+		buf2 += sizeof(struct ext4_super_block);
+		offset2 += sizeof(struct ext4_super_block);
+		len2 = sb->s_blocksize - offset2;
+		if (len2 > len1)
+			len2 = len1;
+		if (len2 > 0 && *buf1)
+			memcpy(buf2, buf1, len2);

 		errstr = ext4_decode_error(sb, j_errno, nbuf);
 		ext4_warning(sb, "Filesystem error recorded "
 			     "from previous mount: %s", errstr);
 		ext4_warning(sb, "Marking fs in need of filesystem check.");

+		/* clear journal message buffer */
+		if (len1 > 0)
+			memset(buf1, 0, len1);
+
 		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
 		ext4_commit_super(sb, 1);
Theodore Ts'o - Jan. 7, 2011, 5:26 a.m.
Am I missing something?  The kernel stores up to 3k worth of data, on
a 4k block file system.  Whereas e2fsck patch blindly assume 2k worhth
of data regardless of the block size.  The kernel patch looks ok, but
the e2fsprogs patch seems badly broken....

						- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o - Jan. 7, 2011, 5:28 a.m.
Also, the kernel patch seems to write the messages in 256-byte
records, whereas the e2fsck patch assumes the messages are packed
together in null-terminated packed lines....

						- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Amir Goldstein - Jan. 7, 2011, 7:41 p.m.
On Fri, Jan 7, 2011 at 7:26 AM, Ted Ts'o <tytso@mit.edu> wrote:
> Am I missing something?  The kernel stores up to 3k worth of data, on
> a 4k block file system.  Whereas e2fsck patch blindly assume 2k worhth
> of data regardless of the block size.  The kernel patch looks ok, but
> the e2fsprogs patch seems badly broken....
>
>                                                - Ted
>

I thought that ext4 super block on 4K block is 1K at offset 1K, so
kernel only uses the remaining 2K.

It may very well be that e2fsck patch only works with 4K block size properly,
because I never tested it on other block sizes.

Amir.
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Amir Goldstein - Jan. 7, 2011, 7:43 p.m.
On Fri, Jan 7, 2011 at 7:28 AM, Ted Ts'o <tytso@mit.edu> wrote:
> Also, the kernel patch seems to write the messages in 256-byte
> records, whereas the e2fsck patch assumes the messages are packed
> together in null-terminated packed lines....
>
>                                                - Ted
>

I guess that is the reason I always see only the first message on fsck :-)
It was always enough information for me though...
feel free to amend to e2fsck patch to match the kernel side.

Thanks,
Amir.
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Amir Goldstein - Jan. 7, 2011, 8:39 p.m.
On Fri, Jan 7, 2011 at 9:43 PM, Amir Goldstein <amir73il@gmail.com> wrote:
> On Fri, Jan 7, 2011 at 7:28 AM, Ted Ts'o <tytso@mit.edu> wrote:
>> Also, the kernel patch seems to write the messages in 256-byte
>> records, whereas the e2fsck patch assumes the messages are packed
>> together in null-terminated packed lines....

I checked again. e2fsck patch DOES read messages in 256-byte records:
+static void e2fsck_print_message_buffer(e2fsck_t ctx)
...
+#define MSGLEN 256
...
+		fputs(buf+offset, stdout);
+		offset += MSGLEN;

>>
>>                                                - Ted
>>
>
> I guess that is the reason I always see only the first message on fsck :-)
> It was always enough information for me though...
> feel free to amend to e2fsck patch to match the kernel side.
>
> Thanks,
> Amir.
>
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Amir Goldstein - Jan. 7, 2011, 9:07 p.m.
On Fri, Jan 7, 2011 at 9:41 PM, Amir Goldstein <amir73il@gmail.com> wrote:
> On Fri, Jan 7, 2011 at 7:26 AM, Ted Ts'o <tytso@mit.edu> wrote:
>> Am I missing something?  The kernel stores up to 3k worth of data, on
>> a 4k block file system.  Whereas e2fsck patch blindly assume 2k worhth
>> of data regardless of the block size.  The kernel patch looks ok, but
>> the e2fsprogs patch seems badly broken....

So it's not badly broken, it copies blocksize-2K, which is clumsily
written like this:
+	int len = ctx->fs->blocksize - 2*SUPERBLOCK_OFFSET;

After that, only 4K block and 8K block will have a leftover,
which will be copied from journal sb+1K to ext4 sb+1K.
Yes, 2K blocks - no message buffer for you!

The reason I am only copying 2K and throwing the extra K is this:
print_message_buffer() is called from check_super_block(),
*after* journal recovery, which was executed either by e2fsck itself
or (and more likely in a errors=remount-ro situation) by ext4
on read-only mount.
In the latter case, the extra K must be discarded, so I saw no reason
to write special code for the first case.
Neither did I find a good reason to complicate the recording code
and limit it to record only blocksize-2K.


>>
>>                                                - Ted
>>
>
> I thought that ext4 super block on 4K block is 1K at offset 1K, so
> kernel only uses the remaining 2K.
>
> It may very well be that e2fsck patch only works with 4K block size properly,
> because I never tested it on other block sizes.
>
> Amir.
>
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Amir Goldstein - Jan. 7, 2011, 10:12 p.m.
On Fri, Jan 7, 2011 at 11:07 PM, Amir Goldstein <amir73il@gmail.com> wrote:
> On Fri, Jan 7, 2011 at 9:41 PM, Amir Goldstein <amir73il@gmail.com> wrote:
>> On Fri, Jan 7, 2011 at 7:26 AM, Ted Ts'o <tytso@mit.edu> wrote:
>>> Am I missing something?  The kernel stores up to 3k worth of data, on
>>> a 4k block file system.  Whereas e2fsck patch blindly assume 2k worhth
>>> of data regardless of the block size.  The kernel patch looks ok, but
>>> the e2fsprogs patch seems badly broken....
>
> So it's not badly broken, it copies blocksize-2K, which is clumsily
> written like this:
> +       int len = ctx->fs->blocksize - 2*SUPERBLOCK_OFFSET;
>
> After that, only 4K block and 8K block will have a leftover,
> which will be copied from journal sb+1K to ext4 sb+1K.
> Yes, 2K blocks - no message buffer for you!
>
> The reason I am only copying 2K and throwing the extra K is this:
> print_message_buffer() is called from check_super_block(),
> *after* journal recovery, which was executed either by e2fsck itself
> or (and more likely in a errors=remount-ro situation) by ext4
> on read-only mount.
> In the latter case, the extra K must be discarded, so I saw no reason
> to write special code for the first case.
> Neither did I find a good reason to complicate the recording code
> and limit it to record only blocksize-2K.
>
>

Ted,

I have a suggestion how to use the wasted extra K.

As I pointed out in the past, the first/last_error_xxx statistics are most
likely to be lost in errors=panic and errors=remount-ro (journal
recovery will override super block)
If you record this information in the last K of journal sb (even copy
the entire ext4 sb),
you can then override ext4 sb with the most up-to-date error stats
after journal recovery.

Amir.
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rogier Wolff - Jan. 8, 2011, 8:05 a.m.
On Fri, Jan 07, 2011 at 11:07:23PM +0200, Amir Goldstein wrote:
> On Fri, Jan 7, 2011 at 9:41 PM, Amir Goldstein <amir73il@gmail.com> wrote:
> > On Fri, Jan 7, 2011 at 7:26 AM, Ted Ts'o <tytso@mit.edu> wrote:
> >> Am I missing something?  The kernel stores up to 3k worth of data, on
> >> a 4k block file system.  Whereas e2fsck patch blindly assume 2k worhth
> >> of data regardless of the block size.  The kernel patch looks ok, but
> >> the e2fsprogs patch seems badly broken....
> 
> So it's not badly broken, it copies blocksize-2K, which is clumsily
> written like this:
> +	int len = ctx->fs->blocksize - 2*SUPERBLOCK_OFFSET;

So this should be: 

  int len = ctx->fs->blocksize - SUPERBLOCK_OFFSET - sizeof (<superblock>);

Although those two numbers are equal right now, there is no reason to
assume that they will remain so in the future. So if the superblock
size (or the offset) changes in the future, it's much better to have
programmed this so that it will keep on working as opposed to getting
to deal with ugly bugs in code that hasn't changed in years...

	Roger.
Amir Goldstein - Jan. 8, 2011, 8:28 p.m.
On Sat, Jan 8, 2011 at 12:12 AM, Amir Goldstein <amir73il@gmail.com> wrote:
> On Fri, Jan 7, 2011 at 11:07 PM, Amir Goldstein <amir73il@gmail.com> wrote:
>> On Fri, Jan 7, 2011 at 9:41 PM, Amir Goldstein <amir73il@gmail.com> wrote:
>>> On Fri, Jan 7, 2011 at 7:26 AM, Ted Ts'o <tytso@mit.edu> wrote:
>>>> Am I missing something?  The kernel stores up to 3k worth of data, on
>>>> a 4k block file system.  Whereas e2fsck patch blindly assume 2k worhth
>>>> of data regardless of the block size.  The kernel patch looks ok, but
>>>> the e2fsprogs patch seems badly broken....
>>
>> So it's not badly broken, it copies blocksize-2K, which is clumsily
>> written like this:
>> +       int len = ctx->fs->blocksize - 2*SUPERBLOCK_OFFSET;
>>
>> After that, only 4K block and 8K block will have a leftover,
>> which will be copied from journal sb+1K to ext4 sb+1K.
>> Yes, 2K blocks - no message buffer for you!
>>
>> The reason I am only copying 2K and throwing the extra K is this:
>> print_message_buffer() is called from check_super_block(),
>> *after* journal recovery, which was executed either by e2fsck itself
>> or (and more likely in a errors=remount-ro situation) by ext4
>> on read-only mount.
>> In the latter case, the extra K must be discarded, so I saw no reason
>> to write special code for the first case.
>> Neither did I find a good reason to complicate the recording code
>> and limit it to record only blocksize-2K.
>>
>>
>
> Ted,
>
> I have a suggestion how to use the wasted extra K.
>
> As I pointed out in the past, the first/last_error_xxx statistics are most
> likely to be lost in errors=panic and errors=remount-ro (journal
> recovery will override super block)
> If you record this information in the last K of journal sb (even copy
> the entire ext4 sb),
> you can then override ext4 sb with the most up-to-date error stats
> after journal recovery.
>
> Amir.
>

Ted,

I just realize you did implement save&update of super block s_error_xxx fields.
However, I wonder if it is not a bug to call ext4_commit_super() from
save_error_info()
to commit the s_error_xxx fields in the first place.

The super block buffer is most likely participating in the current
transaction and should
not be committed to disk.

The alleged bug is likely to be hidden by the fact that the super
block has most likely
participated also in the last committed transaction, so a valid
version of it will most
likely override the invalid version.

I can imagine there could be a corner case, though, when committing
the super block
a head of transaction commit will result in inconsistencies.

Am I missing something?

Amir.
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o - Jan. 8, 2011, 10 p.m.
On Sat, Jan 08, 2011 at 09:05:20AM +0100, Rogier Wolff wrote:
> Although those two numbers are equal right now, there is no reason to
> assume that they will remain so in the future. So if the superblock
> size (or the offset) changes in the future, it's much better to have
> programmed this so that it will keep on working as opposed to getting
> to deal with ugly bugs in code that hasn't changed in years...

No.  The superblock nor its offset will never change.  It's like the
syscall ABI, only worse.  If we changed it would break *everybody*.
Fortunately there is a huge amount of space left over in the 1024 byte
superblock.

						- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rogier Wolff - Jan. 9, 2011, 8:12 a.m.
On Sat, Jan 08, 2011 at 05:00:59PM -0500, Ted Ts'o wrote:
> On Sat, Jan 08, 2011 at 09:05:20AM +0100, Rogier Wolff wrote:
> > Although those two numbers are equal right now, there is no reason to
> > assume that they will remain so in the future. So if the superblock
> > size (or the offset) changes in the future, it's much better to have
> > programmed this so that it will keep on working as opposed to getting
> > to deal with ugly bugs in code that hasn't changed in years...
> 
> No.  The superblock nor its offset will never change.  It's like the
> syscall ABI, only worse.  If we changed it would break *everybody*.
> Fortunately there is a huge amount of space left over in the 1024 byte
> superblock.

It's called defensive programming. It prevents bugs before they
happen. By your reasoning you could've written 2048 or 0x800 there.

My version:

  - documents why something is subtracted from the blockszize and how
    much.
  - keeps on working even if the superblock would oddly change size
    in the future. Even if now you don't expect that to happen ever.


	Roger.
Theodore Ts'o - Jan. 9, 2011, 2:58 p.m.
On Sun, Jan 09, 2011 at 09:12:49AM +0100, Rogier Wolff wrote:
> > No.  The superblock nor its offset will never change.  It's like the
> > syscall ABI, only worse.  If we changed it would break *everybody*.
> > Fortunately there is a huge amount of space left over in the 1024 byte
> > superblock.
> 
> It's called defensive programming. It prevents bugs before they
> happen. By your reasoning you could've written 2048 or 0x800 there.

Defensive programming would be something like

	  BUG_ON(sizeof(struct ext4_super_block) != 1024);

(unfortunately #error sizeof(struct ext4_super_block) != 1024 won't
work since #error is handled by the preprocessor, and I don't think we
can trigger a compile-time warning for a structure size issue).

We could add that, if people like.  I do have regression tests (i.e.,
boot a system with ext4) which would die if anything like that
changed, though.

And yes, I have similar regression tests in e2fsprogs that would
trigger if the superblock size were to ever change.

	       		       	       - Ted

P.S.  The only way I can think of to do it at compile time would be to
build a test .o file with -g, and then use a program like pahole that
pulls the information out of the DWARF information.  Might actually be
a good thing to do that, since it could also be useful for automating
searches for unoptimize structures.  Unfortunately, many developers
don't have the DWARF utilities installed, so that would add a
dependency on the kernel build.

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andreas Dilger - Jan. 10, 2011, 7:45 a.m.
On 2011-01-09, at 07:58, Ted Ts'o wrote:
> Defensive programming would be something like
> 
> 	  BUG_ON(sizeof(struct ext4_super_block) != 1024);
> 
> (unfortunately #error sizeof(struct ext4_super_block) != 1024 won't
> work since #error is handled by the preprocessor, and I don't think we
> can trigger a compile-time warning for a structure size issue).

We actually use a compile-time assertion in the Lustre code:

/*
* compile-time assertions. @cond has to be constant expression.
* ISO C Standard:
*
*        6.8.4.2  The switch statement
*
*       ....
*
*       [#3] The expression of each case label shall be an integer
*       constant expression and no two of the case constant expressions
*       in the same switch statement shall have the same value after
*       after conversion...
*/
#define CLASSERT(cond) ({ switch(42) { case (cond): case 0: break; } })


Of course, the "cond" expression must be resolvable at compile time, or
it will turn into a compile error because it is not legal to have a
variable "case".

Cheers, Andreas





--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rogier Wolff - Jan. 10, 2011, 8:49 a.m.
On Sun, Jan 09, 2011 at 09:58:38AM -0500, Ted Ts'o wrote:
> On Sun, Jan 09, 2011 at 09:12:49AM +0100, Rogier Wolff wrote:
> > > No.  The superblock nor its offset will never change.  It's like the
> > > syscall ABI, only worse.  If we changed it would break *everybody*.
> > > Fortunately there is a huge amount of space left over in the 1024 byte
> > > superblock.
> > 
> > It's called defensive programming. It prevents bugs before they
> > happen. By your reasoning you could've written 2048 or 0x800 there.
> 
> Defensive programming would be something like
> 
> 	  BUG_ON(sizeof(struct ext4_super_block) != 1024);

It is one form of "defense", but not what I call defensive
programming. Defensive programming, is that you make things robust in
the the face of unexpected changes. 

If you do the BUG_ON and do this throughout the code, one day your
grandson will be increasing the superblock size. He'll fix all the
BUG_ON and your other "defensive" measures. But lo and behold. He's
human, and forgot one or two. Especially the run-time detections that
only get called occasionally (like in this case on an error
sitatuation) might take a while before they are noticed.

What use is it to turn a "we've found a serious error in your
filesystem, we strongly recommend you no longer write to it and run
fsck first" into a "system halted"?

What is wrong with just putting the right formula where it belongs?

We need to set the variable "len" to the amount of free space beyond
the superblock in the first block of the filesystem.  So we take the
size of the first block, subtract the size of the superblock and we
subtract the start of the superblock. It's as simple as that.


> We could add that, if people like.  I do have regression tests (i.e.,
> boot a system with ext4) which would die if anything like that
> changed, though.

How about


Makefile: 
	ext4.o: ...the objects.... testsbsize.out

	testsbsize.out:	testsbsize
		./testsbsize

(oh and something about useing "hostcc" for testsbsize). 

with testsbsize.c:
	#include <stdio.h>
	#include <...ext4....> 
	int main (int argc, char **argv)
	{
		if (sizeof (struct ext4_super_block) != 1024) {
			fprintf (stderr, "Superblock size is %d, should be 1024.\n", sizeof (struct ext4_super_block));
			exit (1);
		}
		exit (0);
	}

Roger.

Patch

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7728a4c..8888815 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -290,10 +290,53 @@  int __ext4_journal_stop(const char *where,
unsigned int line, handle_t *handle)
 	return err;
 }

+/* record error messages after journal super block */
+static void ext4_record_journal_err(struct super_block *sb, const char *where,
+		const char *function, const char *fmt, va_list args)
+{
+#define MSGLEN 256
+	journal_t *journal = EXT4_SB(sb)->s_journal;
+	char *buf;
+	unsigned long offset;
+	int len;
+	
+	if (!journal)
+		return;
+
+	buf = (char *)journal->j_superblock;
+	offset = (unsigned long)buf % sb->s_blocksize;
+	buf += sizeof(journal_superblock_t);
+	offset += sizeof(journal_superblock_t);
+
+	/* seek to end of message buffer */
+	while (offset < sb->s_blocksize && *buf) {
+		buf += MSGLEN;
+		offset += MSGLEN;
+	}
+
+	if (offset+MSGLEN > sb->s_blocksize)
+		/* no space left in message buffer */
+		return;
+
+	len = snprintf(buf, MSGLEN, "%s: %s: ", where, function);
+	len += vsnprintf(buf+len, MSGLEN-len, fmt, args);
+}
+
+static void ext4_record_journal_errstr(struct super_block *sb,
+		const char *where, const char *function, ...)
+{
+	va_list args;
+
+	va_start(args, function);
+	ext4_record_journal_err(sb, where, function, "%s\n", args);
+	va_end(args);
+}
+
 void ext4_journal_abort_handle(const char *caller, unsigned int line,
 			       const char *err_fn, struct buffer_head *bh,
 			       handle_t *handle, int err)
 {
+	struct super_block *sb = handle->h_transaction->t_journal->j_private;
 	char nbuf[16];
 	const char *errstr = ext4_decode_error(NULL, err, nbuf);