diff mbox

e2fsck: Fix incorrect interior node logical start values

Message ID 50A546D8.3020500@redhat.com
State Superseded, archived
Headers show

Commit Message

Eric Sandeen Nov. 15, 2012, 7:47 p.m. UTC
An index node's logical start (ei_block) should
match the logical start of the first node (index
or leaf) below it.  If we find a node whose start
does not match its parent, fix all of its parents
accordingly.

If it finds such a problem, we'll see:

Pass 1: Checking inodes, blocks, and sizes
Interior extent node level 0 of inode 274258:
Logical start 3666 does not match logical start 4093 at next level.  Fix<y>?

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
---

p.s. this works for me, but I am emphatically not an e2fsck
guru.  Please do review.  :)

I'm not sure if this should trigger re-traversal of the
entire extent tree after the fixup?


--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Andreas Dilger Nov. 16, 2012, 2:54 a.m. UTC | #1
On 2012-11-15, at 12:47 PM, Eric Sandeen wrote:
> An index node's logical start (ei_block) should
> match the logical start of the first node (index
> or leaf) below it.  If we find a node whose start
> does not match its parent, fix all of its parents
> accordingly.

Eric,
could you please provide some background on why you were looking at this
and why it is a problem?  I could definitely understand that it would be
a problem if the parent index did not cover the child extent, but if the
parent extent is covering the child and has a "hole" at the start (maybe
due to some extent there being punched out) does this confuse the extent
handling logic in some manner?

I'd imagine that if there is a "hole" in the extent tree that it may get
filled in some time later, so updating one or more parent extents during
e2fsck will just need to be changed again later.  If I miss some obvious
point, then maybe I'll end up a bit more informed from your explanation.

Cheers, Andreas

> If it finds such a problem, we'll see:
> 
> Pass 1: Checking inodes, blocks, and sizes
> Interior extent node level 0 of inode 274258:
> Logical start 3666 does not match logical start 4093 at next level.  Fix<y>?
> 
> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
> ---
> 
> p.s. this works for me, but I am emphatically not an e2fsck
> guru.  Please do review.  :)
> 
> I'm not sure if this should trigger re-traversal of the
> entire extent tree after the fixup?
> 
> diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
> index a4bd956..7ff4021 100644
> --- a/e2fsck/pass1.c
> +++ b/e2fsck/pass1.c
> @@ -1901,7 +1901,7 @@ static void scan_extent_node(e2fsck_t ctx, struct problem_context *pctx,
> 		}
> 
> 		if (problem) {
> -		report_problem:
> +report_problem:
> 			pctx->blk = extent.e_pblk;
> 			pctx->blk2 = extent.e_lblk;
> 			pctx->num = extent.e_len;
> @@ -1927,7 +1927,10 @@ fix_problem_now:
> 		}
> 
> 		if (!is_leaf) {
> +			blk64_t lblk;
> +
> 			blk = extent.e_pblk;
> +			lblk = extent.e_lblk;
> 			pctx->errcode = ext2fs_extent_get(ehandle,
> 						  EXT2_EXTENT_DOWN, &extent);
> 			if (pctx->errcode) {
> @@ -1940,6 +1943,18 @@ fix_problem_now:
> 					goto report_problem;
> 				return;
> 			}
> +			/* The next extent should match this index's logical start */
> +			if (extent.e_lblk != lblk) {
> +				struct ext2_extent_info info;
> +
> +				ext2fs_extent_get_info(ehandle, &info);
> +				pctx->blk = lblk;
> +				pctx->blk2 = extent.e_lblk;
> +				pctx->num = info.curr_level - 1;
> +				problem = PR_1_EXTENT_INDEX_START_INVALID;
> +				if (fix_problem(ctx, problem, pctx))
> +					ext2fs_extent_fix_parents(ehandle);
> +			}
> 			scan_extent_node(ctx, pctx, pb, extent.e_lblk, ehandle);
> 			if (pctx->errcode)
> 				return;
> diff --git a/e2fsck/problem.c b/e2fsck/problem.c
> index 78b05bb..9d4cd5f 100644
> --- a/e2fsck/problem.c
> +++ b/e2fsck/problem.c
> @@ -1000,6 +1000,14 @@ static struct e2fsck_problem problem_table[] = {
> 	     "@i %i does not match.  "),
> 	  PROMPT_FIX, 0 },
> 
> +	/*
> +	 * Interior extent node logical offset doesn't match first node below it
> +	 */
> +	{ PR_1_EXTENT_INDEX_START_INVALID,
> +	  N_("Interior @x node level %N of @i %i:\n"
> +	     "Logical start %b does not match logical start %c at next level.  "),
> +	  PROMPT_FIX, 0 },
> +
> 	/* Pass 1b errors */
> 
> 	/* Pass 1B: Rescan for duplicate/bad blocks */
> diff --git a/e2fsck/problem.h b/e2fsck/problem.h
> index 5caade4..a57b104 100644
> --- a/e2fsck/problem.h
> +++ b/e2fsck/problem.h
> @@ -586,6 +586,8 @@ struct problem_context {
> /* ea block passes checks, but checksum invalid */
> #define PR_1_EA_BLOCK_ONLY_CSUM_INVALID        0x01006C
> 
> +/* Index start doesn't match start of next extent down */
> +#define PR_1_EXTENT_INDEX_START_INVALID	0x01006D
> 
> /*
> * Pass 1b errors
> diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
> index 9148d4e..ccd0936 100644
> --- a/lib/ext2fs/ext2fs.h
> +++ b/lib/ext2fs/ext2fs.h
> @@ -1157,6 +1157,7 @@ extern errcode_t ext2fs_extent_get_info(ext2_extent_handle_t handle,
> 					struct ext2_extent_info *info);
> extern errcode_t ext2fs_extent_goto(ext2_extent_handle_t handle,
> 				    blk64_t blk);
> +extern errcode_t ext2fs_extent_fix_parents(ext2_extent_handle_t handle);
> 
> /* fileio.c */
> extern errcode_t ext2fs_file_open2(ext2_filsys fs, ext2_ino_t ino,
> diff --git a/lib/ext2fs/extent.c b/lib/ext2fs/extent.c
> index da82a2a..8c3b7b9 100644
> --- a/lib/ext2fs/extent.c
> +++ b/lib/ext2fs/extent.c
> @@ -722,7 +722,7 @@ errcode_t ext2fs_extent_goto(ext2_extent_handle_t handle,
> * Safe to call for any position in node; if not at the first entry,
> * will  simply return.
> */
> -static errcode_t ext2fs_extent_fix_parents(ext2_extent_handle_t handle)
> +errcode_t ext2fs_extent_fix_parents(ext2_extent_handle_t handle)
> {
> 	int				retval = 0;
> 	blk64_t				start;
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


Cheers, Andreas





--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Sandeen Nov. 16, 2012, 3:10 a.m. UTC | #2
On 11/15/12 8:54 PM, Andreas Dilger wrote:
> On 2012-11-15, at 12:47 PM, Eric Sandeen wrote:
>> An index node's logical start (ei_block) should
>> match the logical start of the first node (index
>> or leaf) below it.  If we find a node whose start
>> does not match its parent, fix all of its parents
>> accordingly.
> 
> Eric,
> could you please provide some background on why you were looking at this
> and why it is a problem?  

Sure, sorry.  Should have provided more info.  And I need to provide
a test fs & testcase too.

This was from the recent  "wayland / chromium" thread; I got an e2image
from Tim which showed the problem.  The extent tree looked something
like this; this is from the debugfs dump_extents command.  (Note:
only the logical start & physical block are on disk; the end block &
length of interior nodes are inferred in debugfs from adjacent nodes):

Level Entries       Logical          Physical Length Flags
 0/ 1   1/  2     0 -  3665 1114157             3666
 1/ 1   1/ 59     0 -   132  510721 -  510853    133
...
 1/ 1  58/ 59  3039 -  3664  573440 -  574065    626
 1/ 1  59/ 59  3665 -  4092  574066 -  574493    428
 0/ 1   2/  2  3666 -  9217  395702             5552
 1/ 1   1/307  4093 -  4093  574494 -  574494      1
...

so in this case, the 2nd interior node claims to cover logical
blocks 3666 onwards, but the last extent in the previous interior
node covers 3665->4092 - overlapping the 2nd interior node's 
claimed range.

If we then try to write to block 3666, the search for its extent
yields:

Nov 12 17:41:52 inode kernel: EXT4-fs error (device loop0): ext4_ext_search_left: inode #274258: (comm flush-7:0) ix (3666) != EXT_FIRST_INDEX (0) (depth 0)!
Nov 12 17:41:52 inode kernel: EXT4-fs (loop0): delayed block allocation failed for inode 274258 at logical offset 3666 with max blocks 1 with error -5

because the search has gotten off track thanks to the wrong information
in the 2nd interior node.

> I could definitely understand that it would be
> a problem if the parent index did not cover the child extent, but if the
> parent extent is covering the child and has a "hole" at the start (maybe
> due to some extent there being punched out) does this confuse the extent
> handling logic in some manner?

So, what I found was when the extent under one interior node
overlapped with the range stated in another interior node.  Which can't
be good at all.

I'm not certain about the hole case, but AFAICT it should never happen,
from looking at the code.  ext4_ext_correct_indexes() seems 
to cover this in the kernel, and ext2fs_extent_fix_parents() in
userspace.

I did test the hole punching case, and the parents seem to get
adjusted.

I'm not 100% sure of the original design intent.  Is Alex still around? ;)

> I'd imagine that if there is a "hole" in the extent tree that it may get
> filled in some time later, so updating one or more parent extents during
> e2fsck will just need to be changed again later.  If I miss some obvious
> point, then maybe I'll end up a bit more informed from your explanation.

From my reading of the code, the parent nodes should always start
at the same point as their first child.  I can see that more clearly
in the cases where it's explicitly set & fixed up after a change, but I
can't say for sure where (or if) it is required for proper functionality
elsewhere.

Thanks,
-Eric

> Cheers, Andreas
> 
>> If it finds such a problem, we'll see:
>>
>> Pass 1: Checking inodes, blocks, and sizes
>> Interior extent node level 0 of inode 274258:
>> Logical start 3666 does not match logical start 4093 at next level.  Fix<y>?
>>
>> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
>> ---
>>
>> p.s. this works for me, but I am emphatically not an e2fsck
>> guru.  Please do review.  :)
>>
>> I'm not sure if this should trigger re-traversal of the
>> entire extent tree after the fixup?
>>
>> diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
>> index a4bd956..7ff4021 100644
>> --- a/e2fsck/pass1.c
>> +++ b/e2fsck/pass1.c
>> @@ -1901,7 +1901,7 @@ static void scan_extent_node(e2fsck_t ctx, struct problem_context *pctx,
>> 		}
>>
>> 		if (problem) {
>> -		report_problem:
>> +report_problem:
>> 			pctx->blk = extent.e_pblk;
>> 			pctx->blk2 = extent.e_lblk;
>> 			pctx->num = extent.e_len;
>> @@ -1927,7 +1927,10 @@ fix_problem_now:
>> 		}
>>
>> 		if (!is_leaf) {
>> +			blk64_t lblk;
>> +
>> 			blk = extent.e_pblk;
>> +			lblk = extent.e_lblk;
>> 			pctx->errcode = ext2fs_extent_get(ehandle,
>> 						  EXT2_EXTENT_DOWN, &extent);
>> 			if (pctx->errcode) {
>> @@ -1940,6 +1943,18 @@ fix_problem_now:
>> 					goto report_problem;
>> 				return;
>> 			}
>> +			/* The next extent should match this index's logical start */
>> +			if (extent.e_lblk != lblk) {
>> +				struct ext2_extent_info info;
>> +
>> +				ext2fs_extent_get_info(ehandle, &info);
>> +				pctx->blk = lblk;
>> +				pctx->blk2 = extent.e_lblk;
>> +				pctx->num = info.curr_level - 1;
>> +				problem = PR_1_EXTENT_INDEX_START_INVALID;
>> +				if (fix_problem(ctx, problem, pctx))
>> +					ext2fs_extent_fix_parents(ehandle);
>> +			}
>> 			scan_extent_node(ctx, pctx, pb, extent.e_lblk, ehandle);
>> 			if (pctx->errcode)
>> 				return;
>> diff --git a/e2fsck/problem.c b/e2fsck/problem.c
>> index 78b05bb..9d4cd5f 100644
>> --- a/e2fsck/problem.c
>> +++ b/e2fsck/problem.c
>> @@ -1000,6 +1000,14 @@ static struct e2fsck_problem problem_table[] = {
>> 	     "@i %i does not match.  "),
>> 	  PROMPT_FIX, 0 },
>>
>> +	/*
>> +	 * Interior extent node logical offset doesn't match first node below it
>> +	 */
>> +	{ PR_1_EXTENT_INDEX_START_INVALID,
>> +	  N_("Interior @x node level %N of @i %i:\n"
>> +	     "Logical start %b does not match logical start %c at next level.  "),
>> +	  PROMPT_FIX, 0 },
>> +
>> 	/* Pass 1b errors */
>>
>> 	/* Pass 1B: Rescan for duplicate/bad blocks */
>> diff --git a/e2fsck/problem.h b/e2fsck/problem.h
>> index 5caade4..a57b104 100644
>> --- a/e2fsck/problem.h
>> +++ b/e2fsck/problem.h
>> @@ -586,6 +586,8 @@ struct problem_context {
>> /* ea block passes checks, but checksum invalid */
>> #define PR_1_EA_BLOCK_ONLY_CSUM_INVALID        0x01006C
>>
>> +/* Index start doesn't match start of next extent down */
>> +#define PR_1_EXTENT_INDEX_START_INVALID	0x01006D
>>
>> /*
>> * Pass 1b errors
>> diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
>> index 9148d4e..ccd0936 100644
>> --- a/lib/ext2fs/ext2fs.h
>> +++ b/lib/ext2fs/ext2fs.h
>> @@ -1157,6 +1157,7 @@ extern errcode_t ext2fs_extent_get_info(ext2_extent_handle_t handle,
>> 					struct ext2_extent_info *info);
>> extern errcode_t ext2fs_extent_goto(ext2_extent_handle_t handle,
>> 				    blk64_t blk);
>> +extern errcode_t ext2fs_extent_fix_parents(ext2_extent_handle_t handle);
>>
>> /* fileio.c */
>> extern errcode_t ext2fs_file_open2(ext2_filsys fs, ext2_ino_t ino,
>> diff --git a/lib/ext2fs/extent.c b/lib/ext2fs/extent.c
>> index da82a2a..8c3b7b9 100644
>> --- a/lib/ext2fs/extent.c
>> +++ b/lib/ext2fs/extent.c
>> @@ -722,7 +722,7 @@ errcode_t ext2fs_extent_goto(ext2_extent_handle_t handle,
>> * Safe to call for any position in node; if not at the first entry,
>> * will  simply return.
>> */
>> -static errcode_t ext2fs_extent_fix_parents(ext2_extent_handle_t handle)
>> +errcode_t ext2fs_extent_fix_parents(ext2_extent_handle_t handle)
>> {
>> 	int				retval = 0;
>> 	blk64_t				start;
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 
> Cheers, Andreas
> 
> 
> 
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o Nov. 29, 2012, 4:52 a.m. UTC | #3
On Wed, Nov 28, 2012 at 11:46:10PM -0500, Theodore Ts'o wrote:
> 
> And I did come up with a test case...
> 

Wow, that was unnecessarily large.  Here's a smaller one...

     	      		    	    	     - Ted
Eric Sandeen Nov. 29, 2012, 5:49 a.m. UTC | #4
On 11/28/12 10:52 PM, Theodore Ts'o wrote:
> On Wed, Nov 28, 2012 at 11:46:10PM -0500, Theodore Ts'o wrote:
>>
>> And I did come up with a test case...
>>
> 
> Wow, that was unnecessarily large.  Here's a smaller one...
> 
>      	      		    	    	     - Ted
> 

Was that by hand-hackery or did you figure out how to hit
it at runtime?

-Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o Nov. 29, 2012, 1:22 p.m. UTC | #5
On Wed, Nov 28, 2012 at 11:49:16PM -0600, Eric Sandeen wrote:
> > 
> > Wow, that was unnecessarily large.  Here's a smaller one...
> > 
>
> Was that by hand-hackery or did you figure out how to hit
> it at runtime?

Via hand-hackery.  The tst_extents command allow you to do things like
this:

% ./build/lib/ext2fs/tst_extents  -w /tmp/test_case.img
tst_extents 1.42.6 (21-Sep-2012)
tst_extents:  inode <16>
header: magic=f30a entries=3 max=4 depth=1 generation=0
Loaded inode 16
tst_extents:  root
extent: lblk 0--83, len 84, pblk 1731, flags: (none)
tst_extents:  next_sibling
extent: lblk 84--4021, len 3938, pblk 1899, flags: (none)
tst_extents:  replace_node 80 0 1899
extent replace: 16 extent: lblk 80--79, len 0, pblk 1899, flags: (none)
extent: lblk 80--4021, len 3942, pblk 1899, flags: (none)
tst_extents:  prev_sibling
extent: lblk 0--79, len 80, pblk 1731, flags: 2ND_VISIT 
tst_extents:  q

						- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index a4bd956..7ff4021 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -1901,7 +1901,7 @@  static void scan_extent_node(e2fsck_t ctx, struct problem_context *pctx,
 		}
 
 		if (problem) {
-		report_problem:
+report_problem:
 			pctx->blk = extent.e_pblk;
 			pctx->blk2 = extent.e_lblk;
 			pctx->num = extent.e_len;
@@ -1927,7 +1927,10 @@  fix_problem_now:
 		}
 
 		if (!is_leaf) {
+			blk64_t lblk;
+
 			blk = extent.e_pblk;
+			lblk = extent.e_lblk;
 			pctx->errcode = ext2fs_extent_get(ehandle,
 						  EXT2_EXTENT_DOWN, &extent);
 			if (pctx->errcode) {
@@ -1940,6 +1943,18 @@  fix_problem_now:
 					goto report_problem;
 				return;
 			}
+			/* The next extent should match this index's logical start */
+			if (extent.e_lblk != lblk) {
+				struct ext2_extent_info info;
+
+				ext2fs_extent_get_info(ehandle, &info);
+				pctx->blk = lblk;
+				pctx->blk2 = extent.e_lblk;
+				pctx->num = info.curr_level - 1;
+				problem = PR_1_EXTENT_INDEX_START_INVALID;
+				if (fix_problem(ctx, problem, pctx))
+					ext2fs_extent_fix_parents(ehandle);
+			}
 			scan_extent_node(ctx, pctx, pb, extent.e_lblk, ehandle);
 			if (pctx->errcode)
 				return;
diff --git a/e2fsck/problem.c b/e2fsck/problem.c
index 78b05bb..9d4cd5f 100644
--- a/e2fsck/problem.c
+++ b/e2fsck/problem.c
@@ -1000,6 +1000,14 @@  static struct e2fsck_problem problem_table[] = {
 	     "@i %i does not match.  "),
 	  PROMPT_FIX, 0 },
 
+	/*
+	 * Interior extent node logical offset doesn't match first node below it
+	 */
+	{ PR_1_EXTENT_INDEX_START_INVALID,
+	  N_("Interior @x node level %N of @i %i:\n"
+	     "Logical start %b does not match logical start %c at next level.  "),
+	  PROMPT_FIX, 0 },
+
 	/* Pass 1b errors */
 
 	/* Pass 1B: Rescan for duplicate/bad blocks */
diff --git a/e2fsck/problem.h b/e2fsck/problem.h
index 5caade4..a57b104 100644
--- a/e2fsck/problem.h
+++ b/e2fsck/problem.h
@@ -586,6 +586,8 @@  struct problem_context {
 /* ea block passes checks, but checksum invalid */
 #define PR_1_EA_BLOCK_ONLY_CSUM_INVALID        0x01006C
 
+/* Index start doesn't match start of next extent down */
+#define PR_1_EXTENT_INDEX_START_INVALID	0x01006D
 
 /*
  * Pass 1b errors
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index 9148d4e..ccd0936 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -1157,6 +1157,7 @@  extern errcode_t ext2fs_extent_get_info(ext2_extent_handle_t handle,
 					struct ext2_extent_info *info);
 extern errcode_t ext2fs_extent_goto(ext2_extent_handle_t handle,
 				    blk64_t blk);
+extern errcode_t ext2fs_extent_fix_parents(ext2_extent_handle_t handle);
 
 /* fileio.c */
 extern errcode_t ext2fs_file_open2(ext2_filsys fs, ext2_ino_t ino,
diff --git a/lib/ext2fs/extent.c b/lib/ext2fs/extent.c
index da82a2a..8c3b7b9 100644
--- a/lib/ext2fs/extent.c
+++ b/lib/ext2fs/extent.c
@@ -722,7 +722,7 @@  errcode_t ext2fs_extent_goto(ext2_extent_handle_t handle,
  * Safe to call for any position in node; if not at the first entry,
  * will  simply return.
  */
-static errcode_t ext2fs_extent_fix_parents(ext2_extent_handle_t handle)
+errcode_t ext2fs_extent_fix_parents(ext2_extent_handle_t handle)
 {
 	int				retval = 0;
 	blk64_t				start;