diff mbox

[1/3] e2fsck: fix incorrect interior node logical start values

Message ID 1356047023-28367-1-git-send-email-tytso@mit.edu
State Accepted, archived
Headers show

Commit Message

Theodore Ts'o Dec. 20, 2012, 11:43 p.m. UTC
From: Eric Sandeen <sandeen@redhat.com>

An index node's logical start (ei_block) should
match the logical start of the first node (index
or leaf) below it.  If we find a node whose start
does not match its parent, fix all of its parents
accordingly.

If it finds such a problem, we'll see:

Pass 1: Checking inodes, blocks, and sizes
Interior extent node level 0 of inode 274258:
Logical start 3666 does not match logical start 4093 at next level.  Fix<y>?

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 e2fsck/pass1.c      | 17 ++++++++++++++++-
 e2fsck/problem.c    |  8 ++++++++
 e2fsck/problem.h    |  4 +++-
 lib/ext2fs/ext2fs.h |  1 +
 lib/ext2fs/extent.c |  2 +-
 5 files changed, 29 insertions(+), 3 deletions(-)

Comments

Eric Sandeen Dec. 21, 2012, 8:47 p.m. UTC | #1
On 12/20/12 5:43 PM, Theodore Ts'o wrote:
> From: Eric Sandeen <sandeen@redhat.com>
> 
> An index node's logical start (ei_block) should
> match the logical start of the first node (index
> or leaf) below it.  If we find a node whose start
> does not match its parent, fix all of its parents
> accordingly.
> 
> If it finds such a problem, we'll see:
> 
> Pass 1: Checking inodes, blocks, and sizes
> Interior extent node level 0 of inode 274258:
> Logical start 3666 does not match logical start 4093 at next level.  Fix<y>?

Hm, this situation might still need more work in some cases.

Looking at a "bad" extent tree reported to me:

 1/ 2  25/ 29 57524 - 59011 15538183              1488
 2/ 2   1/  2 57524 - 59011 15556788 - 15558275   1488
 2/ 2   2/  2 59012 - 65535 15558276 - 15564799   6524 Uninit <- what's this extent?

 1/ 2  26/ 29 59012 - 60671 15538184              1660
 2/ 2   1/  2 59012 - 60671    25638 -    27297   1660
 2/ 2   2/  2 60672 - 60689    27298 -    27315     18 Uninit

 1/ 2  27/ 29 60672 - 61023 15538185               352 <- bad logical start
 2/ 2   1/ 19 60690 - 60690    27316 -    27316      1 Uninit


e2fsck with my patch finds & fixes the parent issues:

Pass 1: Checking inodes, blocks, and sizes
Interior extent node level 1 of inode 8126473:
Logical start 60672 does not match logical start 60690 at next level.  Fix? yes

Interior extent node level 1 of inode 8126473:
Logical start 63157 does not match logical start 63159 at next level.  Fix? yes

and after that the extents look like:

 1/ 2  25/ 29 57524 - 59011 15538183              1488
 2/ 2   1/  2 57524 - 59011 15556788 - 15558275   1488
 2/ 2   2/  2 59012 - 65535 15558276 - 15564799   6524 Uninit <--- ???

 1/ 2  26/ 29 59012 - 60689 15538184              1678
 2/ 2   1/  2 59012 - 60671    25638 -    27297   1660
 2/ 2   2/  2 60672 - 60689    27298 -    27315     18 Uninit

 1/ 2  27/ 29 60690 - 61023 15538185               334 <-- only this got fixed
 2/ 2   1/ 19 60690 - 60690    27316 -    27316      1 Uninit

but in this case, it seems that the length of the range covered by the previous interior nodes is still incorrect.  :(

-Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o Dec. 24, 2012, 2:57 p.m. UTC | #2
On Fri, Dec 21, 2012 at 02:47:58PM -0600, Eric Sandeen wrote:
> 
> Hm, this situation might still need more work in some cases.
> 
> but in this case, it seems that the length of the range covered by
> the previous interior nodes is still incorrect.  :(

Yep, we've got a problem which e2fsck is not detecting.  Here's a
simple test case which shows the problem:

debugfs:  extents <12>
Level Entries       Logical      Physical Length Flags
 0/ 2   1/  2     0 -     6    23              7
 1/ 2   1/  2     0 -     3    18              4
 2/ 2   1/  2     0 -     0    37 -    37      1 Uninit
 2/ 2   2/  2     2 -    21    50 -    69     20 Uninit <----- this conflicts
 1/ 2   2/  2     4 -     6    21              3        <----- with this
 2/ 2   1/  2     4 -     4    39 -    39      1 Uninit
 2/ 2   2/  2     6 -     6    40 -    40      1 Uninit
 0/ 2   2/  2     7 -    10    24              4
 1/ 2   1/  2     7 -     8    20              2
 2/ 2   1/  2     7 -     7    41 -    41      1 Uninit
 2/ 2   2/  2     8 -     8    42 -    42      1 Uninit
 1/ 2   2/  2     9 -    10    22              2
 2/ 2   1/  2     9 -     9    43 -    43      1 Uninit
 2/ 2   2/  2    10 -    10    44 -    44      1 Uninit

In this case it's not obvious what is the right fix, since we have two
physical blocks which claim to map to the same logical block.  There
are some places already in e2fsck where we today just throw up our
hands and offer to zap the entire inode, instead of trying to let the
user decide which is the correct physical blocks to use, or do some
way of saving out the conflicting inodes.  It may be that's the best
way to fix this, since at the very least should be detecting that
there is a problem, and fixing it somehow.  We can always try to come
up with a better way of repairing the corruption later.

Attached is a test case file system image with the above extent tree.

   	  	     		      - Ted
diff mbox

Patch

diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index cc00e0f..2acbb53 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -1797,7 +1797,7 @@  static void scan_extent_node(e2fsck_t ctx, struct problem_context *pctx,
 			problem = PR_1_EXTENT_ENDS_BEYOND;
 
 		if (problem) {
-		report_problem:
+report_problem:
 			pctx->blk = extent.e_pblk;
 			pctx->blk2 = extent.e_lblk;
 			pctx->num = extent.e_len;
@@ -1822,7 +1822,10 @@  static void scan_extent_node(e2fsck_t ctx, struct problem_context *pctx,
 		}
 
 		if (!is_leaf) {
+			blk64_t lblk;
+
 			blk = extent.e_pblk;
+			lblk = extent.e_lblk;
 			pctx->errcode = ext2fs_extent_get(ehandle,
 						  EXT2_EXTENT_DOWN, &extent);
 			if (pctx->errcode) {
@@ -1832,6 +1835,18 @@  static void scan_extent_node(e2fsck_t ctx, struct problem_context *pctx,
 					goto report_problem;
 				return;
 			}
+			/* The next extent should match this index's logical start */
+			if (extent.e_lblk != lblk) {
+				struct ext2_extent_info info;
+
+				ext2fs_extent_get_info(ehandle, &info);
+				pctx->blk = lblk;
+				pctx->blk2 = extent.e_lblk;
+				pctx->num = info.curr_level - 1;
+				problem = PR_1_EXTENT_INDEX_START_INVALID;
+				if (fix_problem(ctx, problem, pctx))
+					ext2fs_extent_fix_parents(ehandle);
+			}
 			scan_extent_node(ctx, pctx, pb, extent.e_lblk, ehandle);
 			if (pctx->errcode)
 				return;
diff --git a/e2fsck/problem.c b/e2fsck/problem.c
index 977a4c8..ab67cff 100644
--- a/e2fsck/problem.c
+++ b/e2fsck/problem.c
@@ -946,6 +946,14 @@  static struct e2fsck_problem problem_table[] = {
 	  N_("@i %i has zero length extent\n\t(@n logical @b %c, physical @b %b)\n"),
 	  PROMPT_CLEAR, 0 },
 
+	/*
+	 * Interior extent node logical offset doesn't match first node below it
+	 */
+	{ PR_1_EXTENT_INDEX_START_INVALID,
+	  N_("Interior @x node level %N of @i %i:\n"
+	     "Logical start %b does not match logical start %c at next level.  "),
+	  PROMPT_FIX, 0 },
+
 	/* Pass 1b errors */
 
 	/* Pass 1B: Rescan for duplicate/bad blocks */
diff --git a/e2fsck/problem.h b/e2fsck/problem.h
index 1b5815b..aed524d 100644
--- a/e2fsck/problem.h
+++ b/e2fsck/problem.h
@@ -558,6 +558,9 @@  struct problem_context {
 /* Extent has zero length */
 #define PR_1_EXTENT_LENGTH_ZERO		0x010066
 
+/* Index start doesn't match start of next extent down */
+#define PR_1_EXTENT_INDEX_START_INVALID	0x01006D
+
 /*
  * Pass 1b errors
  */
@@ -586,7 +589,6 @@  struct problem_context {
 /* Error adjusting EA refcount */
 #define PR_1B_ADJ_EA_REFCOUNT	0x011007
 
-
 /* Pass 1C: Scan directories for inodes with dup blocks. */
 #define PR_1C_PASS_HEADER	0x012000
 
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index 7a14f40..1b35ff7 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -1082,6 +1082,7 @@  extern errcode_t ext2fs_extent_get_info(ext2_extent_handle_t handle,
 					struct ext2_extent_info *info);
 extern errcode_t ext2fs_extent_goto(ext2_extent_handle_t handle,
 				    blk64_t blk);
+extern errcode_t ext2fs_extent_fix_parents(ext2_extent_handle_t handle);
 
 /* fileio.c */
 extern errcode_t ext2fs_file_open2(ext2_filsys fs, ext2_ino_t ino,
diff --git a/lib/ext2fs/extent.c b/lib/ext2fs/extent.c
index 8828764..95a0a86 100644
--- a/lib/ext2fs/extent.c
+++ b/lib/ext2fs/extent.c
@@ -706,7 +706,7 @@  errcode_t ext2fs_extent_goto(ext2_extent_handle_t handle,
  * Safe to call for any position in node; if not at the first entry,
  * will  simply return.
  */
-static errcode_t ext2fs_extent_fix_parents(ext2_extent_handle_t handle)
+errcode_t ext2fs_extent_fix_parents(ext2_extent_handle_t handle)
 {
 	int				retval = 0;
 	blk64_t				start;