Patchwork [02/54] libext2fs: Change ext4 on-disk layout to support metadata checksumming

login
register
mail settings
Submitter Darrick J. Wong
Date March 6, 2012, 11:57 p.m.
Message ID <20120306235734.11945.46038.stgit@elm3b70.beaverton.ibm.com>
Download mbox | patch
Permalink /patch/145061/
State Deferred
Delegated to: Theodore Ts'o
Headers show

Comments

Darrick J. Wong - March 6, 2012, 11:57 p.m.
Define flags and extend ext4 structure definitions to support metadata
checksumming.  Ted T'so covered many of these fields in an earlier patch, but
there are more required changes to the disk layout.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
---
 lib/blkid/probe.h          |    1 +
 lib/ext2fs/ext2_ext_attr.h |    4 +++-
 lib/ext2fs/ext2_fs.h       |   40 ++++++++++++++++++++++++++++++++++++++--
 lib/ext2fs/ext2fs.h        |    1 +
 lib/ext2fs/ext3_extents.h  |   11 +++++++++++
 5 files changed, 54 insertions(+), 3 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o - March 9, 2012, 10:04 p.m.
On Tue, Mar 06, 2012 at 03:57:34PM -0800, Darrick J. Wong wrote:
> Define flags and extend ext4 structure definitions to support metadata
> checksumming.  Ted T'so covered many of these fields in an earlier patch, but
> there are more required changes to the disk layout.
> 
> Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>

> @@ -619,7 +630,7 @@ struct ext2_super_block {
>  	__u64   s_mmp_block;            /* Block for multi-mount protection */
>  	__u32   s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
>  	__u8	s_log_groups_per_flex;	/* FLEX_BG group size */
> -	__u8    s_reserved_char_pad;
> +	__u8    s_checksum_type;	/* metadata checksum algorithm */
>  	__u16	s_reserved_pad;		/* Padding to next 32bits */
>  	__u64	s_kbytes_written;	/* nr of lifetime kilobytes written */
>  	__u32	s_snapshot_inum;	/* Inode number of active snapshot */

Note: when you change the superblock definition, you *must* also
update the test in lib/ext2fs/tst_super_size.c.

If you don't then running "make check" from the top-level of the build
tree will likely bomb out.  And you will also reveal to me that you
haven't run the full set of regression tests before submitting your
patch series.  :-)

					- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Darrick J. Wong - March 12, 2012, 10:24 p.m.
On Fri, Mar 09, 2012 at 05:04:35PM -0500, Ted Ts'o wrote:
> On Tue, Mar 06, 2012 at 03:57:34PM -0800, Darrick J. Wong wrote:
> > Define flags and extend ext4 structure definitions to support metadata
> > checksumming.  Ted T'so covered many of these fields in an earlier patch, but
> > there are more required changes to the disk layout.
> > 
> > Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
> 
> > @@ -619,7 +630,7 @@ struct ext2_super_block {
> >  	__u64   s_mmp_block;            /* Block for multi-mount protection */
> >  	__u32   s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
> >  	__u8	s_log_groups_per_flex;	/* FLEX_BG group size */
> > -	__u8    s_reserved_char_pad;
> > +	__u8    s_checksum_type;	/* metadata checksum algorithm */
> >  	__u16	s_reserved_pad;		/* Padding to next 32bits */
> >  	__u64	s_kbytes_written;	/* nr of lifetime kilobytes written */
> >  	__u32	s_snapshot_inum;	/* Inode number of active snapshot */
> 
> Note: when you change the superblock definition, you *must* also
> update the test in lib/ext2fs/tst_super_size.c.
> 
> If you don't then running "make check" from the top-level of the build
> tree will likely bomb out.  And you will also reveal to me that you
> haven't run the full set of regression tests before submitting your
> patch series.  :-)

Ok, thank you for catching (and fixing) this.

--D
> 
> 					- Ted
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/lib/blkid/probe.h b/lib/blkid/probe.h
index 37e80ef..d6809e1 100644
--- a/lib/blkid/probe.h
+++ b/lib/blkid/probe.h
@@ -110,6 +110,7 @@  struct ext2_super_block {
 #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK	0x0020
 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE	0x0040
 #define EXT4_FEATURE_RO_COMPAT_QUOTA		0x0100
+#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM	0x0400
 
 /* for s_feature_incompat */
 #define EXT2_FEATURE_INCOMPAT_FILETYPE		0x0002
diff --git a/lib/ext2fs/ext2_ext_attr.h b/lib/ext2fs/ext2_ext_attr.h
index ed548d1..bbb0aaa 100644
--- a/lib/ext2fs/ext2_ext_attr.h
+++ b/lib/ext2fs/ext2_ext_attr.h
@@ -20,7 +20,9 @@  struct ext2_ext_attr_header {
 	__u32	h_refcount;	/* reference count */
 	__u32	h_blocks;	/* number of disk blocks used */
 	__u32	h_hash;		/* hash value of all attributes */
-	__u32	h_reserved[4];	/* zero right now */
+	__u32	h_checksum;	/* crc32c(uuid+id+xattrs) */
+				/* id = inum if refcount = 1, else blknum */
+	__u32	h_reserved[3];	/* zero right now */
 };
 
 struct ext2_ext_attr_entry {
diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
index 3b01285..b0b798b 100644
--- a/lib/ext2fs/ext2_fs.h
+++ b/lib/ext2fs/ext2_fs.h
@@ -235,6 +235,13 @@  struct ext2_dx_countlimit {
 	__u16 count;
 };
 
+/*
+ * This goes at the end of each htree block.
+ */
+struct ext2_dx_tail {
+	__u32 dt_reserved;
+	__u32 dt_checksum;	/* crc32c(uuid+inum+dxblock) */
+};
 
 /*
  * Macro-instructions used to manage group descriptors
@@ -463,6 +470,7 @@  struct ext2_inode_large {
 #define i_gid_low	i_gid
 #define i_uid_high	osd2.linux2.l_i_uid_high
 #define i_gid_high	osd2.linux2.l_i_gid_high
+#define i_checksum_lo	osd2.linux2.l_i_checksum_lo
 #else
 #if defined(__GNU__)
 
@@ -534,6 +542,9 @@  struct ext2_inode_large {
 #define ext4_offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
 #endif
 
+/* Metadata checksum algorithms */
+#define EXT2_CRC32C_CHKSUM		1
+
 /*
  * Structure of the super block
  */
@@ -619,7 +630,7 @@  struct ext2_super_block {
 	__u64   s_mmp_block;            /* Block for multi-mount protection */
 	__u32   s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
 	__u8	s_log_groups_per_flex;	/* FLEX_BG group size */
-	__u8    s_reserved_char_pad;
+	__u8    s_checksum_type;	/* metadata checksum algorithm */
 	__u16	s_reserved_pad;		/* Padding to next 32bits */
 	__u64	s_kbytes_written;	/* nr of lifetime kilobytes written */
 	__u32	s_snapshot_inum;	/* Inode number of active snapshot */
@@ -707,6 +718,11 @@  struct ext2_super_block {
 #define EXT4_FEATURE_RO_COMPAT_HAS_SNAPSHOT	0x0080
 #define EXT4_FEATURE_RO_COMPAT_QUOTA		0x0100
 #define EXT4_FEATURE_RO_COMPAT_BIGALLOC		0x0200
+/*
+ * METADATA_CSUM implies GDT_CSUM.  When METADATA_CSUM is set, group
+ * descriptor checksums use the same algorithm as all other data
+ * structures' checksums.
+ */
 #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM	0x0400
 #define EXT4_FEATURE_RO_COMPAT_REPLICA		0x0800
 
@@ -780,6 +796,17 @@  struct ext2_dir_entry_2 {
 };
 
 /*
+ * This is a bogus directory entry at the end of each leaf block that
+ * records checksums.
+ */
+struct ext2_dir_entry_tail {
+	__u32	det_reserved_zero1;	/* Pretend to be unused */
+	__u16	det_rec_len;		/* 12 */
+	__u16	det_reserved_name_len;	/* 0xDE00, fake namelen/filetype */
+	__u32	det_checksum;		/* crc32c(uuid+inode+dirent) */
+};
+
+/*
  * Ext2 directory file types.  Only the low 3 bits are used.  The
  * other bits are reserved for now.
  */
@@ -795,6 +822,14 @@  struct ext2_dir_entry_2 {
 #define EXT2_FT_MAX		8
 
 /*
+ * Annoyingly, e2fsprogs always swab16s ext2_dir_entry.name_len, so we
+ * have to build ext2_dir_entry_tail with that assumption too.  This
+ * constant helps to build the dir_entry_tail to look like it has an
+ * "invalid" file type.
+ */
+#define EXT2_DIR_NAME_LEN_CSUM	0xDE00
+
+/*
  * EXT2_DIR_PAD defines the directory entries boundaries
  *
  * NOTE: It must be a multiple of 4
@@ -835,7 +870,8 @@  struct mmp_struct {
 	char	mmp_bdevname[32];	/* Bdev which last updated MMP block */
 	__u16	mmp_check_interval;	/* Changed mmp_check_interval */
 	__u16	mmp_pad1;
-	__u32	mmp_pad2[227];
+	__u32	mmp_pad2[226];
+	__u32	mmp_checksum;		/* crc32c(uuid+mmp_block) */
 };
 
 /*
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index 682f0cd..bde22b6 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -199,6 +199,7 @@  typedef struct ext2_file *ext2_file_t;
 #define EXT2_FLAG_PRINT_PROGRESS	0x40000
 #define EXT2_FLAG_DIRECT_IO		0x80000
 #define EXT2_FLAG_SKIP_MMP		0x100000
+#define EXT2_FLAG_IGNORE_CSUM_ERRORS	0x200000
 
 /*
  * Special flag in the ext2 inode i_flag field that means that this is
diff --git a/lib/ext2fs/ext3_extents.h b/lib/ext2fs/ext3_extents.h
index 88fabc9..4163436 100644
--- a/lib/ext2fs/ext3_extents.h
+++ b/lib/ext2fs/ext3_extents.h
@@ -19,6 +19,17 @@ 
  */
 
 /*
+ * This is extent tail on-disk structure.
+ * All other extent structures are 12 bytes long.  It turns out that
+ * block_size % 12 >= 4 for at least all powers of 2 greater than 512, which
+ * covers all valid ext4 block sizes.  Therefore, this tail structure can be
+ * crammed into the end of the block without having to rebalance the tree.
+ */
+struct ext3_extent_tail {
+	__u32	et_checksum;	/* crc32c(uuid+inum+extent_block) */
+};
+
+/*
  * this is extent on-disk structure
  * it's used at the bottom of the tree
  */