diff mbox

[1/2] libext2fs: add metadata checksum and snapshot feature flags

Message ID 1316127052-1890-2-git-send-email-tytso@mit.edu
State Accepted, archived
Headers show

Commit Message

Theodore Ts'o Sept. 15, 2011, 10:50 p.m. UTC
Reserve EXT4_FEATURE_RO_COMPAT_METADATA_CSUM and
EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP.  Also reserve fields in the
superblock and the inode for the checksums.  In the block group
descriptor, reserve the exclude bitmap field for the snapshot feature,
and checksums for the inode and block allocation bitmaps.

With this commit, the metadata checksum and exclude bitmap features
should have reserved all of the fields they need in ext4's on-disk
format.

This commit also fixes an a missing byte swap for s_overhead_blocks.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Darrick J. Wong <djwong@us.ibm.com>
Cc: Amir Goldstein <amir73il@gmail.com>
---
 debugfs/set_fields.c        |    3 ++-
 e2fsck/pass1.c              |    4 ++--
 lib/e2p/feature.c           |    4 ++++
 lib/e2p/ls.c                |    4 ++++
 lib/ext2fs/ext2_fs.h        |   31 ++++++++++++++++++++++---------
 lib/ext2fs/swapfs.c         |   16 ++++++++++++++--
 lib/ext2fs/tst_inode_size.c |    3 ++-
 lib/ext2fs/tst_super_size.c |    1 +
 8 files changed, 51 insertions(+), 15 deletions(-)

Comments

Andreas Dilger Sept. 15, 2011, 11:09 p.m. UTC | #1
On 2011-09-15, at 4:50 PM, Theodore Ts'o wrote:
> Reserve EXT4_FEATURE_RO_COMPAT_METADATA_CSUM and
> EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP.  Also reserve fields in the
> superblock and the inode for the checksums.  In the block group
> descriptor, reserve the exclude bitmap field for the snapshot feature,
> and checksums for the inode and block allocation bitmaps.
> 
> With this commit, the metadata checksum and exclude bitmap features
> should have reserved all of the fields they need in ext4's on-disk
> format.
> 
> #define EXT2_BG_INODE_UNINIT	0x0001 /* Inode table/bitmap not initialized */
> @@ -363,7 +370,8 @@ struct ext2_inode {
> 			__u16	l_i_file_acl_high;
> 			__u16	l_i_uid_high;	/* these 2 fields    */
> 			__u16	l_i_gid_high;	/* were reserved2[0] */
> -			__u32	l_i_reserved2;
> +			__u16	l_i_checksum_lo;/* crc32c(uuid+inum+inode) */
> +			__u16	l_i_reserved;	/* crc32c(uuid+inum+inode) */
> 		} linux2;

The comment for l_i_reserved is incorrect, and the comment should include "LSB"? 
Also, if the order of these two fields was swapped it would avoid an extra
call to the CRC function to checksum the last 2 bytes:

			__u16	l_i_uid_high;	/* these 2 fields    */
			__u16	l_i_gid_high;	/* were reserved2[0] */
-			__u32	l_i_reserved2;
+			__u16	l_i_reserved;
+			__u16	l_i_checksum_lo;/* crc32c(uuid+inum+inode) LSB*/
		} linux2;

> @@ -422,7 +431,7 @@ struct ext2_inode_large {
> 		} hurd2;
> 	} osd2;				/* OS dependent 2 */
> 	__u16	i_extra_isize;
> -	__u16	i_pad1;
> +	__u16	i_checksum_hi;	/* crc32c(uuid+inum+inode) */

This comment should include "MSB".

> @@ -623,7 +632,8 @@ struct ext2_super_block {
> 	__u32	s_usr_quota_inum;	/* inode number of user quota file */
> 	__u32	s_grp_quota_inum;	/* inode number of group quota file */
> 	__u32	s_overhead_blocks;	/* overhead blocks/clusters in fs */
> -	__u32   s_reserved[109];        /* Padding to the end of the block */
> +	__u32	s_checksum;		/* crc32c(superblock) */
> +	__u32   s_reserved[108];        /* Padding to the end of the block */
> };


I thought it would be better to move s_checksum to be the last field in the
superblock to avoid multiple calls to the CRC function?  So:

	__u32	s_grp_quota_inum;	/* inode number of group quota file */
	__u32	s_overhead_blocks;	/* overhead blocks/clusters in fs */
-	__u32   s_reserved[109];        /* Padding to the end of the block */
+	__u32   s_reserved[108];        /* Padding to the end of the block */
+	__u32	s_checksum;		/* crc32c(superblock) */
};


> diff --git a/lib/ext2fs/tst_inode_size.c b/lib/ext2fs/tst_inode_size.c
> index 962f1cd..a4f6247 100644
> --- a/lib/ext2fs/tst_inode_size.c
> +++ b/lib/ext2fs/tst_inode_size.c
> @@ -61,7 +61,8 @@ void check_structure_fields()
> 	check_field(osd2.linux2.l_i_file_acl_high);
> 	check_field(osd2.linux2.l_i_uid_high);
> 	check_field(osd2.linux2.l_i_gid_high);
> -	check_field(osd2.linux2.l_i_reserved2);
> +	check_field(osd2.linux2.l_i_checksum_lo);
> +	check_field(osd2.linux2.l_i_reserved);
> 	printf("Ending offset is %d\n\n", cur_offset);
> #endif
> }
> diff --git a/lib/ext2fs/tst_super_size.c b/lib/ext2fs/tst_super_size.c
> index 1e5a524..75659ae 100644
> --- a/lib/ext2fs/tst_super_size.c
> +++ b/lib/ext2fs/tst_super_size.c
> @@ -126,6 +126,7 @@ void check_superblock_fields()
> 	check_field(s_usr_quota_inum);
> 	check_field(s_grp_quota_inum);
> 	check_field(s_overhead_blocks);
> +	check_field(s_checksum);
> 	check_field(s_reserved);
> 	printf("Ending offset is %d\n\n", cur_offset);
> #endif

These two checks would need to be reversed to match the above changes.

Cheers, Andreas





--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o Sept. 15, 2011, 11:11 p.m. UTC | #2
On Thu, Sep 15, 2011 at 05:09:13PM -0600, Andreas Dilger wrote:
> 
> I thought it would be better to move s_checksum to be the last field in the
> superblock to avoid multiple calls to the CRC function? 

Did you see my comment about just zero'ing the checksum field before
running the CRC?  We're going to have to do that for other data
structures, such as the inode structure, and it's what we do with the
block group descriptor checksum.  Might as well do that everywhere for
consistency's sake.

						- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andreas Dilger Sept. 15, 2011, 11:34 p.m. UTC | #3
On 2011-09-15, at 5:11 PM, Ted Ts'o wrote:
> On Thu, Sep 15, 2011 at 05:09:13PM -0600, Andreas Dilger wrote:
>> 
>> I thought it would be better to move s_checksum to be the last field in the
>> superblock to avoid multiple calls to the CRC function? 
> 
> Did you see my comment about just zero'ing the checksum field before
> running the CRC?  We're going to have to do that for other data
> structures, such as the inode structure, and it's what we do with the
> block group descriptor checksum.

That isn't correct.  The group descriptor checksum is computed in chunks:

__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
                            struct ext4_group_desc *gdp)
{
                int offset = offsetof(struct ext4_group_desc, bg_checksum);
                __le32 le_group = cpu_to_le32(block_group);

                crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
                crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
                crc = crc16(crc, (__u8 *)gdp, offset);
                offset += sizeof(gdp->bg_checksum); /* skip checksum */
****HERE****
                /* for checksum of struct ext4_group_desc do the rest...*/
                if ((sbi->s_es->s_feature_incompat &
                     cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
                    offset < le16_to_cpu(sbi->s_es->s_desc_size))
                        crc = crc16(crc, (__u8 *)gdp + offset,
                                    le16_to_cpu(sbi->s_es->s_desc_size) -
                                        offset);
 }

Darrick and I discussed zeroing the checksum fields, but then there is a
race with other threads accessing the same structure.

If we went to a crc32c LSB for filesystems with RO_COMPAT_CSUM it would
be possible to change how it is computed.  Since we have freedom to move
the checksum field now, why have the added complexity to do zeroing of
the field or two chunks?

Since we naturally have to break the checksum calculation for 128-byte
inodes and 32-byte descriptors, due to old versions of those structs,
there is little overhead in just skipping the field, and no races.

Cheers, Andreas





--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o Sept. 15, 2011, 11:41 p.m. UTC | #4
On Thu, Sep 15, 2011 at 05:34:41PM -0600, Andreas Dilger wrote:
> 
> Darrick and I discussed zeroing the checksum fields, but then there is a
> race with other threads accessing the same structure.

What race are you worried about?  The moment you modify some part of
the data structure, the checksum is going to be wrong.  This is true
whether you zero out the checksum field before you do the calculations
or not.

> If we went to a crc32c LSB for filesystems with RO_COMPAT_CSUM it would
> be possible to change how it is computed.  Since we have freedom to move
> the checksum field now, why have the added complexity to do zeroing of
> the field or two chunks?

Why is zero'ing out the field complex?  It's a single line of code....
It's certainly easier than doing it in two chunks, and there will be
some data structures (the block group descriptors at the very least)
where zero'ing the checksum is definitely going to be the easier way
to go.

   	      	       	      	     	    - Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andreas Dilger Sept. 16, 2011, 12:09 a.m. UTC | #5
On 2011-09-15, at 5:41 PM, Ted Ts'o wrote:
> On Thu, Sep 15, 2011 at 05:34:41PM -0600, Andreas Dilger wrote:
>> 
>> Darrick and I discussed zeroing the checksum fields, but then there is a
>> race with other threads accessing the same structure.
> 
> What race are you worried about?  The moment you modify some part of
> the data structure, the checksum is going to be wrong.  This is true
> whether you zero out the checksum field before you do the calculations
> or not.

If you are reading the structure (not modifying it) and trying to verify
the checksum, it would be necessary to read-lock the structure, zero
the field, compute the checksum, reset the field, unlock, and then
compare checksums.  Alternately, one would have to make a copy of the
struct to zero out the field and compute the checksum on the copy.  Both
are more complex than just doing the checksum on two separate chunks.

>> If we went to a crc32c LSB for filesystems with RO_COMPAT_CSUM it would
>> be possible to change how it is computed.  Since we have freedom to move
>> the checksum field now, why have the added complexity to do zeroing of
>> the field or two chunks?
> 
> Why is zero'ing out the field complex?  It's a single line of code....
> It's certainly easier than doing it in two chunks, and there will be
> some data structures (the block group descriptors at the very least)
> where zero'ing the checksum is definitely going to be the easier way
> to go.

No, because for group descriptors, the size is conditional on whether
RO_COMPAT_GDT_CSUM is set, so it is just as easy to always compute the
first 32-byte crc32 (excluding the bg_checksum field) and then only
conditionally compute the second 32-byte checksum.  The same is true
of the inode checksum and s_inode_size, if the checksum is at the last
field of struct ext2_inode.

Cheers, Andreas





--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Darrick J. Wong Sept. 16, 2011, 12:56 a.m. UTC | #6
On Thu, Sep 15, 2011 at 06:09:04PM -0600, Andreas Dilger wrote:
> On 2011-09-15, at 5:41 PM, Ted Ts'o wrote:
> > On Thu, Sep 15, 2011 at 05:34:41PM -0600, Andreas Dilger wrote:
> >> 
> >> Darrick and I discussed zeroing the checksum fields, but then there is a
> >> race with other threads accessing the same structure.
> > 
> > What race are you worried about?  The moment you modify some part of
> > the data structure, the checksum is going to be wrong.  This is true
> > whether you zero out the checksum field before you do the calculations
> > or not.
> 
> If you are reading the structure (not modifying it) and trying to verify
> the checksum, it would be necessary to read-lock the structure, zero
> the field, compute the checksum, reset the field, unlock, and then
> compare checksums.  Alternately, one would have to make a copy of the
> struct to zero out the field and compute the checksum on the copy.  Both
> are more complex than just doing the checksum on two separate chunks.

I think a lot of the metadata update code already has locking to prevent
multiple writers.  I'm not as sure about the locking around the read calls,
though I suspect that we take some of the same locks before starting the read.
However, I've yet to audit all those code paths.  Point is, we might already be
doing most of the necessary locking.  Too bad I'm not sure.
> 
> >> If we went to a crc32c LSB for filesystems with RO_COMPAT_CSUM it would
> >> be possible to change how it is computed.  Since we have freedom to move
> >> the checksum field now, why have the added complexity to do zeroing of
> >> the field or two chunks?
> > 
> > Why is zero'ing out the field complex?  It's a single line of code....
> > It's certainly easier than doing it in two chunks, and there will be
> > some data structures (the block group descriptors at the very least)
> > where zero'ing the checksum is definitely going to be the easier way
> > to go.
> 
> No, because for group descriptors, the size is conditional on whether
> RO_COMPAT_GDT_CSUM is set, so it is just as easy to always compute the
> first 32-byte crc32 (excluding the bg_checksum field) and then only
> conditionally compute the second 32-byte checksum.  The same is true
> of the inode checksum and s_inode_size, if the checksum is at the last
> field of struct ext2_inode.

Um.... it looks to me like the size is conditional on
EXT4_FEATURE_INCOMPAT_64BIT being set and then sb.s_desc_size.  Are you
suggesting that we make the group descriptor checksum a full 32 bits and just
stuff the upper 16 bits somewhere near the end of the structure?  I suppose
that would leave us with very little space in the group descriptor.  On the
other hand the 32-bit format is totally full already and any size increase
depends on 64bit, which means we can make it even bigger with little pain if we
need to.

Incidentally, I have cached versions of the ext4 disk layout and metadata
checksumming wiki pages mirrored here:
http://djwong.org/docs/ext4_metadata_checksums.html
http://djwong.org/docs/ext4_disk_layout.pdf

--D

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o Sept. 16, 2011, 1:06 a.m. UTC | #7
On Thu, Sep 15, 2011 at 06:09:04PM -0600, Andreas Dilger wrote:
> 
> If you are reading the structure (not modifying it) and trying to verify
> the checksum, it would be necessary to read-lock the structure, zero
> the field, compute the checksum, reset the field, unlock, and then
> compare checksums.  Alternately, one would have to make a copy of the
> struct to zero out the field and compute the checksum on the copy.  Both
> are more complex than just doing the checksum on two separate chunks.

You have to read-lock the structure before calculating the checksum in
any case, since otherwise when someone else is modifying the
structure, before they have a chance to update the checksum, you'll
calculate the checksum and discover that it is incorrect.

In practice we would probably be calculating the checksum when the
inode if first read into memory, before it it is visible to the rest
of the system, so this shouldn't be an issue.  But if it is visible to
the rest of the system, even you put the checksum at the end, if
someone else can modify the data structure while you are calculating
it, the checksum will be wrong.

> No, because for group descriptors, the size is conditional on whether
> RO_COMPAT_GDT_CSUM is set, so it is just as easy to always compute the
> first 32-byte crc32 (excluding the bg_checksum field) and then only
> conditionally compute the second 32-byte checksum.  The same is true
> of the inode checksum and s_inode_size, if the checksum is at the last
> field of struct ext2_inode.

But wouldn't it be faster to zero out the two fields, and then
caluclate a single 64-byte checksum?  That way we avoid the setup
costs of the crc32, especially in the case of the crc32c-sby8-[lb]e
implementation.

						- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andreas Dilger Sept. 16, 2011, 3:57 a.m. UTC | #8
On 2011-09-15, at 7:06 PM, Ted Ts'o wrote:
> On Thu, Sep 15, 2011 at 06:09:04PM -0600, Andreas Dilger wrote:
>> 
>> If you are reading the structure (not modifying it) and trying to verify
>> the checksum, it would be necessary to read-lock the structure, zero
>> the field, compute the checksum, reset the field, unlock, and then
>> compare checksums.  Alternately, one would have to make a copy of the
>> struct to zero out the field and compute the checksum on the copy.  Both
>> are more complex than just doing the checksum on two separate chunks.
> 
> You have to read-lock the structure before calculating the checksum in
> any case, since otherwise when someone else is modifying the
> structure, before they have a chance to update the checksum, you'll
> calculate the checksum and discover that it is incorrect.

Sure, but if the structure is read-locked it shouldn't be modified...

> In practice we would probably be calculating the checksum when the
> inode if first read into memory, before it it is visible to the rest
> of the system, so this shouldn't be an issue.  But if it is visible to
> the rest of the system, even you put the checksum at the end, if
> someone else can modify the data structure while you are calculating
> it, the checksum will be wrong.

True, but at the same time is there a reason _not_ to put the checksum
at the end?  For the superblock in particular it seems easy to do and
simplifies the code either way.

>> No, because for group descriptors, the size is conditional on whether
>> RO_COMPAT_GDT_CSUM is set, so it is just as easy to always compute the
>> first 32-byte crc32 (excluding the bg_checksum field) and then only
>> conditionally compute the second 32-byte checksum.  The same is true
>> of the inode checksum and s_inode_size, if the checksum is at the last
>> field of struct ext2_inode.
> 
> But wouldn't it be faster to zero out the two fields, and then
> caluclate a single 64-byte checksum?  That way we avoid the setup
> costs of the crc32, especially in the case of the crc32c-sby8-[lb]e
> implementation.

Looking at Darrick's performance tests for the checksums, at 32 bytes
the performance is 75%/95% (crc32-sby8-le/crc32-intel) of peak, and
91%/97% of peak for 128 bytes, so the overhead of computing the checksum
in two parts is probably not significant.

Cheers, Andreas





--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andreas Dilger Sept. 16, 2011, 4:03 a.m. UTC | #9
On 2011-09-15, at 6:56 PM, Darrick J. Wong wrote:
> On Thu, Sep 15, 2011 at 06:09:04PM -0600, Andreas Dilger wrote:
>> If you are reading the structure (not modifying it) and trying to verify
>> the checksum, it would be necessary to read-lock the structure, zero
>> the field, compute the checksum, reset the field, unlock, and then
>> compare checksums.  Alternately, one would have to make a copy of the
>> struct to zero out the field and compute the checksum on the copy.  Both
>> are more complex than just doing the checksum on two separate chunks.
> 
> I think a lot of the metadata update code already has locking to prevent
> multiple writers.  I'm not as sure about the locking around the read calls,
> though I suspect that we take some of the same locks before starting the read.
> However, I've yet to audit all those code paths.  Point is, we might already be
> doing most of the necessary locking.  Too bad I'm not sure.
> 
>>>> If we went to a crc32c LSB for filesystems with RO_COMPAT_METADATA_CSUM
>>>> it would be possible to change how it is computed.  Since we have freedom
>>>> to move the checksum field now, why have the added complexity to do
>>>> zeroing of the field or two chunks?
>>> 
>>> Why is zero'ing out the field complex?  It's a single line of code....
>>> It's certainly easier than doing it in two chunks, and there will be
>>> some data structures (the block group descriptors at the very least)
>>> where zero'ing the checksum is definitely going to be the easier way
>>> to go.
>> 
>> No, because for group descriptors, the size is conditional on whether
>> RO_COMPAT_GDT_CSUM is set, so it is just as easy to always compute the
>> first 32-byte crc32 (excluding the bg_checksum field) and then only
>> conditionally compute the second 32-byte checksum.  The same is true
>> of the inode checksum and s_inode_size, if the checksum is at the last
>> field of struct ext2_inode.
> 
> Um.... it looks to me like the size is conditional on
> EXT4_FEATURE_INCOMPAT_64BIT being set and then sb.s_desc_size.

My bad, I was looking at ext4_group_desc_csum() for the flag name, and my
eyes caught the wrong one...

>  Are you
> suggesting that we make the group descriptor checksum a full 32 bits and just
> stuff the upper 16 bits somewhere near the end of the structure?

No, I don't think we need a 32-bit checksum for the group descriptor.  Rather
that the crc32c is so much faster than crc16 that it might make sense to use
the low 16 bits of crc32c for the group descriptor if RO_COMPAT_METADATA_CSUM
is set.

> Incidentally, I have cached versions of the ext4 disk layout and metadata
> checksumming wiki pages mirrored here:
> http://djwong.org/docs/ext4_metadata_checksums.html
> http://djwong.org/docs/ext4_disk_layout.pdf


Cheers, Andreas





--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o Sept. 16, 2011, 2:28 p.m. UTC | #10
On Thu, Sep 15, 2011 at 09:57:33PM -0600, Andreas Dilger wrote:
> True, but at the same time is there a reason _not_ to put the checksum
> at the end?  For the superblock in particular it seems easy to do and
> simplifies the code either way.

For the superblock, OK.  I'll buy that and I'll make the change so
that tail end of the superblock looks like this:

     __u32    s_overhead_blocks;      /* overhead blocks/clusters in fs */
     __u32   s_reserved[108];        /* Padding to the end of the block */
     __u32   s_checksum;	     	/* crc32c(superblock) */
};

							- Ted


--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Darrick J. Wong Sept. 19, 2011, 6:58 p.m. UTC | #11
On Thu, Sep 15, 2011 at 06:50:51PM -0400, Theodore Ts'o wrote:
<snip>
> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
> index 4fec5db..1c86cb2 100644
> --- a/lib/ext2fs/ext2_fs.h
> +++ b/lib/ext2fs/ext2_fs.h
> @@ -363,7 +370,8 @@ struct ext2_inode {
>  			__u16	l_i_file_acl_high;
>  			__u16	l_i_uid_high;	/* these 2 fields    */
>  			__u16	l_i_gid_high;	/* were reserved2[0] */
> -			__u32	l_i_reserved2;
> +			__u16	l_i_checksum_lo;	/* crc32c(uuid+inum+inode) */
> +			__u16	l_i_reserved;	/* crc32c(uuid+inum+inode) */
>  		} linux2;
>  		struct {
>  			__u8	h_i_frag;	/* Fragment number */
> @@ -410,7 +418,8 @@ struct ext2_inode_large {
>  			__u16	l_i_file_acl_high;
>  			__u16	l_i_uid_high;	/* these 2 fields    */
>  			__u16	l_i_gid_high;	/* were reserved2[0] */
> -			__u32	l_i_reserved2;
> +			__u16	l_i_checksum_lo; /* crc32c(uuid+inum+inode) */
> +			__u16	l_i_reserved;
>  		} linux2;
>  		struct {
>  			__u8	h_i_frag;	/* Fragment number */
> @@ -422,7 +431,7 @@ struct ext2_inode_large {
>  		} hurd2;
>  	} osd2;				/* OS dependent 2 */
>  	__u16	i_extra_isize;
> -	__u16	i_pad1;
> +	__u16	i_checksum_hi;	/* crc32c(uuid+inum+inode) */
>  	__u32	i_ctime_extra;	/* extra Change time (nsec << 2 | epoch) */
>  	__u32	i_mtime_extra;	/* extra Modification time (nsec << 2 | epoch) */
>  	__u32	i_atime_extra;	/* extra Access time (nsec << 2 | epoch) */
> @@ -441,7 +450,7 @@ struct ext2_inode_large {
>  #define i_gid_low	i_gid
>  #define i_uid_high	osd2.linux2.l_i_uid_high
>  #define i_gid_high	osd2.linux2.l_i_gid_high
> -#define i_reserved2	osd2.linux2.l_i_reserved2
> +#define i_checksum	osd2.linux2.l_i_checksum

s/i_checksum/i_checksum_lo/, I think.

--D
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Darrick J. Wong Sept. 20, 2011, 6:56 p.m. UTC | #12
On Thu, Sep 15, 2011 at 06:50:51PM -0400, Theodore Ts'o wrote:
> Reserve EXT4_FEATURE_RO_COMPAT_METADATA_CSUM and
> EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP.  Also reserve fields in the
> superblock and the inode for the checksums.  In the block group
> descriptor, reserve the exclude bitmap field for the snapshot feature,
> and checksums for the inode and block allocation bitmaps.
> 
> With this commit, the metadata checksum and exclude bitmap features
> should have reserved all of the fields they need in ext4's on-disk
> format.
> 
> This commit also fixes an a missing byte swap for s_overhead_blocks.
> 
> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
> Cc: Darrick J. Wong <djwong@us.ibm.com>
> Cc: Amir Goldstein <amir73il@gmail.com>
> ---
>  debugfs/set_fields.c        |    3 ++-
>  e2fsck/pass1.c              |    4 ++--
>  lib/e2p/feature.c           |    4 ++++
>  lib/e2p/ls.c                |    4 ++++
>  lib/ext2fs/ext2_fs.h        |   31 ++++++++++++++++++++++---------
>  lib/ext2fs/swapfs.c         |   16 ++++++++++++++--
>  lib/ext2fs/tst_inode_size.c |    3 ++-
>  lib/ext2fs/tst_super_size.c |    1 +
>  8 files changed, 51 insertions(+), 15 deletions(-)
> 
> diff --git a/debugfs/set_fields.c b/debugfs/set_fields.c
> index ac6bc25..1a5ec7f 100644
> --- a/debugfs/set_fields.c
> +++ b/debugfs/set_fields.c
> @@ -144,6 +144,7 @@ static struct field_set_info super_fields[] = {
>  	{ "usr_quota_inum", &set_sb.s_usr_quota_inum, 4, parse_uint },
>  	{ "grp_quota_inum", &set_sb.s_grp_quota_inum, 4, parse_uint },
>  	{ "overhead_blocks", &set_sb.s_overhead_blocks, 4, parse_uint },
> +	{ "checksum", &set_sb.s_checksum, 4, parse_uint },
>  	{ 0, 0, 0, 0 }
>  };
> 
> @@ -179,6 +180,7 @@ static struct field_set_info inode_fields[] = {
>  	{ "fsize", &set_inode.osd2.hurd2.h_i_fsize, 1, parse_uint },
>  	{ "uid_high", &set_inode.osd2.linux2.l_i_uid_high, 2, parse_uint },
>  	{ "gid_high", &set_inode.osd2.linux2.l_i_gid_high, 2, parse_uint },
> +	{ "checksum", &set_inode.osd2.linux2.l_i_checksum_lo, 2, parse_uint },
>  	{ "author", &set_inode.osd2.hurd2.h_i_author, 4, parse_uint },
>  	{ "bmap", NULL, 4, parse_bmap, FLAG_ARRAY },
>  	{ 0, 0, 0, 0 }
> @@ -192,7 +194,6 @@ static struct field_set_info ext2_bg_fields[] = {
>  	{ "free_inodes_count", &set_gd.bg_free_inodes_count, 2, parse_uint },
>  	{ "used_dirs_count", &set_gd.bg_used_dirs_count, 2, parse_uint },
>  	{ "flags", &set_gd.bg_flags, 2, parse_uint },
> -	{ "reserved", &set_gd.bg_reserved, 2, parse_uint, FLAG_ARRAY, 2 },
>  	{ "itable_unused", &set_gd.bg_itable_unused, 2, parse_uint },
>  	{ "checksum", &set_gd.bg_checksum, 2, parse_gd_csum },
>  	{ 0, 0, 0, 0 }
> diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
> index dd18ade..16ddcda 100644
> --- a/e2fsck/pass1.c
> +++ b/e2fsck/pass1.c
> @@ -364,8 +364,8 @@ static void check_inode_extra_space(e2fsck_t ctx, struct problem_context *pctx)
>  	printf("inode #%u, i_extra_size %d\n", pctx->ino,
>  			inode->i_extra_isize);
>  #endif
> -	/* i_extra_isize must cover i_extra_isize + i_pad1 at least */
> -	min = sizeof(inode->i_extra_isize) + sizeof(inode->i_pad1);
> +	/* i_extra_isize must cover i_extra_isize + i_checksum_hi at least */
> +	min = sizeof(inode->i_extra_isize) + sizeof(inode->i_checksum_hi);
>  	max = EXT2_INODE_SIZE(sb) - EXT2_GOOD_OLD_INODE_SIZE;
>  	/*
>  	 * For now we will allow i_extra_isize to be 0, but really
> diff --git a/lib/e2p/feature.c b/lib/e2p/feature.c
> index 16fba53..965fc16 100644
> --- a/lib/e2p/feature.c
> +++ b/lib/e2p/feature.c
> @@ -40,6 +40,8 @@ static struct feature feature_list[] = {
>  			"resize_inode" },
>  	{	E2P_FEATURE_COMPAT, EXT2_FEATURE_COMPAT_LAZY_BG,
>  			"lazy_bg" },
> +	{	E2P_FEATURE_COMPAT, EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP,
> +			"snapshot" },
> 
>  	{	E2P_FEATURE_RO_INCOMPAT, EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER,
>  			"sparse_super" },
> @@ -59,6 +61,8 @@ static struct feature feature_list[] = {
>  			"quota" },
>  	{	E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_BIGALLOC,
>  			"bigalloc"},
> +	{	E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_METADATA_CSUM,
> +			"metadata_csum"},
> 
>  	{	E2P_FEATURE_INCOMPAT, EXT2_FEATURE_INCOMPAT_COMPRESSION,
>  			"compression" },
> diff --git a/lib/e2p/ls.c b/lib/e2p/ls.c
> index 0f36f40..aaacdaa 100644
> --- a/lib/e2p/ls.c
> +++ b/lib/e2p/ls.c
> @@ -413,6 +413,10 @@ void list_super2(struct ext2_super_block * sb, FILE *f)
>  	if (sb->s_grp_quota_inum)
>  		fprintf(f, "Group quota inode:        %u\n",
>  			sb->s_grp_quota_inum);
> +
> +	if (sb->s_feature_ro_compat & EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)
> +		fprintf(f, "Checksum:                 0x%08x\n",
> +			sb->s_checksum);
>  }
> 
>  void list_super (struct ext2_super_block * s)
> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
> index 4fec5db..1c86cb2 100644
> --- a/lib/ext2fs/ext2_fs.h
> +++ b/lib/ext2fs/ext2_fs.h
> @@ -142,7 +142,9 @@ struct ext2_group_desc
>  	__u16	bg_free_inodes_count;	/* Free inodes count */
>  	__u16	bg_used_dirs_count;	/* Directories count */
>  	__u16	bg_flags;
> -	__u32	bg_reserved[2];
> +	__u32	bg_exclude_bitmap_lo;	/* Exclude bitmap for snapshots */
> +	__u16	bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> +	__u16	bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>  	__u16	bg_itable_unused;	/* Unused inodes count */
>  	__u16	bg_checksum;		/* crc16(s_uuid+grouo_num+group_desc)*/
>  };
> @@ -159,7 +161,9 @@ struct ext4_group_desc
>  	__u16	bg_free_inodes_count;	/* Free inodes count */
>  	__u16	bg_used_dirs_count;	/* Directories count */
>  	__u16	bg_flags;		/* EXT4_BG_flags (INODE_UNINIT, etc) */
> -	__u32	bg_reserved[2];		/* Likely block/inode bitmap checksum */
> +	__u32	bg_exclude_bitmap_lo;	/* Exclude bitmap for snapshots */
> +	__u16	bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> +	__u16	bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>  	__u16	bg_itable_unused;	/* Unused inodes count */
>  	__u16	bg_checksum;		/* crc16(sb_uuid+group+desc) */
>  	__u32	bg_block_bitmap_hi;	/* Blocks bitmap block MSB */
> @@ -169,7 +173,10 @@ struct ext4_group_desc
>  	__u16	bg_free_inodes_count_hi;/* Free inodes count MSB */
>  	__u16	bg_used_dirs_count_hi;	/* Directories count MSB */
>  	__u16	bg_itable_unused_hi;	/* Unused inodes count MSB */
> -	__u32	bg_reserved2[3];
> +	__u32	bg_exclude_bitmap_hi;	/* Exclude bitmap block MSB */
> +	__u16	bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
> +	__u16	bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
> +	__u32	bg_reserved;

Should we attach a checksum to the exclude bitmap?  That would, unfortunately,
put the 64-byte block group pretty close to full, unless we're ok with making
the bg even bigger...

--D
>  };
> 
>  #define EXT2_BG_INODE_UNINIT	0x0001 /* Inode table/bitmap not initialized */
> @@ -363,7 +370,8 @@ struct ext2_inode {
>  			__u16	l_i_file_acl_high;
>  			__u16	l_i_uid_high;	/* these 2 fields    */
>  			__u16	l_i_gid_high;	/* were reserved2[0] */
> -			__u32	l_i_reserved2;
> +			__u16	l_i_checksum_lo;	/* crc32c(uuid+inum+inode) */
> +			__u16	l_i_reserved;	/* crc32c(uuid+inum+inode) */
>  		} linux2;
>  		struct {
>  			__u8	h_i_frag;	/* Fragment number */
> @@ -410,7 +418,8 @@ struct ext2_inode_large {
>  			__u16	l_i_file_acl_high;
>  			__u16	l_i_uid_high;	/* these 2 fields    */
>  			__u16	l_i_gid_high;	/* were reserved2[0] */
> -			__u32	l_i_reserved2;
> +			__u16	l_i_checksum_lo; /* crc32c(uuid+inum+inode) */
> +			__u16	l_i_reserved;
>  		} linux2;
>  		struct {
>  			__u8	h_i_frag;	/* Fragment number */
> @@ -422,7 +431,7 @@ struct ext2_inode_large {
>  		} hurd2;
>  	} osd2;				/* OS dependent 2 */
>  	__u16	i_extra_isize;
> -	__u16	i_pad1;
> +	__u16	i_checksum_hi;	/* crc32c(uuid+inum+inode) */
>  	__u32	i_ctime_extra;	/* extra Change time (nsec << 2 | epoch) */
>  	__u32	i_mtime_extra;	/* extra Modification time (nsec << 2 | epoch) */
>  	__u32	i_atime_extra;	/* extra Access time (nsec << 2 | epoch) */
> @@ -441,7 +450,7 @@ struct ext2_inode_large {
>  #define i_gid_low	i_gid
>  #define i_uid_high	osd2.linux2.l_i_uid_high
>  #define i_gid_high	osd2.linux2.l_i_gid_high
> -#define i_reserved2	osd2.linux2.l_i_reserved2
> +#define i_checksum	osd2.linux2.l_i_checksum
>  #else
>  #if defined(__GNU__)
> 
> @@ -623,7 +632,8 @@ struct ext2_super_block {
>  	__u32	s_usr_quota_inum;	/* inode number of user quota file */
>  	__u32	s_grp_quota_inum;	/* inode number of group quota file */
>  	__u32	s_overhead_blocks;	/* overhead blocks/clusters in fs */
> -	__u32   s_reserved[109];        /* Padding to the end of the block */
> +	__u32	s_checksum;		/* crc32c(superblock) */
> +	__u32   s_reserved[108];        /* Padding to the end of the block */
>  };
> 
>  #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
> @@ -671,7 +681,9 @@ struct ext2_super_block {
>  #define EXT2_FEATURE_COMPAT_RESIZE_INODE	0x0010
>  #define EXT2_FEATURE_COMPAT_DIR_INDEX		0x0020
>  #define EXT2_FEATURE_COMPAT_LAZY_BG		0x0040
> -#define EXT2_FEATURE_COMPAT_EXCLUDE_INODE	0x0080
> +/* #define EXT2_FEATURE_COMPAT_EXCLUDE_INODE	0x0080 not used, legacy */
> +#define EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP	0x0100
> +
> 
>  #define EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER	0x0001
>  #define EXT2_FEATURE_RO_COMPAT_LARGE_FILE	0x0002
> @@ -683,6 +695,7 @@ struct ext2_super_block {
>  #define EXT4_FEATURE_RO_COMPAT_HAS_SNAPSHOT	0x0080
>  #define EXT4_FEATURE_RO_COMPAT_QUOTA		0x0100
>  #define EXT4_FEATURE_RO_COMPAT_BIGALLOC		0x0200
> +#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM	0x0400
> 
>  #define EXT2_FEATURE_INCOMPAT_COMPRESSION	0x0001
>  #define EXT2_FEATURE_INCOMPAT_FILETYPE		0x0002
> diff --git a/lib/ext2fs/swapfs.c b/lib/ext2fs/swapfs.c
> index 87b1a2e..d1c4a56 100644
> --- a/lib/ext2fs/swapfs.c
> +++ b/lib/ext2fs/swapfs.c
> @@ -78,6 +78,8 @@ void ext2fs_swap_super(struct ext2_super_block * sb)
>  	sb->s_snapshot_list = ext2fs_swab32(sb->s_snapshot_list);
>  	sb->s_usr_quota_inum = ext2fs_swab32(sb->s_usr_quota_inum);
>  	sb->s_grp_quota_inum = ext2fs_swab32(sb->s_grp_quota_inum);
> +	sb->s_overhead_blocks = ext2fs_swab32(sb->s_overhead_blocks);
> +	sb->s_checksum = ext2fs_swab32(sb->s_checksum);
> 
>  	for (i=0; i < 4; i++)
>  		sb->s_hash_seed[i] = ext2fs_swab32(sb->s_hash_seed[i]);
> @@ -106,6 +108,11 @@ void ext2fs_swap_group_desc2(ext2_filsys fs, struct ext2_group_desc *gdp)
>  	gdp->bg_free_inodes_count = ext2fs_swab16(gdp->bg_free_inodes_count);
>  	gdp->bg_used_dirs_count = ext2fs_swab16(gdp->bg_used_dirs_count);
>  	gdp->bg_flags = ext2fs_swab16(gdp->bg_flags);
> +	gdp->bg_exclude_bitmap_lo = ext2fs_swab32(gdp->bg_exclude_bitmap_lo);
> +	gdp->bg_block_bitmap_csum_lo =
> +		ext2fs_swab16(gdp->bg_block_bitmap_csum_lo);
> +	gdp->bg_inode_bitmap_csum_lo =
> +		ext2fs_swab16(gdp->bg_inode_bitmap_csum_lo);
>  	gdp->bg_itable_unused = ext2fs_swab16(gdp->bg_itable_unused);
>  	gdp->bg_checksum = ext2fs_swab16(gdp->bg_checksum);
>  	/* If we're 32-bit, we're done */
> @@ -125,6 +132,11 @@ void ext2fs_swap_group_desc2(ext2_filsys fs, struct ext2_group_desc *gdp)
>  	gdp4->bg_used_dirs_count_hi =
>  		ext2fs_swab16(gdp4->bg_used_dirs_count_hi);
>  	gdp4->bg_itable_unused_hi = ext2fs_swab16(gdp4->bg_itable_unused_hi);
> +	gdp->bg_exclude_bitmap_hi = ext2fs_swab16(gdp->bg_exclude_bitmap_hi);
> +	gdp->bg_block_bitmap_csum_hi =
> +		ext2fs_swab16(gdp->bg_block_bitmap_csum_hi);
> +	gdp->bg_inode_bitmap_csum_hi =
> +		ext2fs_swab16(gdp->bg_inode_bitmap_csum_hi);
>  }
> 
>  void ext2fs_swap_group_desc(struct ext2_group_desc *gdp)
> @@ -244,8 +256,8 @@ void ext2fs_swap_inode_full(ext2_filsys fs, struct ext2_inode_large *t,
>  		  ext2fs_swab16 (f->osd2.linux2.l_i_uid_high);
>  		t->osd2.linux2.l_i_gid_high =
>  		  ext2fs_swab16 (f->osd2.linux2.l_i_gid_high);
> -		t->osd2.linux2.l_i_reserved2 =
> -			ext2fs_swab32(f->osd2.linux2.l_i_reserved2);
> +		t->osd2.linux2.l_i_checksum =
> +			ext2fs_swab32(f->osd2.linux2.checksum);
>  		break;
>  	case EXT2_OS_HURD:
>  		t->osd1.hurd1.h_i_translator =
> diff --git a/lib/ext2fs/tst_inode_size.c b/lib/ext2fs/tst_inode_size.c
> index 962f1cd..a4f6247 100644
> --- a/lib/ext2fs/tst_inode_size.c
> +++ b/lib/ext2fs/tst_inode_size.c
> @@ -61,7 +61,8 @@ void check_structure_fields()
>  	check_field(osd2.linux2.l_i_file_acl_high);
>  	check_field(osd2.linux2.l_i_uid_high);
>  	check_field(osd2.linux2.l_i_gid_high);
> -	check_field(osd2.linux2.l_i_reserved2);
> +	check_field(osd2.linux2.l_i_checksum_lo);
> +	check_field(osd2.linux2.l_i_reserved);
>  	printf("Ending offset is %d\n\n", cur_offset);
>  #endif
>  }
> diff --git a/lib/ext2fs/tst_super_size.c b/lib/ext2fs/tst_super_size.c
> index 1e5a524..75659ae 100644
> --- a/lib/ext2fs/tst_super_size.c
> +++ b/lib/ext2fs/tst_super_size.c
> @@ -126,6 +126,7 @@ void check_superblock_fields()
>  	check_field(s_usr_quota_inum);
>  	check_field(s_grp_quota_inum);
>  	check_field(s_overhead_blocks);
> +	check_field(s_checksum);
>  	check_field(s_reserved);
>  	printf("Ending offset is %d\n\n", cur_offset);
>  #endif
> -- 
> 1.7.4.1.22.gec8e1.dirty
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Amir Goldstein Sept. 21, 2011, 8:03 a.m. UTC | #13
On Tue, Sep 20, 2011 at 9:56 PM, Darrick J. Wong <djwong@us.ibm.com> wrote:
> On Thu, Sep 15, 2011 at 06:50:51PM -0400, Theodore Ts'o wrote:
>> Reserve EXT4_FEATURE_RO_COMPAT_METADATA_CSUM and
>> EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP.  Also reserve fields in the
>> superblock and the inode for the checksums.  In the block group
>> descriptor, reserve the exclude bitmap field for the snapshot feature,
>> and checksums for the inode and block allocation bitmaps.
>>
>> With this commit, the metadata checksum and exclude bitmap features
>> should have reserved all of the fields they need in ext4's on-disk
>> format.
>>
>> This commit also fixes an a missing byte swap for s_overhead_blocks.
>>
>> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
>> Cc: Darrick J. Wong <djwong@us.ibm.com>
>> Cc: Amir Goldstein <amir73il@gmail.com>
>> ---
>>  debugfs/set_fields.c        |    3 ++-
>>  e2fsck/pass1.c              |    4 ++--
>>  lib/e2p/feature.c           |    4 ++++
>>  lib/e2p/ls.c                |    4 ++++
>>  lib/ext2fs/ext2_fs.h        |   31 ++++++++++++++++++++++---------
>>  lib/ext2fs/swapfs.c         |   16 ++++++++++++++--
>>  lib/ext2fs/tst_inode_size.c |    3 ++-
>>  lib/ext2fs/tst_super_size.c |    1 +
>>  8 files changed, 51 insertions(+), 15 deletions(-)
>>
>> diff --git a/debugfs/set_fields.c b/debugfs/set_fields.c
>> index ac6bc25..1a5ec7f 100644
>> --- a/debugfs/set_fields.c
>> +++ b/debugfs/set_fields.c
>> @@ -144,6 +144,7 @@ static struct field_set_info super_fields[] = {
>>       { "usr_quota_inum", &set_sb.s_usr_quota_inum, 4, parse_uint },
>>       { "grp_quota_inum", &set_sb.s_grp_quota_inum, 4, parse_uint },
>>       { "overhead_blocks", &set_sb.s_overhead_blocks, 4, parse_uint },
>> +     { "checksum", &set_sb.s_checksum, 4, parse_uint },
>>       { 0, 0, 0, 0 }
>>  };
>>
>> @@ -179,6 +180,7 @@ static struct field_set_info inode_fields[] = {
>>       { "fsize", &set_inode.osd2.hurd2.h_i_fsize, 1, parse_uint },
>>       { "uid_high", &set_inode.osd2.linux2.l_i_uid_high, 2, parse_uint },
>>       { "gid_high", &set_inode.osd2.linux2.l_i_gid_high, 2, parse_uint },
>> +     { "checksum", &set_inode.osd2.linux2.l_i_checksum_lo, 2, parse_uint },
>>       { "author", &set_inode.osd2.hurd2.h_i_author, 4, parse_uint },
>>       { "bmap", NULL, 4, parse_bmap, FLAG_ARRAY },
>>       { 0, 0, 0, 0 }
>> @@ -192,7 +194,6 @@ static struct field_set_info ext2_bg_fields[] = {
>>       { "free_inodes_count", &set_gd.bg_free_inodes_count, 2, parse_uint },
>>       { "used_dirs_count", &set_gd.bg_used_dirs_count, 2, parse_uint },
>>       { "flags", &set_gd.bg_flags, 2, parse_uint },
>> -     { "reserved", &set_gd.bg_reserved, 2, parse_uint, FLAG_ARRAY, 2 },
>>       { "itable_unused", &set_gd.bg_itable_unused, 2, parse_uint },
>>       { "checksum", &set_gd.bg_checksum, 2, parse_gd_csum },
>>       { 0, 0, 0, 0 }
>> diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
>> index dd18ade..16ddcda 100644
>> --- a/e2fsck/pass1.c
>> +++ b/e2fsck/pass1.c
>> @@ -364,8 +364,8 @@ static void check_inode_extra_space(e2fsck_t ctx, struct problem_context *pctx)
>>       printf("inode #%u, i_extra_size %d\n", pctx->ino,
>>                       inode->i_extra_isize);
>>  #endif
>> -     /* i_extra_isize must cover i_extra_isize + i_pad1 at least */
>> -     min = sizeof(inode->i_extra_isize) + sizeof(inode->i_pad1);
>> +     /* i_extra_isize must cover i_extra_isize + i_checksum_hi at least */
>> +     min = sizeof(inode->i_extra_isize) + sizeof(inode->i_checksum_hi);
>>       max = EXT2_INODE_SIZE(sb) - EXT2_GOOD_OLD_INODE_SIZE;
>>       /*
>>        * For now we will allow i_extra_isize to be 0, but really
>> diff --git a/lib/e2p/feature.c b/lib/e2p/feature.c
>> index 16fba53..965fc16 100644
>> --- a/lib/e2p/feature.c
>> +++ b/lib/e2p/feature.c
>> @@ -40,6 +40,8 @@ static struct feature feature_list[] = {
>>                       "resize_inode" },
>>       {       E2P_FEATURE_COMPAT, EXT2_FEATURE_COMPAT_LAZY_BG,
>>                       "lazy_bg" },
>> +     {       E2P_FEATURE_COMPAT, EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP,
>> +                     "snapshot" },
>>
>>       {       E2P_FEATURE_RO_INCOMPAT, EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER,
>>                       "sparse_super" },
>> @@ -59,6 +61,8 @@ static struct feature feature_list[] = {
>>                       "quota" },
>>       {       E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_BIGALLOC,
>>                       "bigalloc"},
>> +     {       E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_METADATA_CSUM,
>> +                     "metadata_csum"},
>>
>>       {       E2P_FEATURE_INCOMPAT, EXT2_FEATURE_INCOMPAT_COMPRESSION,
>>                       "compression" },
>> diff --git a/lib/e2p/ls.c b/lib/e2p/ls.c
>> index 0f36f40..aaacdaa 100644
>> --- a/lib/e2p/ls.c
>> +++ b/lib/e2p/ls.c
>> @@ -413,6 +413,10 @@ void list_super2(struct ext2_super_block * sb, FILE *f)
>>       if (sb->s_grp_quota_inum)
>>               fprintf(f, "Group quota inode:        %u\n",
>>                       sb->s_grp_quota_inum);
>> +
>> +     if (sb->s_feature_ro_compat & EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)
>> +             fprintf(f, "Checksum:                 0x%08x\n",
>> +                     sb->s_checksum);
>>  }
>>
>>  void list_super (struct ext2_super_block * s)
>> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
>> index 4fec5db..1c86cb2 100644
>> --- a/lib/ext2fs/ext2_fs.h
>> +++ b/lib/ext2fs/ext2_fs.h
>> @@ -142,7 +142,9 @@ struct ext2_group_desc
>>       __u16   bg_free_inodes_count;   /* Free inodes count */
>>       __u16   bg_used_dirs_count;     /* Directories count */
>>       __u16   bg_flags;
>> -     __u32   bg_reserved[2];
>> +     __u32   bg_exclude_bitmap_lo;   /* Exclude bitmap for snapshots */
>> +     __u16   bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> +     __u16   bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>>       __u16   bg_itable_unused;       /* Unused inodes count */
>>       __u16   bg_checksum;            /* crc16(s_uuid+grouo_num+group_desc)*/
>>  };
>> @@ -159,7 +161,9 @@ struct ext4_group_desc
>>       __u16   bg_free_inodes_count;   /* Free inodes count */
>>       __u16   bg_used_dirs_count;     /* Directories count */
>>       __u16   bg_flags;               /* EXT4_BG_flags (INODE_UNINIT, etc) */
>> -     __u32   bg_reserved[2];         /* Likely block/inode bitmap checksum */
>> +     __u32   bg_exclude_bitmap_lo;   /* Exclude bitmap for snapshots */
>> +     __u16   bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> +     __u16   bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>>       __u16   bg_itable_unused;       /* Unused inodes count */
>>       __u16   bg_checksum;            /* crc16(sb_uuid+group+desc) */
>>       __u32   bg_block_bitmap_hi;     /* Blocks bitmap block MSB */
>> @@ -169,7 +173,10 @@ struct ext4_group_desc
>>       __u16   bg_free_inodes_count_hi;/* Free inodes count MSB */
>>       __u16   bg_used_dirs_count_hi;  /* Directories count MSB */
>>       __u16   bg_itable_unused_hi;    /* Unused inodes count MSB */
>> -     __u32   bg_reserved2[3];
>> +     __u32   bg_exclude_bitmap_hi;   /* Exclude bitmap block MSB */
>> +     __u16   bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
>> +     __u16   bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
>> +     __u32   bg_reserved;
>
> Should we attach a checksum to the exclude bitmap?  That would, unfortunately,
> put the 64-byte block group pretty close to full, unless we're ok with making
> the bg even bigger...

Good point.
My initial approach (which I probably expressed somewhere) was that when
exclude_bitmap feature is set, the block bitmap checksum should cover both
block bitmap and exclude bitmap.
The reason being that exclude bitmap adds little entropy over block bitmap:
- it should always be a sub set of block bitmap bits
- clearing exclude bit is done together with clearing the block used bit
- the only case of modifying exclude bitmap only is when a used block becomes
  excluded (a.k.a moved to snapshot)

so how do you feel about compressing 2 blocks into 16bits of checksum :-/?

>
> --D
>>  };
>>
>>  #define EXT2_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not initialized */
>> @@ -363,7 +370,8 @@ struct ext2_inode {
>>                       __u16   l_i_file_acl_high;
>>                       __u16   l_i_uid_high;   /* these 2 fields    */
>>                       __u16   l_i_gid_high;   /* were reserved2[0] */
>> -                     __u32   l_i_reserved2;
>> +                     __u16   l_i_checksum_lo;        /* crc32c(uuid+inum+inode) */
>> +                     __u16   l_i_reserved;   /* crc32c(uuid+inum+inode) */
>>               } linux2;
>>               struct {
>>                       __u8    h_i_frag;       /* Fragment number */
>> @@ -410,7 +418,8 @@ struct ext2_inode_large {
>>                       __u16   l_i_file_acl_high;
>>                       __u16   l_i_uid_high;   /* these 2 fields    */
>>                       __u16   l_i_gid_high;   /* were reserved2[0] */
>> -                     __u32   l_i_reserved2;
>> +                     __u16   l_i_checksum_lo; /* crc32c(uuid+inum+inode) */
>> +                     __u16   l_i_reserved;
>>               } linux2;
>>               struct {
>>                       __u8    h_i_frag;       /* Fragment number */
>> @@ -422,7 +431,7 @@ struct ext2_inode_large {
>>               } hurd2;
>>       } osd2;                         /* OS dependent 2 */
>>       __u16   i_extra_isize;
>> -     __u16   i_pad1;
>> +     __u16   i_checksum_hi;  /* crc32c(uuid+inum+inode) */
>>       __u32   i_ctime_extra;  /* extra Change time (nsec << 2 | epoch) */
>>       __u32   i_mtime_extra;  /* extra Modification time (nsec << 2 | epoch) */
>>       __u32   i_atime_extra;  /* extra Access time (nsec << 2 | epoch) */
>> @@ -441,7 +450,7 @@ struct ext2_inode_large {
>>  #define i_gid_low    i_gid
>>  #define i_uid_high   osd2.linux2.l_i_uid_high
>>  #define i_gid_high   osd2.linux2.l_i_gid_high
>> -#define i_reserved2  osd2.linux2.l_i_reserved2
>> +#define i_checksum   osd2.linux2.l_i_checksum
>>  #else
>>  #if defined(__GNU__)
>>
>> @@ -623,7 +632,8 @@ struct ext2_super_block {
>>       __u32   s_usr_quota_inum;       /* inode number of user quota file */
>>       __u32   s_grp_quota_inum;       /* inode number of group quota file */
>>       __u32   s_overhead_blocks;      /* overhead blocks/clusters in fs */
>> -     __u32   s_reserved[109];        /* Padding to the end of the block */
>> +     __u32   s_checksum;             /* crc32c(superblock) */
>> +     __u32   s_reserved[108];        /* Padding to the end of the block */
>>  };
>>
>>  #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
>> @@ -671,7 +681,9 @@ struct ext2_super_block {
>>  #define EXT2_FEATURE_COMPAT_RESIZE_INODE     0x0010
>>  #define EXT2_FEATURE_COMPAT_DIR_INDEX                0x0020
>>  #define EXT2_FEATURE_COMPAT_LAZY_BG          0x0040
>> -#define EXT2_FEATURE_COMPAT_EXCLUDE_INODE    0x0080
>> +/* #define EXT2_FEATURE_COMPAT_EXCLUDE_INODE 0x0080 not used, legacy */
>> +#define EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP   0x0100
>> +
>>
>>  #define EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER  0x0001
>>  #define EXT2_FEATURE_RO_COMPAT_LARGE_FILE    0x0002
>> @@ -683,6 +695,7 @@ struct ext2_super_block {
>>  #define EXT4_FEATURE_RO_COMPAT_HAS_SNAPSHOT  0x0080
>>  #define EXT4_FEATURE_RO_COMPAT_QUOTA         0x0100
>>  #define EXT4_FEATURE_RO_COMPAT_BIGALLOC              0x0200
>> +#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400
>>
>>  #define EXT2_FEATURE_INCOMPAT_COMPRESSION    0x0001
>>  #define EXT2_FEATURE_INCOMPAT_FILETYPE               0x0002
>> diff --git a/lib/ext2fs/swapfs.c b/lib/ext2fs/swapfs.c
>> index 87b1a2e..d1c4a56 100644
>> --- a/lib/ext2fs/swapfs.c
>> +++ b/lib/ext2fs/swapfs.c
>> @@ -78,6 +78,8 @@ void ext2fs_swap_super(struct ext2_super_block * sb)
>>       sb->s_snapshot_list = ext2fs_swab32(sb->s_snapshot_list);
>>       sb->s_usr_quota_inum = ext2fs_swab32(sb->s_usr_quota_inum);
>>       sb->s_grp_quota_inum = ext2fs_swab32(sb->s_grp_quota_inum);
>> +     sb->s_overhead_blocks = ext2fs_swab32(sb->s_overhead_blocks);
>> +     sb->s_checksum = ext2fs_swab32(sb->s_checksum);
>>
>>       for (i=0; i < 4; i++)
>>               sb->s_hash_seed[i] = ext2fs_swab32(sb->s_hash_seed[i]);
>> @@ -106,6 +108,11 @@ void ext2fs_swap_group_desc2(ext2_filsys fs, struct ext2_group_desc *gdp)
>>       gdp->bg_free_inodes_count = ext2fs_swab16(gdp->bg_free_inodes_count);
>>       gdp->bg_used_dirs_count = ext2fs_swab16(gdp->bg_used_dirs_count);
>>       gdp->bg_flags = ext2fs_swab16(gdp->bg_flags);
>> +     gdp->bg_exclude_bitmap_lo = ext2fs_swab32(gdp->bg_exclude_bitmap_lo);
>> +     gdp->bg_block_bitmap_csum_lo =
>> +             ext2fs_swab16(gdp->bg_block_bitmap_csum_lo);
>> +     gdp->bg_inode_bitmap_csum_lo =
>> +             ext2fs_swab16(gdp->bg_inode_bitmap_csum_lo);
>>       gdp->bg_itable_unused = ext2fs_swab16(gdp->bg_itable_unused);
>>       gdp->bg_checksum = ext2fs_swab16(gdp->bg_checksum);
>>       /* If we're 32-bit, we're done */
>> @@ -125,6 +132,11 @@ void ext2fs_swap_group_desc2(ext2_filsys fs, struct ext2_group_desc *gdp)
>>       gdp4->bg_used_dirs_count_hi =
>>               ext2fs_swab16(gdp4->bg_used_dirs_count_hi);
>>       gdp4->bg_itable_unused_hi = ext2fs_swab16(gdp4->bg_itable_unused_hi);
>> +     gdp->bg_exclude_bitmap_hi = ext2fs_swab16(gdp->bg_exclude_bitmap_hi);
>> +     gdp->bg_block_bitmap_csum_hi =
>> +             ext2fs_swab16(gdp->bg_block_bitmap_csum_hi);
>> +     gdp->bg_inode_bitmap_csum_hi =
>> +             ext2fs_swab16(gdp->bg_inode_bitmap_csum_hi);
>>  }
>>
>>  void ext2fs_swap_group_desc(struct ext2_group_desc *gdp)
>> @@ -244,8 +256,8 @@ void ext2fs_swap_inode_full(ext2_filsys fs, struct ext2_inode_large *t,
>>                 ext2fs_swab16 (f->osd2.linux2.l_i_uid_high);
>>               t->osd2.linux2.l_i_gid_high =
>>                 ext2fs_swab16 (f->osd2.linux2.l_i_gid_high);
>> -             t->osd2.linux2.l_i_reserved2 =
>> -                     ext2fs_swab32(f->osd2.linux2.l_i_reserved2);
>> +             t->osd2.linux2.l_i_checksum =
>> +                     ext2fs_swab32(f->osd2.linux2.checksum);
>>               break;
>>       case EXT2_OS_HURD:
>>               t->osd1.hurd1.h_i_translator =
>> diff --git a/lib/ext2fs/tst_inode_size.c b/lib/ext2fs/tst_inode_size.c
>> index 962f1cd..a4f6247 100644
>> --- a/lib/ext2fs/tst_inode_size.c
>> +++ b/lib/ext2fs/tst_inode_size.c
>> @@ -61,7 +61,8 @@ void check_structure_fields()
>>       check_field(osd2.linux2.l_i_file_acl_high);
>>       check_field(osd2.linux2.l_i_uid_high);
>>       check_field(osd2.linux2.l_i_gid_high);
>> -     check_field(osd2.linux2.l_i_reserved2);
>> +     check_field(osd2.linux2.l_i_checksum_lo);
>> +     check_field(osd2.linux2.l_i_reserved);
>>       printf("Ending offset is %d\n\n", cur_offset);
>>  #endif
>>  }
>> diff --git a/lib/ext2fs/tst_super_size.c b/lib/ext2fs/tst_super_size.c
>> index 1e5a524..75659ae 100644
>> --- a/lib/ext2fs/tst_super_size.c
>> +++ b/lib/ext2fs/tst_super_size.c
>> @@ -126,6 +126,7 @@ void check_superblock_fields()
>>       check_field(s_usr_quota_inum);
>>       check_field(s_grp_quota_inum);
>>       check_field(s_overhead_blocks);
>> +     check_field(s_checksum);
>>       check_field(s_reserved);
>>       printf("Ending offset is %d\n\n", cur_offset);
>>  #endif
>> --
>> 1.7.4.1.22.gec8e1.dirty
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Darrick J. Wong Sept. 21, 2011, 8:40 p.m. UTC | #14
On Wed, Sep 21, 2011 at 11:03:57AM +0300, Amir Goldstein wrote:
> On Tue, Sep 20, 2011 at 9:56 PM, Darrick J. Wong <djwong@us.ibm.com> wrote:
> > On Thu, Sep 15, 2011 at 06:50:51PM -0400, Theodore Ts'o wrote:
<snip>
> >> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
> >> index 4fec5db..1c86cb2 100644
> >> --- a/lib/ext2fs/ext2_fs.h
> >> +++ b/lib/ext2fs/ext2_fs.h
> >> @@ -142,7 +142,9 @@ struct ext2_group_desc
> >>       __u16   bg_free_inodes_count;   /* Free inodes count */
> >>       __u16   bg_used_dirs_count;     /* Directories count */
> >>       __u16   bg_flags;
> >> -     __u32   bg_reserved[2];
> >> +     __u32   bg_exclude_bitmap_lo;   /* Exclude bitmap for snapshots */
> >> +     __u16   bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> >> +     __u16   bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> >>       __u16   bg_itable_unused;       /* Unused inodes count */
> >>       __u16   bg_checksum;            /* crc16(s_uuid+grouo_num+group_desc)*/
> >>  };
> >> @@ -159,7 +161,9 @@ struct ext4_group_desc
> >>       __u16   bg_free_inodes_count;   /* Free inodes count */
> >>       __u16   bg_used_dirs_count;     /* Directories count */
> >>       __u16   bg_flags;               /* EXT4_BG_flags (INODE_UNINIT, etc) */
> >> -     __u32   bg_reserved[2];         /* Likely block/inode bitmap checksum */
> >> +     __u32   bg_exclude_bitmap_lo;   /* Exclude bitmap for snapshots */
> >> +     __u16   bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> >> +     __u16   bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> >>       __u16   bg_itable_unused;       /* Unused inodes count */
> >>       __u16   bg_checksum;            /* crc16(sb_uuid+group+desc) */
> >>       __u32   bg_block_bitmap_hi;     /* Blocks bitmap block MSB */
> >> @@ -169,7 +173,10 @@ struct ext4_group_desc
> >>       __u16   bg_free_inodes_count_hi;/* Free inodes count MSB */
> >>       __u16   bg_used_dirs_count_hi;  /* Directories count MSB */
> >>       __u16   bg_itable_unused_hi;    /* Unused inodes count MSB */
> >> -     __u32   bg_reserved2[3];
> >> +     __u32   bg_exclude_bitmap_hi;   /* Exclude bitmap block MSB */
> >> +     __u16   bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
> >> +     __u16   bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
> >> +     __u32   bg_reserved;
> >
> > Should we attach a checksum to the exclude bitmap?  That would, unfortunately,
> > put the 64-byte block group pretty close to full, unless we're ok with making
> > the bg even bigger...
> 
> Good point.
> My initial approach (which I probably expressed somewhere) was that when
> exclude_bitmap feature is set, the block bitmap checksum should cover both
> block bitmap and exclude bitmap.
> The reason being that exclude bitmap adds little entropy over block bitmap:
> - it should always be a sub set of block bitmap bits
> - clearing exclude bit is done together with clearing the block used bit
> - the only case of modifying exclude bitmap only is when a used block becomes
>   excluded (a.k.a moved to snapshot)
> 
> so how do you feel about compressing 2 blocks into 16bits of checksum :-/?

Hrm.... is it generally the case that both block and exclude bitmaps are loaded
at the same time?  I don't think it'd be difficult to checksum both blocks
unless it's common that one of the two are not in memory.

--D

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Amir Goldstein Sept. 22, 2011, 2:12 a.m. UTC | #15
On Wed, Sep 21, 2011 at 11:40 PM, Darrick J. Wong <djwong@us.ibm.com> wrote:
> On Wed, Sep 21, 2011 at 11:03:57AM +0300, Amir Goldstein wrote:
>> On Tue, Sep 20, 2011 at 9:56 PM, Darrick J. Wong <djwong@us.ibm.com> wrote:
>> > On Thu, Sep 15, 2011 at 06:50:51PM -0400, Theodore Ts'o wrote:
> <snip>
>> >> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
>> >> index 4fec5db..1c86cb2 100644
>> >> --- a/lib/ext2fs/ext2_fs.h
>> >> +++ b/lib/ext2fs/ext2_fs.h
>> >> @@ -142,7 +142,9 @@ struct ext2_group_desc
>> >>       __u16   bg_free_inodes_count;   /* Free inodes count */
>> >>       __u16   bg_used_dirs_count;     /* Directories count */
>> >>       __u16   bg_flags;
>> >> -     __u32   bg_reserved[2];
>> >> +     __u32   bg_exclude_bitmap_lo;   /* Exclude bitmap for snapshots */
>> >> +     __u16   bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> >> +     __u16   bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> >>       __u16   bg_itable_unused;       /* Unused inodes count */
>> >>       __u16   bg_checksum;            /* crc16(s_uuid+grouo_num+group_desc)*/
>> >>  };
>> >> @@ -159,7 +161,9 @@ struct ext4_group_desc
>> >>       __u16   bg_free_inodes_count;   /* Free inodes count */
>> >>       __u16   bg_used_dirs_count;     /* Directories count */
>> >>       __u16   bg_flags;               /* EXT4_BG_flags (INODE_UNINIT, etc) */
>> >> -     __u32   bg_reserved[2];         /* Likely block/inode bitmap checksum */
>> >> +     __u32   bg_exclude_bitmap_lo;   /* Exclude bitmap for snapshots */
>> >> +     __u16   bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> >> +     __u16   bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> >>       __u16   bg_itable_unused;       /* Unused inodes count */
>> >>       __u16   bg_checksum;            /* crc16(sb_uuid+group+desc) */
>> >>       __u32   bg_block_bitmap_hi;     /* Blocks bitmap block MSB */
>> >> @@ -169,7 +173,10 @@ struct ext4_group_desc
>> >>       __u16   bg_free_inodes_count_hi;/* Free inodes count MSB */
>> >>       __u16   bg_used_dirs_count_hi;  /* Directories count MSB */
>> >>       __u16   bg_itable_unused_hi;    /* Unused inodes count MSB */
>> >> -     __u32   bg_reserved2[3];
>> >> +     __u32   bg_exclude_bitmap_hi;   /* Exclude bitmap block MSB */
>> >> +     __u16   bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
>> >> +     __u16   bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
>> >> +     __u32   bg_reserved;
>> >
>> > Should we attach a checksum to the exclude bitmap?  That would, unfortunately,
>> > put the 64-byte block group pretty close to full, unless we're ok with making
>> > the bg even bigger...
>>
>> Good point.
>> My initial approach (which I probably expressed somewhere) was that when
>> exclude_bitmap feature is set, the block bitmap checksum should cover both
>> block bitmap and exclude bitmap.
>> The reason being that exclude bitmap adds little entropy over block bitmap:
>> - it should always be a sub set of block bitmap bits
>> - clearing exclude bit is done together with clearing the block used bit
>> - the only case of modifying exclude bitmap only is when a used block becomes
>>   excluded (a.k.a moved to snapshot)
>>
>> so how do you feel about compressing 2 blocks into 16bits of checksum :-/?
>
> Hrm.... is it generally the case that both block and exclude bitmaps are loaded
> at the same time?  I don't think it'd be difficult to checksum both blocks
> unless it's common that one of the two are not in memory.
>

Hrm indeed. no. exclude bitmap is currently not loaded always together with
block bitmap, so unless this is changed, we'll have to think of something else.
However that brings me to wonder:
block/inode/exclude bitmap seldom change more than a handful of bits/words
at a time.
Does it make sense to diff update the CRC of the bitmaps instead of
recomputing it?

Amir.
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Darrick J. Wong Sept. 22, 2011, 7:18 p.m. UTC | #16
On Thu, Sep 22, 2011 at 05:12:32AM +0300, Amir Goldstein wrote:
> On Wed, Sep 21, 2011 at 11:40 PM, Darrick J. Wong <djwong@us.ibm.com> wrote:
> > On Wed, Sep 21, 2011 at 11:03:57AM +0300, Amir Goldstein wrote:
> >> On Tue, Sep 20, 2011 at 9:56 PM, Darrick J. Wong <djwong@us.ibm.com> wrote:
> >> > On Thu, Sep 15, 2011 at 06:50:51PM -0400, Theodore Ts'o wrote:
> > <snip>
> >> >> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
> >> >> index 4fec5db..1c86cb2 100644
> >> >> --- a/lib/ext2fs/ext2_fs.h
> >> >> +++ b/lib/ext2fs/ext2_fs.h
> >> >> @@ -142,7 +142,9 @@ struct ext2_group_desc
> >> >>       __u16   bg_free_inodes_count;   /* Free inodes count */
> >> >>       __u16   bg_used_dirs_count;     /* Directories count */
> >> >>       __u16   bg_flags;
> >> >> -     __u32   bg_reserved[2];
> >> >> +     __u32   bg_exclude_bitmap_lo;   /* Exclude bitmap for snapshots */
> >> >> +     __u16   bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> >> >> +     __u16   bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> >> >>       __u16   bg_itable_unused;       /* Unused inodes count */
> >> >>       __u16   bg_checksum;            /* crc16(s_uuid+grouo_num+group_desc)*/
> >> >>  };
> >> >> @@ -159,7 +161,9 @@ struct ext4_group_desc
> >> >>       __u16   bg_free_inodes_count;   /* Free inodes count */
> >> >>       __u16   bg_used_dirs_count;     /* Directories count */
> >> >>       __u16   bg_flags;               /* EXT4_BG_flags (INODE_UNINIT, etc) */
> >> >> -     __u32   bg_reserved[2];         /* Likely block/inode bitmap checksum */
> >> >> +     __u32   bg_exclude_bitmap_lo;   /* Exclude bitmap for snapshots */
> >> >> +     __u16   bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> >> >> +     __u16   bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> >> >>       __u16   bg_itable_unused;       /* Unused inodes count */
> >> >>       __u16   bg_checksum;            /* crc16(sb_uuid+group+desc) */
> >> >>       __u32   bg_block_bitmap_hi;     /* Blocks bitmap block MSB */
> >> >> @@ -169,7 +173,10 @@ struct ext4_group_desc
> >> >>       __u16   bg_free_inodes_count_hi;/* Free inodes count MSB */
> >> >>       __u16   bg_used_dirs_count_hi;  /* Directories count MSB */
> >> >>       __u16   bg_itable_unused_hi;    /* Unused inodes count MSB */
> >> >> -     __u32   bg_reserved2[3];
> >> >> +     __u32   bg_exclude_bitmap_hi;   /* Exclude bitmap block MSB */
> >> >> +     __u16   bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
> >> >> +     __u16   bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
> >> >> +     __u32   bg_reserved;
> >> >
> >> > Should we attach a checksum to the exclude bitmap?  That would, unfortunately,
> >> > put the 64-byte block group pretty close to full, unless we're ok with making
> >> > the bg even bigger...
> >>
> >> Good point.
> >> My initial approach (which I probably expressed somewhere) was that when
> >> exclude_bitmap feature is set, the block bitmap checksum should cover both
> >> block bitmap and exclude bitmap.
> >> The reason being that exclude bitmap adds little entropy over block bitmap:
> >> - it should always be a sub set of block bitmap bits
> >> - clearing exclude bit is done together with clearing the block used bit
> >> - the only case of modifying exclude bitmap only is when a used block becomes
> >>   excluded (a.k.a moved to snapshot)
> >>
> >> so how do you feel about compressing 2 blocks into 16bits of checksum :-/?
> >
> > Hrm.... is it generally the case that both block and exclude bitmaps are loaded
> > at the same time?  I don't think it'd be difficult to checksum both blocks
> > unless it's common that one of the two are not in memory.
> >
> 
> Hrm indeed. no. exclude bitmap is currently not loaded always together with
> block bitmap, so unless this is changed, we'll have to think of something else.

Or... does it make sense to preload the exclude bitmap at the same time as the
block bitmap?  If I'm reading you correctly, (block_bitmap | exclude_bitmap)
yields a bitmap of all blocks that are not available, correct?  So in the case
that exclude bitmaps are enabled, you'd need both to be in memory anyway.

> However that brings me to wonder:
> block/inode/exclude bitmap seldom change more than a handful of bits/words
> at a time.
> Does it make sense to diff update the CRC of the bitmaps instead of
> recomputing it?

Yes, that would be a good (later) optimization at least to consider.

--D
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Amir Goldstein Sept. 23, 2011, 5:57 a.m. UTC | #17
On Thu, Sep 22, 2011 at 10:18 PM, Darrick J. Wong <djwong@us.ibm.com> wrote:
> On Thu, Sep 22, 2011 at 05:12:32AM +0300, Amir Goldstein wrote:
>> On Wed, Sep 21, 2011 at 11:40 PM, Darrick J. Wong <djwong@us.ibm.com> wrote:
>> > On Wed, Sep 21, 2011 at 11:03:57AM +0300, Amir Goldstein wrote:
>> >> On Tue, Sep 20, 2011 at 9:56 PM, Darrick J. Wong <djwong@us.ibm.com> wrote:
>> >> > On Thu, Sep 15, 2011 at 06:50:51PM -0400, Theodore Ts'o wrote:
>> > <snip>
>> >> >> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
>> >> >> index 4fec5db..1c86cb2 100644
>> >> >> --- a/lib/ext2fs/ext2_fs.h
>> >> >> +++ b/lib/ext2fs/ext2_fs.h
>> >> >> @@ -142,7 +142,9 @@ struct ext2_group_desc
>> >> >>       __u16   bg_free_inodes_count;   /* Free inodes count */
>> >> >>       __u16   bg_used_dirs_count;     /* Directories count */
>> >> >>       __u16   bg_flags;
>> >> >> -     __u32   bg_reserved[2];
>> >> >> +     __u32   bg_exclude_bitmap_lo;   /* Exclude bitmap for snapshots */
>> >> >> +     __u16   bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> >> >> +     __u16   bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> >> >>       __u16   bg_itable_unused;       /* Unused inodes count */
>> >> >>       __u16   bg_checksum;            /* crc16(s_uuid+grouo_num+group_desc)*/
>> >> >>  };
>> >> >> @@ -159,7 +161,9 @@ struct ext4_group_desc
>> >> >>       __u16   bg_free_inodes_count;   /* Free inodes count */
>> >> >>       __u16   bg_used_dirs_count;     /* Directories count */
>> >> >>       __u16   bg_flags;               /* EXT4_BG_flags (INODE_UNINIT, etc) */
>> >> >> -     __u32   bg_reserved[2];         /* Likely block/inode bitmap checksum */
>> >> >> +     __u32   bg_exclude_bitmap_lo;   /* Exclude bitmap for snapshots */
>> >> >> +     __u16   bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> >> >> +     __u16   bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> >> >>       __u16   bg_itable_unused;       /* Unused inodes count */
>> >> >>       __u16   bg_checksum;            /* crc16(sb_uuid+group+desc) */
>> >> >>       __u32   bg_block_bitmap_hi;     /* Blocks bitmap block MSB */
>> >> >> @@ -169,7 +173,10 @@ struct ext4_group_desc
>> >> >>       __u16   bg_free_inodes_count_hi;/* Free inodes count MSB */
>> >> >>       __u16   bg_used_dirs_count_hi;  /* Directories count MSB */
>> >> >>       __u16   bg_itable_unused_hi;    /* Unused inodes count MSB */
>> >> >> -     __u32   bg_reserved2[3];
>> >> >> +     __u32   bg_exclude_bitmap_hi;   /* Exclude bitmap block MSB */
>> >> >> +     __u16   bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
>> >> >> +     __u16   bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
>> >> >> +     __u32   bg_reserved;
>> >> >
>> >> > Should we attach a checksum to the exclude bitmap?  That would, unfortunately,
>> >> > put the 64-byte block group pretty close to full, unless we're ok with making
>> >> > the bg even bigger...
>> >>
>> >> Good point.
>> >> My initial approach (which I probably expressed somewhere) was that when
>> >> exclude_bitmap feature is set, the block bitmap checksum should cover both
>> >> block bitmap and exclude bitmap.
>> >> The reason being that exclude bitmap adds little entropy over block bitmap:
>> >> - it should always be a sub set of block bitmap bits
>> >> - clearing exclude bit is done together with clearing the block used bit
>> >> - the only case of modifying exclude bitmap only is when a used block becomes
>> >>   excluded (a.k.a moved to snapshot)
>> >>
>> >> so how do you feel about compressing 2 blocks into 16bits of checksum :-/?
>> >
>> > Hrm.... is it generally the case that both block and exclude bitmaps are loaded
>> > at the same time?  I don't think it'd be difficult to checksum both blocks
>> > unless it's common that one of the two are not in memory.
>> >
>>
>> Hrm indeed. no. exclude bitmap is currently not loaded always together with
>> block bitmap, so unless this is changed, we'll have to think of something else.
>
> Or... does it make sense to preload the exclude bitmap at the same time as the
> block bitmap?

it wouldn't be terrible to do that, since exclude bitmap will normally
be located
right next to block bitmap, but if memory pressure is high, that would
lead to half
the number of block bitmaps in memory at any given time.

> If I'm reading you correctly, (block_bitmap | exclude_bitmap)
> yields a bitmap of all blocks that are not available, correct?

not exactly. if all is sane, then
(block_bitmap | exclude_bitmap) == block_bitmap
(block_bitmap & exclude_bitmap) == exclude_bitmap

> So in the case
> that exclude bitmaps are enabled, you'd need both to be in memory anyway.
>

exclude_bitmap is loaded into memory in the following cases:
1. allocating snapshot inode blocks (during COW)
2. deleting snapshot inode blocks (snapshot delete)
3. moving "deleted" blocks to snapshot (MOW)

specifically, exclude_bitmap is NOT loaded into memory in the following cases:
1. allocating regular inode blocks
2. deleting regular inode blocks, which were allocated after last snapshot

For example, in kernel compilation benchmark, all temp files
are created and deleted without modifying exclude bitmap (or snapshots).

>> However that brings me to wonder:
>> block/inode/exclude bitmap seldom change more than a handful of bits/words
>> at a time.
>> Does it make sense to diff update the CRC of the bitmaps instead of
>> recomputing it?
>
> Yes, that would be a good (later) optimization at least to consider.
>
> --D
>
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/debugfs/set_fields.c b/debugfs/set_fields.c
index ac6bc25..1a5ec7f 100644
--- a/debugfs/set_fields.c
+++ b/debugfs/set_fields.c
@@ -144,6 +144,7 @@  static struct field_set_info super_fields[] = {
 	{ "usr_quota_inum", &set_sb.s_usr_quota_inum, 4, parse_uint },
 	{ "grp_quota_inum", &set_sb.s_grp_quota_inum, 4, parse_uint },
 	{ "overhead_blocks", &set_sb.s_overhead_blocks, 4, parse_uint },
+	{ "checksum", &set_sb.s_checksum, 4, parse_uint },
 	{ 0, 0, 0, 0 }
 };
 
@@ -179,6 +180,7 @@  static struct field_set_info inode_fields[] = {
 	{ "fsize", &set_inode.osd2.hurd2.h_i_fsize, 1, parse_uint },
 	{ "uid_high", &set_inode.osd2.linux2.l_i_uid_high, 2, parse_uint },
 	{ "gid_high", &set_inode.osd2.linux2.l_i_gid_high, 2, parse_uint },
+	{ "checksum", &set_inode.osd2.linux2.l_i_checksum_lo, 2, parse_uint },
 	{ "author", &set_inode.osd2.hurd2.h_i_author, 4, parse_uint },
 	{ "bmap", NULL, 4, parse_bmap, FLAG_ARRAY },
 	{ 0, 0, 0, 0 }
@@ -192,7 +194,6 @@  static struct field_set_info ext2_bg_fields[] = {
 	{ "free_inodes_count", &set_gd.bg_free_inodes_count, 2, parse_uint },
 	{ "used_dirs_count", &set_gd.bg_used_dirs_count, 2, parse_uint },
 	{ "flags", &set_gd.bg_flags, 2, parse_uint },
-	{ "reserved", &set_gd.bg_reserved, 2, parse_uint, FLAG_ARRAY, 2 },
 	{ "itable_unused", &set_gd.bg_itable_unused, 2, parse_uint },
 	{ "checksum", &set_gd.bg_checksum, 2, parse_gd_csum },
 	{ 0, 0, 0, 0 }
diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index dd18ade..16ddcda 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -364,8 +364,8 @@  static void check_inode_extra_space(e2fsck_t ctx, struct problem_context *pctx)
 	printf("inode #%u, i_extra_size %d\n", pctx->ino,
 			inode->i_extra_isize);
 #endif
-	/* i_extra_isize must cover i_extra_isize + i_pad1 at least */
-	min = sizeof(inode->i_extra_isize) + sizeof(inode->i_pad1);
+	/* i_extra_isize must cover i_extra_isize + i_checksum_hi at least */
+	min = sizeof(inode->i_extra_isize) + sizeof(inode->i_checksum_hi);
 	max = EXT2_INODE_SIZE(sb) - EXT2_GOOD_OLD_INODE_SIZE;
 	/*
 	 * For now we will allow i_extra_isize to be 0, but really
diff --git a/lib/e2p/feature.c b/lib/e2p/feature.c
index 16fba53..965fc16 100644
--- a/lib/e2p/feature.c
+++ b/lib/e2p/feature.c
@@ -40,6 +40,8 @@  static struct feature feature_list[] = {
 			"resize_inode" },
 	{	E2P_FEATURE_COMPAT, EXT2_FEATURE_COMPAT_LAZY_BG,
 			"lazy_bg" },
+	{	E2P_FEATURE_COMPAT, EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP,
+			"snapshot" },
 
 	{	E2P_FEATURE_RO_INCOMPAT, EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER,
 			"sparse_super" },
@@ -59,6 +61,8 @@  static struct feature feature_list[] = {
 			"quota" },
 	{	E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_BIGALLOC,
 			"bigalloc"},
+	{	E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_METADATA_CSUM,
+			"metadata_csum"},
 
 	{	E2P_FEATURE_INCOMPAT, EXT2_FEATURE_INCOMPAT_COMPRESSION,
 			"compression" },
diff --git a/lib/e2p/ls.c b/lib/e2p/ls.c
index 0f36f40..aaacdaa 100644
--- a/lib/e2p/ls.c
+++ b/lib/e2p/ls.c
@@ -413,6 +413,10 @@  void list_super2(struct ext2_super_block * sb, FILE *f)
 	if (sb->s_grp_quota_inum)
 		fprintf(f, "Group quota inode:        %u\n",
 			sb->s_grp_quota_inum);
+
+	if (sb->s_feature_ro_compat & EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)
+		fprintf(f, "Checksum:                 0x%08x\n",
+			sb->s_checksum);
 }
 
 void list_super (struct ext2_super_block * s)
diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
index 4fec5db..1c86cb2 100644
--- a/lib/ext2fs/ext2_fs.h
+++ b/lib/ext2fs/ext2_fs.h
@@ -142,7 +142,9 @@  struct ext2_group_desc
 	__u16	bg_free_inodes_count;	/* Free inodes count */
 	__u16	bg_used_dirs_count;	/* Directories count */
 	__u16	bg_flags;
-	__u32	bg_reserved[2];
+	__u32	bg_exclude_bitmap_lo;	/* Exclude bitmap for snapshots */
+	__u16	bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
+	__u16	bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
 	__u16	bg_itable_unused;	/* Unused inodes count */
 	__u16	bg_checksum;		/* crc16(s_uuid+grouo_num+group_desc)*/
 };
@@ -159,7 +161,9 @@  struct ext4_group_desc
 	__u16	bg_free_inodes_count;	/* Free inodes count */
 	__u16	bg_used_dirs_count;	/* Directories count */
 	__u16	bg_flags;		/* EXT4_BG_flags (INODE_UNINIT, etc) */
-	__u32	bg_reserved[2];		/* Likely block/inode bitmap checksum */
+	__u32	bg_exclude_bitmap_lo;	/* Exclude bitmap for snapshots */
+	__u16	bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
+	__u16	bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
 	__u16	bg_itable_unused;	/* Unused inodes count */
 	__u16	bg_checksum;		/* crc16(sb_uuid+group+desc) */
 	__u32	bg_block_bitmap_hi;	/* Blocks bitmap block MSB */
@@ -169,7 +173,10 @@  struct ext4_group_desc
 	__u16	bg_free_inodes_count_hi;/* Free inodes count MSB */
 	__u16	bg_used_dirs_count_hi;	/* Directories count MSB */
 	__u16	bg_itable_unused_hi;	/* Unused inodes count MSB */
-	__u32	bg_reserved2[3];
+	__u32	bg_exclude_bitmap_hi;	/* Exclude bitmap block MSB */
+	__u16	bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
+	__u16	bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
+	__u32	bg_reserved;
 };
 
 #define EXT2_BG_INODE_UNINIT	0x0001 /* Inode table/bitmap not initialized */
@@ -363,7 +370,8 @@  struct ext2_inode {
 			__u16	l_i_file_acl_high;
 			__u16	l_i_uid_high;	/* these 2 fields    */
 			__u16	l_i_gid_high;	/* were reserved2[0] */
-			__u32	l_i_reserved2;
+			__u16	l_i_checksum_lo;	/* crc32c(uuid+inum+inode) */
+			__u16	l_i_reserved;	/* crc32c(uuid+inum+inode) */
 		} linux2;
 		struct {
 			__u8	h_i_frag;	/* Fragment number */
@@ -410,7 +418,8 @@  struct ext2_inode_large {
 			__u16	l_i_file_acl_high;
 			__u16	l_i_uid_high;	/* these 2 fields    */
 			__u16	l_i_gid_high;	/* were reserved2[0] */
-			__u32	l_i_reserved2;
+			__u16	l_i_checksum_lo; /* crc32c(uuid+inum+inode) */
+			__u16	l_i_reserved;
 		} linux2;
 		struct {
 			__u8	h_i_frag;	/* Fragment number */
@@ -422,7 +431,7 @@  struct ext2_inode_large {
 		} hurd2;
 	} osd2;				/* OS dependent 2 */
 	__u16	i_extra_isize;
-	__u16	i_pad1;
+	__u16	i_checksum_hi;	/* crc32c(uuid+inum+inode) */
 	__u32	i_ctime_extra;	/* extra Change time (nsec << 2 | epoch) */
 	__u32	i_mtime_extra;	/* extra Modification time (nsec << 2 | epoch) */
 	__u32	i_atime_extra;	/* extra Access time (nsec << 2 | epoch) */
@@ -441,7 +450,7 @@  struct ext2_inode_large {
 #define i_gid_low	i_gid
 #define i_uid_high	osd2.linux2.l_i_uid_high
 #define i_gid_high	osd2.linux2.l_i_gid_high
-#define i_reserved2	osd2.linux2.l_i_reserved2
+#define i_checksum	osd2.linux2.l_i_checksum
 #else
 #if defined(__GNU__)
 
@@ -623,7 +632,8 @@  struct ext2_super_block {
 	__u32	s_usr_quota_inum;	/* inode number of user quota file */
 	__u32	s_grp_quota_inum;	/* inode number of group quota file */
 	__u32	s_overhead_blocks;	/* overhead blocks/clusters in fs */
-	__u32   s_reserved[109];        /* Padding to the end of the block */
+	__u32	s_checksum;		/* crc32c(superblock) */
+	__u32   s_reserved[108];        /* Padding to the end of the block */
 };
 
 #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
@@ -671,7 +681,9 @@  struct ext2_super_block {
 #define EXT2_FEATURE_COMPAT_RESIZE_INODE	0x0010
 #define EXT2_FEATURE_COMPAT_DIR_INDEX		0x0020
 #define EXT2_FEATURE_COMPAT_LAZY_BG		0x0040
-#define EXT2_FEATURE_COMPAT_EXCLUDE_INODE	0x0080
+/* #define EXT2_FEATURE_COMPAT_EXCLUDE_INODE	0x0080 not used, legacy */
+#define EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP	0x0100
+
 
 #define EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER	0x0001
 #define EXT2_FEATURE_RO_COMPAT_LARGE_FILE	0x0002
@@ -683,6 +695,7 @@  struct ext2_super_block {
 #define EXT4_FEATURE_RO_COMPAT_HAS_SNAPSHOT	0x0080
 #define EXT4_FEATURE_RO_COMPAT_QUOTA		0x0100
 #define EXT4_FEATURE_RO_COMPAT_BIGALLOC		0x0200
+#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM	0x0400
 
 #define EXT2_FEATURE_INCOMPAT_COMPRESSION	0x0001
 #define EXT2_FEATURE_INCOMPAT_FILETYPE		0x0002
diff --git a/lib/ext2fs/swapfs.c b/lib/ext2fs/swapfs.c
index 87b1a2e..d1c4a56 100644
--- a/lib/ext2fs/swapfs.c
+++ b/lib/ext2fs/swapfs.c
@@ -78,6 +78,8 @@  void ext2fs_swap_super(struct ext2_super_block * sb)
 	sb->s_snapshot_list = ext2fs_swab32(sb->s_snapshot_list);
 	sb->s_usr_quota_inum = ext2fs_swab32(sb->s_usr_quota_inum);
 	sb->s_grp_quota_inum = ext2fs_swab32(sb->s_grp_quota_inum);
+	sb->s_overhead_blocks = ext2fs_swab32(sb->s_overhead_blocks);
+	sb->s_checksum = ext2fs_swab32(sb->s_checksum);
 
 	for (i=0; i < 4; i++)
 		sb->s_hash_seed[i] = ext2fs_swab32(sb->s_hash_seed[i]);
@@ -106,6 +108,11 @@  void ext2fs_swap_group_desc2(ext2_filsys fs, struct ext2_group_desc *gdp)
 	gdp->bg_free_inodes_count = ext2fs_swab16(gdp->bg_free_inodes_count);
 	gdp->bg_used_dirs_count = ext2fs_swab16(gdp->bg_used_dirs_count);
 	gdp->bg_flags = ext2fs_swab16(gdp->bg_flags);
+	gdp->bg_exclude_bitmap_lo = ext2fs_swab32(gdp->bg_exclude_bitmap_lo);
+	gdp->bg_block_bitmap_csum_lo =
+		ext2fs_swab16(gdp->bg_block_bitmap_csum_lo);
+	gdp->bg_inode_bitmap_csum_lo =
+		ext2fs_swab16(gdp->bg_inode_bitmap_csum_lo);
 	gdp->bg_itable_unused = ext2fs_swab16(gdp->bg_itable_unused);
 	gdp->bg_checksum = ext2fs_swab16(gdp->bg_checksum);
 	/* If we're 32-bit, we're done */
@@ -125,6 +132,11 @@  void ext2fs_swap_group_desc2(ext2_filsys fs, struct ext2_group_desc *gdp)
 	gdp4->bg_used_dirs_count_hi =
 		ext2fs_swab16(gdp4->bg_used_dirs_count_hi);
 	gdp4->bg_itable_unused_hi = ext2fs_swab16(gdp4->bg_itable_unused_hi);
+	gdp->bg_exclude_bitmap_hi = ext2fs_swab16(gdp->bg_exclude_bitmap_hi);
+	gdp->bg_block_bitmap_csum_hi =
+		ext2fs_swab16(gdp->bg_block_bitmap_csum_hi);
+	gdp->bg_inode_bitmap_csum_hi =
+		ext2fs_swab16(gdp->bg_inode_bitmap_csum_hi);
 }
 
 void ext2fs_swap_group_desc(struct ext2_group_desc *gdp)
@@ -244,8 +256,8 @@  void ext2fs_swap_inode_full(ext2_filsys fs, struct ext2_inode_large *t,
 		  ext2fs_swab16 (f->osd2.linux2.l_i_uid_high);
 		t->osd2.linux2.l_i_gid_high =
 		  ext2fs_swab16 (f->osd2.linux2.l_i_gid_high);
-		t->osd2.linux2.l_i_reserved2 =
-			ext2fs_swab32(f->osd2.linux2.l_i_reserved2);
+		t->osd2.linux2.l_i_checksum =
+			ext2fs_swab32(f->osd2.linux2.checksum);
 		break;
 	case EXT2_OS_HURD:
 		t->osd1.hurd1.h_i_translator =
diff --git a/lib/ext2fs/tst_inode_size.c b/lib/ext2fs/tst_inode_size.c
index 962f1cd..a4f6247 100644
--- a/lib/ext2fs/tst_inode_size.c
+++ b/lib/ext2fs/tst_inode_size.c
@@ -61,7 +61,8 @@  void check_structure_fields()
 	check_field(osd2.linux2.l_i_file_acl_high);
 	check_field(osd2.linux2.l_i_uid_high);
 	check_field(osd2.linux2.l_i_gid_high);
-	check_field(osd2.linux2.l_i_reserved2);
+	check_field(osd2.linux2.l_i_checksum_lo);
+	check_field(osd2.linux2.l_i_reserved);
 	printf("Ending offset is %d\n\n", cur_offset);
 #endif
 }
diff --git a/lib/ext2fs/tst_super_size.c b/lib/ext2fs/tst_super_size.c
index 1e5a524..75659ae 100644
--- a/lib/ext2fs/tst_super_size.c
+++ b/lib/ext2fs/tst_super_size.c
@@ -126,6 +126,7 @@  void check_superblock_fields()
 	check_field(s_usr_quota_inum);
 	check_field(s_grp_quota_inum);
 	check_field(s_overhead_blocks);
+	check_field(s_checksum);
 	check_field(s_reserved);
 	printf("Ending offset is %d\n\n", cur_offset);
 #endif