diff mbox

libext2fs: reserve exclude bitmap fields in group descriptor

Message ID CAOQ4uxjydz_6Upm5AUErHct2J0VKSN3-d-XT+Jn=q9BJFWQtaw@mail.gmail.com
State Rejected, archived
Headers show

Commit Message

Amir Goldstein Sept. 15, 2011, 6:50 a.m. UTC
On Thu, Sep 15, 2011 at 1:14 AM, Ted Ts'o <tytso@mit.edu> wrote:
> On Wed, Sep 14, 2011 at 01:59:06PM -0600, Andreas Dilger wrote:
>>
>> There is the field that you told Amir he could use for the exception
>> bitmap for snapshots, which is using one of the two reserved fields in
>> ext2_group_desc, and also one of the 3 reserved fields in ext4_group_desc
>> for 64-bit block numbers.  That leaves one __u32 in ext2_group_desc, and
>> two __u32 in ext4_group_desc for checksums.
>
> Right, that's what I was forgetting.  Thanks for reminding me!
>

Thanks Andreas for reminding!
Here is a patch to reserve the exclude bitmap field, so we won't
forget them again...
The full patch series for adding exclude bitmap can be found here:
https://github.com/amir73il/e2fsprogs-snapshots-patch-queue

---
From: Amir Goldstein <amir73il@users.sf.net>
Date: Thu, 15 Sep 2011 09:42:38 +0300

Ext4 snapshots require the compatible feature 'exclude_bitmap', meaning that
the exclude bitmap was allocated.
The allocated exclude bitmap blocks are stored in the block group descriptors.

To allow easy migration of existing Next3 installations, I took the liberty
of 'burning' the old 'exclude_inode' compatible flag and using a new
'exclude_bitmap' comaptible flag, to state the exclude bitmap blocks are stored
in the group descriptors.

Signed-off-by: Amir Goldstein <amir73il@users.sf.net>
---
 lib/ext2fs/ext2_fs.h |   13 +++++++++----
 1 files changed, 9 insertions(+), 4 deletions(-)


@@ -169,12 +171,14 @@ struct ext4_group_desc
        __u16   bg_free_inodes_count_hi;/* Free inodes count MSB */
        __u16   bg_used_dirs_count_hi;  /* Directories count MSB */
        __u16   bg_itable_unused_hi;    /* Unused inodes count MSB */
-       __u32   bg_reserved2[3];
+       __u32   bg_exclude_bitmap_hi;   /* Exclude bitmap block MSB */
+       __u32   bg_reserved2[2];
 };

 #define EXT2_BG_INODE_UNINIT   0x0001 /* Inode table/bitmap not initialized */
 #define EXT2_BG_BLOCK_UNINIT   0x0002 /* Block bitmap not initialized */
 #define EXT2_BG_INODE_ZEROED   0x0004 /* On-disk itable initialized to zero */
+#define EXT2_BG_EXCLUDE_UNINIT 0x0008 /* Exclude bitmap not initialized */

 /*
  * Data structures used by the directory indexing feature
@@ -670,7 +674,8 @@ struct ext2_super_block {
 #define EXT2_FEATURE_COMPAT_RESIZE_INODE       0x0010
 #define EXT2_FEATURE_COMPAT_DIR_INDEX          0x0020
 #define EXT2_FEATURE_COMPAT_LAZY_BG            0x0040
-#define EXT2_FEATURE_COMPAT_EXCLUDE_INODE      0x0080
+/* #define EXT2_FEATURE_COMPAT_EXCLUDE_INODE   0x0080 not used */
+#define EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP     0x0100

 #define EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER    0x0001
 #define EXT2_FEATURE_RO_COMPAT_LARGE_FILE      0x0002
--
1.7.0.4
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Andreas Dilger Sept. 15, 2011, 10:06 a.m. UTC | #1
On 2011-09-15, at 12:50 AM, Amir Goldstein wrote:
> Here is a patch to reserve the exclude bitmap field, so we won't
> forget them again...
> The full patch series for adding exclude bitmap can be found here:
> https://github.com/amir73il/e2fsprogs-snapshots-patch-queue
> 
> ---
> From: Amir Goldstein <amir73il@users.sf.net>
> Date: Thu, 15 Sep 2011 09:42:38 +0300
> 
> Ext4 snapshots require the compatible feature 'exclude_bitmap', meaning that
> the exclude bitmap was allocated.
> The allocated exclude bitmap blocks are stored in the block group descriptors.
> 
> To allow easy migration of existing Next3 installations, I took the liberty
> of 'burning' the old 'exclude_inode' compatible flag and using a new
> 'exclude_bitmap' comaptible flag, to state the exclude bitmap blocks are stored
> in the group descriptors.
> 
> Signed-off-by: Amir Goldstein <amir73il@users.sf.net>
> ---
> lib/ext2fs/ext2_fs.h |   13 +++++++++----
> 1 files changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
> index 54cb3d4..f1f0e19 100644
> --- a/lib/ext2fs/ext2_fs.h
> +++ b/lib/ext2fs/ext2_fs.h
> @@ -142,7 +142,8 @@ struct ext2_group_desc
>        __u16   bg_free_inodes_count;   /* Free inodes count */
>        __u16   bg_used_dirs_count;     /* Directories count */
>        __u16   bg_flags;
> -       __u32   bg_reserved[2];
> +       __u32   bg_exclude_bitmap;      /* Exclude bitmap block */

I'd prefer if these comments were "Snapshot exclude bitmap block" so
that it is more clear what an exclude bitmap is used for.

For consistency, it would also be good to name these "_lo" to match the
kernel fields.

> +       __u32   bg_reserved[1];
>        __u16   bg_itable_unused;       /* Unused inodes count */
>        __u16   bg_checksum;            /* crc16(s_uuid+grouo_num+group_desc)*/
> };
> @@ -159,7 +160,8 @@ struct ext4_group_desc
>        __u16   bg_free_inodes_count;   /* Free inodes count */
>        __u16   bg_used_dirs_count;     /* Directories count */
>        __u16   bg_flags;               /* EXT4_BG_flags (INODE_UNINIT, etc) */
> -       __u32   bg_reserved[2];         /* Likely block/inode bitmap checksum */
> +       __u32   bg_exclude_bitmap;      /* Exclude bitmap block */
> +       __u32   bg_reserved[1];         /* Likely block/inode bitmap checksum */
>        __u16   bg_itable_unused;       /* Unused inodes count */
>        __u16   bg_checksum;            /* crc16(sb_uuid+group+desc) */
>        __u32   bg_block_bitmap_hi;     /* Blocks bitmap block MSB */
> 
> @@ -169,12 +171,14 @@ struct ext4_group_desc
>        __u16   bg_free_inodes_count_hi;/* Free inodes count MSB */
>        __u16   bg_used_dirs_count_hi;  /* Directories count MSB */
>        __u16   bg_itable_unused_hi;    /* Unused inodes count MSB */
> -       __u32   bg_reserved2[3];
> +       __u32   bg_exclude_bitmap_hi;   /* Exclude bitmap block MSB */
> +       __u32   bg_reserved2[2];
> };
> 
> #define EXT2_BG_INODE_UNINIT   0x0001 /* Inode table/bitmap not initialized */
> #define EXT2_BG_BLOCK_UNINIT   0x0002 /* Block bitmap not initialized */
> #define EXT2_BG_INODE_ZEROED   0x0004 /* On-disk itable initialized to zero */
> +#define EXT2_BG_EXCLUDE_UNINIT 0x0008 /* Exclude bitmap not initialized */
> 
> /*
>  * Data structures used by the directory indexing feature
> @@ -670,7 +674,8 @@ struct ext2_super_block {
> #define EXT2_FEATURE_COMPAT_RESIZE_INODE       0x0010
> #define EXT2_FEATURE_COMPAT_DIR_INDEX          0x0020
> #define EXT2_FEATURE_COMPAT_LAZY_BG            0x0040
> -#define EXT2_FEATURE_COMPAT_EXCLUDE_INODE      0x0080
> +/* #define EXT2_FEATURE_COMPAT_EXCLUDE_INODE   0x0080 not used */
> +#define EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP     0x0100
> 
> #define EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER    0x0001
> #define EXT2_FEATURE_RO_COMPAT_LARGE_FILE      0x0002
> --
> 1.7.0.4


Cheers, Andreas





--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o Sept. 15, 2011, 1:16 p.m. UTC | #2
On Thu, Sep 15, 2011 at 09:50:20AM +0300, Amir Goldstein wrote:
> -#define EXT2_FEATURE_COMPAT_EXCLUDE_INODE      0x0080
> +/* #define EXT2_FEATURE_COMPAT_EXCLUDE_INODE   0x0080 not used */
> +#define EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP     0x0100

Why this change?  Is it because you're already using 0x0100 in
shipping systems?

						- Ted

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Amir Goldstein Sept. 15, 2011, 1:47 p.m. UTC | #3
On Thu, Sep 15, 2011 at 4:16 PM, Ted Ts'o <tytso@mit.edu> wrote:
> On Thu, Sep 15, 2011 at 09:50:20AM +0300, Amir Goldstein wrote:
>> -#define EXT2_FEATURE_COMPAT_EXCLUDE_INODE      0x0080
>> +/* #define EXT2_FEATURE_COMPAT_EXCLUDE_INODE   0x0080 not used */
>> +#define EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP     0x0100
>
> Why this change?  Is it because you're already using 0x0100 in
> shipping systems?
>

I am using 0x80 in shipping systems and it signifies something a bit
different then
the proposed 0x100.

EXCLUDE_INODE means that special inode 9 is used to reference exclude
bitmap blocks.
EXCLUDE_BITMAP means that exclude bitmap blocks are referenced from
group descriptors.
With this distinction it will be easier for me to make the migration.

Thanks,
Amir.
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Amir Goldstein Sept. 15, 2011, 2:03 p.m. UTC | #4
On Thu, Sep 15, 2011 at 1:06 PM, Andreas Dilger
<adilger.kernel@dilger.ca> wrote:
> On 2011-09-15, at 12:50 AM, Amir Goldstein wrote:
>> Here is a patch to reserve the exclude bitmap field, so we won't
>> forget them again...
>> The full patch series for adding exclude bitmap can be found here:
>> https://github.com/amir73il/e2fsprogs-snapshots-patch-queue
>>
>> ---
>> From: Amir Goldstein <amir73il@users.sf.net>
>> Date: Thu, 15 Sep 2011 09:42:38 +0300
>>
>> Ext4 snapshots require the compatible feature 'exclude_bitmap', meaning that
>> the exclude bitmap was allocated.
>> The allocated exclude bitmap blocks are stored in the block group descriptors.
>>
>> To allow easy migration of existing Next3 installations, I took the liberty
>> of 'burning' the old 'exclude_inode' compatible flag and using a new
>> 'exclude_bitmap' comaptible flag, to state the exclude bitmap blocks are stored
>> in the group descriptors.
>>
>> Signed-off-by: Amir Goldstein <amir73il@users.sf.net>
>> ---
>> lib/ext2fs/ext2_fs.h |   13 +++++++++----
>> 1 files changed, 9 insertions(+), 4 deletions(-)
>>
>> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
>> index 54cb3d4..f1f0e19 100644
>> --- a/lib/ext2fs/ext2_fs.h
>> +++ b/lib/ext2fs/ext2_fs.h
>> @@ -142,7 +142,8 @@ struct ext2_group_desc
>>        __u16   bg_free_inodes_count;   /* Free inodes count */
>>        __u16   bg_used_dirs_count;     /* Directories count */
>>        __u16   bg_flags;
>> -       __u32   bg_reserved[2];
>> +       __u32   bg_exclude_bitmap;      /* Exclude bitmap block */
>
> I'd prefer if these comments were "Snapshot exclude bitmap block" so
> that it is more clear what an exclude bitmap is used for.

I'll change that.

>
> For consistency, it would also be good to name these "_lo" to match the
> kernel fields.
>

That's a major cleanup.
Forgive me for not picking up that glove...

>> +       __u32   bg_reserved[1];
>>        __u16   bg_itable_unused;       /* Unused inodes count */
>>        __u16   bg_checksum;            /* crc16(s_uuid+grouo_num+group_desc)*/
>> };
>> @@ -159,7 +160,8 @@ struct ext4_group_desc
>>        __u16   bg_free_inodes_count;   /* Free inodes count */
>>        __u16   bg_used_dirs_count;     /* Directories count */
>>        __u16   bg_flags;               /* EXT4_BG_flags (INODE_UNINIT, etc) */
>> -       __u32   bg_reserved[2];         /* Likely block/inode bitmap checksum */
>> +       __u32   bg_exclude_bitmap;      /* Exclude bitmap block */
>> +       __u32   bg_reserved[1];         /* Likely block/inode bitmap checksum */
>>        __u16   bg_itable_unused;       /* Unused inodes count */
>>        __u16   bg_checksum;            /* crc16(sb_uuid+group+desc) */
>>        __u32   bg_block_bitmap_hi;     /* Blocks bitmap block MSB */
>>
>> @@ -169,12 +171,14 @@ struct ext4_group_desc
>>        __u16   bg_free_inodes_count_hi;/* Free inodes count MSB */
>>        __u16   bg_used_dirs_count_hi;  /* Directories count MSB */
>>        __u16   bg_itable_unused_hi;    /* Unused inodes count MSB */
>> -       __u32   bg_reserved2[3];
>> +       __u32   bg_exclude_bitmap_hi;   /* Exclude bitmap block MSB */
>> +       __u32   bg_reserved2[2];
>> };
>>
>> #define EXT2_BG_INODE_UNINIT   0x0001 /* Inode table/bitmap not initialized */
>> #define EXT2_BG_BLOCK_UNINIT   0x0002 /* Block bitmap not initialized */
>> #define EXT2_BG_INODE_ZEROED   0x0004 /* On-disk itable initialized to zero */
>> +#define EXT2_BG_EXCLUDE_UNINIT 0x0008 /* Exclude bitmap not initialized */
>>
>> /*
>>  * Data structures used by the directory indexing feature
>> @@ -670,7 +674,8 @@ struct ext2_super_block {
>> #define EXT2_FEATURE_COMPAT_RESIZE_INODE       0x0010
>> #define EXT2_FEATURE_COMPAT_DIR_INDEX          0x0020
>> #define EXT2_FEATURE_COMPAT_LAZY_BG            0x0040
>> -#define EXT2_FEATURE_COMPAT_EXCLUDE_INODE      0x0080
>> +/* #define EXT2_FEATURE_COMPAT_EXCLUDE_INODE   0x0080 not used */
>> +#define EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP     0x0100
>>
>> #define EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER    0x0001
>> #define EXT2_FEATURE_RO_COMPAT_LARGE_FILE      0x0002
>> --
>> 1.7.0.4
>
>
> Cheers, Andreas
>
>
>
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o Sept. 15, 2011, 2:21 p.m. UTC | #5
On Thu, Sep 15, 2011 at 05:03:37PM +0300, Amir Goldstein wrote:
> > I'd prefer if these comments were "Snapshot exclude bitmap block" so
> > that it is more clear what an exclude bitmap is used for.
> 
> I'll change that.

I'll be sending out a combined patch very shortly which adds the
necessary fields for the snapshot and metadata checksum fields.  I
just want to make sure we're all on the same page about reserved
fields as far as the on-disk format is concerned.

> >
> > For consistency, it would also be good to name these "_lo" to match the
> > kernel fields.
> >
> 
> That's a major cleanup.
> Forgive me for not picking up that glove...

One way of handling these sorts of cleanups is to make the change in
the header file, and then do something like this:

#define bg_exclude_bitmap   bg_exclude_bitmap_lo

Then once you add the accessor functions to set/get the
exclude_bitmap, you can delete the #define to make sure you fixed up
all of your code to actually use the accessor functions.

       	    	    	     	     	      - Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andreas Dilger Sept. 15, 2011, 7:08 p.m. UTC | #6
On 2011-09-15, at 7:47 AM, Amir Goldstein wrote:
> On Thu, Sep 15, 2011 at 4:16 PM, Ted Ts'o <tytso@mit.edu> wrote:
>> On Thu, Sep 15, 2011 at 09:50:20AM +0300, Amir Goldstein wrote:
>>> -#define EXT2_FEATURE_COMPAT_EXCLUDE_INODE      0x0080
>>> +/* #define EXT2_FEATURE_COMPAT_EXCLUDE_INODE   0x0080 not used */
>>> +#define EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP     0x0100
>> 
>> Why this change?  Is it because you're already using 0x0100 in
>> shipping systems?
> 
> I am using 0x80 in shipping systems and it signifies something a bit
> different then the proposed 0x100.
> 
> EXCLUDE_INODE means that special inode 9 is used to reference exclude
> bitmap blocks.  EXCLUDE_BITMAP means that exclude bitmap blocks are
> referenced from group descriptors.
> With this distinction it will be easier for me to make the migration.

In that light, why not continue to use an inode to map the exclude bitmap
blocks, where the bitmap offset is (group * blocksize), instead of
explicitly listing all of the blocks in the group descriptor?  This is
how the buddy bitmap works in memory only, but it could be done for the
exclude bitmap on disk.

The advantage of this is that it would allow the 32-bit bitmap checksums
to both fit into the group descriptor.  The disadvantage is that there
is a chance this inode would become corrupted and the location of the
exclude bitmaps is lost.  I don't know how serious that is (e.g. if e2fsck
could fix it by regenerating the bitmaps, or just deleting the snapshot).

Cheers, Andreas





--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o Sept. 15, 2011, 9:57 p.m. UTC | #7
On Thu, Sep 15, 2011 at 01:08:34PM -0600, Andreas Dilger wrote:
> In that light, why not continue to use an inode to map the exclude bitmap
> blocks, where the bitmap offset is (group * blocksize), instead of
> explicitly listing all of the blocks in the group descriptor?  This is
> how the buddy bitmap works in memory only, but it could be done for the
> exclude bitmap on disk.

I seem to recall the use of an inode to map the exclude bitmap added a
huge amount of complexity to the snapshot patches.  Amir, am I
remembering this correctly?

							- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Amir Goldstein Sept. 16, 2011, 6:45 a.m. UTC | #8
On Fri, Sep 16, 2011 at 12:57 AM, Ted Ts'o <tytso@mit.edu> wrote:
> On Thu, Sep 15, 2011 at 01:08:34PM -0600, Andreas Dilger wrote:
>> In that light, why not continue to use an inode to map the exclude bitmap
>> blocks, where the bitmap offset is (group * blocksize), instead of
>> explicitly listing all of the blocks in the group descriptor?  This is
>> how the buddy bitmap works in memory only, but it could be done for the
>> exclude bitmap on disk.
>

And this exactly is how the exclude inode works, except only the DIND
block is used
for mapping, just like the resize inode.

> I seem to recall the use of an inode to map the exclude bitmap added a
> huge amount of complexity to the snapshot patches.  Amir, am I
> remembering this correctly?
>

No, I am not sure this is accurate.
I think after we over viewed the e2fsprogs snapshots patch set, you
has 2 observations:
1. the largest part (in lines of code) of the e2fsprogs snapshot patch set
    is related to the exclude inode/bitmap code.
2. it reminded you of resize inode too much and you didn't like that
3. There was also the issue of whether or not to allow the removal of
the exclude inode/bitmap
    and then re-allocation would not be in optimal layout

In retrospect, after Yongqiang has implemented the alternative exclude
bitmap patch set
for e2fsprogs, I can say:
1. The alternative patch set is not smaller
2. It is a lot more elegant and deals with correct layout of exclude
bitmap (next to block bitmap)
3. It will be able to deal with 64bit fs (unlike exclude/resize inode)
and 64bit resize

Yongqiang, do you have anything else to add to the exclude inode vs.
group desc issue?

Amir.
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Amir Goldstein Sept. 16, 2011, 6:55 a.m. UTC | #9
On Thu, Sep 15, 2011 at 10:08 PM, Andreas Dilger
<adilger.kernel@dilger.ca> wrote:
> On 2011-09-15, at 7:47 AM, Amir Goldstein wrote:
>> On Thu, Sep 15, 2011 at 4:16 PM, Ted Ts'o <tytso@mit.edu> wrote:
>>> On Thu, Sep 15, 2011 at 09:50:20AM +0300, Amir Goldstein wrote:
>>>> -#define EXT2_FEATURE_COMPAT_EXCLUDE_INODE      0x0080
>>>> +/* #define EXT2_FEATURE_COMPAT_EXCLUDE_INODE   0x0080 not used */
>>>> +#define EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP     0x0100
>>>
>>> Why this change?  Is it because you're already using 0x0100 in
>>> shipping systems?
>>
>> I am using 0x80 in shipping systems and it signifies something a bit
>> different then the proposed 0x100.
>>
>> EXCLUDE_INODE means that special inode 9 is used to reference exclude
>> bitmap blocks.  EXCLUDE_BITMAP means that exclude bitmap blocks are
>> referenced from group descriptors.
>> With this distinction it will be easier for me to make the migration.
>
> In that light, why not continue to use an inode to map the exclude bitmap
> blocks, where the bitmap offset is (group * blocksize), instead of
> explicitly listing all of the blocks in the group descriptor?  This is
> how the buddy bitmap works in memory only, but it could be done for the
> exclude bitmap on disk.
>
> The advantage of this is that it would allow the 32-bit bitmap checksums
> to both fit into the group descriptor.  The disadvantage is that there
> is a chance this inode would become corrupted and the location of the
> exclude bitmaps is lost.  I don't know how serious that is (e.g. if e2fsck
> could fix it by regenerating the bitmaps, or just deleting the snapshot).
>
> Cheers, Andreas
>

fsck can fix it. it just marks all blocks used by snapshot inodes.

Amir.
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yongqiang Yang Sept. 16, 2011, 11:43 a.m. UTC | #10
On Fri, Sep 16, 2011 at 2:45 PM, Amir Goldstein <amir73il@gmail.com> wrote:
> On Fri, Sep 16, 2011 at 12:57 AM, Ted Ts'o <tytso@mit.edu> wrote:
>> On Thu, Sep 15, 2011 at 01:08:34PM -0600, Andreas Dilger wrote:
>>> In that light, why not continue to use an inode to map the exclude bitmap
>>> blocks, where the bitmap offset is (group * blocksize), instead of
>>> explicitly listing all of the blocks in the group descriptor?  This is
>>> how the buddy bitmap works in memory only, but it could be done for the
>>> exclude bitmap on disk.
>>
>
> And this exactly is how the exclude inode works, except only the DIND
> block is used
> for mapping, just like the resize inode.
>
>> I seem to recall the use of an inode to map the exclude bitmap added a
>> huge amount of complexity to the snapshot patches.  Amir, am I
>> remembering this correctly?
>>
>
> No, I am not sure this is accurate.
> I think after we over viewed the e2fsprogs snapshots patch set, you
> has 2 observations:
> 1. the largest part (in lines of code) of the e2fsprogs snapshot patch set
>    is related to the exclude inode/bitmap code.
> 2. it reminded you of resize inode too much and you didn't like that
> 3. There was also the issue of whether or not to allow the removal of
> the exclude inode/bitmap
>    and then re-allocation would not be in optimal layout
>
> In retrospect, after Yongqiang has implemented the alternative exclude
> bitmap patch set
> for e2fsprogs, I can say:
> 1. The alternative patch set is not smaller
> 2. It is a lot more elegant and deals with correct layout of exclude
> bitmap (next to block bitmap)
> 3. It will be able to deal with 64bit fs (unlike exclude/resize inode)
> and 64bit resize
>
> Yongqiang, do you have anything else to add to the exclude inode vs.
> group desc issue?
Nope, regarding resize group desc is better than exclude inode.  For
meta_bg, group desc is much more welcome.


Yongqiang.
>
> Amir.
>
Andreas Dilger Sept. 16, 2011, 8:22 p.m. UTC | #11
On 2011-09-16, at 5:43 AM, Yongqiang Yang wrote:
> On Fri, Sep 16, 2011 at 2:45 PM, Amir Goldstein <amir73il@gmail.com> wrote:
>> No, I am not sure this is accurate.
>> I think after we over viewed the e2fsprogs snapshots patch set, you
>> has 2 observations:
>> 1. the largest part (in lines of code) of the e2fsprogs snapshot patch set
>>    is related to the exclude inode/bitmap code.
>> 2. it reminded you of resize inode too much and you didn't like that
>> 3. There was also the issue of whether or not to allow the removal of
>> the exclude inode/bitmap
>>    and then re-allocation would not be in optimal layout
>> 
>> In retrospect, after Yongqiang has implemented the alternative exclude
>> bitmap patch set
>> for e2fsprogs, I can say:
>> 1. The alternative patch set is not smaller
>> 2. It is a lot more elegant and deals with correct layout of exclude
>> bitmap (next to block bitmap)
>> 3. It will be able to deal with 64bit fs (unlike exclude/resize inode)
>> and 64bit resize
>> 
>> Yongqiang, do you have anything else to add to the exclude inode vs.
>> group desc issue?
> 
> Nope, regarding resize group desc is better than exclude inode.  For
> meta_bg, group desc is much more welcome.

I'm not dead-set on using the exclude inode.  I was just wondering if there
was a clear benefit to doing so.  I'm OK with having 16-bit checksums for
the inode bitmaps for now, and possibly changing mke2fs to always creating
ext4 filesystems with the 64bit feature.  Always specifying 64bit for ext4
filesystems will allow larger checksums, and has the added benefit of
facilitating resize from below 2^32 blocks to over 2^32 blocks as well.

One minor drawback is the size of the s_group_desc array doubles in size, so
the allocation needs to be larger (handled in newer kernels by ext4_kvmalloc())
and it needs more space on disk.  At 16TB the group descriptor table would
use 8MB instead of 4MB, and at 256TB the group descriptor table would fill
the whole 128MB of the first group, where we HAVE to use META_BG because the
second group is also filled with the backup GDT.

Cheers, Andreas





--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
index 54cb3d4..f1f0e19 100644
--- a/lib/ext2fs/ext2_fs.h
+++ b/lib/ext2fs/ext2_fs.h
@@ -142,7 +142,8 @@  struct ext2_group_desc
        __u16   bg_free_inodes_count;   /* Free inodes count */
        __u16   bg_used_dirs_count;     /* Directories count */
        __u16   bg_flags;
-       __u32   bg_reserved[2];
+       __u32   bg_exclude_bitmap;      /* Exclude bitmap block */
+       __u32   bg_reserved[1];
        __u16   bg_itable_unused;       /* Unused inodes count */
        __u16   bg_checksum;            /* crc16(s_uuid+grouo_num+group_desc)*/
 };
@@ -159,7 +160,8 @@  struct ext4_group_desc
        __u16   bg_free_inodes_count;   /* Free inodes count */
        __u16   bg_used_dirs_count;     /* Directories count */
        __u16   bg_flags;               /* EXT4_BG_flags (INODE_UNINIT, etc) */
-       __u32   bg_reserved[2];         /* Likely block/inode bitmap checksum */
+       __u32   bg_exclude_bitmap;      /* Exclude bitmap block */
+       __u32   bg_reserved[1];         /* Likely block/inode bitmap checksum */
        __u16   bg_itable_unused;       /* Unused inodes count */
        __u16   bg_checksum;            /* crc16(sb_uuid+group+desc) */
        __u32   bg_block_bitmap_hi;     /* Blocks bitmap block MSB */