From patchwork Mon Sep 3 16:33:51 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Theodore Ts'o X-Patchwork-Id: 181389 Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 346DD2C008D for ; Tue, 4 Sep 2012 02:34:00 +1000 (EST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756508Ab2ICQd6 (ORCPT ); Mon, 3 Sep 2012 12:33:58 -0400 Received: from li9-11.members.linode.com ([67.18.176.11]:48620 "EHLO imap.thunk.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1756418Ab2ICQd5 (ORCPT ); Mon, 3 Sep 2012 12:33:57 -0400 Received: from root (helo=closure.thunk.org) by imap.thunk.org with local-esmtp (Exim 4.72) (envelope-from ) id 1T8Zav-0000MH-8y; Mon, 03 Sep 2012 16:33:49 +0000 Received: by closure.thunk.org (Postfix, from userid 15806) id CC1C6241260; Mon, 3 Sep 2012 12:33:51 -0400 (EDT) From: Theodore Ts'o To: Ext4 Developers List Cc: Yongqiang Yang , "Theodore Ts'o" Subject: [PATCH 5/5 -v2] ext4: add online resizing support for meta_bg and 64-bit file systems Date: Mon, 3 Sep 2012 12:33:51 -0400 Message-Id: <1346690031-20565-1-git-send-email-tytso@mit.edu> X-Mailer: git-send-email 1.7.12.rc0.22.gcdd159b In-Reply-To: <1346561485-6413-6-git-send-email-tytso@mit.edu> References: <1346561485-6413-6-git-send-email-tytso@mit.edu> X-SA-Exim-Connect-IP: X-SA-Exim-Mail-From: tytso@thunk.org X-SA-Exim-Scanned: No (on imap.thunk.org); SAEximRunCond expanded to false Sender: linux-ext4-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-ext4@vger.kernel.org From: Yongqiang Yang This patch adds support for resizing file systems with the meta_bg and 64bit features. [ Added a fix by tytso to fix a divide by zero when resizing a filesystem from 14 TB to 18TB. Also fixed overhead accounting for meta_bg file systems.] Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/ioctl.c | 15 ---- fs/ext4/resize.c | 215 ++++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 165 insertions(+), 65 deletions(-) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7f7dad7..8b84fe2 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -365,26 +365,11 @@ group_add_out: return -EOPNOTSUPP; } - if (EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_META_BG)) { - ext4_msg(sb, KERN_ERR, - "Online resizing not (yet) supported with meta_bg"); - return -EOPNOTSUPP; - } - if (copy_from_user(&n_blocks_count, (__u64 __user *)arg, sizeof(__u64))) { return -EFAULT; } - if (n_blocks_count > MAX_32_NUM && - !EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_64BIT)) { - ext4_msg(sb, KERN_ERR, - "File system only supports 32-bit block numbers"); - return -EOPNOTSUPP; - } - err = ext4_resize_begin(sb); if (err) return err; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 39ff8ee..767a3e8 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -45,6 +45,28 @@ void ext4_resize_end(struct super_block *sb) smp_mb__after_clear_bit(); } +static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb, + ext4_group_t group) { + return (group >> EXT4_DESC_PER_BLOCK_BITS(sb)) << + EXT4_DESC_PER_BLOCK_BITS(sb); +} + +static ext4_fsblk_t ext4_meta_bg_first_block_no(struct super_block *sb, + ext4_group_t group) { + group = ext4_meta_bg_first_group(sb, group); + return ext4_group_first_block_no(sb, group); +} + +static ext4_grpblk_t ext4_group_overhead_blocks(struct super_block *sb, + ext4_group_t group) { + ext4_grpblk_t overhead; + overhead = ext4_bg_num_gdb(sb, group); + if (ext4_bg_has_super(sb, group)) + overhead += 1 + + le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); + return overhead; +} + #define outside(b, first, last) ((b) < (first) || (b) >= (last)) #define inside(b, first, last) ((b) >= (first) && (b) < (last)) @@ -57,9 +79,7 @@ static int verify_group_input(struct super_block *sb, ext4_fsblk_t end = start + input->blocks_count; ext4_group_t group = input->group; ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; - unsigned overhead = ext4_bg_has_super(sb, group) ? - (1 + ext4_bg_num_gdb(sb, group) + - le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; + unsigned overhead = ext4_group_overhead_blocks(sb, group); ext4_fsblk_t metaend = start + overhead; struct buffer_head *bh = NULL; ext4_grpblk_t free_blocks_count, offset; @@ -206,7 +226,6 @@ static void ext4_alloc_group_tables(struct super_block *sb, int flexbg_size) { struct ext4_new_group_data *group_data = flex_gd->groups; - struct ext4_super_block *es = EXT4_SB(sb)->s_es; ext4_fsblk_t start_blk; ext4_fsblk_t last_blk; ext4_group_t src_group; @@ -229,20 +248,20 @@ next_group: start_blk = ext4_group_first_block_no(sb, src_group); last_blk = start_blk + group_data[src_group - group].blocks_count; - overhead = ext4_bg_has_super(sb, src_group) ? - (1 + ext4_bg_num_gdb(sb, src_group) + - le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; + overhead = ext4_group_overhead_blocks(sb, src_group); start_blk += overhead; BUG_ON(src_group >= group_data[0].group + flex_gd->count); /* We collect contiguous blocks as much as possible. */ src_group++; - for (; src_group <= last_group; src_group++) - if (!ext4_bg_has_super(sb, src_group)) + for (; src_group <= last_group; src_group++) { + overhead = ext4_group_overhead_blocks(sb, src_group); + if (overhead != 0) last_blk += group_data[src_group - group].blocks_count; else break; + } /* Allocate block bitmaps */ for (; bb_index < flex_gd->count; bb_index++) { @@ -433,11 +452,13 @@ static int setup_new_flex_group_blocks(struct super_block *sb, ext4_group_t group, count; struct buffer_head *bh = NULL; int reserved_gdb, i, j, err = 0, err2; + int meta_bg; BUG_ON(!flex_gd->count || !group_data || group_data[0].group != sbi->s_groups_count); reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); + meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); /* This transaction may be extended/restarted along the way */ handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); @@ -447,15 +468,25 @@ static int setup_new_flex_group_blocks(struct super_block *sb, group = group_data[0].group; for (i = 0; i < flex_gd->count; i++, group++) { unsigned long gdblocks; + ext4_grpblk_t overhead; gdblocks = ext4_bg_num_gdb(sb, group); start = ext4_group_first_block_no(sb, group); - if (!ext4_bg_has_super(sb, group)) + if (meta_bg == 0 && !ext4_bg_has_super(sb, group)) goto handle_itb; + if (meta_bg == 1) { + ext4_group_t first_group; + first_group = ext4_meta_bg_first_group(sb, group); + if (first_group != group + 1 && + first_group != group + EXT4_DESC_PER_BLOCK(sb) - 1) + goto handle_itb; + } + + block = start + ext4_bg_has_super(sb, group); /* Copy all of the GDT blocks into the backup in this group */ - for (j = 0, block = start + 1; j < gdblocks; j++, block++) { + for (j = 0; j < gdblocks; j++, block++) { struct buffer_head *gdb; ext4_debug("update backup group %#04llx\n", block); @@ -525,11 +556,11 @@ handle_bb: err = PTR_ERR(bh); goto out; } - if (ext4_bg_has_super(sb, group)) { + overhead = ext4_group_overhead_blocks(sb, group); + if (overhead != 0) { ext4_debug("mark backup superblock %#04llx (+0)\n", start); - ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + - 1); + ext4_set_bits(bh->b_data, 0, overhead); } ext4_mark_bitmap_end(group_data[i].blocks_count, sb->s_blocksize * 8, bh->b_data); @@ -826,6 +857,45 @@ exit_bh: } /* + * add_new_gdb_meta_bg is the sister of add_new_gdb. + */ +static int add_new_gdb_meta_bg(struct super_block *sb, + handle_t *handle, ext4_group_t group) { + ext4_fsblk_t gdblock; + struct buffer_head *gdb_bh; + struct buffer_head **o_group_desc, **n_group_desc; + unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb); + int err; + + gdblock = ext4_meta_bg_first_block_no(sb, group) + + ext4_bg_has_super(sb, group); + gdb_bh = sb_bread(sb, gdblock); + if (!gdb_bh) + return -EIO; + n_group_desc = ext4_kvmalloc((gdb_num + 1) * + sizeof(struct buffer_head *), + GFP_NOFS); + if (!n_group_desc) { + err = -ENOMEM; + ext4_warning(sb, "not enough memory for %lu groups", + gdb_num + 1); + return err; + } + + o_group_desc = EXT4_SB(sb)->s_group_desc; + memcpy(n_group_desc, o_group_desc, + EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); + n_group_desc[gdb_num] = gdb_bh; + EXT4_SB(sb)->s_group_desc = n_group_desc; + EXT4_SB(sb)->s_gdb_count++; + ext4_kvfree(o_group_desc); + err = ext4_journal_get_write_access(handle, gdb_bh); + if (unlikely(err)) + brelse(gdb_bh); + return err; +} + +/* * Called when we are adding a new group which has a backup copy of each of * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks. * We need to add these reserved backup GDT blocks to the resize inode, so @@ -953,16 +1023,16 @@ exit_free: * do not copy the full number of backups at this time. The resize * which changed s_groups_count will backup again. */ -static void update_backups(struct super_block *sb, - int blk_off, char *data, int size) +static void update_backups(struct super_block *sb, int blk_off, char *data, + int size, int meta_bg) { struct ext4_sb_info *sbi = EXT4_SB(sb); - const ext4_group_t last = sbi->s_groups_count; + ext4_group_t last; const int bpg = EXT4_BLOCKS_PER_GROUP(sb); unsigned three = 1; unsigned five = 5; unsigned seven = 7; - ext4_group_t group; + ext4_group_t group = 0; int rest = sb->s_blocksize - size; handle_t *handle; int err = 0, err2; @@ -976,8 +1046,17 @@ static void update_backups(struct super_block *sb, ext4_superblock_csum_set(sb, (struct ext4_super_block *)data); - while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) { + if (meta_bg == 0) { + group = ext4_list_backups(sb, &three, &five, &seven); + last = sbi->s_groups_count; + } else { + group = ext4_meta_bg_first_group(sb, group) + 1; + last = (ext4_group_t)(group + EXT4_DESC_PER_BLOCK(sb) - 2); + } + + while (group < sbi->s_groups_count) { struct buffer_head *bh; + ext4_fsblk_t backup_block; /* Out of journal space, and can't get more - abort - so sad */ if (ext4_handle_valid(handle) && @@ -986,13 +1065,20 @@ static void update_backups(struct super_block *sb, (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) break; - bh = sb_getblk(sb, group * bpg + blk_off); + if (meta_bg == 0) + backup_block = group * bpg + blk_off; + else + backup_block = (ext4_group_first_block_no(sb, group) + + ext4_bg_has_super(sb, group)); + + bh = sb_getblk(sb, backup_block); if (!bh) { err = -EIO; break; } - ext4_debug("update metadata backup %#04lx\n", - (unsigned long)bh->b_blocknr); + ext4_debug("update metadata backup %llu(+%llu)\n", + backup_block, backup_block - + ext4_group_first_block_no(sb, group)); if ((err = ext4_journal_get_write_access(handle, bh))) break; lock_buffer(bh); @@ -1005,6 +1091,13 @@ static void update_backups(struct super_block *sb, if (unlikely(err)) ext4_std_error(sb, err); brelse(bh); + + if (meta_bg == 0) + group = ext4_list_backups(sb, &three, &five, &seven); + else if (group == last) + break; + else + group = last; } if ((err2 = ext4_journal_stop(handle)) && !err) err = err2; @@ -1047,7 +1140,9 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, struct ext4_super_block *es = sbi->s_es; struct buffer_head *gdb_bh; int i, gdb_off, gdb_num, err = 0; + int meta_bg; + meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); for (i = 0; i < count; i++, group++) { int reserved_gdb = ext4_bg_has_super(sb, group) ? le16_to_cpu(es->s_reserved_gdt_blocks) : 0; @@ -1067,8 +1162,11 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group)) err = reserve_backup_gdb(handle, resize_inode, group); - } else + } else if (meta_bg != 0) { + err = add_new_gdb_meta_bg(sb, handle, group); + } else { err = add_new_gdb(handle, resize_inode, group); + } if (err) break; } @@ -1220,7 +1318,7 @@ static void ext4_update_super(struct super_block *sb, } reserved_blocks = ext4_r_blocks_count(es) * 100; - do_div(reserved_blocks, ext4_blocks_count(es)); + reserved_blocks = div64_u64(reserved_blocks, ext4_blocks_count(es)); reserved_blocks *= blocks_count; do_div(reserved_blocks, 100); @@ -1231,6 +1329,7 @@ static void ext4_update_super(struct super_block *sb, le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) * flex_gd->count); + ext4_debug("free blocks count %llu", ext4_free_blocks_count(es)); /* * We need to protect s_groups_count against other CPUs seeing * inconsistent state in the superblock. @@ -1265,6 +1364,8 @@ static void ext4_update_super(struct super_block *sb, percpu_counter_add(&sbi->s_freeinodes_counter, EXT4_INODES_PER_GROUP(sb) * flex_gd->count); + ext4_debug("free blocks count %llu", + percpu_counter_read(&sbi->s_freeclusters_counter)); if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && sbi->s_log_groups_per_flex) { @@ -1356,15 +1457,17 @@ exit_journal: int gdb_num = group / EXT4_DESC_PER_BLOCK(sb); int gdb_num_end = ((group + flex_gd->count - 1) / EXT4_DESC_PER_BLOCK(sb)); + int meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, + EXT4_FEATURE_INCOMPAT_META_BG); update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, - sizeof(struct ext4_super_block)); + sizeof(struct ext4_super_block), 0); for (; gdb_num <= gdb_num_end; gdb_num++) { struct buffer_head *gdb_bh; gdb_bh = sbi->s_group_desc[gdb_num]; update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, - gdb_bh->b_size); + gdb_bh->b_size, meta_bg); } } exit: @@ -1408,9 +1511,7 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, group_data[i].group = group + i; group_data[i].blocks_count = blocks_per_group; - overhead = ext4_bg_has_super(sb, group + i) ? - (1 + ext4_bg_num_gdb(sb, group + i) + - le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; + overhead = ext4_group_overhead_blocks(sb, group + i); group_data[i].free_blocks_count = blocks_per_group - overhead; if (ext4_has_group_desc_csum(sb)) flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | @@ -1558,11 +1659,13 @@ errout: err = err2; if (!err) { + ext4_fsblk_t first_block; + first_block = ext4_group_first_block_no(sb, 0); if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT4-fs: extended group to %llu " "blocks\n", ext4_blocks_count(es)); - update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, - sizeof(struct ext4_super_block)); + update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr - first_block, + (char *)es, sizeof(struct ext4_super_block), 0); } return err; } @@ -1657,15 +1760,16 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; struct buffer_head *bh; - struct inode *resize_inode; - ext4_fsblk_t o_blocks_count; - ext4_group_t o_group; - ext4_group_t n_group; - ext4_grpblk_t offset, add; + struct inode *resize_inode = NULL; + ext4_grpblk_t add, offset; unsigned long n_desc_blocks; unsigned long o_desc_blocks; unsigned long desc_blocks; + ext4_group_t o_group; + ext4_group_t n_group; + ext4_fsblk_t o_blocks_count; int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex; + int meta_bg; o_blocks_count = ext4_blocks_count(es); @@ -1687,22 +1791,33 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset); n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / - EXT4_DESC_PER_BLOCK(sb); + EXT4_DESC_PER_BLOCK(sb); o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / - EXT4_DESC_PER_BLOCK(sb); + EXT4_DESC_PER_BLOCK(sb); desc_blocks = n_desc_blocks - o_desc_blocks; - if (desc_blocks && - (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) || - le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks)) { - ext4_warning(sb, "No reserved GDT blocks, can't resize"); - return -EPERM; - } + meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); - resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); - if (IS_ERR(resize_inode)) { - ext4_warning(sb, "Error opening resize inode"); - return PTR_ERR(resize_inode); + if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE)) { + if (meta_bg) { + ext4_error(sb, "resize_inode and meta_bg enabled " + "simultaneously"); + return -EINVAL; + } + if (le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks) { + ext4_warning(sb, + "No reserved GDT blocks, can't resize"); + return -EPERM; + } + resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); + if (IS_ERR(resize_inode)) { + ext4_warning(sb, "Error opening resize inode"); + return PTR_ERR(resize_inode); + } + } else if (!meta_bg) { + ext4_warning(sb, "File system features do not permit " + "online resize"); + return -EPERM; } /* See if the device is actually as big as what was requested */ @@ -1756,8 +1871,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) out: if (flex_gd) free_flex_gd(flex_gd); - - iput(resize_inode); + if (resize_inode != NULL) + iput(resize_inode); if (test_opt(sb, DEBUG)) ext4_msg(sb, KERN_DEBUG, "resized filesystem from %llu " "upto %llu blocks", o_blocks_count, n_blocks_count);