Patchwork [RFC,7/9,v1] ext4: add a new convert function to convert an unwritten extent in extent status tree

login
register
mail settings
Submitter Zheng Liu
Date Dec. 24, 2012, 7:55 a.m.
Message ID <1356335742-11793-8-git-send-email-wenqing.lz@taobao.com>
Download mbox | patch
Permalink /patch/208036/
State Superseded
Headers show

Comments

Zheng Liu - Dec. 24, 2012, 7:55 a.m.
From: Zheng Liu <wenqing.lz@taobao.com>

A new function called ext4_es_convert_unwritten_extents() is defined to convert
a range of unwritten extents to written in extent status tree.

This function aims to improve the unwritten extent conversion in DIO end_io.
Meanwhile all locks are changed to save irq flags due to DIO end_io is in irq
context.

Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
---
 fs/ext4/extents_status.c | 161 ++++++++++++++++++++++++++++++++++++++++++++---
 fs/ext4/extents_status.h |   2 +
 2 files changed, 155 insertions(+), 8 deletions(-)

Patch

diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index ccd940c..9db9e05 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -239,10 +239,11 @@  ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es)
 	struct extent_status *es1 = NULL;
 	struct rb_node *node;
 	ext4_lblk_t ret = EXT_MAX_BLOCKS;
+	unsigned long flags;
 
 	trace_ext4_es_find_extent_enter(inode, es->es_lblk);
 
-	read_lock(&EXT4_I(inode)->i_es_lock);
+	read_lock_irqsave(&EXT4_I(inode)->i_es_lock, flags);
 	tree = &EXT4_I(inode)->i_es_tree;
 
 	/* find delay extent in cache firstly */
@@ -273,7 +274,7 @@  out:
 		}
 	}
 
-	read_unlock(&EXT4_I(inode)->i_es_lock);
+	read_unlock_irqrestore(&EXT4_I(inode)->i_es_lock, flags);
 
 	trace_ext4_es_find_extent_exit(inode, es, ret);
 	return ret;
@@ -426,6 +427,7 @@  int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
 	struct ext4_es_tree *tree;
 	struct extent_status newes;
 	ext4_lblk_t end = lblk + len - 1;
+	unsigned long flags;
 	int err = 0;
 
 	es_debug("add [%u/%u) %llu %d to extent status tree of inode %lu\n",
@@ -439,7 +441,7 @@  int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
 	newes.es_status = status;
 	trace_ext4_es_insert_extent(inode, &newes);
 
-	write_lock(&EXT4_I(inode)->i_es_lock);
+	write_lock_irqsave(&EXT4_I(inode)->i_es_lock, flags);
 	tree = &EXT4_I(inode)->i_es_tree;
 	err = __es_remove_extent(tree, lblk, end);
 	if (err != 0)
@@ -447,7 +449,7 @@  int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
 	err = __es_insert_extent(tree, &newes);
 
 error:
-	write_unlock(&EXT4_I(inode)->i_es_lock);
+	write_unlock_irqrestore(&EXT4_I(inode)->i_es_lock, flags);
 
 	ext4_es_print_tree(inode);
 
@@ -466,12 +468,13 @@  int ext4_es_lookup_extent(struct inode *inode, struct extent_status *es)
 	struct ext4_es_tree *tree;
 	struct extent_status *es1;
 	struct rb_node *node;
+	unsigned long flags;
 	int found = 0;
 
 	es_debug("lookup extent in block %u\n", es->es_lblk);
 
 	tree = &EXT4_I(inode)->i_es_tree;
-	read_lock(&EXT4_I(inode)->i_es_lock);
+	read_lock_irqsave(&EXT4_I(inode)->i_es_lock, flags);
 
 	/* find delay extent in cache firstly */
 	if (tree->cache_es) {
@@ -506,7 +509,7 @@  out:
 		es->es_status = es1->es_status;
 	}
 
-	read_unlock(&EXT4_I(inode)->i_es_lock);
+	read_unlock_irqrestore(&EXT4_I(inode)->i_es_lock, flags);
 
 	return found;
 }
@@ -605,6 +608,7 @@  int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
 {
 	struct ext4_es_tree *tree;
 	ext4_lblk_t end;
+	unsigned long flags;
 	int err = 0;
 
 	trace_ext4_es_remove_extent(inode, lblk, len);
@@ -616,9 +620,150 @@  int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
 
 	tree = &EXT4_I(inode)->i_es_tree;
 
-	write_lock(&EXT4_I(inode)->i_es_lock);
+	write_lock_irqsave(&EXT4_I(inode)->i_es_lock, flags);
 	err = __es_remove_extent(tree, lblk, end);
-	write_unlock(&EXT4_I(inode)->i_es_lock);
+	write_unlock_irqrestore(&EXT4_I(inode)->i_es_lock, flags);
+	ext4_es_print_tree(inode);
+	return err;
+}
+
+int ext4_es_convert_unwritten_extents(struct inode *inode, loff_t offset,
+				      size_t size)
+{
+	struct ext4_es_tree *tree;
+	struct rb_node *node;
+	struct extent_status *es, orig_es, conv_es;
+	ext4_lblk_t end, len1, len2;
+	ext4_lblk_t lblk = 0, len = 0;
+	unsigned long flags;
+	unsigned int blkbits;
+	int err = 0;
+
+	/* add trace point and debug */
+	blkbits = inode->i_blkbits;
+	lblk = offset >> blkbits;
+	len = (EXT4_BLOCK_ALIGN(offset + size, blkbits) >> blkbits) - lblk;
+
+	end = lblk + len - 1;
+	BUG_ON(end < lblk);
+
+	tree = &EXT4_I(inode)->i_es_tree;
+
+	write_lock_irqsave(&EXT4_I(inode)->i_es_lock, flags);
+
+	es = __es_tree_search(&tree->root, lblk);
+	if (!es)
+		goto out;
+	if (es->es_lblk > end)
+		goto out;
+
+	tree->cache_es = NULL;
+
+	orig_es.es_lblk = es->es_lblk;
+	orig_es.es_len = es->es_len;
+	orig_es.es_pblk = es->es_pblk;
+	orig_es.es_status = es->es_status;
+
+	len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0;
+	len2 = extent_status_end(es) > end ?
+	       extent_status_end(es) - end : 0;
+	if (len1 > 0)
+		es->es_len = len1;
+	if (len2 > 0) {
+		if (len1 > 0) {
+			struct extent_status newes;
+
+			newes.es_lblk = end + 1;
+			newes.es_len = len2;
+			newes.es_pblk = orig_es.es_pblk + orig_es.es_len - len2;
+			newes.es_status = orig_es.es_status;
+			/*BUG_ON(newes.es_status != EXTENT_STATUS_UNWRITTEN);*/
+			err = __es_insert_extent(tree, &newes);
+			if (err) {
+				es->es_lblk = orig_es.es_lblk;
+				es->es_len = orig_es.es_len;
+				goto out;
+			}
+
+			conv_es.es_lblk = orig_es.es_lblk + len1;
+			conv_es.es_len = orig_es.es_len - len1 - len2;
+			conv_es.es_pblk = orig_es.es_pblk + len1;
+			conv_es.es_status = EXTENT_STATUS_WRITTEN;
+			err = __es_insert_extent(tree, &conv_es);
+			if (err) {
+				int err2;
+				err2 = __es_remove_extent(tree, newes.es_lblk,
+						extent_status_end(&newes));
+				if (err2)
+					goto out;
+				es->es_lblk = orig_es.es_lblk;
+				es->es_len = orig_es.es_len;
+				goto out;
+			}
+		} else {
+			es->es_lblk = end + 1;
+			es->es_len = len2;
+			es->es_pblk = orig_es.es_pblk + orig_es.es_len - len2;
+			/*BUG_ON(newes.es_status != EXTENT_STATUS_UNWRITTEN);*/
+
+			conv_es.es_lblk = orig_es.es_lblk;
+			conv_es.es_len = orig_es.es_len - len2;
+			conv_es.es_pblk = orig_es.es_pblk;
+			conv_es.es_status = EXTENT_STATUS_WRITTEN;
+			err = __es_insert_extent(tree, &conv_es);
+			if (err) {
+				es->es_lblk = orig_es.es_lblk;
+				es->es_len = orig_es.es_len;
+				es->es_pblk = orig_es.es_pblk;
+			}
+		}
+
+		goto out;
+	}
+
+	if (len1 > 0) {
+		node = rb_next(&es->rb_node);
+		if (node)
+			es = rb_entry(node, struct extent_status, rb_node);
+		else
+			es = NULL;
+	}
+
+	while (es && extent_status_end(es) <= end) {
+		node = rb_next(&es->rb_node);
+		es->es_status = EXTENT_STATUS_WRITTEN;
+		if (!node) {
+			es = NULL;
+			break;
+		}
+		es = rb_entry(node, struct extent_status, rb_node);
+	}
+
+	if (es && es->es_lblk < end + 1) {
+		ext4_lblk_t orig_len = es->es_len;
+
+		/*
+		 * Here we first set conv_es just because of avoiding copy the
+		 * value of es to a tmporary variable.
+		 */
+		len1 = extent_status_end(es) - end;
+		conv_es.es_lblk = es->es_lblk;
+		conv_es.es_len = es->es_len - len1;
+		conv_es.es_pblk = es->es_pblk;
+		conv_es.es_status = EXTENT_STATUS_WRITTEN;
+
+		es->es_lblk = end + 1;
+		es->es_len = len1;
+		es->es_pblk = es->es_pblk + orig_len - len1;
+
+		err = __es_insert_extent(tree, &conv_es);
+		if (err)
+			goto out;
+	}
+
+out:
+	write_unlock_irqrestore(&EXT4_I(inode)->i_es_lock, flags);
+
 	ext4_es_print_tree(inode);
 	return err;
 }
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index 1890f80..9069ecf 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -51,6 +51,8 @@  extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
 extern ext4_lblk_t ext4_es_find_extent(struct inode *inode,
 				struct extent_status *es);
 extern int ext4_es_lookup_extent(struct inode *inode, struct extent_status *es);
+extern int ext4_es_convert_unwritten_extents(struct inode *inode,
+					     loff_t offset, size_t size);
 
 static inline int ext4_es_is_written(struct extent_status *es)
 {