diff mbox

[3.11.y.z,extended,stable] Patch "bcache: Data corruption fix" has been added to staging queue

Message ID 1391606006-30925-1-git-send-email-luis.henriques@canonical.com
State New
Headers show

Commit Message

Luis Henriques Feb. 5, 2014, 1:13 p.m. UTC
This is a note to let you know that I have just added a patch titled

    bcache: Data corruption fix

to the linux-3.11.y-queue branch of the 3.11.y.z extended stable tree 
which can be found at:

 http://kernel.ubuntu.com/git?p=ubuntu/linux.git;a=shortlog;h=refs/heads/linux-3.11.y-queue

If you, or anyone else, feels it should not be added to this tree, please 
reply to this email.

For more information about the 3.11.y.z tree, see
https://wiki.ubuntu.com/Kernel/Dev/ExtendedStable

Thanks.
-Luis

------

From 5826eca8246738a5258ed82d1a4583c7360d2a90 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kmo@daterainc.com>
Date: Tue, 17 Dec 2013 17:51:02 -0800
Subject: bcache: Data corruption fix

commit ef71ec00002d92a08eb27e9d036e3d48835b6597 upstream.

The code that handles overlapping extents that we've just read back in from disk
was depending on the behaviour of the code that handles overlapping extents as
we're inserting into a btree node in the case of an insert that forced an
existing extent to be split: on insert, if we had to split we'd also insert a
new extent to represent the top part of the old extent - and then that new
extent would get written out.

The code that read the extents back in thus not bother with splitting extents -
if it saw an extent that ovelapped in the middle of an older extent, it would
trim the old extent to only represent the bottom part, assuming that the
original insert would've inserted a new extent to represent the top part.

I still haven't figured out _how_ it can happen, but I'm now pretty convinced
(and testing has confirmed) that there's some kind of an obscure corner case
(probably involving extent merging, and multiple overwrites in different sets)
that breaks this. The fix is to change the mergesort fixup code to split extents
itself when required.

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Signed-off-by: Luis Henriques <luis.henriques@canonical.com>
---
 drivers/md/bcache/bset.c | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

--
1.8.3.2
diff mbox

Patch

diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 22d1ae7..26fdcac 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -935,7 +935,7 @@  static void sort_key_next(struct btree_iter *iter,
 		*i = iter->data[--iter->used];
 }

-static void btree_sort_fixup(struct btree_iter *iter)
+static struct bkey *btree_sort_fixup(struct btree_iter *iter, struct bkey *tmp)
 {
 	while (iter->used > 1) {
 		struct btree_iter_set *top = iter->data, *i = top + 1;
@@ -963,9 +963,22 @@  static void btree_sort_fixup(struct btree_iter *iter)
 		} else {
 			/* can't happen because of comparison func */
 			BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
-			bch_cut_back(&START_KEY(i->k), top->k);
+
+			if (bkey_cmp(i->k, top->k) < 0) {
+				bkey_copy(tmp, top->k);
+
+				bch_cut_back(&START_KEY(i->k), tmp);
+				bch_cut_front(i->k, top->k);
+				heap_sift(iter, 0, btree_iter_cmp);
+
+				return tmp;
+			} else {
+				bch_cut_back(&START_KEY(i->k), top->k);
+			}
 		}
 	}
+
+	return NULL;
 }

 static void btree_mergesort(struct btree *b, struct bset *out,
@@ -973,15 +986,20 @@  static void btree_mergesort(struct btree *b, struct bset *out,
 			    bool fixup, bool remove_stale)
 {
 	struct bkey *k, *last = NULL;
+	BKEY_PADDED(k) tmp;
 	bool (*bad)(struct btree *, const struct bkey *) = remove_stale
 		? bch_ptr_bad
 		: bch_ptr_invalid;

 	while (!btree_iter_end(iter)) {
 		if (fixup && !b->level)
-			btree_sort_fixup(iter);
+			k = btree_sort_fixup(iter, &tmp.k);
+		else
+			k = NULL;
+
+		if (!k)
+			k = bch_btree_iter_next(iter);

-		k = bch_btree_iter_next(iter);
 		if (bad(b, k))
 			continue;