diff mbox

[4.2.y-ckt,052/218] RAID5: revert e9e4c377e2f563 to fix a livelock

Message ID 1459455431-12687-53-git-send-email-kamal@canonical.com
State New
Headers show

Commit Message

Kamal Mostafa March 31, 2016, 8:14 p.m. UTC
4.2.8-ckt7 -stable review patch.  If anyone has any objections, please let me know.

---8<------------------------------------------------------------

From: Shaohua Li <shli@fb.com>

commit 6ab2a4b806ae21b6c3e47c5ff1285ec06d505325 upstream.

Revert commit
e9e4c377e2f563(md/raid5: per hash value and exclusive wait_for_stripe)

The problem is raid5_get_active_stripe waits on
conf->wait_for_stripe[hash]. Assume hash is 0. My test release stripes
in this order:
- release all stripes with hash 0
- raid5_get_active_stripe still sleeps since active_stripes >
  max_nr_stripes * 3 / 4
- release all stripes with hash other than 0. active_stripes becomes 0
- raid5_get_active_stripe still sleeps, since nobody wakes up
  wait_for_stripe[0]
The system live locks. The problem is active_stripes isn't a per-hash
count. Revert the patch makes the live lock go away.

Cc: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Cc: NeilBrown <neilb@suse.de>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Kamal Mostafa <kamal@canonical.com>
---
 drivers/md/raid5.c | 27 ++++++++-------------------
 drivers/md/raid5.h |  2 +-
 2 files changed, 9 insertions(+), 20 deletions(-)
diff mbox

Patch

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index a55b6859..71d7cf7 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -344,8 +344,7 @@  static void release_inactive_stripe_list(struct r5conf *conf,
 					 int hash)
 {
 	int size;
-	unsigned long do_wakeup = 0;
-	int i = 0;
+	bool do_wakeup = false;
 	unsigned long flags;
 
 	if (hash == NR_STRIPE_HASH_LOCKS) {
@@ -366,19 +365,15 @@  static void release_inactive_stripe_list(struct r5conf *conf,
 			    !list_empty(list))
 				atomic_dec(&conf->empty_inactive_list_nr);
 			list_splice_tail_init(list, conf->inactive_list + hash);
-			do_wakeup |= 1 << hash;
+			do_wakeup = true;
 			spin_unlock_irqrestore(conf->hash_locks + hash, flags);
 		}
 		size--;
 		hash--;
 	}
 
-	for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
-		if (do_wakeup & (1 << i))
-			wake_up(&conf->wait_for_stripe[i]);
-	}
-
 	if (do_wakeup) {
+		wake_up(&conf->wait_for_stripe);
 		if (atomic_read(&conf->active_stripes) == 0)
 			wake_up(&conf->wait_for_quiescent);
 		if (conf->retry_read_aligned)
@@ -691,15 +686,14 @@  get_active_stripe(struct r5conf *conf, sector_t sector,
 			if (!sh) {
 				set_bit(R5_INACTIVE_BLOCKED,
 					&conf->cache_state);
-				wait_event_exclusive_cmd(
-					conf->wait_for_stripe[hash],
+				wait_event_lock_irq(
+					conf->wait_for_stripe,
 					!list_empty(conf->inactive_list + hash) &&
 					(atomic_read(&conf->active_stripes)
 					 < (conf->max_nr_stripes * 3 / 4)
 					 || !test_bit(R5_INACTIVE_BLOCKED,
 						      &conf->cache_state)),
-					spin_unlock_irq(conf->hash_locks + hash),
-					spin_lock_irq(conf->hash_locks + hash));
+					*(conf->hash_locks + hash));
 				clear_bit(R5_INACTIVE_BLOCKED,
 					  &conf->cache_state);
 			} else {
@@ -724,9 +718,6 @@  get_active_stripe(struct r5conf *conf, sector_t sector,
 		}
 	} while (sh == NULL);
 
-	if (!list_empty(conf->inactive_list + hash))
-		wake_up(&conf->wait_for_stripe[hash]);
-
 	spin_unlock_irq(conf->hash_locks + hash);
 	return sh;
 }
@@ -2204,7 +2195,7 @@  static int resize_stripes(struct r5conf *conf, int newsize)
 	cnt = 0;
 	list_for_each_entry(nsh, &newstripes, lru) {
 		lock_device_hash_lock(conf, hash);
-		wait_event_exclusive_cmd(conf->wait_for_stripe[hash],
+		wait_event_cmd(conf->wait_for_stripe,
 				    !list_empty(conf->inactive_list + hash),
 				    unlock_device_hash_lock(conf, hash),
 				    lock_device_hash_lock(conf, hash));
@@ -6487,9 +6478,7 @@  static struct r5conf *setup_conf(struct mddev *mddev)
 	seqcount_init(&conf->gen_lock);
 	mutex_init(&conf->cache_size_mutex);
 	init_waitqueue_head(&conf->wait_for_quiescent);
-	for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
-		init_waitqueue_head(&conf->wait_for_stripe[i]);
-	}
+	init_waitqueue_head(&conf->wait_for_stripe);
 	init_waitqueue_head(&conf->wait_for_overlap);
 	INIT_LIST_HEAD(&conf->handle_list);
 	INIT_LIST_HEAD(&conf->hold_list);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 65ecd49..a42a25d 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -515,7 +515,7 @@  struct r5conf {
 	atomic_t		empty_inactive_list_nr;
 	struct llist_head	released_stripes;
 	wait_queue_head_t	wait_for_quiescent;
-	wait_queue_head_t	wait_for_stripe[NR_STRIPE_HASH_LOCKS];
+	wait_queue_head_t	wait_for_stripe;
 	wait_queue_head_t	wait_for_overlap;
 	unsigned long		cache_state;
 #define R5_INACTIVE_BLOCKED	1	/* release of inactive stripes blocked,