diff mbox

mtd: nand: do FIFO processing in nand_get_device()

Message ID 20151125173543.GA17151@linutronix.de
State Superseded
Headers show

Commit Message

Sebastian Andrzej Siewior Nov. 25, 2015, 5:35 p.m. UTC
I have here a live lock in UBI doing
  ensure_wear_leveling() -> wear_leveling_worker() -> ubi_eba_copy_leb()
  MOVE_RETRY -> schedule_erase() -> ensure_wear_leveling()

on the same PEB over and over again. The reason for MOVE_RETRY is that
the LEB-Lock owner is stucked in nand_get_device() and does not get the
device lock. The PEB-lock owner is only scheduled on the CPU while the UBI
thread is idle during erase or read while (again) owning the device-lock
so the LEB-lock owner makes no progress.

To fix this live lock I ensure that there FIFO processing in
nand_get_device(). On release the first waiter is marked as the new
owner. If someone asks for the device and is not the waiter to which
nand device has been handed over then it will put itself on the
waitqueue.
The FIFO processing was suggested by Peter Zijlstra.

As a small optimization I use add_wait_queue_exclusive() instead
add_wait_queue() to make sure that only _one_ waiter is woken up and not
all of them.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
Would a stable be considered as reasonable?

 drivers/mtd/nand/nand_base.c  | 39 ++++++++++++++++++++++++++++++++-------
 include/linux/mtd/flashchip.h |  1 +
 include/linux/mtd/nand.h      |  1 +
 3 files changed, 34 insertions(+), 7 deletions(-)

Comments

Peter Zijlstra Nov. 30, 2015, 4:15 p.m. UTC | #1
On Wed, Nov 25, 2015 at 06:35:43PM +0100, Sebastian Andrzej Siewior wrote:
> +++ b/drivers/mtd/nand/nand_base.c
> @@ -137,8 +137,25 @@ static void nand_release_device(struct mtd_info *mtd)
>  	/* Release the controller and the chip */
>  	spin_lock(&chip->controller->lock);
>  	chip->controller->active = NULL;
> -	chip->state = FL_READY;
> -	wake_up(&chip->controller->wq);
> +
> +	if (waitqueue_active(&chip->controller->wq)) {
> +		wait_queue_head_t *q;
> +		wait_queue_t *waiter;
> +		unsigned long flags;
> +
> +		q = &chip->controller->wq;
> +		chip->state = FL_HANDOVER;
> +
> +		spin_lock_irqsave(&q->lock, flags);

This lock is actually not required, as your add/remove_wait_queue calls
are also under chip->controller->lock.

> +		waiter = list_first_entry(&q->task_list, wait_queue_t,
> +					  task_list);
> +		spin_unlock_irqrestore(&q->lock, flags);

And the one read instruction under a spinlock is a tad pointless anyway.

> +
> +		chip->controller->handover_waiter = waiter;

You could consider using ->active for this; as it stands you never use
both at the same time. Its a tad icky, but it avoids adding that new
field.

> +		wake_up(q);
> +	} else {
> +		chip->state = FL_READY;
> +	}
>  	spin_unlock(&chip->controller->lock);
>  }
>  
> @@ -843,10 +860,18 @@ nand_get_device(struct mtd_info *mtd, int new_state)
>  	if (!chip->controller->active)
>  		chip->controller->active = chip;
>  
> -	if (chip->controller->active == chip && chip->state == FL_READY) {
> -		chip->state = new_state;
> -		spin_unlock(lock);
> -		return 0;
> +	if (chip->controller->active == chip) {
> +		if (chip->state == FL_READY) {
> +			chip->state = new_state;
> +			spin_unlock(lock);
> +			return 0;
> +		}
> +		if (chip->state == FL_HANDOVER &&
> +		    chip->controller->handover_waiter == &wait) {
> +			chip->state = new_state;
> +			spin_unlock(lock);
> +			return 0;
> +		}
>  	}

That means this would become:

	if (chip->state == FL_HANDOVER && chip->controller->active == &wait) {
		chip->controller->active = chip;
		chip->state = new_state;
	}

	... existing code ...
Brian Norris Dec. 2, 2015, 6:52 p.m. UTC | #2
On Wed, Nov 25, 2015 at 06:35:43PM +0100, Sebastian Andrzej Siewior wrote:
> I have here a live lock in UBI doing
>   ensure_wear_leveling() -> wear_leveling_worker() -> ubi_eba_copy_leb()
>   MOVE_RETRY -> schedule_erase() -> ensure_wear_leveling()
> 
> on the same PEB over and over again. The reason for MOVE_RETRY is that
> the LEB-Lock owner is stucked in nand_get_device() and does not get the
> device lock. The PEB-lock owner is only scheduled on the CPU while the UBI
> thread is idle during erase or read while (again) owning the device-lock
> so the LEB-lock owner makes no progress.
> 
> To fix this live lock I ensure that there FIFO processing in
> nand_get_device(). On release the first waiter is marked as the new
> owner. If someone asks for the device and is not the waiter to which
> nand device has been handed over then it will put itself on the
> waitqueue.
> The FIFO processing was suggested by Peter Zijlstra.
> 
> As a small optimization I use add_wait_queue_exclusive() instead
> add_wait_queue() to make sure that only _one_ waiter is woken up and not
> all of them.
> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
> Would a stable be considered as reasonable?

Your threading is a bit confusing. Is this patch still needed, or did
you fix everything by fixing UBI with these?

http://lists.infradead.org/pipermail/linux-mtd/2015-November/063745.html
http://lists.infradead.org/pipermail/linux-mtd/2015-November/063744.html
http://lists.infradead.org/pipermail/linux-mtd/2015-November/063746.html

Brian
Sebastian Andrzej Siewior Dec. 2, 2015, 8:41 p.m. UTC | #3
On 12/02/2015 07:52 PM, Brian Norris wrote:
> Your threading is a bit confusing. Is this patch still needed, or did
> you fix everything by fixing UBI with these?

The three UBI patches are just an optimization and I trigger the life
lock with them. This patch is still required. I hope that I can respond
to Peter's comments by the end of the week. If you any suggestions
please let me know.

> Brian

Sebastian
Sebastian Andrzej Siewior Dec. 6, 2015, 2:17 p.m. UTC | #4
* Peter Zijlstra | 2015-11-30 17:15:49 [+0100]:

>On Wed, Nov 25, 2015 at 06:35:43PM +0100, Sebastian Andrzej Siewior wrote:
>> +++ b/drivers/mtd/nand/nand_base.c
>> @@ -137,8 +137,25 @@ static void nand_release_device(struct mtd_info *mtd)
>>  	/* Release the controller and the chip */
>>  	spin_lock(&chip->controller->lock);
>>  	chip->controller->active = NULL;
>> -	chip->state = FL_READY;
>> -	wake_up(&chip->controller->wq);
>> +
>> +	if (waitqueue_active(&chip->controller->wq)) {
>> +		wait_queue_head_t *q;
>> +		wait_queue_t *waiter;
>> +		unsigned long flags;
>> +
>> +		q = &chip->controller->wq;
>> +		chip->state = FL_HANDOVER;
>> +
>> +		spin_lock_irqsave(&q->lock, flags);
>
>This lock is actually not required, as your add/remove_wait_queue calls
>are also under chip->controller->lock.

yeah. And there can be only one wakeup and only after this function here
made it happen. So yes, not required. dropped it.

>> +
>> +		chip->controller->handover_waiter = waiter;
>
>You could consider using ->active for this; as it stands you never use
>both at the same time. Its a tad icky, but it avoids adding that new
>field.

There is this FL_PM_SUSPENDED which derefences `active` even if its state
is not FL_READY. So I think that we could go boom here.
Also in order to share that member we need either an union or an
explicit cast because it is a different type. Puh.
My estimate here is 3 * pointer + 2 * sizeof spinlock. So on 32bit we
should always end up in kmalloc-32 (except with spinlock debugging which
inflates this struct anyway so this extra pointer should not matter
much).

Sebastian
diff mbox

Patch

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index ece544efccc3..e2896af488c0 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -137,8 +137,25 @@  static void nand_release_device(struct mtd_info *mtd)
 	/* Release the controller and the chip */
 	spin_lock(&chip->controller->lock);
 	chip->controller->active = NULL;
-	chip->state = FL_READY;
-	wake_up(&chip->controller->wq);
+
+	if (waitqueue_active(&chip->controller->wq)) {
+		wait_queue_head_t *q;
+		wait_queue_t *waiter;
+		unsigned long flags;
+
+		q = &chip->controller->wq;
+		chip->state = FL_HANDOVER;
+
+		spin_lock_irqsave(&q->lock, flags);
+		waiter = list_first_entry(&q->task_list, wait_queue_t,
+					  task_list);
+		spin_unlock_irqrestore(&q->lock, flags);
+
+		chip->controller->handover_waiter = waiter;
+		wake_up(q);
+	} else {
+		chip->state = FL_READY;
+	}
 	spin_unlock(&chip->controller->lock);
 }
 
@@ -843,10 +860,18 @@  nand_get_device(struct mtd_info *mtd, int new_state)
 	if (!chip->controller->active)
 		chip->controller->active = chip;
 
-	if (chip->controller->active == chip && chip->state == FL_READY) {
-		chip->state = new_state;
-		spin_unlock(lock);
-		return 0;
+	if (chip->controller->active == chip) {
+		if (chip->state == FL_READY) {
+			chip->state = new_state;
+			spin_unlock(lock);
+			return 0;
+		}
+		if (chip->state == FL_HANDOVER &&
+		    chip->controller->handover_waiter == &wait) {
+			chip->state = new_state;
+			spin_unlock(lock);
+			return 0;
+		}
 	}
 	if (new_state == FL_PM_SUSPENDED) {
 		if (chip->controller->active->state == FL_PM_SUSPENDED) {
@@ -856,7 +881,7 @@  nand_get_device(struct mtd_info *mtd, int new_state)
 		}
 	}
 	set_current_state(TASK_UNINTERRUPTIBLE);
-	add_wait_queue(wq, &wait);
+	add_wait_queue_exclusive(wq, &wait);
 	spin_unlock(lock);
 	schedule();
 	remove_wait_queue(wq, &wait);
diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h
index b63fa457febd..c855f4fd51b8 100644
--- a/include/linux/mtd/flashchip.h
+++ b/include/linux/mtd/flashchip.h
@@ -58,6 +58,7 @@  typedef enum {
 	FL_OTPING,
 	FL_PREPARING_ERASE,
 	FL_VERIFYING_ERASE,
+	FL_HANDOVER,
 
 	FL_UNKNOWN
 } flstate_t;
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 5a9d1d4c2487..2686dc5dd51b 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -439,6 +439,7 @@  struct nand_hw_control {
 	spinlock_t lock;
 	struct nand_chip *active;
 	wait_queue_head_t wq;
+	wait_queue_t *handover_waiter;
 };
 
 /**