diff mbox series

[v5,3/4] mtd: core: protect access to MTD devices while in suspend

Message ID 20211102110204.3334609-4-sean@geanix.com
State Accepted
Headers show
Series mtd: core: protect access to mtd devices while in suspend | expand

Commit Message

Sean Nyekjaer Nov. 2, 2021, 11:02 a.m. UTC
Prevent MTD access while in a suspended state. Also
prevent suspending a device which is still currently in use.

Commit 013e6292aaf5 ("mtd: rawnand: Simplify the locking") allows the
rawnand layer to return errors rather than waiting in a blocking wait.

Tested on a iMX6ULL.

Suggested-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Sean Nyekjaer <sean@geanix.com>
---
 drivers/mtd/mtdcore.c   | 115 +++++++++++++++++++++++++++++++++++-----
 include/linux/mtd/mtd.h |  81 +++++++++++++++++++++++-----
 2 files changed, 169 insertions(+), 27 deletions(-)

Comments

Miquel Raynal Nov. 19, 2021, 6:35 p.m. UTC | #1
On Tue, 2021-11-02 at 11:02:03 UTC, Sean Nyekjaer wrote:
> Prevent MTD access while in a suspended state. Also
> prevent suspending a device which is still currently in use.
> 
> Commit 013e6292aaf5 ("mtd: rawnand: Simplify the locking") allows the
> rawnand layer to return errors rather than waiting in a blocking wait.
> 
> Tested on a iMX6ULL.
> 
> Suggested-by: Boris Brezillon <boris.brezillon@collabora.com>
> Signed-off-by: Sean Nyekjaer <sean@geanix.com>

Applied to https://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git mtd/next, thanks.

Miquel
Marek Szyprowski Nov. 23, 2021, 12:03 p.m. UTC | #2
Hi,

On 02.11.2021 12:02, Sean Nyekjaer wrote:
> Prevent MTD access while in a suspended state. Also
> prevent suspending a device which is still currently in use.
>
> Commit 013e6292aaf5 ("mtd: rawnand: Simplify the locking") allows the
> rawnand layer to return errors rather than waiting in a blocking wait.
>
> Tested on a iMX6ULL.
>
> Suggested-by: Boris Brezillon <boris.brezillon@collabora.com>
> Signed-off-by: Sean Nyekjaer <sean@geanix.com>

This patch landed recently in linux-next as commit 9d6abd489e70 ("mtd: 
core: protect access to MTD devices while in suspend"). I found that it 
triggers the following warning on my test systems:

INFO: trying to register non-static key.
The code is fine but needs lockdep annotation, or maybe
you didn't initialize this object before use?
turning off the locking correctness validator.
CPU: 1 PID: 1606 Comm: reboot Not tainted 5.16.0-rc1+ #4165
Hardware name: linux,dummy-virt (DT)
Call trace:
  dump_backtrace+0x0/0x1ac
  show_stack+0x18/0x24
  dump_stack_lvl+0x8c/0xb8
  dump_stack+0x18/0x34
  register_lock_class+0x4a0/0x4cc
  __lock_acquire+0x78/0x20cc
  lock_acquire.part.0+0xe0/0x230
  lock_acquire+0x68/0x84
  down_write+0x64/0xe0
  physmap_flash_shutdown+0x60/0x140
  platform_shutdown+0x28/0x40
  device_shutdown+0x160/0x340
  __do_sys_reboot+0x1f8/0x2a0
  __arm64_sys_reboot+0x28/0x34
  invoke_syscall+0x48/0x114
  el0_svc_common.constprop.0+0x60/0x11c
  do_el0_svc_compat+0x1c/0x50
  el0_svc_compat+0x4c/0x100
  el0t_32_sync_handler+0x90/0x140
  el0t_32_sync+0x1a4/0x1a8
Flash device refused suspend due to active operation (state 20)
------------[ cut here ]------------
DEBUG_RWSEMS_WARN_ON(sem->magic != sem): count = 0x1, magic = 0x0, owner 
= 0xffff588b4547c740, curr 0xffff588b4547c740, list not empty
WARNING: CPU: 1 PID: 1606 at kernel/locking/rwsem.c:1322 
up_write+0x144/0x1a0
Modules linked in: bluetooth ecdh_generic ecc rfkill ipv6
CPU: 1 PID: 1606 Comm: reboot Not tainted 5.16.0-rc1+ #4165
Hardware name: linux,dummy-virt (DT)
pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : up_write+0x144/0x1a0
lr : up_write+0x144/0x1a0
sp : ffff8000109ebbd0
x29: ffff8000109ebbd0 x28: ffff588b4547c740 x27: 0000000000000000
x26: ffffce0238d56470 x25: 0000000000000008 x24: ffffce0239bba030
x23: ffff588b451938b0 x22: 0000000000000000 x21: ffff588b44046c80
x20: ffffce02397a2000 x19: ffff588b451938b0 x18: 0000000000000030
x17: 0000000000000000 x16: 0000000000000000 x15: ffffffffffffffff
x14: 0000000000000005 x13: ffffce02397c5198 x12: 0000000000000390
x11: 0000000000000130 x10: ffffce023981d198 x9 : 00000000fffff000
x8 : ffffce02397c5198 x7 : ffffce023981d198 x6 : 0000000000000000
x5 : 000000000000bff4 x4 : 0000000000000000 x3 : 0000000000000000
x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff588b4547c740
Call trace:
  up_write+0x144/0x1a0
  physmap_flash_shutdown+0x13c/0x140
  platform_shutdown+0x28/0x40
  device_shutdown+0x160/0x340
  __do_sys_reboot+0x1f8/0x2a0
  __arm64_sys_reboot+0x28/0x34
  invoke_syscall+0x48/0x114
  el0_svc_common.constprop.0+0x60/0x11c
  do_el0_svc_compat+0x1c/0x50
  el0_svc_compat+0x4c/0x100
  el0t_32_sync_handler+0x90/0x140
  el0t_32_sync+0x1a4/0x1a8
irq event stamp: 2541
hardirqs last  enabled at (2541): [<ffffce02382d94c8>] 
_raw_spin_unlock_irq+0x44/0x90
hardirqs last disabled at (2540): [<ffffce02382d98cc>] 
_raw_spin_lock_irq+0xac/0xb0
softirqs last  enabled at (2278): [<ffffce0237210470>] _stext+0x470/0x5e8
softirqs last disabled at (2273): [<ffffce023729d904>] 
__irq_exit_rcu+0x180/0x1ac
---[ end trace d06160a193b668c2 ]---
Flash device refused suspend due to active operation (state 20)


Reverting $subject patch on top of linux-next hides the warning. The 
above log has been gathered on QEMU/arm64 'virt' machine, during the 
reboot operation. If you need more information how to reproduce it, let 
me know.


> ---
>   drivers/mtd/mtdcore.c   | 115 +++++++++++++++++++++++++++++++++++-----
>   include/linux/mtd/mtd.h |  81 +++++++++++++++++++++++-----
>   2 files changed, 169 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
> index 153229198947..f02b602b3fa9 100644
> --- a/drivers/mtd/mtdcore.c
> +++ b/drivers/mtd/mtdcore.c
> @@ -777,6 +777,8 @@ static void mtd_set_dev_defaults(struct mtd_info *mtd)
>   	INIT_LIST_HEAD(&mtd->partitions);
>   	mutex_init(&mtd->master.partitions_lock);
>   	mutex_init(&mtd->master.chrdev_lock);
> +	init_waitqueue_head(&mtd->master.resume_wq);
> +	init_rwsem(&mtd->master.suspend_lock);
>   }
>   
>   static ssize_t mtd_otp_size(struct mtd_info *mtd, bool is_user)
> @@ -1267,7 +1269,9 @@ int mtd_erase(struct mtd_info *mtd, struct erase_info *instr)
>   
>   	adjinstr.addr += mst_ofs;
>   
> +	mtd_start_access(master);
>   	ret = master->_erase(master, &adjinstr);
> +	mtd_end_access(master);
>   
>   	if (adjinstr.fail_addr != MTD_FAIL_ADDR_UNKNOWN) {
>   		instr->fail_addr = adjinstr.fail_addr - mst_ofs;
> @@ -1289,6 +1293,7 @@ int mtd_point(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen,
>   	      void **virt, resource_size_t *phys)
>   {
>   	struct mtd_info *master = mtd_get_master(mtd);
> +	int ret;
>   
>   	*retlen = 0;
>   	*virt = NULL;
> @@ -1301,8 +1306,12 @@ int mtd_point(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen,
>   	if (!len)
>   		return 0;
>   
> +	mtd_start_access(master);
>   	from = mtd_get_master_ofs(mtd, from);
> -	return master->_point(master, from, len, retlen, virt, phys);
> +	ret = master->_point(master, from, len, retlen, virt, phys);
> +	mtd_end_access(master);
> +
> +	return ret;
>   }
>   EXPORT_SYMBOL_GPL(mtd_point);
>   
> @@ -1310,6 +1319,7 @@ EXPORT_SYMBOL_GPL(mtd_point);
>   int mtd_unpoint(struct mtd_info *mtd, loff_t from, size_t len)
>   {
>   	struct mtd_info *master = mtd_get_master(mtd);
> +	int ret;
>   
>   	if (!master->_unpoint)
>   		return -EOPNOTSUPP;
> @@ -1317,7 +1327,12 @@ int mtd_unpoint(struct mtd_info *mtd, loff_t from, size_t len)
>   		return -EINVAL;
>   	if (!len)
>   		return 0;
> -	return master->_unpoint(master, mtd_get_master_ofs(mtd, from), len);
> +
> +	mtd_start_access(master);
> +	ret =  master->_unpoint(master, mtd_get_master_ofs(mtd, from), len);
> +	mtd_end_access(master);
> +
> +	return ret;
>   }
>   EXPORT_SYMBOL_GPL(mtd_unpoint);
>   
> @@ -1372,6 +1387,7 @@ int mtd_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen,
>   	};
>   	int ret;
>   
> +	/* mtd_read_oob_std handles mtd access protection */
>   	ret = mtd_read_oob(mtd, from, &ops);
>   	*retlen = ops.retlen;
>   
> @@ -1388,6 +1404,7 @@ int mtd_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen,
>   	};
>   	int ret;
>   
> +	/* mtd_write_oob_std handles mtd access protection */
>   	ret = mtd_write_oob(mtd, to, &ops);
>   	*retlen = ops.retlen;
>   
> @@ -1464,11 +1481,13 @@ static int mtd_read_oob_std(struct mtd_info *mtd, loff_t from,
>   	int ret;
>   
>   	from = mtd_get_master_ofs(mtd, from);
> +	mtd_start_access(master);
>   	if (master->_read_oob)
>   		ret = master->_read_oob(master, from, ops);
>   	else
>   		ret = master->_read(master, from, ops->len, &ops->retlen,
>   				    ops->datbuf);
> +	mtd_end_access(master);
>   
>   	return ret;
>   }
> @@ -1480,11 +1499,13 @@ static int mtd_write_oob_std(struct mtd_info *mtd, loff_t to,
>   	int ret;
>   
>   	to = mtd_get_master_ofs(mtd, to);
> +	mtd_start_access(master);
>   	if (master->_write_oob)
>   		ret = master->_write_oob(master, to, ops);
>   	else
>   		ret = master->_write(master, to, ops->len, &ops->retlen,
>   				     ops->datbuf);
> +	mtd_end_access(master);
>   
>   	return ret;
>   }
> @@ -1992,12 +2013,18 @@ int mtd_get_fact_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen,
>   			   struct otp_info *buf)
>   {
>   	struct mtd_info *master = mtd_get_master(mtd);
> +	int ret;
>   
>   	if (!master->_get_fact_prot_info)
>   		return -EOPNOTSUPP;
>   	if (!len)
>   		return 0;
> -	return master->_get_fact_prot_info(master, len, retlen, buf);
> +
> +	mtd_start_access(master);
> +	ret = master->_get_fact_prot_info(master, len, retlen, buf);
> +	mtd_end_access(master);
> +
> +	return ret;
>   }
>   EXPORT_SYMBOL_GPL(mtd_get_fact_prot_info);
>   
> @@ -2005,13 +2032,19 @@ int mtd_read_fact_prot_reg(struct mtd_info *mtd, loff_t from, size_t len,
>   			   size_t *retlen, u_char *buf)
>   {
>   	struct mtd_info *master = mtd_get_master(mtd);
> +	int ret;
>   
>   	*retlen = 0;
>   	if (!master->_read_fact_prot_reg)
>   		return -EOPNOTSUPP;
>   	if (!len)
>   		return 0;
> -	return master->_read_fact_prot_reg(master, from, len, retlen, buf);
> +
> +	mtd_start_access(master);
> +	ret = master->_read_fact_prot_reg(master, from, len, retlen, buf);
> +	mtd_end_access(master);
> +
> +	return ret;
>   }
>   EXPORT_SYMBOL_GPL(mtd_read_fact_prot_reg);
>   
> @@ -2019,12 +2052,18 @@ int mtd_get_user_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen,
>   			   struct otp_info *buf)
>   {
>   	struct mtd_info *master = mtd_get_master(mtd);
> +	int ret;
>   
>   	if (!master->_get_user_prot_info)
>   		return -EOPNOTSUPP;
>   	if (!len)
>   		return 0;
> -	return master->_get_user_prot_info(master, len, retlen, buf);
> +
> +	mtd_start_access(master);
> +	ret =  master->_get_user_prot_info(master, len, retlen, buf);
> +	mtd_end_access(master);
> +
> +	return ret;
>   }
>   EXPORT_SYMBOL_GPL(mtd_get_user_prot_info);
>   
> @@ -2032,13 +2071,19 @@ int mtd_read_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len,
>   			   size_t *retlen, u_char *buf)
>   {
>   	struct mtd_info *master = mtd_get_master(mtd);
> +	int ret;
>   
>   	*retlen = 0;
>   	if (!master->_read_user_prot_reg)
>   		return -EOPNOTSUPP;
>   	if (!len)
>   		return 0;
> -	return master->_read_user_prot_reg(master, from, len, retlen, buf);
> +
> +	mtd_start_access(master);
> +	ret = master->_read_user_prot_reg(master, from, len, retlen, buf);
> +	mtd_end_access(master);
> +
> +	return ret;
>   }
>   EXPORT_SYMBOL_GPL(mtd_read_user_prot_reg);
>   
> @@ -2053,7 +2098,11 @@ int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to, size_t len,
>   		return -EOPNOTSUPP;
>   	if (!len)
>   		return 0;
> +
> +	mtd_start_access(master);
>   	ret = master->_write_user_prot_reg(master, to, len, retlen, buf);
> +	mtd_end_access(master);
> +
>   	if (ret)
>   		return ret;
>   
> @@ -2068,24 +2117,36 @@ EXPORT_SYMBOL_GPL(mtd_write_user_prot_reg);
>   int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len)
>   {
>   	struct mtd_info *master = mtd_get_master(mtd);
> +	int ret;
>   
>   	if (!master->_lock_user_prot_reg)
>   		return -EOPNOTSUPP;
>   	if (!len)
>   		return 0;
> -	return master->_lock_user_prot_reg(master, from, len);
> +
> +	mtd_start_access(master);
> +	ret = master->_lock_user_prot_reg(master, from, len);
> +	mtd_end_access(master);
> +
> +	return ret;
>   }
>   EXPORT_SYMBOL_GPL(mtd_lock_user_prot_reg);
>   
>   int mtd_erase_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len)
>   {
>   	struct mtd_info *master = mtd_get_master(mtd);
> +	int ret;
>   
>   	if (!master->_erase_user_prot_reg)
>   		return -EOPNOTSUPP;
>   	if (!len)
>   		return 0;
> -	return master->_erase_user_prot_reg(master, from, len);
> +
> +	mtd_start_access(master);
> +	ret = master->_erase_user_prot_reg(master, from, len);
> +	mtd_end_access(master);
> +
> +	return ret;
>   }
>   EXPORT_SYMBOL_GPL(mtd_erase_user_prot_reg);
>   
> @@ -2093,6 +2154,7 @@ EXPORT_SYMBOL_GPL(mtd_erase_user_prot_reg);
>   int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
>   {
>   	struct mtd_info *master = mtd_get_master(mtd);
> +	int ret;
>   
>   	if (!master->_lock)
>   		return -EOPNOTSUPP;
> @@ -2106,13 +2168,18 @@ int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
>   		len = (u64)mtd_div_by_eb(len, mtd) * master->erasesize;
>   	}
>   
> -	return master->_lock(master, mtd_get_master_ofs(mtd, ofs), len);
> +	mtd_start_access(master);
> +	ret = master->_lock(master, mtd_get_master_ofs(mtd, ofs), len);
> +	mtd_end_access(master);
> +
> +	return ret;
>   }
>   EXPORT_SYMBOL_GPL(mtd_lock);
>   
>   int mtd_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
>   {
>   	struct mtd_info *master = mtd_get_master(mtd);
> +	int ret;
>   
>   	if (!master->_unlock)
>   		return -EOPNOTSUPP;
> @@ -2126,13 +2193,18 @@ int mtd_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
>   		len = (u64)mtd_div_by_eb(len, mtd) * master->erasesize;
>   	}
>   
> -	return master->_unlock(master, mtd_get_master_ofs(mtd, ofs), len);
> +	mtd_start_access(master);
> +	ret = master->_unlock(master, mtd_get_master_ofs(mtd, ofs), len);
> +	mtd_end_access(master);
> +
> +	return ret;
>   }
>   EXPORT_SYMBOL_GPL(mtd_unlock);
>   
>   int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
>   {
>   	struct mtd_info *master = mtd_get_master(mtd);
> +	int ret;
>   
>   	if (!master->_is_locked)
>   		return -EOPNOTSUPP;
> @@ -2146,13 +2218,18 @@ int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
>   		len = (u64)mtd_div_by_eb(len, mtd) * master->erasesize;
>   	}
>   
> -	return master->_is_locked(master, mtd_get_master_ofs(mtd, ofs), len);
> +	mtd_start_access(master);
> +	ret = master->_is_locked(master, mtd_get_master_ofs(mtd, ofs), len);
> +	mtd_end_access(master);
> +
> +	return ret;
>   }
>   EXPORT_SYMBOL_GPL(mtd_is_locked);
>   
>   int mtd_block_isreserved(struct mtd_info *mtd, loff_t ofs)
>   {
>   	struct mtd_info *master = mtd_get_master(mtd);
> +	int ret;
>   
>   	if (ofs < 0 || ofs >= mtd->size)
>   		return -EINVAL;
> @@ -2162,13 +2239,18 @@ int mtd_block_isreserved(struct mtd_info *mtd, loff_t ofs)
>   	if (mtd->flags & MTD_SLC_ON_MLC_EMULATION)
>   		ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize;
>   
> -	return master->_block_isreserved(master, mtd_get_master_ofs(mtd, ofs));
> +	mtd_start_access(master);
> +	ret = master->_block_isreserved(master, mtd_get_master_ofs(mtd, ofs));
> +	mtd_end_access(master);
> +
> +	return ret;
>   }
>   EXPORT_SYMBOL_GPL(mtd_block_isreserved);
>   
>   int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs)
>   {
>   	struct mtd_info *master = mtd_get_master(mtd);
> +	int ret;
>   
>   	if (ofs < 0 || ofs >= mtd->size)
>   		return -EINVAL;
> @@ -2178,7 +2260,11 @@ int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs)
>   	if (mtd->flags & MTD_SLC_ON_MLC_EMULATION)
>   		ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize;
>   
> -	return master->_block_isbad(master, mtd_get_master_ofs(mtd, ofs));
> +	mtd_start_access(master);
> +	ret = master->_block_isbad(master, mtd_get_master_ofs(mtd, ofs));
> +	mtd_end_access(master);
> +
> +	return ret;
>   }
>   EXPORT_SYMBOL_GPL(mtd_block_isbad);
>   
> @@ -2197,7 +2283,10 @@ int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs)
>   	if (mtd->flags & MTD_SLC_ON_MLC_EMULATION)
>   		ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize;
>   
> +	mtd_start_access(master);
>   	ret = master->_block_markbad(master, mtd_get_master_ofs(mtd, ofs));
> +	mtd_end_access(master);
> +
>   	if (ret)
>   		return ret;
>   
> diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
> index 88227044fc86..b074106e2d8e 100644
> --- a/include/linux/mtd/mtd.h
> +++ b/include/linux/mtd/mtd.h
> @@ -231,6 +231,8 @@ struct mtd_master {
>   	struct mutex partitions_lock;
>   	struct mutex chrdev_lock;
>   	unsigned int suspended : 1;
> +	wait_queue_head_t resume_wq;
> +	struct rw_semaphore suspend_lock;
>   };
>   
>   struct mtd_info {
> @@ -476,10 +478,47 @@ static inline u32 mtd_oobavail(struct mtd_info *mtd, struct mtd_oob_ops *ops)
>   	return ops->mode == MTD_OPS_AUTO_OOB ? mtd->oobavail : mtd->oobsize;
>   }
>   
> +static inline void mtd_start_access(struct mtd_info *master)
> +{
> +	WARN_ON_ONCE(master != mtd_get_master(master));
> +
> +	/*
> +	 * Don't take the suspend_lock on devices that don't
> +	 * implement the suspend hook. Otherwise, lockdep will
> +	 * complain about nested locks when trying to suspend MTD
> +	 * partitions or MTD devices created by gluebi which are
> +	 * backed by real devices.
> +	 */
> +	if (!master->_suspend)
> +		return;
> +
> +	/*
> +	 * Wait until the device is resumed. Should we have a
> +	 * non-blocking mode here?
> +	 */
> +	while (1) {
> +		down_read(&master->master.suspend_lock);
> +		if (!master->master.suspended)
> +			return;
> +
> +		up_read(&master->master.suspend_lock);
> +		wait_event(master->master.resume_wq, !master->master.suspended);
> +	}
> +}
> +
> +static inline void mtd_end_access(struct mtd_info *master)
> +{
> +	if (!master->_suspend)
> +		return;
> +
> +	up_read(&master->master.suspend_lock);
> +}
> +
>   static inline int mtd_max_bad_blocks(struct mtd_info *mtd,
>   				     loff_t ofs, size_t len)
>   {
>   	struct mtd_info *master = mtd_get_master(mtd);
> +	int ret;
>   
>   	if (!master->_max_bad_blocks)
>   		return -ENOTSUPP;
> @@ -487,8 +526,12 @@ static inline int mtd_max_bad_blocks(struct mtd_info *mtd,
>   	if (mtd->size < (len + ofs) || ofs < 0)
>   		return -EINVAL;
>   
> -	return master->_max_bad_blocks(master, mtd_get_master_ofs(mtd, ofs),
> -				       len);
> +	mtd_start_access(master);
> +	ret = master->_max_bad_blocks(master, mtd_get_master_ofs(mtd, ofs),
> +				      len);
> +	mtd_end_access(master);
> +
> +	return ret;
>   }
>   
>   int mtd_wunit_to_pairing_info(struct mtd_info *mtd, int wunit,
> @@ -546,30 +589,40 @@ int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs);
>   static inline int mtd_suspend(struct mtd_info *mtd)
>   {
>   	struct mtd_info *master = mtd_get_master(mtd);
> -	int ret;
> -
> -	if (master->master.suspended)
> -		return 0;
> +	int ret = 0;
>   
> -	ret = master->_suspend ? master->_suspend(master) : 0;
> -	if (ret)
> +	if (!master->_suspend)
>   		return ret;
>   
> -	master->master.suspended = 1;
> -	return 0;
> +	down_write(&master->master.suspend_lock);
> +	if (!master->master.suspended) {
> +		ret = master->_suspend(master);
> +		if (!ret)
> +			master->master.suspended = 1;
> +	}
> +	up_write(&master->master.suspend_lock);
> +
> +	return ret;
>   }
>   
>   static inline void mtd_resume(struct mtd_info *mtd)
>   {
>   	struct mtd_info *master = mtd_get_master(mtd);
>   
> -	if (!master->master.suspended)
> +	if (!master->_suspend)
>   		return;
>   
> -	if (master->_resume)
> -		master->_resume(master);
> +	down_write(&master->master.suspend_lock);
> +	if (master->master.suspended) {
> +		if (master->_resume)
> +			master->_resume(master);
> +
> +		master->master.suspended = 0;
>   
> -	master->master.suspended = 0;
> +		/* The MTD dev has been resumed, wake up all waiters. */
> +		wake_up_all(&master->master.resume_wq);
> +	}
> +	up_write(&master->master.suspend_lock);
>   }
>   
>   static inline uint32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd)

Best regards
Sean Nyekjaer Nov. 23, 2021, 12:50 p.m. UTC | #3
On Tue, Nov 23, 2021 at 01:03:52PM +0100, Marek Szyprowski wrote:
> Hi,
> 
> On 02.11.2021 12:02, Sean Nyekjaer wrote:
> > Prevent MTD access while in a suspended state. Also
> > prevent suspending a device which is still currently in use.
> >
> > Commit 013e6292aaf5 ("mtd: rawnand: Simplify the locking") allows the
> > rawnand layer to return errors rather than waiting in a blocking wait.
> >
> > Tested on a iMX6ULL.
> >
> > Suggested-by: Boris Brezillon <boris.brezillon@collabora.com>
> > Signed-off-by: Sean Nyekjaer <sean@geanix.com>
> 
> This patch landed recently in linux-next as commit 9d6abd489e70 ("mtd: 
> core: protect access to MTD devices while in suspend"). I found that it 
> triggers the following warning on my test systems:
> 
> INFO: trying to register non-static key.
> The code is fine but needs lockdep annotation, or maybe
> you didn't initialize this object before use?
> turning off the locking correctness validator.
> CPU: 1 PID: 1606 Comm: reboot Not tainted 5.16.0-rc1+ #4165
> Hardware name: linux,dummy-virt (DT)
> Call trace:
>   dump_backtrace+0x0/0x1ac
>   show_stack+0x18/0x24
>   dump_stack_lvl+0x8c/0xb8
>   dump_stack+0x18/0x34
>   register_lock_class+0x4a0/0x4cc
>   __lock_acquire+0x78/0x20cc
>   lock_acquire.part.0+0xe0/0x230
>   lock_acquire+0x68/0x84
>   down_write+0x64/0xe0
>   physmap_flash_shutdown+0x60/0x140
>   platform_shutdown+0x28/0x40
>   device_shutdown+0x160/0x340
>   __do_sys_reboot+0x1f8/0x2a0
>   __arm64_sys_reboot+0x28/0x34
>   invoke_syscall+0x48/0x114
>   el0_svc_common.constprop.0+0x60/0x11c
>   do_el0_svc_compat+0x1c/0x50
>   el0_svc_compat+0x4c/0x100
>   el0t_32_sync_handler+0x90/0x140
>   el0t_32_sync+0x1a4/0x1a8
> Flash device refused suspend due to active operation (state 20)
> ------------[ cut here ]------------
> DEBUG_RWSEMS_WARN_ON(sem->magic != sem): count = 0x1, magic = 0x0, owner 
> = 0xffff588b4547c740, curr 0xffff588b4547c740, list not empty
> WARNING: CPU: 1 PID: 1606 at kernel/locking/rwsem.c:1322 
> up_write+0x144/0x1a0
> Modules linked in: bluetooth ecdh_generic ecc rfkill ipv6
> CPU: 1 PID: 1606 Comm: reboot Not tainted 5.16.0-rc1+ #4165
> Hardware name: linux,dummy-virt (DT)
> pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> pc : up_write+0x144/0x1a0
> lr : up_write+0x144/0x1a0
> sp : ffff8000109ebbd0
> x29: ffff8000109ebbd0 x28: ffff588b4547c740 x27: 0000000000000000
> x26: ffffce0238d56470 x25: 0000000000000008 x24: ffffce0239bba030
> x23: ffff588b451938b0 x22: 0000000000000000 x21: ffff588b44046c80
> x20: ffffce02397a2000 x19: ffff588b451938b0 x18: 0000000000000030
> x17: 0000000000000000 x16: 0000000000000000 x15: ffffffffffffffff
> x14: 0000000000000005 x13: ffffce02397c5198 x12: 0000000000000390
> x11: 0000000000000130 x10: ffffce023981d198 x9 : 00000000fffff000
> x8 : ffffce02397c5198 x7 : ffffce023981d198 x6 : 0000000000000000
> x5 : 000000000000bff4 x4 : 0000000000000000 x3 : 0000000000000000
> x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff588b4547c740
> Call trace:
>   up_write+0x144/0x1a0
>   physmap_flash_shutdown+0x13c/0x140
>   platform_shutdown+0x28/0x40
>   device_shutdown+0x160/0x340
>   __do_sys_reboot+0x1f8/0x2a0
>   __arm64_sys_reboot+0x28/0x34
>   invoke_syscall+0x48/0x114
>   el0_svc_common.constprop.0+0x60/0x11c
>   do_el0_svc_compat+0x1c/0x50
>   el0_svc_compat+0x4c/0x100
>   el0t_32_sync_handler+0x90/0x140
>   el0t_32_sync+0x1a4/0x1a8
> irq event stamp: 2541
> hardirqs last  enabled at (2541): [<ffffce02382d94c8>] 
> _raw_spin_unlock_irq+0x44/0x90
> hardirqs last disabled at (2540): [<ffffce02382d98cc>] 
> _raw_spin_lock_irq+0xac/0xb0
> softirqs last  enabled at (2278): [<ffffce0237210470>] _stext+0x470/0x5e8
> softirqs last disabled at (2273): [<ffffce023729d904>] 
> __irq_exit_rcu+0x180/0x1ac
> ---[ end trace d06160a193b668c2 ]---
> Flash device refused suspend due to active operation (state 20)
> 
> 
> Reverting $subject patch on top of linux-next hides the warning. The 
> above log has been gathered on QEMU/arm64 'virt' machine, during the 
> reboot operation. If you need more information how to reproduce it, let 
> me know.
> 
>

Hi,

Thanks Marek :)

@Boris do we need to do something similar here to what we did with the
mtdconcat stuff?

/Sean
Boris Brezillon Nov. 23, 2021, 1:07 p.m. UTC | #4
On Tue, 23 Nov 2021 13:50:12 +0100
Sean Nyekjaer <sean@geanix.com> wrote:

> @Boris do we need to do something similar here to what we did with the
> mtdconcat stuff?

Absolutely, physmasp subdevices are never initialized/registered, so
you can't call any of the mtd helpers taking the suspend lock on those.
I guess it'd be better to call mtd_suspend/resume() on the concat device
in though:

static void physmap_flash_shutdown(struct platform_device *dev)
{
	struct physmap_flash_info *info = platform_get_drvdata(dev);

	mtd_suspend(info->cmtd);
}
Miquel Raynal Nov. 29, 2021, 9:19 a.m. UTC | #5
Hi Sean,

boris.brezillon@collabora.com wrote on Tue, 23 Nov 2021 14:07:15 +0100:

> On Tue, 23 Nov 2021 13:50:12 +0100
> Sean Nyekjaer <sean@geanix.com> wrote:
> 
> > @Boris do we need to do something similar here to what we did with the
> > mtdconcat stuff?  
> 
> Absolutely, physmasp subdevices are never initialized/registered, so
> you can't call any of the mtd helpers taking the suspend lock on those.
> I guess it'd be better to call mtd_suspend/resume() on the concat device
> in though:

Any chance that you will come up with a fix or v6 of the series anytime
soon?

> 
> static void physmap_flash_shutdown(struct platform_device *dev)
> {
> 	struct physmap_flash_info *info = platform_get_drvdata(dev);
> 
> 	mtd_suspend(info->cmtd);
> }


Thanks,
Miquèl
Sean Nyekjaer Nov. 29, 2021, 9:41 a.m. UTC | #6
On Mon, Nov 29, 2021 at 10:19:08AM +0100, Miquel Raynal wrote:
> Hi Sean,
> 
> boris.brezillon@collabora.com wrote on Tue, 23 Nov 2021 14:07:15 +0100:
> 
> > On Tue, 23 Nov 2021 13:50:12 +0100
> > Sean Nyekjaer <sean@geanix.com> wrote:
> > 
> > > @Boris do we need to do something similar here to what we did with the
> > > mtdconcat stuff?  
> > 
> > Absolutely, physmasp subdevices are never initialized/registered, so
> > you can't call any of the mtd helpers taking the suspend lock on those.
> > I guess it'd be better to call mtd_suspend/resume() on the concat device
> > in though:
> 
> Any chance that you will come up with a fix or v6 of the series anytime
> soon?
> 
> > 
> > static void physmap_flash_shutdown(struct platform_device *dev)
> > {
> > 	struct physmap_flash_info *info = platform_get_drvdata(dev);
> > 
> > 	mtd_suspend(info->cmtd);
> > }
> 
> 
> Thanks,
> Miquèl

Hi Miquèl,

I'm not 100% comfortable in doing this.

During this patch series I have mostly been Boris' tester and
implemented his ideas :/

I'm willing to give it a try, if Marek shares how to reproduce this with qemu :)

/Sean
Marek Szyprowski Nov. 29, 2021, 11:17 a.m. UTC | #7
Hi Sean,

On 29.11.2021 10:41, Sean Nyekjaer wrote:
> On Mon, Nov 29, 2021 at 10:19:08AM +0100, Miquel Raynal wrote:
>> boris.brezillon@collabora.com wrote on Tue, 23 Nov 2021 14:07:15 +0100:
>>> On Tue, 23 Nov 2021 13:50:12 +0100
>>> Sean Nyekjaer <sean@geanix.com> wrote:
>>>> @Boris do we need to do something similar here to what we did with the
>>>> mtdconcat stuff?
>>> Absolutely, physmasp subdevices are never initialized/registered, so
>>> you can't call any of the mtd helpers taking the suspend lock on those.
>>> I guess it'd be better to call mtd_suspend/resume() on the concat device
>>> in though:
>> Any chance that you will come up with a fix or v6 of the series anytime
>> soon?
>>
>>> static void physmap_flash_shutdown(struct platform_device *dev)
>>> {
>>> 	struct physmap_flash_info *info = platform_get_drvdata(dev);
>>>
>>> 	mtd_suspend(info->cmtd);
>>> }
>>
>> Thanks,
>> Miquèl
> Hi Miquèl,
>
> I'm not 100% comfortable in doing this.
>
> During this patch series I have mostly been Boris' tester and
> implemented his ideas :/
>
> I'm willing to give it a try, if Marek shares how to reproduce this with qemu :)

Frankly speaking there is nothing special in my setup, typical options 
to run ARM64/virt machine. Just run recent qemu (I did my test with 
5.2.0) with any basic arm/arm64 rootfs (I've used Debian). The qemu 
command line (copied somewhere from the Internet) I've used is:

qemu-system-aarch64 -serial stdio -monitor null -kernel Image -append 
"console=ttyAMA0 root=/dev/vda rootwait" -M virt -cpu cortex-a57 -smp 2 
-m 1024 -device virtio-blk-device,drive=virtio-blk0 -drive 
file=qemu-virt-rootfs.raw,id=virtio-blk0,if=none,format=raw -netdev 
user,id=user -device virtio-net-device,netdev=user -display none

Once it boots to shell, just type 'reboot' and wait until issue appears.

Best regards
Sean Nyekjaer Nov. 30, 2021, 12:41 p.m. UTC | #8
> On 29.11.2021 10:41, Sean Nyekjaer wrote:
> > On Mon, Nov 29, 2021 at 10:19:08AM +0100, Miquel Raynal wrote:
> >> boris.brezillon@collabora.com wrote on Tue, 23 Nov 2021 14:07:15 +0100:
> >>> On Tue, 23 Nov 2021 13:50:12 +0100
> >>> Sean Nyekjaer <sean@geanix.com> wrote:
> >>>> @Boris do we need to do something similar here to what we did with the
> >>>> mtdconcat stuff?
> >>> Absolutely, physmasp subdevices are never initialized/registered, so
> >>> you can't call any of the mtd helpers taking the suspend lock on those.
> >>> I guess it'd be better to call mtd_suspend/resume() on the concat device
> >>> in though:
> >> Any chance that you will come up with a fix or v6 of the series anytime
> >> soon?
> >>
> >>> static void physmap_flash_shutdown(struct platform_device *dev)
> >>> {
> >>> 	struct physmap_flash_info *info = platform_get_drvdata(dev);
> >>>
> >>> 	mtd_suspend(info->cmtd);

There is one more issue when using concat during boot, I think we should
start here :)
It uses uninitialized rwsm.


[    0.000000] Booting Linux on physical CPU 0x0000000000 [0x411fd070]
[    0.000000] Linux version 5.16.0-rc1-00013-g67bcbe202b48 (sean@zen) (aarch64-linux-gnu-gcc (GCC) 11.2.0, GNU ld (GNU Binutils) 2.37) #207 SMP PREEMPT Tue Nov 30 13:22:48 CET 2021
[    0.000000] Machine model: linux,dummy-virt
[    0.000000] efi: UEFI not found.
[    0.000000] NUMA: No NUMA configuration found

[ ... ]

[    1.848327] 0.flash: Found 2 x16 devices at 0x0 in 32-bit bank. Manufacturer ID 0x000000 Chip ID 0x000000
[    1.848567] Intel/Sharp Extended Query Table at 0x0031
[    1.848999] Using buffer write method
[    1.849237] Concatenating MTD devices:
[    1.849347] (0): "0.flash"
[    1.849637] (1): "0.flash"
[    1.849726] into device "0.flash"
[    1.904985] libphy: Fixed MDIO Bus: probed
[    1.915812] tun: Universal TUN/TAP device driver, 1.6

[ ... ]

[    6.064628] The code is fine but needs lockdep annotation, or maybe
[    6.064756] you didn't initialize this object before use?
[    6.064857] turning off the locking correctness validator.
[    6.065111] CPU: 0 PID: 49 Comm: kworker/0:1H Not tainted 5.16.0-rc1-00013-g67bcbe202b48 #207
[    6.065273] Hardware name: linux,dummy-virt (DT)
[    6.065572] Workqueue: kblockd blk_mq_run_work_fn
[    6.065879] Call trace:
[    6.065943]  dump_backtrace+0x0/0x1a0
[    6.066057]  show_stack+0x1c/0x30
[    6.066130]  dump_stack_lvl+0x8c/0xb8
[    6.066206]  dump_stack+0x1c/0x38
[    6.066270]  register_lock_class+0x49c/0x4c0
[    6.066352]  __lock_acquire+0x78/0x20cc
[    6.066422]  lock_acquire.part.0+0xe0/0x230
[    6.066497]  lock_acquire+0x6c/0x90
[    6.066562]  down_read+0x58/0x7c
[    6.066626]  mtd_read_oob_std+0x98/0x170
[    6.066703]  mtd_read_oob+0x80/0x13c
[    6.066770]  mtd_read+0x64/0xa0
[    6.066834]  concat_read+0xd4/0x1b0
[    6.066900]  mtd_read_oob_std+0x158/0x170
[    6.066972]  mtd_read_oob+0x80/0x13c
[    6.067038]  mtd_read+0x64/0xa0
[    6.067099]  mtdblock_readsect+0x6c/0x160
[    6.067169]  mtd_queue_rq+0x240/0x460
[    6.067234]  blk_mq_dispatch_rq_list+0x19c/0x830
[    6.067313]  __blk_mq_do_dispatch_sched+0x248/0x2d4
[    6.067397]  __blk_mq_sched_dispatch_requests+0x110/0x170
[    6.067487]  blk_mq_sched_dispatch_requests+0x3c/0x80
[    6.067573]  __blk_mq_run_hw_queue+0x60/0xa0
[    6.067647]  blk_mq_run_work_fn+0x24/0x30
[    6.067718]  process_one_work+0x28c/0x6e4
[    6.067790]  worker_thread+0x78/0x464
[    6.067856]  kthread+0x180/0x190
[    6.067919]  ret_from_fork+0x10/0x20
[    6.068868] ------------[ cut here ]------------
[    6.068993] DEBUG_RWSEMS_WARN_ON(sem->magic != sem): count = 0x100, magic = 0x0, owner = 0xffff528f84c617c1, curr 0xffff528f84c617c0, list not empty
[    6.069592] WARNING: CPU: 0 PID: 49 at kernel/locking/rwsem.c:1302 __up_read+0x1e8/0x270
[    6.069790] Modules linked in:
[    6.069963] CPU: 0 PID: 49 Comm: kworker/0:1H Not tainted 5.16.0-rc1-00013-g67bcbe202b48 #207
[    6.070099] Hardware name: linux,dummy-virt (DT)
[    6.070179] Workqueue: kblockd blk_mq_run_work_fn
[    6.070347] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[    6.070487] pc : __up_read+0x1e8/0x270
[    6.070577] lr : __up_read+0x1e8/0x270
[    6.070660] sp : ffff80001026b740
[    6.070728] x29: ffff80001026b740 x28: 000000000003ff88 x27: 0000000000040000
[    6.070912] x26: ffff80001026ba28 x25: ffff528f84e92000 x24: ffff528f8576d000
[    6.071040] x23: 0000000000000000 x22: 0000000003ff0000 x21: ffff80001026b878
[    6.071165] x20: ffffadee32213000 x19: ffff528f84e918b0 x18: ffffffffffffffff
[    6.071292] x17: 672d33313030302d x16: 3163722d302e3631 x15: ffff80009026b457
[    6.071418] x14: 0000000000000007 x13: ffffadee32236658 x12: 0000000000000384
[    6.071543] x11: 000000000000012c x10: ffffadee32236658 x9 : ffffadee32236658
[    6.071692] x8 : 00000000ffffefff x7 : ffffadee3228e658 x6 : ffffadee3228e658
[    6.071817] x5 : 000000000000bff4 x4 : 0000000000000000 x3 : 0000000000000000
[    6.071942] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff528f84c617c0
[    6.072094] Call trace:
[    6.072163]  __up_read+0x1e8/0x270
[    6.072238]  up_read+0x44/0x300
[    6.072301]  mtd_read_oob_std+0xd0/0x170
[    6.072374]  mtd_read_oob+0x80/0x13c
[    6.072441]  mtd_read+0x64/0xa0
[    6.072502]  concat_read+0xd4/0x1b0
[    6.072567]  mtd_read_oob_std+0x158/0x170
[    6.072640]  mtd_read_oob+0x80/0x13c
[    6.072706]  mtd_read+0x64/0xa0
[    6.072766]  mtdblock_readsect+0x6c/0x160
[    6.072843]  mtd_queue_rq+0x240/0x460
[    6.072909]  blk_mq_dispatch_rq_list+0x19c/0x830
[    6.072990]  __blk_mq_do_dispatch_sched+0x248/0x2d4
[    6.073073]  __blk_mq_sched_dispatch_requests+0x110/0x170
[    6.073163]  blk_mq_sched_dispatch_requests+0x3c/0x80
[    6.073249]  __blk_mq_run_hw_queue+0x60/0xa0
[    6.073325]  blk_mq_run_work_fn+0x24/0x30
[    6.073421]  process_one_work+0x28c/0x6e4
[    6.073568]  worker_thread+0x78/0x464
[    6.073643]  kthread+0x180/0x190
[    6.073710]  ret_from_fork+0x10/0x20
[    6.073820] irq event stamp: 1529
[    6.073891] hardirqs last  enabled at (1529): [<ffffadee3114576c>] _raw_spin_unlock_irq+0x48/0x9c
[    6.074067] hardirqs last disabled at (1528): [<ffffadee31145b70>] _raw_spin_lock_irq+0xac/0xb0
[    6.074211] softirqs last  enabled at (1444): [<ffffadee30010528>] __do_softirq+0x478/0x5f0
[    6.074350] softirqs last disabled at (1429): [<ffffadee3009f064>] __irq_exit_rcu+0x184/0x1b0
[    6.074506] ---[ end trace 738f7ffe764b66d5 ]---

I can solve this with this hack from mtd_set_dev_defaults():

diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index f4a4274489b4..a4b69b9558c9 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -693,6 +693,8 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[],	/* subdevices to c
 	concat->mtd.oobavail = subdev[0]->oobavail;
 
 	subdev_master = mtd_get_master(subdev[0]);
+	init_waitqueue_head(&subdev_master->master.resume_wq);
+	init_rwsem(&subdev_master->master.suspend_lock);
 	if (subdev_master->_writev)
 		concat->mtd._writev = concat_writev;
 	if (subdev_master->_read_oob)
@@ -740,6 +742,8 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[],	/* subdevices to c
 		}
 
 		subdev_master = mtd_get_master(subdev[i]);
+		init_waitqueue_head(&subdev_master->master.resume_wq);
+		init_rwsem(&subdev_master->master.suspend_lock);
 		concat->mtd.size += subdev[i]->size;
 		concat->mtd.ecc_stats.badblocks +=
 			subdev[i]->ecc_stats.badblocks;

I do not have a great overview of what is happing here with all these
master devices :/

Any ideas Boris/Miquel?

/Sean
Boris Brezillon Nov. 30, 2021, 1:15 p.m. UTC | #9
On Tue, 30 Nov 2021 13:41:31 +0100
Sean Nyekjaer <sean@geanix.com> wrote:

> > On 29.11.2021 10:41, Sean Nyekjaer wrote:  
> > > On Mon, Nov 29, 2021 at 10:19:08AM +0100, Miquel Raynal wrote:  
> > >> boris.brezillon@collabora.com wrote on Tue, 23 Nov 2021 14:07:15 +0100:  
> > >>> On Tue, 23 Nov 2021 13:50:12 +0100
> > >>> Sean Nyekjaer <sean@geanix.com> wrote:  
> > >>>> @Boris do we need to do something similar here to what we did with the
> > >>>> mtdconcat stuff?  
> > >>> Absolutely, physmasp subdevices are never initialized/registered, so
> > >>> you can't call any of the mtd helpers taking the suspend lock on those.
> > >>> I guess it'd be better to call mtd_suspend/resume() on the concat device
> > >>> in though:  
> > >> Any chance that you will come up with a fix or v6 of the series anytime
> > >> soon?
> > >>  
> > >>> static void physmap_flash_shutdown(struct platform_device *dev)
> > >>> {
> > >>> 	struct physmap_flash_info *info = platform_get_drvdata(dev);
> > >>>
> > >>> 	mtd_suspend(info->cmtd);  
> 
> There is one more issue when using concat during boot, I think we should
> start here :)
> It uses uninitialized rwsm.
> 
> 
> [    0.000000] Booting Linux on physical CPU 0x0000000000 [0x411fd070]
> [    0.000000] Linux version 5.16.0-rc1-00013-g67bcbe202b48 (sean@zen) (aarch64-linux-gnu-gcc (GCC) 11.2.0, GNU ld (GNU Binutils) 2.37) #207 SMP PREEMPT Tue Nov 30 13:22:48 CET 2021
> [    0.000000] Machine model: linux,dummy-virt
> [    0.000000] efi: UEFI not found.
> [    0.000000] NUMA: No NUMA configuration found
> 
> [ ... ]
> 
> [    1.848327] 0.flash: Found 2 x16 devices at 0x0 in 32-bit bank. Manufacturer ID 0x000000 Chip ID 0x000000
> [    1.848567] Intel/Sharp Extended Query Table at 0x0031
> [    1.848999] Using buffer write method
> [    1.849237] Concatenating MTD devices:
> [    1.849347] (0): "0.flash"
> [    1.849637] (1): "0.flash"
> [    1.849726] into device "0.flash"
> [    1.904985] libphy: Fixed MDIO Bus: probed
> [    1.915812] tun: Universal TUN/TAP device driver, 1.6
> 
> [ ... ]
> 
> [    6.064628] The code is fine but needs lockdep annotation, or maybe
> [    6.064756] you didn't initialize this object before use?
> [    6.064857] turning off the locking correctness validator.
> [    6.065111] CPU: 0 PID: 49 Comm: kworker/0:1H Not tainted 5.16.0-rc1-00013-g67bcbe202b48 #207
> [    6.065273] Hardware name: linux,dummy-virt (DT)
> [    6.065572] Workqueue: kblockd blk_mq_run_work_fn
> [    6.065879] Call trace:
> [    6.065943]  dump_backtrace+0x0/0x1a0
> [    6.066057]  show_stack+0x1c/0x30
> [    6.066130]  dump_stack_lvl+0x8c/0xb8
> [    6.066206]  dump_stack+0x1c/0x38
> [    6.066270]  register_lock_class+0x49c/0x4c0
> [    6.066352]  __lock_acquire+0x78/0x20cc
> [    6.066422]  lock_acquire.part.0+0xe0/0x230
> [    6.066497]  lock_acquire+0x6c/0x90
> [    6.066562]  down_read+0x58/0x7c
> [    6.066626]  mtd_read_oob_std+0x98/0x170
> [    6.066703]  mtd_read_oob+0x80/0x13c
> [    6.066770]  mtd_read+0x64/0xa0
> [    6.066834]  concat_read+0xd4/0x1b0
> [    6.066900]  mtd_read_oob_std+0x158/0x170
> [    6.066972]  mtd_read_oob+0x80/0x13c
> [    6.067038]  mtd_read+0x64/0xa0
> [    6.067099]  mtdblock_readsect+0x6c/0x160
> [    6.067169]  mtd_queue_rq+0x240/0x460
> [    6.067234]  blk_mq_dispatch_rq_list+0x19c/0x830
> [    6.067313]  __blk_mq_do_dispatch_sched+0x248/0x2d4
> [    6.067397]  __blk_mq_sched_dispatch_requests+0x110/0x170
> [    6.067487]  blk_mq_sched_dispatch_requests+0x3c/0x80
> [    6.067573]  __blk_mq_run_hw_queue+0x60/0xa0
> [    6.067647]  blk_mq_run_work_fn+0x24/0x30
> [    6.067718]  process_one_work+0x28c/0x6e4
> [    6.067790]  worker_thread+0x78/0x464
> [    6.067856]  kthread+0x180/0x190
> [    6.067919]  ret_from_fork+0x10/0x20
> [    6.068868] ------------[ cut here ]------------
> [    6.068993] DEBUG_RWSEMS_WARN_ON(sem->magic != sem): count = 0x100, magic = 0x0, owner = 0xffff528f84c617c1, curr 0xffff528f84c617c0, list not empty
> [    6.069592] WARNING: CPU: 0 PID: 49 at kernel/locking/rwsem.c:1302 __up_read+0x1e8/0x270
> [    6.069790] Modules linked in:
> [    6.069963] CPU: 0 PID: 49 Comm: kworker/0:1H Not tainted 5.16.0-rc1-00013-g67bcbe202b48 #207
> [    6.070099] Hardware name: linux,dummy-virt (DT)
> [    6.070179] Workqueue: kblockd blk_mq_run_work_fn
> [    6.070347] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> [    6.070487] pc : __up_read+0x1e8/0x270
> [    6.070577] lr : __up_read+0x1e8/0x270
> [    6.070660] sp : ffff80001026b740
> [    6.070728] x29: ffff80001026b740 x28: 000000000003ff88 x27: 0000000000040000
> [    6.070912] x26: ffff80001026ba28 x25: ffff528f84e92000 x24: ffff528f8576d000
> [    6.071040] x23: 0000000000000000 x22: 0000000003ff0000 x21: ffff80001026b878
> [    6.071165] x20: ffffadee32213000 x19: ffff528f84e918b0 x18: ffffffffffffffff
> [    6.071292] x17: 672d33313030302d x16: 3163722d302e3631 x15: ffff80009026b457
> [    6.071418] x14: 0000000000000007 x13: ffffadee32236658 x12: 0000000000000384
> [    6.071543] x11: 000000000000012c x10: ffffadee32236658 x9 : ffffadee32236658
> [    6.071692] x8 : 00000000ffffefff x7 : ffffadee3228e658 x6 : ffffadee3228e658
> [    6.071817] x5 : 000000000000bff4 x4 : 0000000000000000 x3 : 0000000000000000
> [    6.071942] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff528f84c617c0
> [    6.072094] Call trace:
> [    6.072163]  __up_read+0x1e8/0x270
> [    6.072238]  up_read+0x44/0x300
> [    6.072301]  mtd_read_oob_std+0xd0/0x170
> [    6.072374]  mtd_read_oob+0x80/0x13c
> [    6.072441]  mtd_read+0x64/0xa0
> [    6.072502]  concat_read+0xd4/0x1b0
> [    6.072567]  mtd_read_oob_std+0x158/0x170
> [    6.072640]  mtd_read_oob+0x80/0x13c
> [    6.072706]  mtd_read+0x64/0xa0
> [    6.072766]  mtdblock_readsect+0x6c/0x160
> [    6.072843]  mtd_queue_rq+0x240/0x460
> [    6.072909]  blk_mq_dispatch_rq_list+0x19c/0x830
> [    6.072990]  __blk_mq_do_dispatch_sched+0x248/0x2d4
> [    6.073073]  __blk_mq_sched_dispatch_requests+0x110/0x170
> [    6.073163]  blk_mq_sched_dispatch_requests+0x3c/0x80
> [    6.073249]  __blk_mq_run_hw_queue+0x60/0xa0
> [    6.073325]  blk_mq_run_work_fn+0x24/0x30
> [    6.073421]  process_one_work+0x28c/0x6e4
> [    6.073568]  worker_thread+0x78/0x464
> [    6.073643]  kthread+0x180/0x190
> [    6.073710]  ret_from_fork+0x10/0x20
> [    6.073820] irq event stamp: 1529
> [    6.073891] hardirqs last  enabled at (1529): [<ffffadee3114576c>] _raw_spin_unlock_irq+0x48/0x9c
> [    6.074067] hardirqs last disabled at (1528): [<ffffadee31145b70>] _raw_spin_lock_irq+0xac/0xb0
> [    6.074211] softirqs last  enabled at (1444): [<ffffadee30010528>] __do_softirq+0x478/0x5f0
> [    6.074350] softirqs last disabled at (1429): [<ffffadee3009f064>] __irq_exit_rcu+0x184/0x1b0
> [    6.074506] ---[ end trace 738f7ffe764b66d5 ]---
> 
> I can solve this with this hack from mtd_set_dev_defaults():
> 
> diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
> index f4a4274489b4..a4b69b9558c9 100644
> --- a/drivers/mtd/mtdconcat.c
> +++ b/drivers/mtd/mtdconcat.c
> @@ -693,6 +693,8 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[],	/* subdevices to c
>  	concat->mtd.oobavail = subdev[0]->oobavail;
>  
>  	subdev_master = mtd_get_master(subdev[0]);
> +	init_waitqueue_head(&subdev_master->master.resume_wq);
> +	init_rwsem(&subdev_master->master.suspend_lock);
>  	if (subdev_master->_writev)
>  		concat->mtd._writev = concat_writev;
>  	if (subdev_master->_read_oob)
> @@ -740,6 +742,8 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[],	/* subdevices to c
>  		}
>  
>  		subdev_master = mtd_get_master(subdev[i]);
> +		init_waitqueue_head(&subdev_master->master.resume_wq);
> +		init_rwsem(&subdev_master->master.suspend_lock);
>  		concat->mtd.size += subdev[i]->size;
>  		concat->mtd.ecc_stats.badblocks +=
>  			subdev[i]->ecc_stats.badblocks;
> 
> I do not have a great overview of what is happing here with all these
> master devices :/
> 
> Any ideas Boris/Miquel?

It's the same problem, over and over again: the master device uses MTD
helpers on its unregistered/unitialized subdevices. The following diff
should fix the issue, but let's be honest, it's just papering over the
real design issue, that his, MTD implementations use the MTD helpers
assuming nothing needs to be initialized. So, I'm tempted to just drop
the series and work an a more robust solution. In the meantime, I guess
getting back to a raw-NAND-only fix would make sense.

--->8---

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 2739a48e9b64..7926137b3645 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -490,6 +490,9 @@ static inline void mtd_start_access(struct mtd_info *master)
        if (!master->_suspend)
                return;
 
+       if (WARN_ON_ONCE(!device_is_registered(&master->dev)))
+               return;
+
        /*
         * Wait until the device is resumed. Should we have a
         * non-blocking mode here?
@@ -509,6 +512,9 @@ static inline void mtd_end_access(struct mtd_info *master)
        if (!master->_suspend)
                return;
 
+       if (WARN_ON_ONCE(!device_is_registered(&master->dev)))
+               return;
+
        up_read(&master->master.suspend_lock);
 }
 
@@ -592,13 +598,17 @@ static inline int mtd_suspend(struct mtd_info *mtd)
        if (!master->_suspend)
                return ret;
 
-       down_write(&master->master.suspend_lock);
+       if (!WARN_ON_ONCE(!device_is_registered(&master->dev)))
+               down_write(&master->master.suspend_lock);
+
        if (!master->master.suspended) {
                ret = master->_suspend(master);
                if (!ret)
                        master->master.suspended = 1;
        }
-       up_write(&master->master.suspend_lock);
+
+       if (device_is_registered(&master->dev))
+               up_write(&master->master.suspend_lock);
 
        return ret;
 }
@@ -610,7 +620,9 @@ static inline void mtd_resume(struct mtd_info *mtd)
        if (!master->_suspend)
                return;
 
-       down_write(&master->master.suspend_lock);
+       if (!WARN_ON_ONCE(!device_is_registered(&master->dev)))
+               down_write(&master->master.suspend_lock);
+
        if (master->master.suspended) {
                if (master->_resume)
                        master->_resume(master);
@@ -620,7 +632,9 @@ static inline void mtd_resume(struct mtd_info *mtd)
                /* The MTD dev has been resumed, wake up all waiters. */
                wake_up_all(&master->master.resume_wq);
        }
-       up_write(&master->master.suspend_lock);
+
+       if (device_is_registered(&master->dev))
+               up_write(&master->master.suspend_lock);
 }
 
 static inline uint32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd)
Sean Nyekjaer Nov. 30, 2021, 1:29 p.m. UTC | #10
On Tue, Nov 30, 2021 at 02:15:51PM +0100, Boris Brezillon wrote:
> On Tue, 30 Nov 2021 13:41:31 +0100
> Sean Nyekjaer <sean@geanix.com> wrote:
> 
> > > On 29.11.2021 10:41, Sean Nyekjaer wrote:  
> > > > On Mon, Nov 29, 2021 at 10:19:08AM +0100, Miquel Raynal wrote:  
> > > >> boris.brezillon@collabora.com wrote on Tue, 23 Nov 2021 14:07:15 +0100:  
> > > >>> On Tue, 23 Nov 2021 13:50:12 +0100
> > > >>> Sean Nyekjaer <sean@geanix.com> wrote:  
> > > >>>> @Boris do we need to do something similar here to what we did with the
> > > >>>> mtdconcat stuff?  
> > > >>> Absolutely, physmasp subdevices are never initialized/registered, so
> > > >>> you can't call any of the mtd helpers taking the suspend lock on those.
> > > >>> I guess it'd be better to call mtd_suspend/resume() on the concat device
> > > >>> in though:  
> > > >> Any chance that you will come up with a fix or v6 of the series anytime
> > > >> soon?
> > > >>  
> > > >>> static void physmap_flash_shutdown(struct platform_device *dev)
> > > >>> {
> > > >>> 	struct physmap_flash_info *info = platform_get_drvdata(dev);
> > > >>>
> > > >>> 	mtd_suspend(info->cmtd);  
> > 
> > There is one more issue when using concat during boot, I think we should
> > start here :)
> > It uses uninitialized rwsm.
> > 
> > 
> > [    0.000000] Booting Linux on physical CPU 0x0000000000 [0x411fd070]
> > [    0.000000] Linux version 5.16.0-rc1-00013-g67bcbe202b48 (sean@zen) (aarch64-linux-gnu-gcc (GCC) 11.2.0, GNU ld (GNU Binutils) 2.37) #207 SMP PREEMPT Tue Nov 30 13:22:48 CET 2021
> > [    0.000000] Machine model: linux,dummy-virt
> > [    0.000000] efi: UEFI not found.
> > [    0.000000] NUMA: No NUMA configuration found
> > 
> > [ ... ]
> > 
> > [    1.848327] 0.flash: Found 2 x16 devices at 0x0 in 32-bit bank. Manufacturer ID 0x000000 Chip ID 0x000000
> > [    1.848567] Intel/Sharp Extended Query Table at 0x0031
> > [    1.848999] Using buffer write method
> > [    1.849237] Concatenating MTD devices:
> > [    1.849347] (0): "0.flash"
> > [    1.849637] (1): "0.flash"
> > [    1.849726] into device "0.flash"
> > [    1.904985] libphy: Fixed MDIO Bus: probed
> > [    1.915812] tun: Universal TUN/TAP device driver, 1.6
> > 
> > [ ... ]
> > 
> > [    6.064628] The code is fine but needs lockdep annotation, or maybe
> > [    6.064756] you didn't initialize this object before use?
> > [    6.064857] turning off the locking correctness validator.
> > [    6.065111] CPU: 0 PID: 49 Comm: kworker/0:1H Not tainted 5.16.0-rc1-00013-g67bcbe202b48 #207
> > [    6.065273] Hardware name: linux,dummy-virt (DT)
> > [    6.065572] Workqueue: kblockd blk_mq_run_work_fn
> > [    6.065879] Call trace:
> > [    6.065943]  dump_backtrace+0x0/0x1a0
> > [    6.066057]  show_stack+0x1c/0x30
> > [    6.066130]  dump_stack_lvl+0x8c/0xb8
> > [    6.066206]  dump_stack+0x1c/0x38
> > [    6.066270]  register_lock_class+0x49c/0x4c0
> > [    6.066352]  __lock_acquire+0x78/0x20cc
> > [    6.066422]  lock_acquire.part.0+0xe0/0x230
> > [    6.066497]  lock_acquire+0x6c/0x90
> > [    6.066562]  down_read+0x58/0x7c
> > [    6.066626]  mtd_read_oob_std+0x98/0x170
> > [    6.066703]  mtd_read_oob+0x80/0x13c
> > [    6.066770]  mtd_read+0x64/0xa0
> > [    6.066834]  concat_read+0xd4/0x1b0
> > [    6.066900]  mtd_read_oob_std+0x158/0x170
> > [    6.066972]  mtd_read_oob+0x80/0x13c
> > [    6.067038]  mtd_read+0x64/0xa0
> > [    6.067099]  mtdblock_readsect+0x6c/0x160
> > [    6.067169]  mtd_queue_rq+0x240/0x460
> > [    6.067234]  blk_mq_dispatch_rq_list+0x19c/0x830
> > [    6.067313]  __blk_mq_do_dispatch_sched+0x248/0x2d4
> > [    6.067397]  __blk_mq_sched_dispatch_requests+0x110/0x170
> > [    6.067487]  blk_mq_sched_dispatch_requests+0x3c/0x80
> > [    6.067573]  __blk_mq_run_hw_queue+0x60/0xa0
> > [    6.067647]  blk_mq_run_work_fn+0x24/0x30
> > [    6.067718]  process_one_work+0x28c/0x6e4
> > [    6.067790]  worker_thread+0x78/0x464
> > [    6.067856]  kthread+0x180/0x190
> > [    6.067919]  ret_from_fork+0x10/0x20
> > [    6.068868] ------------[ cut here ]------------
> > [    6.068993] DEBUG_RWSEMS_WARN_ON(sem->magic != sem): count = 0x100, magic = 0x0, owner = 0xffff528f84c617c1, curr 0xffff528f84c617c0, list not empty
> > [    6.069592] WARNING: CPU: 0 PID: 49 at kernel/locking/rwsem.c:1302 __up_read+0x1e8/0x270
> > [    6.069790] Modules linked in:
> > [    6.069963] CPU: 0 PID: 49 Comm: kworker/0:1H Not tainted 5.16.0-rc1-00013-g67bcbe202b48 #207
> > [    6.070099] Hardware name: linux,dummy-virt (DT)
> > [    6.070179] Workqueue: kblockd blk_mq_run_work_fn
> > [    6.070347] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> > [    6.070487] pc : __up_read+0x1e8/0x270
> > [    6.070577] lr : __up_read+0x1e8/0x270
> > [    6.070660] sp : ffff80001026b740
> > [    6.070728] x29: ffff80001026b740 x28: 000000000003ff88 x27: 0000000000040000
> > [    6.070912] x26: ffff80001026ba28 x25: ffff528f84e92000 x24: ffff528f8576d000
> > [    6.071040] x23: 0000000000000000 x22: 0000000003ff0000 x21: ffff80001026b878
> > [    6.071165] x20: ffffadee32213000 x19: ffff528f84e918b0 x18: ffffffffffffffff
> > [    6.071292] x17: 672d33313030302d x16: 3163722d302e3631 x15: ffff80009026b457
> > [    6.071418] x14: 0000000000000007 x13: ffffadee32236658 x12: 0000000000000384
> > [    6.071543] x11: 000000000000012c x10: ffffadee32236658 x9 : ffffadee32236658
> > [    6.071692] x8 : 00000000ffffefff x7 : ffffadee3228e658 x6 : ffffadee3228e658
> > [    6.071817] x5 : 000000000000bff4 x4 : 0000000000000000 x3 : 0000000000000000
> > [    6.071942] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff528f84c617c0
> > [    6.072094] Call trace:
> > [    6.072163]  __up_read+0x1e8/0x270
> > [    6.072238]  up_read+0x44/0x300
> > [    6.072301]  mtd_read_oob_std+0xd0/0x170
> > [    6.072374]  mtd_read_oob+0x80/0x13c
> > [    6.072441]  mtd_read+0x64/0xa0
> > [    6.072502]  concat_read+0xd4/0x1b0
> > [    6.072567]  mtd_read_oob_std+0x158/0x170
> > [    6.072640]  mtd_read_oob+0x80/0x13c
> > [    6.072706]  mtd_read+0x64/0xa0
> > [    6.072766]  mtdblock_readsect+0x6c/0x160
> > [    6.072843]  mtd_queue_rq+0x240/0x460
> > [    6.072909]  blk_mq_dispatch_rq_list+0x19c/0x830
> > [    6.072990]  __blk_mq_do_dispatch_sched+0x248/0x2d4
> > [    6.073073]  __blk_mq_sched_dispatch_requests+0x110/0x170
> > [    6.073163]  blk_mq_sched_dispatch_requests+0x3c/0x80
> > [    6.073249]  __blk_mq_run_hw_queue+0x60/0xa0
> > [    6.073325]  blk_mq_run_work_fn+0x24/0x30
> > [    6.073421]  process_one_work+0x28c/0x6e4
> > [    6.073568]  worker_thread+0x78/0x464
> > [    6.073643]  kthread+0x180/0x190
> > [    6.073710]  ret_from_fork+0x10/0x20
> > [    6.073820] irq event stamp: 1529
> > [    6.073891] hardirqs last  enabled at (1529): [<ffffadee3114576c>] _raw_spin_unlock_irq+0x48/0x9c
> > [    6.074067] hardirqs last disabled at (1528): [<ffffadee31145b70>] _raw_spin_lock_irq+0xac/0xb0
> > [    6.074211] softirqs last  enabled at (1444): [<ffffadee30010528>] __do_softirq+0x478/0x5f0
> > [    6.074350] softirqs last disabled at (1429): [<ffffadee3009f064>] __irq_exit_rcu+0x184/0x1b0
> > [    6.074506] ---[ end trace 738f7ffe764b66d5 ]---
> > 
> > I can solve this with this hack from mtd_set_dev_defaults():
> > 
> > diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
> > index f4a4274489b4..a4b69b9558c9 100644
> > --- a/drivers/mtd/mtdconcat.c
> > +++ b/drivers/mtd/mtdconcat.c
> > @@ -693,6 +693,8 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[],	/* subdevices to c
> >  	concat->mtd.oobavail = subdev[0]->oobavail;
> >  
> >  	subdev_master = mtd_get_master(subdev[0]);
> > +	init_waitqueue_head(&subdev_master->master.resume_wq);
> > +	init_rwsem(&subdev_master->master.suspend_lock);
> >  	if (subdev_master->_writev)
> >  		concat->mtd._writev = concat_writev;
> >  	if (subdev_master->_read_oob)
> > @@ -740,6 +742,8 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[],	/* subdevices to c
> >  		}
> >  
> >  		subdev_master = mtd_get_master(subdev[i]);
> > +		init_waitqueue_head(&subdev_master->master.resume_wq);
> > +		init_rwsem(&subdev_master->master.suspend_lock);
> >  		concat->mtd.size += subdev[i]->size;
> >  		concat->mtd.ecc_stats.badblocks +=
> >  			subdev[i]->ecc_stats.badblocks;
> > 
> > I do not have a great overview of what is happing here with all these
> > master devices :/
> > 
> > Any ideas Boris/Miquel?
> 
> It's the same problem, over and over again: the master device uses MTD
> helpers on its unregistered/unitialized subdevices. The following diff
> should fix the issue, but let's be honest, it's just papering over the
> real design issue, that his, MTD implementations use the MTD helpers
> assuming nothing needs to be initialized. So, I'm tempted to just drop
> the series and work an a more robust solution. In the meantime, I guess
> getting back to a raw-NAND-only fix would make sense.
> 

Fine by me, lets drop this series.

We have +10.000 devices that runs with this patch:

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 1f0d542d5923..58d48c3070fa 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -4337,7 +4337,6 @@ static int nand_suspend(struct mtd_info *mtd)
 		ret = chip->ops.suspend(chip);
 	if (!ret)
 		chip->suspended = 1;
-	mutex_unlock(&chip->lock);
 
 	return ret;
 }
@@ -4350,7 +4349,6 @@ static void nand_resume(struct mtd_info *mtd)
 {
 	struct nand_chip *chip = mtd_to_nand(mtd);
 
-	mutex_lock(&chip->lock);
 	if (chip->suspended) {
 		if (chip->ops.resume)
 			chip->ops.resume(chip);

/Sean
Boris Brezillon Nov. 30, 2021, 1:37 p.m. UTC | #11
On Tue, 30 Nov 2021 14:29:12 +0100
Sean Nyekjaer <sean@geanix.com> wrote:

> On Tue, Nov 30, 2021 at 02:15:51PM +0100, Boris Brezillon wrote:
> > On Tue, 30 Nov 2021 13:41:31 +0100
> > Sean Nyekjaer <sean@geanix.com> wrote:
> >   
> > > > On 29.11.2021 10:41, Sean Nyekjaer wrote:    
> > > > > On Mon, Nov 29, 2021 at 10:19:08AM +0100, Miquel Raynal wrote:    
> > > > >> boris.brezillon@collabora.com wrote on Tue, 23 Nov 2021 14:07:15 +0100:    
> > > > >>> On Tue, 23 Nov 2021 13:50:12 +0100
> > > > >>> Sean Nyekjaer <sean@geanix.com> wrote:    
> > > > >>>> @Boris do we need to do something similar here to what we did with the
> > > > >>>> mtdconcat stuff?    
> > > > >>> Absolutely, physmasp subdevices are never initialized/registered, so
> > > > >>> you can't call any of the mtd helpers taking the suspend lock on those.
> > > > >>> I guess it'd be better to call mtd_suspend/resume() on the concat device
> > > > >>> in though:    
> > > > >> Any chance that you will come up with a fix or v6 of the series anytime
> > > > >> soon?
> > > > >>    
> > > > >>> static void physmap_flash_shutdown(struct platform_device *dev)
> > > > >>> {
> > > > >>> 	struct physmap_flash_info *info = platform_get_drvdata(dev);
> > > > >>>
> > > > >>> 	mtd_suspend(info->cmtd);    
> > > 
> > > There is one more issue when using concat during boot, I think we should
> > > start here :)
> > > It uses uninitialized rwsm.
> > > 
> > > 
> > > [    0.000000] Booting Linux on physical CPU 0x0000000000 [0x411fd070]
> > > [    0.000000] Linux version 5.16.0-rc1-00013-g67bcbe202b48 (sean@zen) (aarch64-linux-gnu-gcc (GCC) 11.2.0, GNU ld (GNU Binutils) 2.37) #207 SMP PREEMPT Tue Nov 30 13:22:48 CET 2021
> > > [    0.000000] Machine model: linux,dummy-virt
> > > [    0.000000] efi: UEFI not found.
> > > [    0.000000] NUMA: No NUMA configuration found
> > > 
> > > [ ... ]
> > > 
> > > [    1.848327] 0.flash: Found 2 x16 devices at 0x0 in 32-bit bank. Manufacturer ID 0x000000 Chip ID 0x000000
> > > [    1.848567] Intel/Sharp Extended Query Table at 0x0031
> > > [    1.848999] Using buffer write method
> > > [    1.849237] Concatenating MTD devices:
> > > [    1.849347] (0): "0.flash"
> > > [    1.849637] (1): "0.flash"
> > > [    1.849726] into device "0.flash"
> > > [    1.904985] libphy: Fixed MDIO Bus: probed
> > > [    1.915812] tun: Universal TUN/TAP device driver, 1.6
> > > 
> > > [ ... ]
> > > 
> > > [    6.064628] The code is fine but needs lockdep annotation, or maybe
> > > [    6.064756] you didn't initialize this object before use?
> > > [    6.064857] turning off the locking correctness validator.
> > > [    6.065111] CPU: 0 PID: 49 Comm: kworker/0:1H Not tainted 5.16.0-rc1-00013-g67bcbe202b48 #207
> > > [    6.065273] Hardware name: linux,dummy-virt (DT)
> > > [    6.065572] Workqueue: kblockd blk_mq_run_work_fn
> > > [    6.065879] Call trace:
> > > [    6.065943]  dump_backtrace+0x0/0x1a0
> > > [    6.066057]  show_stack+0x1c/0x30
> > > [    6.066130]  dump_stack_lvl+0x8c/0xb8
> > > [    6.066206]  dump_stack+0x1c/0x38
> > > [    6.066270]  register_lock_class+0x49c/0x4c0
> > > [    6.066352]  __lock_acquire+0x78/0x20cc
> > > [    6.066422]  lock_acquire.part.0+0xe0/0x230
> > > [    6.066497]  lock_acquire+0x6c/0x90
> > > [    6.066562]  down_read+0x58/0x7c
> > > [    6.066626]  mtd_read_oob_std+0x98/0x170
> > > [    6.066703]  mtd_read_oob+0x80/0x13c
> > > [    6.066770]  mtd_read+0x64/0xa0
> > > [    6.066834]  concat_read+0xd4/0x1b0
> > > [    6.066900]  mtd_read_oob_std+0x158/0x170
> > > [    6.066972]  mtd_read_oob+0x80/0x13c
> > > [    6.067038]  mtd_read+0x64/0xa0
> > > [    6.067099]  mtdblock_readsect+0x6c/0x160
> > > [    6.067169]  mtd_queue_rq+0x240/0x460
> > > [    6.067234]  blk_mq_dispatch_rq_list+0x19c/0x830
> > > [    6.067313]  __blk_mq_do_dispatch_sched+0x248/0x2d4
> > > [    6.067397]  __blk_mq_sched_dispatch_requests+0x110/0x170
> > > [    6.067487]  blk_mq_sched_dispatch_requests+0x3c/0x80
> > > [    6.067573]  __blk_mq_run_hw_queue+0x60/0xa0
> > > [    6.067647]  blk_mq_run_work_fn+0x24/0x30
> > > [    6.067718]  process_one_work+0x28c/0x6e4
> > > [    6.067790]  worker_thread+0x78/0x464
> > > [    6.067856]  kthread+0x180/0x190
> > > [    6.067919]  ret_from_fork+0x10/0x20
> > > [    6.068868] ------------[ cut here ]------------
> > > [    6.068993] DEBUG_RWSEMS_WARN_ON(sem->magic != sem): count = 0x100, magic = 0x0, owner = 0xffff528f84c617c1, curr 0xffff528f84c617c0, list not empty
> > > [    6.069592] WARNING: CPU: 0 PID: 49 at kernel/locking/rwsem.c:1302 __up_read+0x1e8/0x270
> > > [    6.069790] Modules linked in:
> > > [    6.069963] CPU: 0 PID: 49 Comm: kworker/0:1H Not tainted 5.16.0-rc1-00013-g67bcbe202b48 #207
> > > [    6.070099] Hardware name: linux,dummy-virt (DT)
> > > [    6.070179] Workqueue: kblockd blk_mq_run_work_fn
> > > [    6.070347] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> > > [    6.070487] pc : __up_read+0x1e8/0x270
> > > [    6.070577] lr : __up_read+0x1e8/0x270
> > > [    6.070660] sp : ffff80001026b740
> > > [    6.070728] x29: ffff80001026b740 x28: 000000000003ff88 x27: 0000000000040000
> > > [    6.070912] x26: ffff80001026ba28 x25: ffff528f84e92000 x24: ffff528f8576d000
> > > [    6.071040] x23: 0000000000000000 x22: 0000000003ff0000 x21: ffff80001026b878
> > > [    6.071165] x20: ffffadee32213000 x19: ffff528f84e918b0 x18: ffffffffffffffff
> > > [    6.071292] x17: 672d33313030302d x16: 3163722d302e3631 x15: ffff80009026b457
> > > [    6.071418] x14: 0000000000000007 x13: ffffadee32236658 x12: 0000000000000384
> > > [    6.071543] x11: 000000000000012c x10: ffffadee32236658 x9 : ffffadee32236658
> > > [    6.071692] x8 : 00000000ffffefff x7 : ffffadee3228e658 x6 : ffffadee3228e658
> > > [    6.071817] x5 : 000000000000bff4 x4 : 0000000000000000 x3 : 0000000000000000
> > > [    6.071942] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff528f84c617c0
> > > [    6.072094] Call trace:
> > > [    6.072163]  __up_read+0x1e8/0x270
> > > [    6.072238]  up_read+0x44/0x300
> > > [    6.072301]  mtd_read_oob_std+0xd0/0x170
> > > [    6.072374]  mtd_read_oob+0x80/0x13c
> > > [    6.072441]  mtd_read+0x64/0xa0
> > > [    6.072502]  concat_read+0xd4/0x1b0
> > > [    6.072567]  mtd_read_oob_std+0x158/0x170
> > > [    6.072640]  mtd_read_oob+0x80/0x13c
> > > [    6.072706]  mtd_read+0x64/0xa0
> > > [    6.072766]  mtdblock_readsect+0x6c/0x160
> > > [    6.072843]  mtd_queue_rq+0x240/0x460
> > > [    6.072909]  blk_mq_dispatch_rq_list+0x19c/0x830
> > > [    6.072990]  __blk_mq_do_dispatch_sched+0x248/0x2d4
> > > [    6.073073]  __blk_mq_sched_dispatch_requests+0x110/0x170
> > > [    6.073163]  blk_mq_sched_dispatch_requests+0x3c/0x80
> > > [    6.073249]  __blk_mq_run_hw_queue+0x60/0xa0
> > > [    6.073325]  blk_mq_run_work_fn+0x24/0x30
> > > [    6.073421]  process_one_work+0x28c/0x6e4
> > > [    6.073568]  worker_thread+0x78/0x464
> > > [    6.073643]  kthread+0x180/0x190
> > > [    6.073710]  ret_from_fork+0x10/0x20
> > > [    6.073820] irq event stamp: 1529
> > > [    6.073891] hardirqs last  enabled at (1529): [<ffffadee3114576c>] _raw_spin_unlock_irq+0x48/0x9c
> > > [    6.074067] hardirqs last disabled at (1528): [<ffffadee31145b70>] _raw_spin_lock_irq+0xac/0xb0
> > > [    6.074211] softirqs last  enabled at (1444): [<ffffadee30010528>] __do_softirq+0x478/0x5f0
> > > [    6.074350] softirqs last disabled at (1429): [<ffffadee3009f064>] __irq_exit_rcu+0x184/0x1b0
> > > [    6.074506] ---[ end trace 738f7ffe764b66d5 ]---
> > > 
> > > I can solve this with this hack from mtd_set_dev_defaults():
> > > 
> > > diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
> > > index f4a4274489b4..a4b69b9558c9 100644
> > > --- a/drivers/mtd/mtdconcat.c
> > > +++ b/drivers/mtd/mtdconcat.c
> > > @@ -693,6 +693,8 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[],	/* subdevices to c
> > >  	concat->mtd.oobavail = subdev[0]->oobavail;
> > >  
> > >  	subdev_master = mtd_get_master(subdev[0]);
> > > +	init_waitqueue_head(&subdev_master->master.resume_wq);
> > > +	init_rwsem(&subdev_master->master.suspend_lock);
> > >  	if (subdev_master->_writev)
> > >  		concat->mtd._writev = concat_writev;
> > >  	if (subdev_master->_read_oob)
> > > @@ -740,6 +742,8 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[],	/* subdevices to c
> > >  		}
> > >  
> > >  		subdev_master = mtd_get_master(subdev[i]);
> > > +		init_waitqueue_head(&subdev_master->master.resume_wq);
> > > +		init_rwsem(&subdev_master->master.suspend_lock);
> > >  		concat->mtd.size += subdev[i]->size;
> > >  		concat->mtd.ecc_stats.badblocks +=
> > >  			subdev[i]->ecc_stats.badblocks;
> > > 
> > > I do not have a great overview of what is happing here with all these
> > > master devices :/
> > > 
> > > Any ideas Boris/Miquel?  
> > 
> > It's the same problem, over and over again: the master device uses MTD
> > helpers on its unregistered/unitialized subdevices. The following diff
> > should fix the issue, but let's be honest, it's just papering over the
> > real design issue, that his, MTD implementations use the MTD helpers
> > assuming nothing needs to be initialized. So, I'm tempted to just drop
> > the series and work an a more robust solution. In the meantime, I guess
> > getting back to a raw-NAND-only fix would make sense.
> >   
> 
> Fine by me, lets drop this series.
> 
> We have +10.000 devices that runs with this patch:
> 
> diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
> index 1f0d542d5923..58d48c3070fa 100644
> --- a/drivers/mtd/nand/raw/nand_base.c
> +++ b/drivers/mtd/nand/raw/nand_base.c
> @@ -4337,7 +4337,6 @@ static int nand_suspend(struct mtd_info *mtd)
>  		ret = chip->ops.suspend(chip);
>  	if (!ret)
>  		chip->suspended = 1;
> -	mutex_unlock(&chip->lock);
>  
>  	return ret;
>  }
> @@ -4350,7 +4349,6 @@ static void nand_resume(struct mtd_info *mtd)
>  {
>  	struct nand_chip *chip = mtd_to_nand(mtd);
>  
> -	mutex_lock(&chip->lock);
>  	if (chip->suspended) {
>  		if (chip->ops.resume)
>  			chip->ops.resume(chip);
> 

But it's abusing the chip lock... Use a wait queue as we did at the MTD
level, and make nand_get_device() wait on this wait queue when the
device is suspended.
Miquel Raynal Dec. 3, 2021, 1:39 p.m. UTC | #12
Hello,

> > Fine by me, lets drop this series.

FYI I've dropped the entire series from mtd/next. I'm waiting for the
fix discussed below (without abusing the chip mutex ;-) ).

Cheers,
Miquèl

> > We have +10.000 devices that runs with this patch:
> > 
> > diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
> > index 1f0d542d5923..58d48c3070fa 100644
> > --- a/drivers/mtd/nand/raw/nand_base.c
> > +++ b/drivers/mtd/nand/raw/nand_base.c
> > @@ -4337,7 +4337,6 @@ static int nand_suspend(struct mtd_info *mtd)
> >  		ret = chip->ops.suspend(chip);
> >  	if (!ret)
> >  		chip->suspended = 1;
> > -	mutex_unlock(&chip->lock);
> >  
> >  	return ret;
> >  }
> > @@ -4350,7 +4349,6 @@ static void nand_resume(struct mtd_info *mtd)
> >  {
> >  	struct nand_chip *chip = mtd_to_nand(mtd);
> >  
> > -	mutex_lock(&chip->lock);
> >  	if (chip->suspended) {
> >  		if (chip->ops.resume)
> >  			chip->ops.resume(chip);
> >   
> 
> But it's abusing the chip lock... Use a wait queue as we did at the MTD
> level, and make nand_get_device() wait on this wait queue when the
> device is suspended.
> 
> ______________________________________________________
> Linux MTD discussion mailing list
> http://lists.infradead.org/mailman/listinfo/linux-mtd/
Sean Nyekjaer Dec. 9, 2021, 2:07 p.m. UTC | #13
On Fri, Dec 03, 2021 at 02:39:58PM +0100, Miquel Raynal wrote:
> Hello,
> 
> > > Fine by me, lets drop this series.
> 
> FYI I've dropped the entire series from mtd/next. I'm waiting for the
> fix discussed below (without abusing the chip mutex ;-) ).

Cool, looking forward to test a patch series :)

/Sean
Miquel Raynal Dec. 9, 2021, 2:28 p.m. UTC | #14
Hi Sean,

sean@geanix.com wrote on Thu, 9 Dec 2021 15:07:21 +0100:

> On Fri, Dec 03, 2021 at 02:39:58PM +0100, Miquel Raynal wrote:
> > Hello,
> >   
> > > > Fine by me, lets drop this series.  
> > 
> > FYI I've dropped the entire series from mtd/next. I'm waiting for the
> > fix discussed below (without abusing the chip mutex ;-) ).  
> 
> Cool, looking forward to test a patch series :)

Test? You mean "write"? :)

Cheers,
Miquèl
Sean Nyekjaer Dec. 10, 2021, 1:25 p.m. UTC | #15
On Thu, Dec 09, 2021 at 03:28:11PM +0100, Miquel Raynal wrote:
> Hi Sean,
> 
> sean@geanix.com wrote on Thu, 9 Dec 2021 15:07:21 +0100:
> 
> > On Fri, Dec 03, 2021 at 02:39:58PM +0100, Miquel Raynal wrote:
> > > Hello,
> > >   
> > > > > Fine by me, lets drop this series.  
> > > 
> > > FYI I've dropped the entire series from mtd/next. I'm waiting for the
> > > fix discussed below (without abusing the chip mutex ;-) ).  
> > 
> > Cool, looking forward to test a patch series :)
> 
> Test? You mean "write"? :)
> 
> Cheers,
> Miquèl

Hi Miquel,

Should we us a atomic for the suspended variable?

/Sean

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index b3a9bc08b4bb..eb4ec9a42d49 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -338,16 +338,19 @@ static int nand_isbad_bbm(struct nand_chip *chip, loff_t ofs)
  *
  * Return: -EBUSY if the chip has been suspended, 0 otherwise
  */
-static int nand_get_device(struct nand_chip *chip)
+static void nand_get_device(struct nand_chip *chip)
 {
-	mutex_lock(&chip->lock);
-	if (chip->suspended) {
+	/* Wait until the device is resumed. */
+	while (1) {
+		mutex_lock(&chip->lock);
+		if (!chip->suspended) {
+			mutex_lock(&chip->controller->lock);
+			return;
+		}
 		mutex_unlock(&chip->lock);
-		return -EBUSY;
-	}
-	mutex_lock(&chip->controller->lock);
 
-	return 0;
+		wait_event(chip->resume_wq, !chip->suspended);
+	}
 }
 
 /**
@@ -576,9 +579,7 @@ static int nand_block_markbad_lowlevel(struct nand_chip *chip, loff_t ofs)
 		nand_erase_nand(chip, &einfo, 0);
 
 		/* Write bad block marker to OOB */
-		ret = nand_get_device(chip);
-		if (ret)
-			return ret;
+		nand_get_device(chip);
 
 		ret = nand_markbad_bbm(chip, ofs);
 		nand_release_device(chip);
@@ -3759,9 +3760,7 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from,
 	    ops->mode != MTD_OPS_RAW)
 		return -ENOTSUPP;
 
-	ret = nand_get_device(chip);
-	if (ret)
-		return ret;
+	nand_get_device(chip);
 
 	if (!ops->datbuf)
 		ret = nand_do_read_oob(chip, from, ops);
@@ -4352,9 +4351,7 @@ static int nand_write_oob(struct mtd_info *mtd, loff_t to,
 
 	ops->retlen = 0;
 
-	ret = nand_get_device(chip);
-	if (ret)
-		return ret;
+	nand_get_device(chip);
 
 	switch (ops->mode) {
 	case MTD_OPS_PLACE_OOB:
@@ -4414,9 +4411,7 @@ int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr,
 		return -EIO;
 
 	/* Grab the lock and see if the device is available */
-	ret = nand_get_device(chip);
-	if (ret)
-		return ret;
+	nand_get_device(chip);
 
 	/* Shift to get first page */
 	page = (int)(instr->addr >> chip->page_shift);
@@ -4503,7 +4498,7 @@ static void nand_sync(struct mtd_info *mtd)
 	pr_debug("%s: called\n", __func__);
 
 	/* Grab the lock and see if the device is available */
-	WARN_ON(nand_get_device(chip));
+	nand_get_device(chip);
 	/* Release it and go back */
 	nand_release_device(chip);
 }
@@ -4520,9 +4515,7 @@ static int nand_block_isbad(struct mtd_info *mtd, loff_t offs)
 	int ret;
 
 	/* Select the NAND device */
-	ret = nand_get_device(chip);
-	if (ret)
-		return ret;
+	nand_get_device(chip);
 
 	nand_select_target(chip, chipnr);
 
@@ -4593,6 +4586,8 @@ static void nand_resume(struct mtd_info *mtd)
 			__func__);
 	}
 	mutex_unlock(&chip->lock);
+
+	wake_up_all(&chip->resume_wq);
 }
 
 /**
@@ -5370,6 +5365,7 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips,
 	chip->cur_cs = -1;
 
 	mutex_init(&chip->lock);
+	init_waitqueue_head(&chip->resume_wq);
 
 	/* Enforce the right timings for reset/detection */
 	chip->current_interface_config = nand_get_reset_interface_config();
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index b2f9dd3cbd69..248054560581 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1294,6 +1294,7 @@ struct nand_chip {
 	/* Internals */
 	struct mutex lock;
 	unsigned int suspended : 1;
+	wait_queue_head_t resume_wq;
 	int cur_cs;
 	int read_retries;
 	struct nand_secure_region *secure_regions;
Miquel Raynal Dec. 13, 2021, 9:10 a.m. UTC | #16
Hi Sean,

sean@geanix.com wrote on Fri, 10 Dec 2021 14:25:35 +0100:

> On Thu, Dec 09, 2021 at 03:28:11PM +0100, Miquel Raynal wrote:
> > Hi Sean,
> > 
> > sean@geanix.com wrote on Thu, 9 Dec 2021 15:07:21 +0100:
> >   
> > > On Fri, Dec 03, 2021 at 02:39:58PM +0100, Miquel Raynal wrote:  
> > > > Hello,
> > > >     
> > > > > > Fine by me, lets drop this series.    
> > > > 
> > > > FYI I've dropped the entire series from mtd/next. I'm waiting for the
> > > > fix discussed below (without abusing the chip mutex ;-) ).    
> > > 
> > > Cool, looking forward to test a patch series :)  
> > 
> > Test? You mean "write"? :)
> > 
> > Cheers,
> > Miquèl  
> 
> Hi Miquel,
> 
> Should we us a atomic for the suspended variable?

I haven't thought about it extensively, an atomic variable sound fine
but I am definitely not a locking expert...

> 
> /Sean
> 
> diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
> index b3a9bc08b4bb..eb4ec9a42d49 100644
> --- a/drivers/mtd/nand/raw/nand_base.c
> +++ b/drivers/mtd/nand/raw/nand_base.c
> @@ -338,16 +338,19 @@ static int nand_isbad_bbm(struct nand_chip *chip, loff_t ofs)
>   *
>   * Return: -EBUSY if the chip has been suspended, 0 otherwise
>   */
> -static int nand_get_device(struct nand_chip *chip)
> +static void nand_get_device(struct nand_chip *chip)
>  {
> -	mutex_lock(&chip->lock);
> -	if (chip->suspended) {
> +	/* Wait until the device is resumed. */
> +	while (1) {
> +		mutex_lock(&chip->lock);
> +		if (!chip->suspended) {
> +			mutex_lock(&chip->controller->lock);
> +			return;
> +		}
>  		mutex_unlock(&chip->lock);
> -		return -EBUSY;
> -	}
> -	mutex_lock(&chip->controller->lock);
>  
> -	return 0;
> +		wait_event(chip->resume_wq, !chip->suspended);
> +	}
>  }
>  
>  /**
> @@ -576,9 +579,7 @@ static int nand_block_markbad_lowlevel(struct nand_chip *chip, loff_t ofs)
>  		nand_erase_nand(chip, &einfo, 0);
>  
>  		/* Write bad block marker to OOB */
> -		ret = nand_get_device(chip);
> -		if (ret)
> -			return ret;
> +		nand_get_device(chip);
>  
>  		ret = nand_markbad_bbm(chip, ofs);
>  		nand_release_device(chip);
> @@ -3759,9 +3760,7 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from,
>  	    ops->mode != MTD_OPS_RAW)
>  		return -ENOTSUPP;
>  
> -	ret = nand_get_device(chip);
> -	if (ret)
> -		return ret;
> +	nand_get_device(chip);
>  
>  	if (!ops->datbuf)
>  		ret = nand_do_read_oob(chip, from, ops);
> @@ -4352,9 +4351,7 @@ static int nand_write_oob(struct mtd_info *mtd, loff_t to,
>  
>  	ops->retlen = 0;
>  
> -	ret = nand_get_device(chip);
> -	if (ret)
> -		return ret;
> +	nand_get_device(chip);
>  
>  	switch (ops->mode) {
>  	case MTD_OPS_PLACE_OOB:
> @@ -4414,9 +4411,7 @@ int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr,
>  		return -EIO;
>  
>  	/* Grab the lock and see if the device is available */
> -	ret = nand_get_device(chip);
> -	if (ret)
> -		return ret;
> +	nand_get_device(chip);
>  
>  	/* Shift to get first page */
>  	page = (int)(instr->addr >> chip->page_shift);
> @@ -4503,7 +4498,7 @@ static void nand_sync(struct mtd_info *mtd)
>  	pr_debug("%s: called\n", __func__);
>  
>  	/* Grab the lock and see if the device is available */
> -	WARN_ON(nand_get_device(chip));
> +	nand_get_device(chip);
>  	/* Release it and go back */
>  	nand_release_device(chip);
>  }
> @@ -4520,9 +4515,7 @@ static int nand_block_isbad(struct mtd_info *mtd, loff_t offs)
>  	int ret;
>  
>  	/* Select the NAND device */
> -	ret = nand_get_device(chip);
> -	if (ret)
> -		return ret;
> +	nand_get_device(chip);
>  
>  	nand_select_target(chip, chipnr);
>  
> @@ -4593,6 +4586,8 @@ static void nand_resume(struct mtd_info *mtd)
>  			__func__);
>  	}
>  	mutex_unlock(&chip->lock);
> +
> +	wake_up_all(&chip->resume_wq);
>  }
>  
>  /**
> @@ -5370,6 +5365,7 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips,
>  	chip->cur_cs = -1;
>  
>  	mutex_init(&chip->lock);
> +	init_waitqueue_head(&chip->resume_wq);
>  
>  	/* Enforce the right timings for reset/detection */
>  	chip->current_interface_config = nand_get_reset_interface_config();
> diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
> index b2f9dd3cbd69..248054560581 100644
> --- a/include/linux/mtd/rawnand.h
> +++ b/include/linux/mtd/rawnand.h
> @@ -1294,6 +1294,7 @@ struct nand_chip {
>  	/* Internals */
>  	struct mutex lock;
>  	unsigned int suspended : 1;
> +	wait_queue_head_t resume_wq;
>  	int cur_cs;
>  	int read_retries;
>  	struct nand_secure_region *secure_regions;


Thanks,
Miquèl
Boris Brezillon Dec. 13, 2021, 9:28 a.m. UTC | #17
On Mon, 13 Dec 2021 10:10:25 +0100
Miquel Raynal <miquel.raynal@bootlin.com> wrote:

> Hi Sean,
> 
> sean@geanix.com wrote on Fri, 10 Dec 2021 14:25:35 +0100:
> 
> > On Thu, Dec 09, 2021 at 03:28:11PM +0100, Miquel Raynal wrote:  
> > > Hi Sean,
> > > 
> > > sean@geanix.com wrote on Thu, 9 Dec 2021 15:07:21 +0100:
> > >     
> > > > On Fri, Dec 03, 2021 at 02:39:58PM +0100, Miquel Raynal wrote:    
> > > > > Hello,
> > > > >       
> > > > > > > Fine by me, lets drop this series.      
> > > > > 
> > > > > FYI I've dropped the entire series from mtd/next. I'm waiting for the
> > > > > fix discussed below (without abusing the chip mutex ;-) ).      
> > > > 
> > > > Cool, looking forward to test a patch series :)    
> > > 
> > > Test? You mean "write"? :)
> > > 
> > > Cheers,
> > > Miquèl    
> > 
> > Hi Miquel,
> > 
> > Should we us a atomic for the suspended variable?  
> 
> I haven't thought about it extensively, an atomic variable sound fine
> but I am definitely not a locking expert...

No need to use an atomic if the variable is already protected by a lock
when accessed, and this seems to be case.

> 
> > 
> > /Sean
> > 
> > diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
> > index b3a9bc08b4bb..eb4ec9a42d49 100644
> > --- a/drivers/mtd/nand/raw/nand_base.c
> > +++ b/drivers/mtd/nand/raw/nand_base.c
> > @@ -338,16 +338,19 @@ static int nand_isbad_bbm(struct nand_chip *chip, loff_t ofs)
> >   *
> >   * Return: -EBUSY if the chip has been suspended, 0 otherwise

You need to fix the documentation and make it clear that the caller
will be blocked if the chip is suspended.

> >   */
> > -static int nand_get_device(struct nand_chip *chip)
> > +static void nand_get_device(struct nand_chip *chip)
> >  {
> > -	mutex_lock(&chip->lock);
> > -	if (chip->suspended) {
> > +	/* Wait until the device is resumed. */
> > +	while (1) {
> > +		mutex_lock(&chip->lock);
> > +		if (!chip->suspended) {
> > +			mutex_lock(&chip->controller->lock);
> > +			return;
> > +		}
> >  		mutex_unlock(&chip->lock);
> > -		return -EBUSY;
> > -	}
> > -	mutex_lock(&chip->controller->lock);
> >  
> > -	return 0;
> > +		wait_event(chip->resume_wq, !chip->suspended);
> > +	}
> >  }
> >  
> >  /**
> > @@ -576,9 +579,7 @@ static int nand_block_markbad_lowlevel(struct nand_chip *chip, loff_t ofs)
> >  		nand_erase_nand(chip, &einfo, 0);
> >  
> >  		/* Write bad block marker to OOB */
> > -		ret = nand_get_device(chip);
> > -		if (ret)
> > -			return ret;
> > +		nand_get_device(chip);
> >  
> >  		ret = nand_markbad_bbm(chip, ofs);
> >  		nand_release_device(chip);
> > @@ -3759,9 +3760,7 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from,
> >  	    ops->mode != MTD_OPS_RAW)
> >  		return -ENOTSUPP;
> >  
> > -	ret = nand_get_device(chip);
> > -	if (ret)
> > -		return ret;
> > +	nand_get_device(chip);
> >  
> >  	if (!ops->datbuf)
> >  		ret = nand_do_read_oob(chip, from, ops);
> > @@ -4352,9 +4351,7 @@ static int nand_write_oob(struct mtd_info *mtd, loff_t to,
> >  
> >  	ops->retlen = 0;
> >  
> > -	ret = nand_get_device(chip);
> > -	if (ret)
> > -		return ret;
> > +	nand_get_device(chip);
> >  
> >  	switch (ops->mode) {
> >  	case MTD_OPS_PLACE_OOB:
> > @@ -4414,9 +4411,7 @@ int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr,
> >  		return -EIO;
> >  
> >  	/* Grab the lock and see if the device is available */
> > -	ret = nand_get_device(chip);
> > -	if (ret)
> > -		return ret;
> > +	nand_get_device(chip);
> >  
> >  	/* Shift to get first page */
> >  	page = (int)(instr->addr >> chip->page_shift);
> > @@ -4503,7 +4498,7 @@ static void nand_sync(struct mtd_info *mtd)
> >  	pr_debug("%s: called\n", __func__);
> >  
> >  	/* Grab the lock and see if the device is available */
> > -	WARN_ON(nand_get_device(chip));
> > +	nand_get_device(chip);
> >  	/* Release it and go back */
> >  	nand_release_device(chip);
> >  }
> > @@ -4520,9 +4515,7 @@ static int nand_block_isbad(struct mtd_info *mtd, loff_t offs)
> >  	int ret;
> >  
> >  	/* Select the NAND device */
> > -	ret = nand_get_device(chip);
> > -	if (ret)
> > -		return ret;
> > +	nand_get_device(chip);
> >  
> >  	nand_select_target(chip, chipnr);
> >  
> > @@ -4593,6 +4586,8 @@ static void nand_resume(struct mtd_info *mtd)
> >  			__func__);
> >  	}
> >  	mutex_unlock(&chip->lock);
> > +
> > +	wake_up_all(&chip->resume_wq);
> >  }
> >  
> >  /**
> > @@ -5370,6 +5365,7 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips,
> >  	chip->cur_cs = -1;
> >  
> >  	mutex_init(&chip->lock);
> > +	init_waitqueue_head(&chip->resume_wq);
> >  
> >  	/* Enforce the right timings for reset/detection */
> >  	chip->current_interface_config = nand_get_reset_interface_config();
> > diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
> > index b2f9dd3cbd69..248054560581 100644
> > --- a/include/linux/mtd/rawnand.h
> > +++ b/include/linux/mtd/rawnand.h
> > @@ -1294,6 +1294,7 @@ struct nand_chip {
> >  	/* Internals */
> >  	struct mutex lock;
> >  	unsigned int suspended : 1;
> > +	wait_queue_head_t resume_wq;
> >  	int cur_cs;
> >  	int read_retries;
> >  	struct nand_secure_region *secure_regions;  
> 
> 
> Thanks,
> Miquèl
Miquel Raynal Dec. 13, 2021, 9:33 a.m. UTC | #18
Hello,

boris.brezillon@collabora.com wrote on Mon, 13 Dec 2021 10:28:01 +0100:

> On Mon, 13 Dec 2021 10:10:25 +0100
> Miquel Raynal <miquel.raynal@bootlin.com> wrote:
> 
> > Hi Sean,
> > 
> > sean@geanix.com wrote on Fri, 10 Dec 2021 14:25:35 +0100:
> >   
> > > On Thu, Dec 09, 2021 at 03:28:11PM +0100, Miquel Raynal wrote:    
> > > > Hi Sean,
> > > > 
> > > > sean@geanix.com wrote on Thu, 9 Dec 2021 15:07:21 +0100:
> > > >       
> > > > > On Fri, Dec 03, 2021 at 02:39:58PM +0100, Miquel Raynal wrote:      
> > > > > > Hello,
> > > > > >         
> > > > > > > > Fine by me, lets drop this series.        
> > > > > > 
> > > > > > FYI I've dropped the entire series from mtd/next. I'm waiting for the
> > > > > > fix discussed below (without abusing the chip mutex ;-) ).        
> > > > > 
> > > > > Cool, looking forward to test a patch series :)      
> > > > 
> > > > Test? You mean "write"? :)
> > > > 
> > > > Cheers,
> > > > Miquèl      
> > > 
> > > Hi Miquel,
> > > 
> > > Should we us a atomic for the suspended variable?    
> > 
> > I haven't thought about it extensively, an atomic variable sound fine
> > but I am definitely not a locking expert...  
> 
> No need to use an atomic if the variable is already protected by a lock
> when accessed, and this seems to be case.

Maybe there was a confusion about this lock: I think Boris just do not
want the core to take any lock during a suspend operation. But you can
still use locks, as long as you release them before suspending.

And also, that chip lock might not be the one you want to take because
it's been introduced for another purpose.

> 
> >   
> > > 
> > > /Sean
> > > 
> > > diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
> > > index b3a9bc08b4bb..eb4ec9a42d49 100644
> > > --- a/drivers/mtd/nand/raw/nand_base.c
> > > +++ b/drivers/mtd/nand/raw/nand_base.c
> > > @@ -338,16 +338,19 @@ static int nand_isbad_bbm(struct nand_chip *chip, loff_t ofs)
> > >   *
> > >   * Return: -EBUSY if the chip has been suspended, 0 otherwise  
> 
> You need to fix the documentation and make it clear that the caller
> will be blocked if the chip is suspended.
> 
> > >   */
> > > -static int nand_get_device(struct nand_chip *chip)
> > > +static void nand_get_device(struct nand_chip *chip)
> > >  {
> > > -	mutex_lock(&chip->lock);
> > > -	if (chip->suspended) {
> > > +	/* Wait until the device is resumed. */
> > > +	while (1) {
> > > +		mutex_lock(&chip->lock);
> > > +		if (!chip->suspended) {
> > > +			mutex_lock(&chip->controller->lock);
> > > +			return;
> > > +		}
> > >  		mutex_unlock(&chip->lock);
> > > -		return -EBUSY;
> > > -	}
> > > -	mutex_lock(&chip->controller->lock);
> > >  
> > > -	return 0;
> > > +		wait_event(chip->resume_wq, !chip->suspended);
> > > +	}
> > >  }
> > >  
> > >  /**
> > > @@ -576,9 +579,7 @@ static int nand_block_markbad_lowlevel(struct nand_chip *chip, loff_t ofs)
> > >  		nand_erase_nand(chip, &einfo, 0);
> > >  
> > >  		/* Write bad block marker to OOB */
> > > -		ret = nand_get_device(chip);
> > > -		if (ret)
> > > -			return ret;
> > > +		nand_get_device(chip);
> > >  
> > >  		ret = nand_markbad_bbm(chip, ofs);
> > >  		nand_release_device(chip);
> > > @@ -3759,9 +3760,7 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from,
> > >  	    ops->mode != MTD_OPS_RAW)
> > >  		return -ENOTSUPP;
> > >  
> > > -	ret = nand_get_device(chip);
> > > -	if (ret)
> > > -		return ret;
> > > +	nand_get_device(chip);
> > >  
> > >  	if (!ops->datbuf)
> > >  		ret = nand_do_read_oob(chip, from, ops);
> > > @@ -4352,9 +4351,7 @@ static int nand_write_oob(struct mtd_info *mtd, loff_t to,
> > >  
> > >  	ops->retlen = 0;
> > >  
> > > -	ret = nand_get_device(chip);
> > > -	if (ret)
> > > -		return ret;
> > > +	nand_get_device(chip);
> > >  
> > >  	switch (ops->mode) {
> > >  	case MTD_OPS_PLACE_OOB:
> > > @@ -4414,9 +4411,7 @@ int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr,
> > >  		return -EIO;
> > >  
> > >  	/* Grab the lock and see if the device is available */
> > > -	ret = nand_get_device(chip);
> > > -	if (ret)
> > > -		return ret;
> > > +	nand_get_device(chip);
> > >  
> > >  	/* Shift to get first page */
> > >  	page = (int)(instr->addr >> chip->page_shift);
> > > @@ -4503,7 +4498,7 @@ static void nand_sync(struct mtd_info *mtd)
> > >  	pr_debug("%s: called\n", __func__);
> > >  
> > >  	/* Grab the lock and see if the device is available */
> > > -	WARN_ON(nand_get_device(chip));
> > > +	nand_get_device(chip);
> > >  	/* Release it and go back */
> > >  	nand_release_device(chip);
> > >  }
> > > @@ -4520,9 +4515,7 @@ static int nand_block_isbad(struct mtd_info *mtd, loff_t offs)
> > >  	int ret;
> > >  
> > >  	/* Select the NAND device */
> > > -	ret = nand_get_device(chip);
> > > -	if (ret)
> > > -		return ret;
> > > +	nand_get_device(chip);
> > >  
> > >  	nand_select_target(chip, chipnr);
> > >  
> > > @@ -4593,6 +4586,8 @@ static void nand_resume(struct mtd_info *mtd)
> > >  			__func__);
> > >  	}
> > >  	mutex_unlock(&chip->lock);
> > > +
> > > +	wake_up_all(&chip->resume_wq);
> > >  }
> > >  
> > >  /**
> > > @@ -5370,6 +5365,7 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips,
> > >  	chip->cur_cs = -1;
> > >  
> > >  	mutex_init(&chip->lock);
> > > +	init_waitqueue_head(&chip->resume_wq);
> > >  
> > >  	/* Enforce the right timings for reset/detection */
> > >  	chip->current_interface_config = nand_get_reset_interface_config();
> > > diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
> > > index b2f9dd3cbd69..248054560581 100644
> > > --- a/include/linux/mtd/rawnand.h
> > > +++ b/include/linux/mtd/rawnand.h
> > > @@ -1294,6 +1294,7 @@ struct nand_chip {
> > >  	/* Internals */
> > >  	struct mutex lock;
> > >  	unsigned int suspended : 1;
> > > +	wait_queue_head_t resume_wq;
> > >  	int cur_cs;
> > >  	int read_retries;
> > >  	struct nand_secure_region *secure_regions;    
> > 
> > 
> > Thanks,
> > Miquèl  
> 


Thanks,
Miquèl
Boris Brezillon Dec. 13, 2021, 9:53 a.m. UTC | #19
On Mon, 13 Dec 2021 10:33:50 +0100
Miquel Raynal <miquel.raynal@bootlin.com> wrote:

> Hello,
> 
> boris.brezillon@collabora.com wrote on Mon, 13 Dec 2021 10:28:01 +0100:
> 
> > On Mon, 13 Dec 2021 10:10:25 +0100
> > Miquel Raynal <miquel.raynal@bootlin.com> wrote:
> >   
> > > Hi Sean,
> > > 
> > > sean@geanix.com wrote on Fri, 10 Dec 2021 14:25:35 +0100:
> > >     
> > > > On Thu, Dec 09, 2021 at 03:28:11PM +0100, Miquel Raynal wrote:      
> > > > > Hi Sean,
> > > > > 
> > > > > sean@geanix.com wrote on Thu, 9 Dec 2021 15:07:21 +0100:
> > > > >         
> > > > > > On Fri, Dec 03, 2021 at 02:39:58PM +0100, Miquel Raynal wrote:        
> > > > > > > Hello,
> > > > > > >           
> > > > > > > > > Fine by me, lets drop this series.          
> > > > > > > 
> > > > > > > FYI I've dropped the entire series from mtd/next. I'm waiting for the
> > > > > > > fix discussed below (without abusing the chip mutex ;-) ).          
> > > > > > 
> > > > > > Cool, looking forward to test a patch series :)        
> > > > > 
> > > > > Test? You mean "write"? :)
> > > > > 
> > > > > Cheers,
> > > > > Miquèl        
> > > > 
> > > > Hi Miquel,
> > > > 
> > > > Should we us a atomic for the suspended variable?      
> > > 
> > > I haven't thought about it extensively, an atomic variable sound fine
> > > but I am definitely not a locking expert...    
> > 
> > No need to use an atomic if the variable is already protected by a lock
> > when accessed, and this seems to be case.  
> 
> Maybe there was a confusion about this lock: I think Boris just do not
> want the core to take any lock during a suspend operation. But you can
> still use locks, as long as you release them before suspending.
> 
> And also, that chip lock might not be the one you want to take because
> it's been introduced for another purpose.

Access to the suspended field is already protected by the chip lock,
and I think it's just fine to keep it this way.
Sean Nyekjaer Dec. 13, 2021, 10:50 a.m. UTC | #20
Hi Miquel and Boris,

On Mon, Dec 13, 2021 at 10:53:36AM +0100, Boris Brezillon wrote:
> On Mon, 13 Dec 2021 10:33:50 +0100
> Miquel Raynal <miquel.raynal@bootlin.com> wrote:
> 
> > Hello,
> > 
> > boris.brezillon@collabora.com wrote on Mon, 13 Dec 2021 10:28:01 +0100:
> > 
> > > On Mon, 13 Dec 2021 10:10:25 +0100
> > > Miquel Raynal <miquel.raynal@bootlin.com> wrote:
> > >   
> > > > Hi Sean,
> > > > 
> > > > sean@geanix.com wrote on Fri, 10 Dec 2021 14:25:35 +0100:
> > > >     
> > > > > On Thu, Dec 09, 2021 at 03:28:11PM +0100, Miquel Raynal wrote:      
> > > > > > Hi Sean,
> > > > > > 
> > > > > > sean@geanix.com wrote on Thu, 9 Dec 2021 15:07:21 +0100:
> > > > > >         
> > > > > > > On Fri, Dec 03, 2021 at 02:39:58PM +0100, Miquel Raynal wrote:        
> > > > > > > > Hello,
> > > > > > > >           
> > > > > > > > > > Fine by me, lets drop this series.          
> > > > > > > > 
> > > > > > > > FYI I've dropped the entire series from mtd/next. I'm waiting for the
> > > > > > > > fix discussed below (without abusing the chip mutex ;-) ).          
> > > > > > > 
> > > > > > > Cool, looking forward to test a patch series :)        
> > > > > > 
> > > > > > Test? You mean "write"? :)
> > > > > > 
> > > > > > Cheers,
> > > > > > Miquèl        
> > > > > 
> > > > > Hi Miquel,
> > > > > 
> > > > > Should we us a atomic for the suspended variable?      
> > > > 
> > > > I haven't thought about it extensively, an atomic variable sound fine
> > > > but I am definitely not a locking expert...    
> > > 
> > > No need to use an atomic if the variable is already protected by a lock
> > > when accessed, and this seems to be case.  
> > 
> > Maybe there was a confusion about this lock: I think Boris just do not
> > want the core to take any lock during a suspend operation. But you can
> > still use locks, as long as you release them before suspending.
> > 
> > And also, that chip lock might not be the one you want to take because
> > it's been introduced for another purpose.
> 
> Access to the suspended field is already protected by the chip lock,
> and I think it's just fine to keep it this way.

I'm reading the suspended variable in wait_event() outside the lock :/

/Sean
Boris Brezillon Dec. 13, 2021, 11:06 a.m. UTC | #21
On Mon, 13 Dec 2021 12:50:12 +0200
Sean Nyekjaer <sean@geanix.com> wrote:

> Hi Miquel and Boris,
> 
> On Mon, Dec 13, 2021 at 10:53:36AM +0100, Boris Brezillon wrote:
> > On Mon, 13 Dec 2021 10:33:50 +0100
> > Miquel Raynal <miquel.raynal@bootlin.com> wrote:
> >   
> > > Hello,
> > > 
> > > boris.brezillon@collabora.com wrote on Mon, 13 Dec 2021 10:28:01 +0100:
> > >   
> > > > On Mon, 13 Dec 2021 10:10:25 +0100
> > > > Miquel Raynal <miquel.raynal@bootlin.com> wrote:
> > > >     
> > > > > Hi Sean,
> > > > > 
> > > > > sean@geanix.com wrote on Fri, 10 Dec 2021 14:25:35 +0100:
> > > > >       
> > > > > > On Thu, Dec 09, 2021 at 03:28:11PM +0100, Miquel Raynal wrote:        
> > > > > > > Hi Sean,
> > > > > > > 
> > > > > > > sean@geanix.com wrote on Thu, 9 Dec 2021 15:07:21 +0100:
> > > > > > >           
> > > > > > > > On Fri, Dec 03, 2021 at 02:39:58PM +0100, Miquel Raynal wrote:          
> > > > > > > > > Hello,
> > > > > > > > >             
> > > > > > > > > > > Fine by me, lets drop this series.            
> > > > > > > > > 
> > > > > > > > > FYI I've dropped the entire series from mtd/next. I'm waiting for the
> > > > > > > > > fix discussed below (without abusing the chip mutex ;-) ).            
> > > > > > > > 
> > > > > > > > Cool, looking forward to test a patch series :)          
> > > > > > > 
> > > > > > > Test? You mean "write"? :)
> > > > > > > 
> > > > > > > Cheers,
> > > > > > > Miquèl          
> > > > > > 
> > > > > > Hi Miquel,
> > > > > > 
> > > > > > Should we us a atomic for the suspended variable?        
> > > > > 
> > > > > I haven't thought about it extensively, an atomic variable sound fine
> > > > > but I am definitely not a locking expert...      
> > > > 
> > > > No need to use an atomic if the variable is already protected by a lock
> > > > when accessed, and this seems to be case.    
> > > 
> > > Maybe there was a confusion about this lock: I think Boris just do not
> > > want the core to take any lock during a suspend operation. But you can
> > > still use locks, as long as you release them before suspending.
> > > 
> > > And also, that chip lock might not be the one you want to take because
> > > it's been introduced for another purpose.  
> > 
> > Access to the suspended field is already protected by the chip lock,
> > and I think it's just fine to keep it this way.  
> 
> I'm reading the suspended variable in wait_event() outside the lock :/

It doesn't matter because you're checking it again with the lock held
when doing a new loop iteration.
diff mbox series

Patch

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 153229198947..f02b602b3fa9 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -777,6 +777,8 @@  static void mtd_set_dev_defaults(struct mtd_info *mtd)
 	INIT_LIST_HEAD(&mtd->partitions);
 	mutex_init(&mtd->master.partitions_lock);
 	mutex_init(&mtd->master.chrdev_lock);
+	init_waitqueue_head(&mtd->master.resume_wq);
+	init_rwsem(&mtd->master.suspend_lock);
 }
 
 static ssize_t mtd_otp_size(struct mtd_info *mtd, bool is_user)
@@ -1267,7 +1269,9 @@  int mtd_erase(struct mtd_info *mtd, struct erase_info *instr)
 
 	adjinstr.addr += mst_ofs;
 
+	mtd_start_access(master);
 	ret = master->_erase(master, &adjinstr);
+	mtd_end_access(master);
 
 	if (adjinstr.fail_addr != MTD_FAIL_ADDR_UNKNOWN) {
 		instr->fail_addr = adjinstr.fail_addr - mst_ofs;
@@ -1289,6 +1293,7 @@  int mtd_point(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen,
 	      void **virt, resource_size_t *phys)
 {
 	struct mtd_info *master = mtd_get_master(mtd);
+	int ret;
 
 	*retlen = 0;
 	*virt = NULL;
@@ -1301,8 +1306,12 @@  int mtd_point(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen,
 	if (!len)
 		return 0;
 
+	mtd_start_access(master);
 	from = mtd_get_master_ofs(mtd, from);
-	return master->_point(master, from, len, retlen, virt, phys);
+	ret = master->_point(master, from, len, retlen, virt, phys);
+	mtd_end_access(master);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(mtd_point);
 
@@ -1310,6 +1319,7 @@  EXPORT_SYMBOL_GPL(mtd_point);
 int mtd_unpoint(struct mtd_info *mtd, loff_t from, size_t len)
 {
 	struct mtd_info *master = mtd_get_master(mtd);
+	int ret;
 
 	if (!master->_unpoint)
 		return -EOPNOTSUPP;
@@ -1317,7 +1327,12 @@  int mtd_unpoint(struct mtd_info *mtd, loff_t from, size_t len)
 		return -EINVAL;
 	if (!len)
 		return 0;
-	return master->_unpoint(master, mtd_get_master_ofs(mtd, from), len);
+
+	mtd_start_access(master);
+	ret =  master->_unpoint(master, mtd_get_master_ofs(mtd, from), len);
+	mtd_end_access(master);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(mtd_unpoint);
 
@@ -1372,6 +1387,7 @@  int mtd_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen,
 	};
 	int ret;
 
+	/* mtd_read_oob_std handles mtd access protection */
 	ret = mtd_read_oob(mtd, from, &ops);
 	*retlen = ops.retlen;
 
@@ -1388,6 +1404,7 @@  int mtd_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen,
 	};
 	int ret;
 
+	/* mtd_write_oob_std handles mtd access protection */
 	ret = mtd_write_oob(mtd, to, &ops);
 	*retlen = ops.retlen;
 
@@ -1464,11 +1481,13 @@  static int mtd_read_oob_std(struct mtd_info *mtd, loff_t from,
 	int ret;
 
 	from = mtd_get_master_ofs(mtd, from);
+	mtd_start_access(master);
 	if (master->_read_oob)
 		ret = master->_read_oob(master, from, ops);
 	else
 		ret = master->_read(master, from, ops->len, &ops->retlen,
 				    ops->datbuf);
+	mtd_end_access(master);
 
 	return ret;
 }
@@ -1480,11 +1499,13 @@  static int mtd_write_oob_std(struct mtd_info *mtd, loff_t to,
 	int ret;
 
 	to = mtd_get_master_ofs(mtd, to);
+	mtd_start_access(master);
 	if (master->_write_oob)
 		ret = master->_write_oob(master, to, ops);
 	else
 		ret = master->_write(master, to, ops->len, &ops->retlen,
 				     ops->datbuf);
+	mtd_end_access(master);
 
 	return ret;
 }
@@ -1992,12 +2013,18 @@  int mtd_get_fact_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen,
 			   struct otp_info *buf)
 {
 	struct mtd_info *master = mtd_get_master(mtd);
+	int ret;
 
 	if (!master->_get_fact_prot_info)
 		return -EOPNOTSUPP;
 	if (!len)
 		return 0;
-	return master->_get_fact_prot_info(master, len, retlen, buf);
+
+	mtd_start_access(master);
+	ret = master->_get_fact_prot_info(master, len, retlen, buf);
+	mtd_end_access(master);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(mtd_get_fact_prot_info);
 
@@ -2005,13 +2032,19 @@  int mtd_read_fact_prot_reg(struct mtd_info *mtd, loff_t from, size_t len,
 			   size_t *retlen, u_char *buf)
 {
 	struct mtd_info *master = mtd_get_master(mtd);
+	int ret;
 
 	*retlen = 0;
 	if (!master->_read_fact_prot_reg)
 		return -EOPNOTSUPP;
 	if (!len)
 		return 0;
-	return master->_read_fact_prot_reg(master, from, len, retlen, buf);
+
+	mtd_start_access(master);
+	ret = master->_read_fact_prot_reg(master, from, len, retlen, buf);
+	mtd_end_access(master);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(mtd_read_fact_prot_reg);
 
@@ -2019,12 +2052,18 @@  int mtd_get_user_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen,
 			   struct otp_info *buf)
 {
 	struct mtd_info *master = mtd_get_master(mtd);
+	int ret;
 
 	if (!master->_get_user_prot_info)
 		return -EOPNOTSUPP;
 	if (!len)
 		return 0;
-	return master->_get_user_prot_info(master, len, retlen, buf);
+
+	mtd_start_access(master);
+	ret =  master->_get_user_prot_info(master, len, retlen, buf);
+	mtd_end_access(master);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(mtd_get_user_prot_info);
 
@@ -2032,13 +2071,19 @@  int mtd_read_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len,
 			   size_t *retlen, u_char *buf)
 {
 	struct mtd_info *master = mtd_get_master(mtd);
+	int ret;
 
 	*retlen = 0;
 	if (!master->_read_user_prot_reg)
 		return -EOPNOTSUPP;
 	if (!len)
 		return 0;
-	return master->_read_user_prot_reg(master, from, len, retlen, buf);
+
+	mtd_start_access(master);
+	ret = master->_read_user_prot_reg(master, from, len, retlen, buf);
+	mtd_end_access(master);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(mtd_read_user_prot_reg);
 
@@ -2053,7 +2098,11 @@  int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to, size_t len,
 		return -EOPNOTSUPP;
 	if (!len)
 		return 0;
+
+	mtd_start_access(master);
 	ret = master->_write_user_prot_reg(master, to, len, retlen, buf);
+	mtd_end_access(master);
+
 	if (ret)
 		return ret;
 
@@ -2068,24 +2117,36 @@  EXPORT_SYMBOL_GPL(mtd_write_user_prot_reg);
 int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len)
 {
 	struct mtd_info *master = mtd_get_master(mtd);
+	int ret;
 
 	if (!master->_lock_user_prot_reg)
 		return -EOPNOTSUPP;
 	if (!len)
 		return 0;
-	return master->_lock_user_prot_reg(master, from, len);
+
+	mtd_start_access(master);
+	ret = master->_lock_user_prot_reg(master, from, len);
+	mtd_end_access(master);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(mtd_lock_user_prot_reg);
 
 int mtd_erase_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len)
 {
 	struct mtd_info *master = mtd_get_master(mtd);
+	int ret;
 
 	if (!master->_erase_user_prot_reg)
 		return -EOPNOTSUPP;
 	if (!len)
 		return 0;
-	return master->_erase_user_prot_reg(master, from, len);
+
+	mtd_start_access(master);
+	ret = master->_erase_user_prot_reg(master, from, len);
+	mtd_end_access(master);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(mtd_erase_user_prot_reg);
 
@@ -2093,6 +2154,7 @@  EXPORT_SYMBOL_GPL(mtd_erase_user_prot_reg);
 int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 {
 	struct mtd_info *master = mtd_get_master(mtd);
+	int ret;
 
 	if (!master->_lock)
 		return -EOPNOTSUPP;
@@ -2106,13 +2168,18 @@  int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 		len = (u64)mtd_div_by_eb(len, mtd) * master->erasesize;
 	}
 
-	return master->_lock(master, mtd_get_master_ofs(mtd, ofs), len);
+	mtd_start_access(master);
+	ret = master->_lock(master, mtd_get_master_ofs(mtd, ofs), len);
+	mtd_end_access(master);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(mtd_lock);
 
 int mtd_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 {
 	struct mtd_info *master = mtd_get_master(mtd);
+	int ret;
 
 	if (!master->_unlock)
 		return -EOPNOTSUPP;
@@ -2126,13 +2193,18 @@  int mtd_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 		len = (u64)mtd_div_by_eb(len, mtd) * master->erasesize;
 	}
 
-	return master->_unlock(master, mtd_get_master_ofs(mtd, ofs), len);
+	mtd_start_access(master);
+	ret = master->_unlock(master, mtd_get_master_ofs(mtd, ofs), len);
+	mtd_end_access(master);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(mtd_unlock);
 
 int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 {
 	struct mtd_info *master = mtd_get_master(mtd);
+	int ret;
 
 	if (!master->_is_locked)
 		return -EOPNOTSUPP;
@@ -2146,13 +2218,18 @@  int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 		len = (u64)mtd_div_by_eb(len, mtd) * master->erasesize;
 	}
 
-	return master->_is_locked(master, mtd_get_master_ofs(mtd, ofs), len);
+	mtd_start_access(master);
+	ret = master->_is_locked(master, mtd_get_master_ofs(mtd, ofs), len);
+	mtd_end_access(master);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(mtd_is_locked);
 
 int mtd_block_isreserved(struct mtd_info *mtd, loff_t ofs)
 {
 	struct mtd_info *master = mtd_get_master(mtd);
+	int ret;
 
 	if (ofs < 0 || ofs >= mtd->size)
 		return -EINVAL;
@@ -2162,13 +2239,18 @@  int mtd_block_isreserved(struct mtd_info *mtd, loff_t ofs)
 	if (mtd->flags & MTD_SLC_ON_MLC_EMULATION)
 		ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize;
 
-	return master->_block_isreserved(master, mtd_get_master_ofs(mtd, ofs));
+	mtd_start_access(master);
+	ret = master->_block_isreserved(master, mtd_get_master_ofs(mtd, ofs));
+	mtd_end_access(master);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(mtd_block_isreserved);
 
 int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs)
 {
 	struct mtd_info *master = mtd_get_master(mtd);
+	int ret;
 
 	if (ofs < 0 || ofs >= mtd->size)
 		return -EINVAL;
@@ -2178,7 +2260,11 @@  int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs)
 	if (mtd->flags & MTD_SLC_ON_MLC_EMULATION)
 		ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize;
 
-	return master->_block_isbad(master, mtd_get_master_ofs(mtd, ofs));
+	mtd_start_access(master);
+	ret = master->_block_isbad(master, mtd_get_master_ofs(mtd, ofs));
+	mtd_end_access(master);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(mtd_block_isbad);
 
@@ -2197,7 +2283,10 @@  int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	if (mtd->flags & MTD_SLC_ON_MLC_EMULATION)
 		ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize;
 
+	mtd_start_access(master);
 	ret = master->_block_markbad(master, mtd_get_master_ofs(mtd, ofs));
+	mtd_end_access(master);
+
 	if (ret)
 		return ret;
 
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 88227044fc86..b074106e2d8e 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -231,6 +231,8 @@  struct mtd_master {
 	struct mutex partitions_lock;
 	struct mutex chrdev_lock;
 	unsigned int suspended : 1;
+	wait_queue_head_t resume_wq;
+	struct rw_semaphore suspend_lock;
 };
 
 struct mtd_info {
@@ -476,10 +478,47 @@  static inline u32 mtd_oobavail(struct mtd_info *mtd, struct mtd_oob_ops *ops)
 	return ops->mode == MTD_OPS_AUTO_OOB ? mtd->oobavail : mtd->oobsize;
 }
 
+static inline void mtd_start_access(struct mtd_info *master)
+{
+	WARN_ON_ONCE(master != mtd_get_master(master));
+
+	/*
+	 * Don't take the suspend_lock on devices that don't
+	 * implement the suspend hook. Otherwise, lockdep will
+	 * complain about nested locks when trying to suspend MTD
+	 * partitions or MTD devices created by gluebi which are
+	 * backed by real devices.
+	 */
+	if (!master->_suspend)
+		return;
+
+	/*
+	 * Wait until the device is resumed. Should we have a
+	 * non-blocking mode here?
+	 */
+	while (1) {
+		down_read(&master->master.suspend_lock);
+		if (!master->master.suspended)
+			return;
+
+		up_read(&master->master.suspend_lock);
+		wait_event(master->master.resume_wq, !master->master.suspended);
+	}
+}
+
+static inline void mtd_end_access(struct mtd_info *master)
+{
+	if (!master->_suspend)
+		return;
+
+	up_read(&master->master.suspend_lock);
+}
+
 static inline int mtd_max_bad_blocks(struct mtd_info *mtd,
 				     loff_t ofs, size_t len)
 {
 	struct mtd_info *master = mtd_get_master(mtd);
+	int ret;
 
 	if (!master->_max_bad_blocks)
 		return -ENOTSUPP;
@@ -487,8 +526,12 @@  static inline int mtd_max_bad_blocks(struct mtd_info *mtd,
 	if (mtd->size < (len + ofs) || ofs < 0)
 		return -EINVAL;
 
-	return master->_max_bad_blocks(master, mtd_get_master_ofs(mtd, ofs),
-				       len);
+	mtd_start_access(master);
+	ret = master->_max_bad_blocks(master, mtd_get_master_ofs(mtd, ofs),
+				      len);
+	mtd_end_access(master);
+
+	return ret;
 }
 
 int mtd_wunit_to_pairing_info(struct mtd_info *mtd, int wunit,
@@ -546,30 +589,40 @@  int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs);
 static inline int mtd_suspend(struct mtd_info *mtd)
 {
 	struct mtd_info *master = mtd_get_master(mtd);
-	int ret;
-
-	if (master->master.suspended)
-		return 0;
+	int ret = 0;
 
-	ret = master->_suspend ? master->_suspend(master) : 0;
-	if (ret)
+	if (!master->_suspend)
 		return ret;
 
-	master->master.suspended = 1;
-	return 0;
+	down_write(&master->master.suspend_lock);
+	if (!master->master.suspended) {
+		ret = master->_suspend(master);
+		if (!ret)
+			master->master.suspended = 1;
+	}
+	up_write(&master->master.suspend_lock);
+
+	return ret;
 }
 
 static inline void mtd_resume(struct mtd_info *mtd)
 {
 	struct mtd_info *master = mtd_get_master(mtd);
 
-	if (!master->master.suspended)
+	if (!master->_suspend)
 		return;
 
-	if (master->_resume)
-		master->_resume(master);
+	down_write(&master->master.suspend_lock);
+	if (master->master.suspended) {
+		if (master->_resume)
+			master->_resume(master);
+
+		master->master.suspended = 0;
 
-	master->master.suspended = 0;
+		/* The MTD dev has been resumed, wake up all waiters. */
+		wake_up_all(&master->master.resume_wq);
+	}
+	up_write(&master->master.suspend_lock);
 }
 
 static inline uint32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd)