diff mbox

jffs2: GC deadlock reading a page that is used in jffs2_write_begin()

Message ID 149914202384.24318.7331828698981799313.stgit@kyeongy-dl.atlnz.lc
State Accepted
Delegated to: David Woodhouse
Headers show

Commit Message

Kyeong Yoo July 4, 2017, 4:22 a.m. UTC
GC task can deadlock in read_cache_page() because it may attempt
to release a page that is actually allocated by another task in
jffs2_write_begin().
The reason is that in jffs2_write_begin() there is a small window
a cache page is allocated for use but not set Uptodate yet.

This ends up with a deadlock between two tasks:
1) A task (e.g. file copy)
   - jffs2_write_begin() locks a cache page
   - jffs2_write_end() tries to lock "alloc_sem" from
	 jffs2_reserve_space() <-- STUCK
2) GC task (jffs2_gcd_mtd3)
   - jffs2_garbage_collect_pass() locks "alloc_sem"
   - try to lock the same cache page in read_cache_page() <-- STUCK

So to avoid this deadlock, hold "alloc_sem" in jffs2_write_begin()
while reading data in a cache page.

Signed-off-by: Kyeong Yoo <kyeong.yoo@alliedtelesis.co.nz>
---

Note: I'm resending this patch to linux-mtd.

 fs/jffs2/file.c |   40 +++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

Comments

Kyeong Yoo July 17, 2017, 9:54 p.m. UTC | #1
Hi,

Can someone review this and give me any feedback?

This patch is to prevent deadlock in jffs2 GC.

Thanks,
Kyeong


On 04/07/17 16:22, Kyeong Yoo wrote:
> GC task can deadlock in read_cache_page() because it may attempt
> to release a page that is actually allocated by another task in
> jffs2_write_begin().
> The reason is that in jffs2_write_begin() there is a small window
> a cache page is allocated for use but not set Uptodate yet.
> 
> This ends up with a deadlock between two tasks:
> 1) A task (e.g. file copy)
>     - jffs2_write_begin() locks a cache page
>     - jffs2_write_end() tries to lock "alloc_sem" from
> 	 jffs2_reserve_space() <-- STUCK
> 2) GC task (jffs2_gcd_mtd3)
>     - jffs2_garbage_collect_pass() locks "alloc_sem"
>     - try to lock the same cache page in read_cache_page() <-- STUCK
> 
> So to avoid this deadlock, hold "alloc_sem" in jffs2_write_begin()
> while reading data in a cache page.
> 
> Signed-off-by: Kyeong Yoo <kyeong.yoo@alliedtelesis.co.nz>
> ---
> 
> Note: I'm resending this patch to linux-mtd.
> 
>   fs/jffs2/file.c |   40 +++++++++++++++++++++++++---------------
>   1 file changed, 25 insertions(+), 15 deletions(-)
> 
> diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
> index c12476e309c6..eb4e4d784d26 100644
> --- a/fs/jffs2/file.c
> +++ b/fs/jffs2/file.c
> @@ -135,20 +135,15 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
>   	struct page *pg;
>   	struct inode *inode = mapping->host;
>   	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
> +	struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
>   	pgoff_t index = pos >> PAGE_SHIFT;
>   	uint32_t pageofs = index << PAGE_SHIFT;
>   	int ret = 0;
>   
> -	pg = grab_cache_page_write_begin(mapping, index, flags);
> -	if (!pg)
> -		return -ENOMEM;
> -	*pagep = pg;
> -
>   	jffs2_dbg(1, "%s()\n", __func__);
>   
>   	if (pageofs > inode->i_size) {
>   		/* Make new hole frag from old EOF to new page */
> -		struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
>   		struct jffs2_raw_inode ri;
>   		struct jffs2_full_dnode *fn;
>   		uint32_t alloc_len;
> @@ -159,7 +154,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
>   		ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
>   					  ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
>   		if (ret)
> -			goto out_page;
> +			goto out_err;
>   
>   		mutex_lock(&f->sem);
>   		memset(&ri, 0, sizeof(ri));
> @@ -189,7 +184,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
>   			ret = PTR_ERR(fn);
>   			jffs2_complete_reservation(c);
>   			mutex_unlock(&f->sem);
> -			goto out_page;
> +			goto out_err;
>   		}
>   		ret = jffs2_add_full_dnode_to_inode(c, f, fn);
>   		if (f->metadata) {
> @@ -204,7 +199,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
>   			jffs2_free_full_dnode(fn);
>   			jffs2_complete_reservation(c);
>   			mutex_unlock(&f->sem);
> -			goto out_page;
> +			goto out_err;
>   		}
>   		jffs2_complete_reservation(c);
>   		inode->i_size = pageofs;
> @@ -212,6 +207,19 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
>   	}
>   
>   	/*
> +	 * While getting a page and reading data in, lock c->alloc_sem until
> +	 * the page is Uptodate. Otherwise GC task may attempt to read the same
> +	 * page in read_cache_page(), which causes a deadlock.
> +	 */
> +	mutex_lock(&c->alloc_sem);
> +	pg = grab_cache_page_write_begin(mapping, index, flags);
> +	if (!pg) {
> +		ret = -ENOMEM;
> +		goto release_sem;
> +	}
> +	*pagep = pg;
> +
> +	/*
>   	 * Read in the page if it wasn't already present. Cannot optimize away
>   	 * the whole page write case until jffs2_write_end can handle the
>   	 * case of a short-copy.
> @@ -220,15 +228,17 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
>   		mutex_lock(&f->sem);
>   		ret = jffs2_do_readpage_nolock(inode, pg);
>   		mutex_unlock(&f->sem);
> -		if (ret)
> -			goto out_page;
> +		if (ret) {
> +			unlock_page(pg);
> +			put_page(pg);
> +			goto release_sem;
> +		}
>   	}
>   	jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
> -	return ret;
>   
> -out_page:
> -	unlock_page(pg);
> -	put_page(pg);
> +release_sem:
> +	mutex_unlock(&c->alloc_sem);
> +out_err:
>   	return ret;
>   }
>   
>
Richard Weinberger July 18, 2017, 7:17 a.m. UTC | #2
Kyeong Yoo,

On Mon, Jul 17, 2017 at 11:54 PM, Kyeong Yoo
<Kyeong.Yoo@alliedtelesis.co.nz> wrote:
> Hi,
>
> Can someone review this and give me any feedback?
>
> This patch is to prevent deadlock in jffs2 GC.
>
> Thanks,
> Kyeong
>
>
> On 04/07/17 16:22, Kyeong Yoo wrote:
>> GC task can deadlock in read_cache_page() because it may attempt
>> to release a page that is actually allocated by another task in
>> jffs2_write_begin().
>> The reason is that in jffs2_write_begin() there is a small window
>> a cache page is allocated for use but not set Uptodate yet.
>>
>> This ends up with a deadlock between two tasks:
>> 1) A task (e.g. file copy)
>>     - jffs2_write_begin() locks a cache page
>>     - jffs2_write_end() tries to lock "alloc_sem" from
>>        jffs2_reserve_space() <-- STUCK
>> 2) GC task (jffs2_gcd_mtd3)
>>     - jffs2_garbage_collect_pass() locks "alloc_sem"
>>     - try to lock the same cache page in read_cache_page() <-- STUCK

Your description does not match the code. Are you seeing this on a
recent mainline kernel?
Can t be that your kernel does not has this commit?
commit 157078f64b8a9cd7011b6b900b2f2498df850748
Author: Thomas Betker <thomas.betker@rohde-schwarz.com>
Date:   Tue Nov 10 22:18:15 2015 +0100

    Revert "jffs2: Fix lock acquisition order bug in jffs2_write_begin"

    This reverts commit 5ffd3412ae55
    ("jffs2: Fix lock acquisition order bug in jffs2_write_begin").
Kyeong Yoo July 18, 2017, 11 p.m. UTC | #3
Hi Richard,

This patch was on top of v4.12 and still applies cleanly to the latest 
mainline which has the following commit at the top:

commit 74cbd96bc2e00f5daa805e2ebf49e998f7045062 (origin/master, origin/HEAD)
Merge: bef85bd7db26 6409e84ec58f
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Tue Jul 18 11:51:08 2017 -0700

     Merge tag 'md/4.13-rc2' of 
git://git.kernel.org/pub/scm/linux/kernel/git/shli/md


And I can see the commit you mentioned exists in my repo.

  commit 157078f64b8a9cd7011b6b900b2f2498df850748
  Author: Thomas Betker <thomas.betker@rohde-schwarz.com>
  Date:   Tue Nov 10 22:18:15 2015 +0100

     Revert "jffs2: Fix lock acquisition order bug in jffs2_write_begin"


The logic of deadlock still exists with the above commit, that's what I 
described in the patch commit message. Actual kernel version I saw the 
issue is 4.4.6. But what I found is the logic regarding locking is same 
in the current mainline.
For you information, attached the kernel panic info below.

Thanks,
Kyeong

<3>INFO: task jffs2_gcd_mtd3:190 blocked for more than 120 seconds.
<3>      Tainted: P           O    4.4.6-at1 #1
<3>"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
<6>jffs2_gcd_mtd3  D 00000000     0   190      2 0x00000800
<6>Call Trace:
<6>[a712faf0] [a4003180] 0xa4003180 (unreliable)
<6>[a712fbb0] [8068ea54] __schedule+0x1f4/0x5f0
<6>[a712fbf0] [8068ee84] schedule+0x34/0xb0
<6>[a712fc00] [80691b48] schedule_timeout+0x188/0x1d0
<6>[a712fc50] [8068e824] io_schedule_timeout+0x84/0xc0
<6>[a712fc70] [8068f7c0] bit_wait_io+0x20/0x70
<6>[a712fc80] [8068f584] __wait_on_bit_lock+0xa4/0x140
<6>[a712fcb0] [800b51bc] __lock_page+0x9c/0xb0
<6>[a712fce0] [800b58c8] do_read_cache_page+0x168/0x200
<6>[a712fd20] [8022b670] jffs2_gc_fetch_page+0x30/0xd0
<6>[a712fd30] [80694934] jffs2_garbage_collect_live+0x958/0xe64
<6>[a712fde0] [80228934] jffs2_garbage_collect_pass+0x454/0x7e0
<6>[a712fe40] [8022a290] jffs2_garbage_collect_thread+0xc0/0x1d0
<6>[a712fef0] [8003fb38] kthread+0xc8/0xe0
<6>[a712ff40] [8000eed8] ret_from_kernel_thread+0x5c/0x64
<3>INFO: task syslog-ng:483 blocked for more than 120 seconds.
<3>      Tainted: P           O    4.4.6-at1 #1
<3>"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
<6>syslog-ng       D 0fc2101c     0   483      1 0x00000000
<6>Call Trace:
<6>[a663bb60] [a6346840] 0xa6346840 (unreliable)
<6>[a663bc20] [8068ea54] __schedule+0x1f4/0x5f0
<6>[a663bc60] [8068ee84] schedule+0x34/0xb0
<6>[a663bc70] [8068f230] schedule_preempt_disabled+0x10/0x20
<6>[a663bc80] [806908a0] __mutex_lock_slowpath+0xa0/0x140
<6>[a663bcc0] [80690994] mutex_lock+0x54/0x70
<6>[a663bcd0] [802237e0] jffs2_reserve_space+0x40/0x310
<6>[a663bd40] [802263b0] jffs2_write_inode_range+0xe0/0x2e0
<6>[a663bda0] [80220c04] jffs2_write_end+0xf4/0x2d0
<6>[a663bde0] [800b62b4] generic_perform_write+0x114/0x200
<6>[a663be40] [800b772c] __generic_file_write_iter+0x1bc/0x230
<6>[a663be80] [800b78b0] generic_file_write_iter+0x110/0x300
<6>[a663bea0] [80103cb0] __vfs_write+0xc0/0x130
<6>[a663bef0] [80104660] vfs_write+0xa0/0x1b0
<6>[a663bf10] [8010506c] SyS_write+0x4c/0xd0
<6>[a663bf40] [8000eda0] ret_from_syscall+0x0/0x3c
<6>--- interrupt: c01 at 0xfc2101c
<6>    LR = 0x10023e94
<3>INFO: task scp:15190 blocked for more than 120 seconds.
<3>      Tainted: P           O    4.4.6-at1 #1
<3>"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
<6>scp             D 1fdb3b4c     0 15190  15189 0x00000000
<6>Call Trace:
<6>[a468fb60] [0929effc] 0x929effc (unreliable)
<6>[a468fc20] [8068ea54] __schedule+0x1f4/0x5f0
<6>[a468fc60] [8068ee84] schedule+0x34/0xb0
<6>[a468fc70] [8068f230] schedule_preempt_disabled+0x10/0x20
<6>[a468fc80] [806908a0] __mutex_lock_slowpath+0xa0/0x140
<6>[a468fcc0] [80690994] mutex_lock+0x54/0x70
<6>[a468fcd0] [802237e0] jffs2_reserve_space+0x40/0x310
<6>[a468fd40] [802263b0] jffs2_write_inode_range+0xe0/0x2e0
<6>[a468fda0] [80220c04] jffs2_write_end+0xf4/0x2d0
<6>[a468fde0] [800b62b4] generic_perform_write+0x114/0x200
<6>[a468fe40] [800b772c] __generic_file_write_iter+0x1bc/0x230
<6>[a468fe80] [800b78b0] generic_file_write_iter+0x110/0x300
<6>[a468fea0] [80103cb0] __vfs_write+0xc0/0x130
<6>[a468fef0] [80104660] vfs_write+0xa0/0x1b0
<6>[a468ff10] [8010506c] SyS_write+0x4c/0xd0
<6>[a468ff40] [8000eda0] ret_from_syscall+0x0/0x3c
<6>--- interrupt: c01 at 0x1fdb3b4c
<6>    LR = 0x2021adcc

On 18/07/17 19:17, Richard Weinberger wrote:
> Kyeong Yoo,
> 
> On Mon, Jul 17, 2017 at 11:54 PM, Kyeong Yoo
> <Kyeong.Yoo@alliedtelesis.co.nz> wrote:
>> Hi,
>>
>> Can someone review this and give me any feedback?
>>
>> This patch is to prevent deadlock in jffs2 GC.
>>
>> Thanks,
>> Kyeong
>>
>>
>> On 04/07/17 16:22, Kyeong Yoo wrote:
>>> GC task can deadlock in read_cache_page() because it may attempt
>>> to release a page that is actually allocated by another task in
>>> jffs2_write_begin().
>>> The reason is that in jffs2_write_begin() there is a small window
>>> a cache page is allocated for use but not set Uptodate yet.
>>>
>>> This ends up with a deadlock between two tasks:
>>> 1) A task (e.g. file copy)
>>>      - jffs2_write_begin() locks a cache page
>>>      - jffs2_write_end() tries to lock "alloc_sem" from
>>>         jffs2_reserve_space() <-- STUCK
>>> 2) GC task (jffs2_gcd_mtd3)
>>>      - jffs2_garbage_collect_pass() locks "alloc_sem"
>>>      - try to lock the same cache page in read_cache_page() <-- STUCK
> 
> Your description does not match the code. Are you seeing this on a
> recent mainline kernel?
> Can t be that your kernel does not has this commit?
> commit 157078f64b8a9cd7011b6b900b2f2498df850748
> Author: Thomas Betker <thomas.betker@rohde-schwarz.com>
> Date:   Tue Nov 10 22:18:15 2015 +0100
> 
>      Revert "jffs2: Fix lock acquisition order bug in jffs2_write_begin"
> 
>      This reverts commit 5ffd3412ae55
>      ("jffs2: Fix lock acquisition order bug in jffs2_write_begin").
> 
>
Richard Weinberger Dec. 15, 2018, 10:37 p.m. UTC | #4
On Tue, Jul 4, 2017 at 6:23 AM Kyeong Yoo
<kyeong.yoo@alliedtelesis.co.nz> wrote:
>
> GC task can deadlock in read_cache_page() because it may attempt
> to release a page that is actually allocated by another task in
> jffs2_write_begin().
> The reason is that in jffs2_write_begin() there is a small window
> a cache page is allocated for use but not set Uptodate yet.
>
> This ends up with a deadlock between two tasks:
> 1) A task (e.g. file copy)
>    - jffs2_write_begin() locks a cache page
>    - jffs2_write_end() tries to lock "alloc_sem" from
>          jffs2_reserve_space() <-- STUCK
> 2) GC task (jffs2_gcd_mtd3)
>    - jffs2_garbage_collect_pass() locks "alloc_sem"
>    - try to lock the same cache page in read_cache_page() <-- STUCK
>
> So to avoid this deadlock, hold "alloc_sem" in jffs2_write_begin()
> while reading data in a cache page.
>
> Signed-off-by: Kyeong Yoo <kyeong.yoo@alliedtelesis.co.nz>
> ---
>
> Note: I'm resending this patch to linux-mtd.
>
>  fs/jffs2/file.c |   40 +++++++++++++++++++++++++---------------
>  1 file changed, 25 insertions(+), 15 deletions(-)
>
> diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
> index c12476e309c6..eb4e4d784d26 100644
> --- a/fs/jffs2/file.c
> +++ b/fs/jffs2/file.c
> @@ -135,20 +135,15 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
>         struct page *pg;
>         struct inode *inode = mapping->host;
>         struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
> +       struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
>         pgoff_t index = pos >> PAGE_SHIFT;
>         uint32_t pageofs = index << PAGE_SHIFT;
>         int ret = 0;
>
> -       pg = grab_cache_page_write_begin(mapping, index, flags);
> -       if (!pg)
> -               return -ENOMEM;
> -       *pagep = pg;
> -
>         jffs2_dbg(1, "%s()\n", __func__);
>
>         if (pageofs > inode->i_size) {
>                 /* Make new hole frag from old EOF to new page */
> -               struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
>                 struct jffs2_raw_inode ri;
>                 struct jffs2_full_dnode *fn;
>                 uint32_t alloc_len;
> @@ -159,7 +154,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
>                 ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
>                                           ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
>                 if (ret)
> -                       goto out_page;
> +                       goto out_err;
>
>                 mutex_lock(&f->sem);
>                 memset(&ri, 0, sizeof(ri));
> @@ -189,7 +184,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
>                         ret = PTR_ERR(fn);
>                         jffs2_complete_reservation(c);
>                         mutex_unlock(&f->sem);
> -                       goto out_page;
> +                       goto out_err;
>                 }
>                 ret = jffs2_add_full_dnode_to_inode(c, f, fn);
>                 if (f->metadata) {
> @@ -204,7 +199,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
>                         jffs2_free_full_dnode(fn);
>                         jffs2_complete_reservation(c);
>                         mutex_unlock(&f->sem);
> -                       goto out_page;
> +                       goto out_err;
>                 }
>                 jffs2_complete_reservation(c);
>                 inode->i_size = pageofs;
> @@ -212,6 +207,19 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
>         }
>
>         /*
> +        * While getting a page and reading data in, lock c->alloc_sem until
> +        * the page is Uptodate. Otherwise GC task may attempt to read the same
> +        * page in read_cache_page(), which causes a deadlock.
> +        */
> +       mutex_lock(&c->alloc_sem);
> +       pg = grab_cache_page_write_begin(mapping, index, flags);
> +       if (!pg) {
> +               ret = -ENOMEM;
> +               goto release_sem;
> +       }
> +       *pagep = pg;
> +
> +       /*
>          * Read in the page if it wasn't already present. Cannot optimize away
>          * the whole page write case until jffs2_write_end can handle the
>          * case of a short-copy.
> @@ -220,15 +228,17 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
>                 mutex_lock(&f->sem);
>                 ret = jffs2_do_readpage_nolock(inode, pg);
>                 mutex_unlock(&f->sem);
> -               if (ret)
> -                       goto out_page;
> +               if (ret) {
> +                       unlock_page(pg);
> +                       put_page(pg);
> +                       goto release_sem;
> +               }
>         }
>         jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
> -       return ret;
>
> -out_page:
> -       unlock_page(pg);
> -       put_page(pg);
> +release_sem:
> +       mutex_unlock(&c->alloc_sem);
> +out_err:
>         return ret;
>  }

Kyeong,

first of all, sorry for the massive delay!

Right now I'm trying to get jffs2 back in shape and stumbled across your patch.
My test suite can actually trigger this deadlock and I think your
patch is likely the
right solution. I'm still reviewing and try to make very sure that it
works as expected.

David, unless you have objections I'd carry this patch via the MTD tree.
Hou Tao Jan. 25, 2019, 6:30 a.m. UTC | #5
Hi Richard,

On 2018/12/16 6:37, Richard Weinberger wrote:
> On Tue, Jul 4, 2017 at 6:23 AM Kyeong Yoo
> <kyeong.yoo@alliedtelesis.co.nz> wrote:
>>
>> GC task can deadlock in read_cache_page() because it may attempt
>> to release a page that is actually allocated by another task in
>> jffs2_write_begin().
>> The reason is that in jffs2_write_begin() there is a small window
>> a cache page is allocated for use but not set Uptodate yet.
>>
>> This ends up with a deadlock between two tasks:
>> 1) A task (e.g. file copy)
>>    - jffs2_write_begin() locks a cache page
>>    - jffs2_write_end() tries to lock "alloc_sem" from
>>          jffs2_reserve_space() <-- STUCK
>> 2) GC task (jffs2_gcd_mtd3)
>>    - jffs2_garbage_collect_pass() locks "alloc_sem"
>>    - try to lock the same cache page in read_cache_page() <-- STUCK
>>
>> So to avoid this deadlock, hold "alloc_sem" in jffs2_write_begin()
>> while reading data in a cache page.
>>

Acquiring alloc_sem may be too heavy, can we use it as a slow path and using
the following code as the fast path ?

    /*
     * Need to make sure this page is Uptodate before locking it,
     * else the write may dead-lock with the GC thread which tries
     * to wait for unlock a not-Uptodate page.
     */
    gfp_mask = mapping_gfp_mask(mapping);
    if (flags & AOP_FLAG_NOFS)
        gfp_mask &= ~__GFP_FS;
    pg = read_cache_page_gfp(mapping, index, gfp_mask);
    if (IS_ERR(pg)) {
        ret = PTR_ERR(pg);
        goto out_err;
    }

    lock_page(pg);
    /* If page is reclaimed, fall back to slow path */
    if (!pg->mapping) {
        unlock_page(pg);
        put_page(pg);

        /*
         * While getting a page and reading data in, lock c->alloc_sem until
         * the page is Uptodate. Otherwise GC task may attempt to read the same
         * page in read_cache_page(), which causes a deadlock.
         */
        mutex_lock(&c->alloc_sem);
        pg = grab_cache_page_write_begin(mapping, index, flags);

And will the fix go into v5.0 ?

Regards,
Tao

>> Signed-off-by: Kyeong Yoo <kyeong.yoo@alliedtelesis.co.nz>
>> ---
>>
>> Note: I'm resending this patch to linux-mtd.
>>
>>  fs/jffs2/file.c |   40 +++++++++++++++++++++++++---------------
>>  1 file changed, 25 insertions(+), 15 deletions(-)
>>
>> diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
>> index c12476e309c6..eb4e4d784d26 100644
>> --- a/fs/jffs2/file.c
>> +++ b/fs/jffs2/file.c
>> @@ -135,20 +135,15 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
>>         struct page *pg;
>>         struct inode *inode = mapping->host;
>>         struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
>> +       struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
>>         pgoff_t index = pos >> PAGE_SHIFT;
>>         uint32_t pageofs = index << PAGE_SHIFT;
>>         int ret = 0;
>>
>> -       pg = grab_cache_page_write_begin(mapping, index, flags);
>> -       if (!pg)
>> -               return -ENOMEM;
>> -       *pagep = pg;
>> -
>>         jffs2_dbg(1, "%s()\n", __func__);
>>
>>         if (pageofs > inode->i_size) {
>>                 /* Make new hole frag from old EOF to new page */
>> -               struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
>>                 struct jffs2_raw_inode ri;
>>                 struct jffs2_full_dnode *fn;
>>                 uint32_t alloc_len;
>> @@ -159,7 +154,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
>>                 ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
>>                                           ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
>>                 if (ret)
>> -                       goto out_page;
>> +                       goto out_err;
>>
>>                 mutex_lock(&f->sem);
>>                 memset(&ri, 0, sizeof(ri));
>> @@ -189,7 +184,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
>>                         ret = PTR_ERR(fn);
>>                         jffs2_complete_reservation(c);
>>                         mutex_unlock(&f->sem);
>> -                       goto out_page;
>> +                       goto out_err;
>>                 }
>>                 ret = jffs2_add_full_dnode_to_inode(c, f, fn);
>>                 if (f->metadata) {
>> @@ -204,7 +199,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
>>                         jffs2_free_full_dnode(fn);
>>                         jffs2_complete_reservation(c);
>>                         mutex_unlock(&f->sem);
>> -                       goto out_page;
>> +                       goto out_err;
>>                 }
>>                 jffs2_complete_reservation(c);
>>                 inode->i_size = pageofs;
>> @@ -212,6 +207,19 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
>>         }
>>
>>         /*
>> +        * While getting a page and reading data in, lock c->alloc_sem until
>> +        * the page is Uptodate. Otherwise GC task may attempt to read the same
>> +        * page in read_cache_page(), which causes a deadlock.
>> +        */
>> +       mutex_lock(&c->alloc_sem);
>> +       pg = grab_cache_page_write_begin(mapping, index, flags);
>> +       if (!pg) {
>> +               ret = -ENOMEM;
>> +               goto release_sem;
>> +       }
>> +       *pagep = pg;
>> +
>> +       /*
>>          * Read in the page if it wasn't already present. Cannot optimize away
>>          * the whole page write case until jffs2_write_end can handle the
>>          * case of a short-copy.
>> @@ -220,15 +228,17 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
>>                 mutex_lock(&f->sem);
>>                 ret = jffs2_do_readpage_nolock(inode, pg);
>>                 mutex_unlock(&f->sem);
>> -               if (ret)
>> -                       goto out_page;
>> +               if (ret) {
>> +                       unlock_page(pg);
>> +                       put_page(pg);
>> +                       goto release_sem;
>> +               }
>>         }
>>         jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
>> -       return ret;
>>
>> -out_page:
>> -       unlock_page(pg);
>> -       put_page(pg);
>> +release_sem:
>> +       mutex_unlock(&c->alloc_sem);
>> +out_err:
>>         return ret;
>>  }
> 
> Kyeong,
> 
> first of all, sorry for the massive delay!
> 
> Right now I'm trying to get jffs2 back in shape and stumbled across your patch.
> My test suite can actually trigger this deadlock and I think your
> patch is likely the
> right solution. I'm still reviewing and try to make very sure that it
> works as expected.
> 
> David, unless you have objections I'd carry this patch via the MTD tree.
>
Hamish Martin May 20, 2020, midnight UTC | #6
On Sat, 2018-12-15 at 23:37 +0100, Richard Weinberger wrote:
> On Tue, Jul 4, 2017 at 6:23 AM Kyeong Yoo
> <kyeong.yoo@alliedtelesis.co.nz> wrote:
> > 
> > 
> > GC task can deadlock in read_cache_page() because it may attempt
> > to release a page that is actually allocated by another task in
> > jffs2_write_begin().
> > The reason is that in jffs2_write_begin() there is a small window
> > a cache page is allocated for use but not set Uptodate yet.
> > 
> > This ends up with a deadlock between two tasks:
> > 1) A task (e.g. file copy)
> >    - jffs2_write_begin() locks a cache page
> >    - jffs2_write_end() tries to lock "alloc_sem" from
> >          jffs2_reserve_space() <-- STUCK
> > 2) GC task (jffs2_gcd_mtd3)
> >    - jffs2_garbage_collect_pass() locks "alloc_sem"
> >    - try to lock the same cache page in read_cache_page() <-- STUCK
> > 
> > So to avoid this deadlock, hold "alloc_sem" in jffs2_write_begin()
> > while reading data in a cache page.
> > 
> > Signed-off-by: Kyeong Yoo <kyeong.yoo@alliedtelesis.co.nz>
> > ---
> > 
> > Note: I'm resending this patch to linux-mtd.
> > 
> >  fs/jffs2/file.c |   40 +++++++++++++++++++++++++---------------
> >  1 file changed, 25 insertions(+), 15 deletions(-)
> > 
> > diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
> > index c12476e309c6..eb4e4d784d26 100644
> > --- a/fs/jffs2/file.c
> > +++ b/fs/jffs2/file.c
> > @@ -135,20 +135,15 @@ static int jffs2_write_begin(struct file
> > *filp, struct address_space *mapping,
> >         struct page *pg;
> >         struct inode *inode = mapping->host;
> >         struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
> > +       struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
> >         pgoff_t index = pos >> PAGE_SHIFT;
> >         uint32_t pageofs = index << PAGE_SHIFT;
> >         int ret = 0;
> > 
> > -       pg = grab_cache_page_write_begin(mapping, index, flags);
> > -       if (!pg)
> > -               return -ENOMEM;
> > -       *pagep = pg;
> > -
> >         jffs2_dbg(1, "%s()\n", __func__);
> > 
> >         if (pageofs > inode->i_size) {
> >                 /* Make new hole frag from old EOF to new page */
> > -               struct jffs2_sb_info *c = JFFS2_SB_INFO(inode-
> > >i_sb);
> >                 struct jffs2_raw_inode ri;
> >                 struct jffs2_full_dnode *fn;
> >                 uint32_t alloc_len;
> > @@ -159,7 +154,7 @@ static int jffs2_write_begin(struct file *filp,
> > struct address_space *mapping,
> >                 ret = jffs2_reserve_space(c, sizeof(ri),
> > &alloc_len,
> >                                           ALLOC_NORMAL,
> > JFFS2_SUMMARY_INODE_SIZE);
> >                 if (ret)
> > -                       goto out_page;
> > +                       goto out_err;
> > 
> >                 mutex_lock(&f->sem);
> >                 memset(&ri, 0, sizeof(ri));
> > @@ -189,7 +184,7 @@ static int jffs2_write_begin(struct file *filp,
> > struct address_space *mapping,
> >                         ret = PTR_ERR(fn);
> >                         jffs2_complete_reservation(c);
> >                         mutex_unlock(&f->sem);
> > -                       goto out_page;
> > +                       goto out_err;
> >                 }
> >                 ret = jffs2_add_full_dnode_to_inode(c, f, fn);
> >                 if (f->metadata) {
> > @@ -204,7 +199,7 @@ static int jffs2_write_begin(struct file *filp,
> > struct address_space *mapping,
> >                         jffs2_free_full_dnode(fn);
> >                         jffs2_complete_reservation(c);
> >                         mutex_unlock(&f->sem);
> > -                       goto out_page;
> > +                       goto out_err;
> >                 }
> >                 jffs2_complete_reservation(c);
> >                 inode->i_size = pageofs;
> > @@ -212,6 +207,19 @@ static int jffs2_write_begin(struct file
> > *filp, struct address_space *mapping,
> >         }
> > 
> >         /*
> > +        * While getting a page and reading data in, lock c-
> > >alloc_sem until
> > +        * the page is Uptodate. Otherwise GC task may attempt to
> > read the same
> > +        * page in read_cache_page(), which causes a deadlock.
> > +        */
> > +       mutex_lock(&c->alloc_sem);
> > +       pg = grab_cache_page_write_begin(mapping, index, flags);
> > +       if (!pg) {
> > +               ret = -ENOMEM;
> > +               goto release_sem;
> > +       }
> > +       *pagep = pg;
> > +
> > +       /*
> >          * Read in the page if it wasn't already present. Cannot
> > optimize away
> >          * the whole page write case until jffs2_write_end can
> > handle the
> >          * case of a short-copy.
> > @@ -220,15 +228,17 @@ static int jffs2_write_begin(struct file
> > *filp, struct address_space *mapping,
> >                 mutex_lock(&f->sem);
> >                 ret = jffs2_do_readpage_nolock(inode, pg);
> >                 mutex_unlock(&f->sem);
> > -               if (ret)
> > -                       goto out_page;
> > +               if (ret) {
> > +                       unlock_page(pg);
> > +                       put_page(pg);
> > +                       goto release_sem;
> > +               }
> >         }
> >         jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg-
> > >flags);
> > -       return ret;
> > 
> > -out_page:
> > -       unlock_page(pg);
> > -       put_page(pg);
> > +release_sem:
> > +       mutex_unlock(&c->alloc_sem);
> > +out_err:
> >         return ret;
> >  }
> Kyeong,
> 
> first of all, sorry for the massive delay!
> 
> Right now I'm trying to get jffs2 back in shape and stumbled across
> your patch.
> My test suite can actually trigger this deadlock and I think your
> patch is likely the
> right solution. I'm still reviewing and try to make very sure that it
> works as expected.
> 
> David, unless you have objections I'd carry this patch via the MTD
> tree.
> 

Hi Richard,

I'm interested to know what happened to this patch. I can't see that it
made it in to the Linus' tree or any other maintainers tree.

I'm keen to help finish it off and I note that you said you were able
to make the fault occur with your tests. Would you be able to share
what test you were running?

Please let me know if you'd like me to test a modified patch, or if
you'd like any further assistance to get this patch completed.

Thanks,
Hamish Martin
Richard Weinberger May 25, 2020, 7:45 p.m. UTC | #7
On Wed, May 20, 2020 at 2:00 AM Hamish Martin
<Hamish.Martin@alliedtelesis.co.nz> wrote:
> > David, unless you have objections I'd carry this patch via the MTD
> > tree.
> >
>
> Hi Richard,
>
> I'm interested to know what happened to this patch. I can't see that it
> made it in to the Linus' tree or any other maintainers tree.
>
> I'm keen to help finish it off and I note that you said you were able
> to make the fault occur with your tests. Would you be able to share
> what test you were running?
>
> Please let me know if you'd like me to test a modified patch, or if
> you'd like any further assistance to get this patch completed.

Let me figure why this patch got skipped. :-)
Thanks for letting me know.
Richard Weinberger June 2, 2020, 9 p.m. UTC | #8
On Mon, May 25, 2020 at 9:45 PM Richard Weinberger
<richard.weinberger@gmail.com> wrote:
> > Please let me know if you'd like me to test a modified patch, or if
> > you'd like any further assistance to get this patch completed.
>
> Let me figure why this patch got skipped. :-)
> Thanks for letting me know.

Applied to my fixes queue.
diff mbox

Patch

diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index c12476e309c6..eb4e4d784d26 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -135,20 +135,15 @@  static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
 	struct page *pg;
 	struct inode *inode = mapping->host;
 	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
+	struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
 	pgoff_t index = pos >> PAGE_SHIFT;
 	uint32_t pageofs = index << PAGE_SHIFT;
 	int ret = 0;
 
-	pg = grab_cache_page_write_begin(mapping, index, flags);
-	if (!pg)
-		return -ENOMEM;
-	*pagep = pg;
-
 	jffs2_dbg(1, "%s()\n", __func__);
 
 	if (pageofs > inode->i_size) {
 		/* Make new hole frag from old EOF to new page */
-		struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
 		struct jffs2_raw_inode ri;
 		struct jffs2_full_dnode *fn;
 		uint32_t alloc_len;
@@ -159,7 +154,7 @@  static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
 		ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
 					  ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
 		if (ret)
-			goto out_page;
+			goto out_err;
 
 		mutex_lock(&f->sem);
 		memset(&ri, 0, sizeof(ri));
@@ -189,7 +184,7 @@  static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
 			ret = PTR_ERR(fn);
 			jffs2_complete_reservation(c);
 			mutex_unlock(&f->sem);
-			goto out_page;
+			goto out_err;
 		}
 		ret = jffs2_add_full_dnode_to_inode(c, f, fn);
 		if (f->metadata) {
@@ -204,7 +199,7 @@  static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
 			jffs2_free_full_dnode(fn);
 			jffs2_complete_reservation(c);
 			mutex_unlock(&f->sem);
-			goto out_page;
+			goto out_err;
 		}
 		jffs2_complete_reservation(c);
 		inode->i_size = pageofs;
@@ -212,6 +207,19 @@  static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
 	}
 
 	/*
+	 * While getting a page and reading data in, lock c->alloc_sem until
+	 * the page is Uptodate. Otherwise GC task may attempt to read the same
+	 * page in read_cache_page(), which causes a deadlock.
+	 */
+	mutex_lock(&c->alloc_sem);
+	pg = grab_cache_page_write_begin(mapping, index, flags);
+	if (!pg) {
+		ret = -ENOMEM;
+		goto release_sem;
+	}
+	*pagep = pg;
+
+	/*
 	 * Read in the page if it wasn't already present. Cannot optimize away
 	 * the whole page write case until jffs2_write_end can handle the
 	 * case of a short-copy.
@@ -220,15 +228,17 @@  static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
 		mutex_lock(&f->sem);
 		ret = jffs2_do_readpage_nolock(inode, pg);
 		mutex_unlock(&f->sem);
-		if (ret)
-			goto out_page;
+		if (ret) {
+			unlock_page(pg);
+			put_page(pg);
+			goto release_sem;
+		}
 	}
 	jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
-	return ret;
 
-out_page:
-	unlock_page(pg);
-	put_page(pg);
+release_sem:
+	mutex_unlock(&c->alloc_sem);
+out_err:
 	return ret;
 }