Message ID | 20180723121902.20201-1-tomasbortoli@gmail.com |
---|---|
State | Not Applicable, archived |
Delegated to: | David Miller |
Headers | show |
Series | net/p9/trans_fd.c: fix double list_del() | expand |
Tomas Bortoli wrote on Mon, Jul 23, 2018: > A double list_del(&req->req_list) is possible in p9_fd_cancel() as > shown by Syzbot. To prevent it we have to ensure that we have the > client->lock when deleting the list. Furthermore, we have to update > the status of the request before releasing the lock, to prevent the > race. Nice, so no need to change the list_del to list_del_init! I still have a nitpick on the last moved unlock, but it's mostly aesthetic - the change looks much better to me now. (Since that will require a v2 I'll be evil and go further than Yiwen about the commit message: let it breathe a bit! :) I think a line break before "furthermore" for example will make it easier to read) > > Signed-off-by: Tomas Bortoli <tomasbortoli@gmail.com> > Reported-by: syzbot+735d926e9d1317c3310c@syzkaller.appspotmail.com > --- > net/9p/trans_fd.c | 9 ++++----- > 1 file changed, 4 insertions(+), 5 deletions(-) > > diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c > index a64b01c56e30..370c6c69a05c 100644 > --- a/net/9p/trans_fd.c > +++ b/net/9p/trans_fd.c > @@ -199,15 +199,14 @@ static void p9_mux_poll_stop(struct p9_conn *m) > static void p9_conn_cancel(struct p9_conn *m, int err) > { > struct p9_req_t *req, *rtmp; > - unsigned long flags; > LIST_HEAD(cancel_list); > > p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); > > - spin_lock_irqsave(&m->client->lock, flags); > + spin_lock(&m->client->lock); > > if (m->err) { > - spin_unlock_irqrestore(&m->client->lock, flags); > + spin_unlock(&m->client->lock); > return; > } > > @@ -219,7 +218,6 @@ static void p9_conn_cancel(struct p9_conn *m, int err) > list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { > list_move(&req->req_list, &cancel_list); > } > - spin_unlock_irqrestore(&m->client->lock, flags); > > list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { > p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); > @@ -228,6 +226,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err) > req->t_err = err; > p9_client_cb(m->client, req, REQ_STATUS_ERROR); > } > + spin_unlock(&m->client->lock); > } > > static __poll_t > @@ -370,12 +369,12 @@ static void p9_read_work(struct work_struct *work) > if (m->req->status != REQ_STATUS_ERROR) > status = REQ_STATUS_RCVD; > list_del(&m->req->req_list); > - spin_unlock(&m->client->lock); > p9_client_cb(m->client, m->req, status); > m->rc.sdata = NULL; > m->rc.offset = 0; > m->rc.capacity = 0; > m->req = NULL; > + spin_unlock(&m->client->lock); It took me a while to understand why you extended this lock despite having just read the commit message, I'd suggest: - moving the spin_unlock to right after p9_client_cb (afterall that's what we want, the m->rc and m->req don't need to be protected) - add a comment before p9_client_cb saying something like 'updates req->status' or try to explain why it needs to be locked here but other transports don't need such a lock (they're not dependant on req->status like this)
On 07/23/2018 02:57 PM, Dominique Martinet wrote: > Tomas Bortoli wrote on Mon, Jul 23, 2018: >> A double list_del(&req->req_list) is possible in p9_fd_cancel() as >> shown by Syzbot. To prevent it we have to ensure that we have the >> client->lock when deleting the list. Furthermore, we have to update >> the status of the request before releasing the lock, to prevent the >> race. > > Nice, so no need to change the list_del to list_del_init! > > I still have a nitpick on the last moved unlock, but it's mostly > aesthetic - the change looks much better to me now. > > (Since that will require a v2 I'll be evil and go further than Yiwen > about the commit message: let it breathe a bit! :) I think a line break > before "furthermore" for example will make it easier to read) > agree >> >> Signed-off-by: Tomas Bortoli <tomasbortoli@gmail.com> >> Reported-by: syzbot+735d926e9d1317c3310c@syzkaller.appspotmail.com >> --- >> net/9p/trans_fd.c | 9 ++++----- >> 1 file changed, 4 insertions(+), 5 deletions(-) >> >> diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c >> index a64b01c56e30..370c6c69a05c 100644 >> --- a/net/9p/trans_fd.c >> +++ b/net/9p/trans_fd.c >> @@ -199,15 +199,14 @@ static void p9_mux_poll_stop(struct p9_conn *m) >> static void p9_conn_cancel(struct p9_conn *m, int err) >> { >> struct p9_req_t *req, *rtmp; >> - unsigned long flags; >> LIST_HEAD(cancel_list); >> >> p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); >> >> - spin_lock_irqsave(&m->client->lock, flags); >> + spin_lock(&m->client->lock); >> >> if (m->err) { >> - spin_unlock_irqrestore(&m->client->lock, flags); >> + spin_unlock(&m->client->lock); >> return; >> } >> >> @@ -219,7 +218,6 @@ static void p9_conn_cancel(struct p9_conn *m, int err) >> list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { >> list_move(&req->req_list, &cancel_list); >> } >> - spin_unlock_irqrestore(&m->client->lock, flags); >> >> list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { >> p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); >> @@ -228,6 +226,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err) >> req->t_err = err; >> p9_client_cb(m->client, req, REQ_STATUS_ERROR); >> } >> + spin_unlock(&m->client->lock); >> } >> >> static __poll_t >> @@ -370,12 +369,12 @@ static void p9_read_work(struct work_struct *work) >> if (m->req->status != REQ_STATUS_ERROR) >> status = REQ_STATUS_RCVD; >> list_del(&m->req->req_list); >> - spin_unlock(&m->client->lock); >> p9_client_cb(m->client, m->req, status); >> m->rc.sdata = NULL; >> m->rc.offset = 0; >> m->rc.capacity = 0; >> m->req = NULL; >> + spin_unlock(&m->client->lock); > > It took me a while to understand why you extended this lock despite > having just read the commit message, I'd suggest: > - moving the spin_unlock to right after p9_client_cb (afterall that's > what we want, the m->rc and m->req don't need to be protected) yes, better. > - add a comment before p9_client_cb saying something like 'updates > req->status' or try to explain why it needs to be locked here but other > transports don't need such a lock (they're not dependant on req->status > like this) > ok thanks for the feedback
On 2018/7/23 20:19, Tomas Bortoli wrote: > A double list_del(&req->req_list) is possible in p9_fd_cancel() as > shown by Syzbot. To prevent it we have to ensure that we have the > client->lock when deleting the list. Furthermore, we have to update > the status of the request before releasing the lock, to prevent the > race. > > Signed-off-by: Tomas Bortoli <tomasbortoli@gmail.com> > Reported-by: syzbot+735d926e9d1317c3310c@syzkaller.appspotmail.com > --- > net/9p/trans_fd.c | 9 ++++----- > 1 file changed, 4 insertions(+), 5 deletions(-) > > diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c > index a64b01c56e30..370c6c69a05c 100644 > --- a/net/9p/trans_fd.c > +++ b/net/9p/trans_fd.c > @@ -199,15 +199,14 @@ static void p9_mux_poll_stop(struct p9_conn *m) > static void p9_conn_cancel(struct p9_conn *m, int err) > { > struct p9_req_t *req, *rtmp; > - unsigned long flags; > LIST_HEAD(cancel_list); > > p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); > > - spin_lock_irqsave(&m->client->lock, flags); > + spin_lock(&m->client->lock); > > if (m->err) { > - spin_unlock_irqrestore(&m->client->lock, flags); > + spin_unlock(&m->client->lock); > return; > } > > @@ -219,7 +218,6 @@ static void p9_conn_cancel(struct p9_conn *m, int err) > list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { > list_move(&req->req_list, &cancel_list); > } > - spin_unlock_irqrestore(&m->client->lock, flags); > > list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { > p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); > @@ -228,6 +226,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err) > req->t_err = err; > p9_client_cb(m->client, req, REQ_STATUS_ERROR); > } > + spin_unlock(&m->client->lock); If you want to expand the ranges of client->lock, the cancel_list will not be necessary, you can optimize this code. Thanks, Yiwen. > } > > static __poll_t > @@ -370,12 +369,12 @@ static void p9_read_work(struct work_struct *work) > if (m->req->status != REQ_STATUS_ERROR) > status = REQ_STATUS_RCVD; > list_del(&m->req->req_list); > - spin_unlock(&m->client->lock); > p9_client_cb(m->client, m->req, status); > m->rc.sdata = NULL; > m->rc.offset = 0; > m->rc.capacity = 0; > m->req = NULL; > + spin_unlock(&m->client->lock); > } > > end_clear: >
On 07/24/2018 03:40 AM, jiangyiwen wrote: > On 2018/7/23 20:19, Tomas Bortoli wrote: >> A double list_del(&req->req_list) is possible in p9_fd_cancel() as >> shown by Syzbot. To prevent it we have to ensure that we have the >> client->lock when deleting the list. Furthermore, we have to update >> the status of the request before releasing the lock, to prevent the >> race. >> >> Signed-off-by: Tomas Bortoli <tomasbortoli@gmail.com> >> Reported-by: syzbot+735d926e9d1317c3310c@syzkaller.appspotmail.com >> --- >> net/9p/trans_fd.c | 9 ++++----- >> 1 file changed, 4 insertions(+), 5 deletions(-) >> >> diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c >> index a64b01c56e30..370c6c69a05c 100644 >> --- a/net/9p/trans_fd.c >> +++ b/net/9p/trans_fd.c >> @@ -199,15 +199,14 @@ static void p9_mux_poll_stop(struct p9_conn *m) >> static void p9_conn_cancel(struct p9_conn *m, int err) >> { >> struct p9_req_t *req, *rtmp; >> - unsigned long flags; >> LIST_HEAD(cancel_list); >> >> p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); >> >> - spin_lock_irqsave(&m->client->lock, flags); >> + spin_lock(&m->client->lock); >> >> if (m->err) { >> - spin_unlock_irqrestore(&m->client->lock, flags); >> + spin_unlock(&m->client->lock); >> return; >> } >> >> @@ -219,7 +218,6 @@ static void p9_conn_cancel(struct p9_conn *m, int err) >> list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { >> list_move(&req->req_list, &cancel_list); >> } >> - spin_unlock_irqrestore(&m->client->lock, flags); >> >> list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { >> p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); >> @@ -228,6 +226,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err) >> req->t_err = err; >> p9_client_cb(m->client, req, REQ_STATUS_ERROR); >> } >> + spin_unlock(&m->client->lock); > > If you want to expand the ranges of client->lock, the cancel_list will not > be necessary, you can optimize this code. > Unfortunately, not. Moving the spin_lock() before the for makes the crash appear again. This because the calls to list_move() in the for before delete all the elements from req->req_list, so the list is empty, another call to list_del() would trigger a double del. That's why we hold the lock to update the status of all those requests.. otherwise we have again the race with p9_fd_cancel(). Crash log at the bottom. > Thanks, > Yiwen. > >> } >> >> static __poll_t >> @@ -370,12 +369,12 @@ static void p9_read_work(struct work_struct *work) >> if (m->req->status != REQ_STATUS_ERROR) >> status = REQ_STATUS_RCVD; >> list_del(&m->req->req_list); >> - spin_unlock(&m->client->lock); >> p9_client_cb(m->client, m->req, status); >> m->rc.sdata = NULL; >> m->rc.offset = 0; >> m->rc.capacity = 0; >> m->req = NULL; >> + spin_unlock(&m->client->lock); >> } >> >> end_clear: >> > > Crash: syzkaller login: [ 55.691138] list_del corruption, ffff88004de337a8->next is LIST_POISON1 (dead000000000100) [ 55.693058] ------------[ cut here ]------------ [ 55.693910] kernel BUG at lib/list_debug.c:47! [ 55.695060] invalid opcode: 0000 [#1] SMP KASAN [ 55.696008] CPU: 1 PID: 9500 Comm: repro1 Not tainted 4.18.0-rc4+ #260 [ 55.696027] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 [ 55.696027] RIP: 0010:__list_del_entry_valid+0xd3/0x150 [ 55.696027] Code: 00 00 00 49 8b 54 24 08 48 39 f2 75 3b 48 83 c4 08 b8 01 00 00 00 5b 41 5c 5d c3 4c 89 e2 48 c7 c7 80 06 b8 87 e8 21 c6 1d fe <0f> 0b 48 c7 c7 e0 06 b8 87 e8 13 c6 1d fe 0f 0b 48 c7 c7 40 07 b8 [ 55.696027] RSP: 0018:ffff88004de2f198 EFLAGS: 00010282 [ 55.696027] RAX: 000000000000004e RBX: dead000000000200 RCX: ffffffff815efe0e [ 55.696027] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88006c9265ac [ 55.696027] RBP: ffff88004de2f1b0 R08: ffffed000d924fc1 R09: 0000000000000001 [ 55.696027] R10: ffffed000cdc94e8 R11: ffff88006c927e07 R12: dead000000000100 [ 55.696027] R13: ffff880066e4a740 R14: ffff88004de337a8 R15: ffff88004de2f240 [ 55.696027] FS: 00007efc61d2e700(0000) GS:ffff88006c900000(0000) knlGS:0000000000000000 [ 55.696027] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 55.696027] CR2: 00007efc613ab330 CR3: 000000005e86f000 CR4: 00000000000006e0 [ 55.696027] Call Trace: [ 55.696027] ? _raw_spin_lock+0x32/0x40 [ 55.696027] p9_fd_cancel+0xf3/0x390 [ 55.696027] ? p9_fd_request+0x238/0x3e0 [ 55.696027] ? p9_fd_close+0x5a0/0x5a0 [ 55.696027] p9_client_rpc+0xacf/0x11b0 [ 55.696027] ? p9_client_prepare_req.part.11+0xd20/0xd20 [ 55.696027] ? __fget+0x378/0x5a0 [ 55.696027] ? iterate_fd+0x400/0x400 [ 55.696027] ? finish_wait+0x4b0/0x4b0 [ 55.696027] ? rcu_read_lock_sched_held+0x108/0x120 [ 55.696027] ? p9_fd_cancel+0x390/0x390 [ 55.696027] p9_client_create+0xa33/0x1600 [ 55.696027] ? v9fs_drop_inode+0x100/0x140 [ 55.696027] ? p9_client_read+0xbe0/0xbe0 [ 55.724517] ? __sched_text_start+0x8/0x8 [ 55.724517] ? find_held_lock+0x35/0x1d0 [ 55.724517] ? __lockdep_init_map+0xe4/0x650 [ 55.724517] ? lockdep_init_map+0x9/0x10 [ 55.724517] ? kasan_check_write+0x14/0x20 [ 55.724517] ? __init_rwsem+0x1ce/0x2b0 [ 55.724517] ? do_raw_write_unlock+0x2a0/0x2a0 [ 55.724517] ? rcu_read_lock_sched_held+0x108/0x120 [ 55.724517] ? __kmalloc_track_caller+0x49f/0x760 [ 55.724517] ? save_stack+0xa3/0xd0 [ 55.724517] v9fs_session_init+0x218/0x1980 [ 55.724517] ? v9fs_session_init+0x218/0x1980 [ 55.724517] ? v9fs_show_options+0x740/0x740 [ 55.724517] ? kasan_check_read+0x11/0x20 [ 55.724517] ? rcu_is_watching+0x8c/0x150 [ 55.724517] ? rcu_pm_notify+0xc0/0xc0 [ 55.736879] ? v9fs_mount+0x62/0x880 [ 55.736879] ? rcu_read_lock_sched_held+0x108/0x120 [ 55.738600] ? kmem_cache_alloc_trace+0x48d/0x740 [ 55.738600] v9fs_mount+0x81/0x880 [ 55.738600] ? v9fs_mount+0x81/0x880 [ 55.738600] mount_fs+0x66/0x2f0 [ 55.738600] vfs_kern_mount.part.26+0xcc/0x4a0 [ 55.738600] ? may_umount+0xa0/0xa0 [ 55.738600] ? _raw_read_unlock+0x22/0x30 [ 55.738600] ? __get_fs_type+0x8a/0xc0 [ 55.738600] do_mount+0xd86/0x2e90 [ 55.738600] ? kasan_check_read+0x11/0x20 [ 55.738600] ? do_raw_spin_unlock+0xa7/0x330 [ 55.738600] ? copy_mount_string+0x40/0x40 [ 55.738600] ? copy_mount_options+0x5f/0x2e0 [ 55.738600] ? rcu_read_lock_sched_held+0x108/0x120 [ 55.738600] ? kmem_cache_alloc_trace+0x48d/0x740 [ 55.738600] ? copy_mount_options+0x1f7/0x2e0 [ 55.738600] ksys_mount+0xab/0x120 [ 55.738600] __x64_sys_mount+0xbe/0x150 [ 55.738600] ? trace_hardirqs_on_caller+0x421/0x5c0 [ 55.738600] do_syscall_64+0x18c/0x760 [ 55.738600] ? finish_task_switch+0x186/0x9f0 [ 55.738600] ? syscall_return_slowpath+0x560/0x560 [ 55.738600] ? syscall_return_slowpath+0x2b0/0x560 [ 55.738600] ? __switch_to_asm+0x34/0x70 [ 55.738600] ? prepare_exit_to_usermode+0x360/0x360 [ 55.738600] ? __switch_to_asm+0x34/0x70 [ 55.738600] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 55.738600] ? trace_hardirqs_off_thunk+0x1a/0x1c [ 55.738600] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 55.738600] RIP: 0033:0x7efc61442b79 [ 55.763914] Code: f3 fa ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 8f c2 2a 00 31 d2 48 29 c2 64 [ 55.763914] RSP: 002b:00007efc61d2de88 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 [ 55.763914] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007efc61442b79 [ 55.769909] RDX: 0000000020000380 RSI: 0000000020000000 RDI: 0000000000000000 [ 55.769909] RBP: 00007efc61d2deb0 R08: 0000000020000240 R09: 00007efc61d2e9c0 [ 55.769909] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffd68da6aa0 [ 55.773854] R13: 00007efc61d2e9c0 R14: 00007efc61d39040 R15: 0000000000000003 [ 55.773854] Modules linked in: [ 55.776650] ---[ end trace 8de8057bee332983 ]--- [ 55.777631] RIP: 0010:__list_del_entry_valid+0xd3/0x150 [ 55.778754] Code: 00 00 00 49 8b 54 24 08 48 39 f2 75 3b 48 83 c4 08 b8 01 00 00 00 5b 41 5c 5d c3 4c 89 e2 48 c7 c7 80 06 b8 87 e8 21 c6 1d fe <0f> 0b 48 c7 c7 e0 06 b8 87 e8 13 c6 1d fe 0f 0b 48 c7 c7 40 07 b8 [ 55.782685] RSP: 0018:ffff88004de2f198 EFLAGS: 00010282 [ 55.783785] RAX: 000000000000004e RBX: dead000000000200 RCX: ffffffff815efe0e [ 55.785126] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88006c9265ac [ 55.786470] RBP: ffff88004de2f1b0 R08: ffffed000d924fc1 R09: 0000000000000001 [ 55.788007] R10: ffffed000cdc94e8 R11: ffff88006c927e07 R12: dead000000000100 [ 55.789517] R13: ffff880066e4a740 R14: ffff88004de337a8 R15: ffff88004de2f240 [ 55.791173] FS: 00007efc61d2e700(0000) GS:ffff88006c900000(0000) knlGS:0000000000000000 [ 55.792746] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 55.793882] CR2: 00007efc613ab330 CR3: 000000005e86f000 CR4: 00000000000006e0 [ 55.795410] Kernel panic - not syncing: Fatal exception [ 55.796384] Kernel Offset: disabled [ 55.796384] Rebooting in 86400 seconds..
Tomas Bortoli wrote on Tue, Jul 24, 2018: > >> @@ -228,6 +226,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err) > >> req->t_err = err; > >> p9_client_cb(m->client, req, REQ_STATUS_ERROR); > >> } > >> + spin_unlock(&m->client->lock); > > > > If you want to expand the ranges of client->lock, the cancel_list will not > > be necessary, you can optimize this code. > > > > Unfortunately, not. Moving the spin_lock() before the for makes the > crash appear again. This because the calls to list_move() in the for > before delete all the elements from req->req_list, so the list is empty, > another call to list_del() would trigger a double del. > That's why we hold the lock to update the status of all those requests.. > otherwise we have again the race with p9_fd_cancel(). What (I think) he meant is that since you're holding the lock all the way, you don't need to transfer all the items to a temporary list to loop on it immediately afterwards, but you could call the client cb directly. I'm personally not a fan of this approach as that would duplicate the code, even if the loop isn't big... This code is only called at disconnect time so I think using the extra list doesn't hurt anyone; but as usual do what you feel is better; I don't mind much either way.
On 07/24/2018 12:19 PM, Dominique Martinet wrote: > Tomas Bortoli wrote on Tue, Jul 24, 2018: >>>> @@ -228,6 +226,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err) >>>> req->t_err = err; >>>> p9_client_cb(m->client, req, REQ_STATUS_ERROR); >>>> } >>>> + spin_unlock(&m->client->lock); >>> >>> If you want to expand the ranges of client->lock, the cancel_list will not >>> be necessary, you can optimize this code. >>> >> >> Unfortunately, not. Moving the spin_lock() before the for makes the >> crash appear again. This because the calls to list_move() in the for >> before delete all the elements from req->req_list, so the list is empty, >> another call to list_del() would trigger a double del. >> That's why we hold the lock to update the status of all those requests.. >> otherwise we have again the race with p9_fd_cancel(). > > What (I think) he meant is that since you're holding the lock all the > way, you don't need to transfer all the items to a temporary list to > loop on it immediately afterwards, but you could call the client cb > directly. > Yeah that is possible. > I'm personally not a fan of this approach as that would duplicate the > code, even if the loop isn't big... Yep > > This code is only called at disconnect time so I think using the extra > list doesn't hurt anyone; but as usual do what you feel is better; I > don't mind much either way. > I think it's fine as it is.
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index a64b01c56e30..370c6c69a05c 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -199,15 +199,14 @@ static void p9_mux_poll_stop(struct p9_conn *m) static void p9_conn_cancel(struct p9_conn *m, int err) { struct p9_req_t *req, *rtmp; - unsigned long flags; LIST_HEAD(cancel_list); p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); - spin_lock_irqsave(&m->client->lock, flags); + spin_lock(&m->client->lock); if (m->err) { - spin_unlock_irqrestore(&m->client->lock, flags); + spin_unlock(&m->client->lock); return; } @@ -219,7 +218,6 @@ static void p9_conn_cancel(struct p9_conn *m, int err) list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { list_move(&req->req_list, &cancel_list); } - spin_unlock_irqrestore(&m->client->lock, flags); list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); @@ -228,6 +226,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err) req->t_err = err; p9_client_cb(m->client, req, REQ_STATUS_ERROR); } + spin_unlock(&m->client->lock); } static __poll_t @@ -370,12 +369,12 @@ static void p9_read_work(struct work_struct *work) if (m->req->status != REQ_STATUS_ERROR) status = REQ_STATUS_RCVD; list_del(&m->req->req_list); - spin_unlock(&m->client->lock); p9_client_cb(m->client, m->req, status); m->rc.sdata = NULL; m->rc.offset = 0; m->rc.capacity = 0; m->req = NULL; + spin_unlock(&m->client->lock); } end_clear:
A double list_del(&req->req_list) is possible in p9_fd_cancel() as shown by Syzbot. To prevent it we have to ensure that we have the client->lock when deleting the list. Furthermore, we have to update the status of the request before releasing the lock, to prevent the race. Signed-off-by: Tomas Bortoli <tomasbortoli@gmail.com> Reported-by: syzbot+735d926e9d1317c3310c@syzkaller.appspotmail.com --- net/9p/trans_fd.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-)