diff mbox series

[2/2] 9p: Add refcount to p9_req_t

Message ID 20180811144254.23665-2-tomasbortoli@gmail.com
State Not Applicable, archived
Delegated to: David Miller
Headers show
Series None | expand

Commit Message

Tomas Bortoli Aug. 11, 2018, 2:42 p.m. UTC
To avoid use-after-free(s), use a refcount to keep track of the
usable references to any instantiated struct p9_req_t.

This commit adds p9_req_put(), p9_req_get() and p9_req_try_get() as
wrappers to kref_put(), kref_get() and kref_get_unless_zero().
These are used by the client and the transports to keep track of
valid requests' references.

p9_free_req() is added back and used as callback by kref_put().

Add SLAB_TYPESAFE_BY_RCU as it ensures that the memory freed by
kmem_cache_free() will not be reused for another type until the rcu
synchronisation period is over, so an address gotten under rcu read
lock is safe to inc_ref() without corrupting random memory while
the lock is held.

Co-developed-by: Dominique Martinet <dominique.martinet@cea.fr>
Signed-off-by: Tomas Bortoli <tomasbortoli@gmail.com>
Reported-by: syzbot+467050c1ce275af2a5b8@syzkaller.appspotmail.com
Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
---
 include/net/9p/client.h | 14 +++++++++++++
 net/9p/client.c         | 54 +++++++++++++++++++++++++++++++++++++++++++------
 net/9p/trans_fd.c       | 11 +++++++++-
 net/9p/trans_rdma.c     |  1 +
 4 files changed, 73 insertions(+), 7 deletions(-)

Comments

piaojun Aug. 13, 2018, 1:37 a.m. UTC | #1
Hi Tomas & Dominique,

Could you help paste the reason of the crash bug to help others understand
more clearly? And I have another question below.

On 2018/8/11 22:42, Tomas Bortoli wrote:
> To avoid use-after-free(s), use a refcount to keep track of the
> usable references to any instantiated struct p9_req_t.
> 
> This commit adds p9_req_put(), p9_req_get() and p9_req_try_get() as
> wrappers to kref_put(), kref_get() and kref_get_unless_zero().
> These are used by the client and the transports to keep track of
> valid requests' references.
> 
> p9_free_req() is added back and used as callback by kref_put().
> 
> Add SLAB_TYPESAFE_BY_RCU as it ensures that the memory freed by
> kmem_cache_free() will not be reused for another type until the rcu
> synchronisation period is over, so an address gotten under rcu read
> lock is safe to inc_ref() without corrupting random memory while
> the lock is held.
> 
> Co-developed-by: Dominique Martinet <dominique.martinet@cea.fr>
> Signed-off-by: Tomas Bortoli <tomasbortoli@gmail.com>
> Reported-by: syzbot+467050c1ce275af2a5b8@syzkaller.appspotmail.com
> Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
> ---
>  include/net/9p/client.h | 14 +++++++++++++
>  net/9p/client.c         | 54 +++++++++++++++++++++++++++++++++++++++++++------
>  net/9p/trans_fd.c       | 11 +++++++++-
>  net/9p/trans_rdma.c     |  1 +
>  4 files changed, 73 insertions(+), 7 deletions(-)
> 
> diff --git a/include/net/9p/client.h b/include/net/9p/client.h
> index 735f3979d559..947a570307a6 100644
> --- a/include/net/9p/client.h
> +++ b/include/net/9p/client.h
> @@ -94,6 +94,7 @@ enum p9_req_status_t {
>  struct p9_req_t {
>  	int status;
>  	int t_err;
> +	struct kref refcount;
>  	wait_queue_head_t wq;
>  	struct p9_fcall tc;
>  	struct p9_fcall rc;
> @@ -233,6 +234,19 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
>  int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
>  void p9_fcall_fini(struct p9_fcall *fc);
>  struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
> +
> +static inline void p9_req_get(struct p9_req_t *r)
> +{
> +	kref_get(&r->refcount);
> +}
> +
> +static inline int p9_req_try_get(struct p9_req_t *r)
> +{
> +	return kref_get_unless_zero(&r->refcount);
> +}
> +
> +int p9_req_put(struct p9_req_t *r);
> +
>  void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status);
>  
>  int p9_parse_header(struct p9_fcall *, int32_t *, int8_t *, int16_t *, int);
> diff --git a/net/9p/client.c b/net/9p/client.c
> index 7942c0bfcc5b..83f2f0aadc14 100644
> --- a/net/9p/client.c
> +++ b/net/9p/client.c
> @@ -310,6 +310,18 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
>  	if (tag < 0)
>  		goto free;
>  
> +	/*	Init ref to two because in the general case there is one ref
> +	 *	that is put asynchronously by a writer thread, one ref
> +	 *	temporarily given by p9_tag_lookup and put by p9_client_cb
> +	 *	in the recv thread, and one ref put by p9_remove_tag in the
> +	 *	main thread. The only exception is virtio that does not use
> +	 *	p9_tag_lookup but does not have a writer thread either
> +	 *	(the write happens synchronously in the request/zc_request
> +	 *	callback), so p9_client_cb eats the second ref there
> +	 *	as the pointer is duplicated directly by virtqueue_add_sgs()
> +	 */
> +	refcount_set(&req->refcount.refcount, 2);
> +
>  	return req;
>  
>  free:
> @@ -333,10 +345,21 @@ struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag)
>  	struct p9_req_t *req;
>  
>  	rcu_read_lock();
> +again:
>  	req = idr_find(&c->reqs, tag);
> -	/* There's no refcount on the req; a malicious server could cause
> -	 * us to dereference a NULL pointer
> -	 */
> +	if (req) {
> +		/* We have to be careful with the req found under rcu_read_lock
> +		 * Thanks to SLAB_TYPESAFE_BY_RCU we can safely try to get the
> +		 * ref again without corrupting other data, then check again
> +		 * that the tag matches once we have the ref
> +		 */
> +		if (!p9_req_try_get(req))
> +			goto again;
> +		if (req->tc.tag != tag) {
> +			p9_req_put(req);
> +			goto again;
> +		}
> +	}
>  	rcu_read_unlock();
>  
>  	return req;
> @@ -350,7 +373,7 @@ EXPORT_SYMBOL(p9_tag_lookup);
>   *
>   * Context: Any context.
>   */
> -static void p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
> +static int p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
>  {
>  	unsigned long flags;
>  	u16 tag = r->tc.tag;
> @@ -359,11 +382,23 @@ static void p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
>  	spin_lock_irqsave(&c->lock, flags);
>  	idr_remove(&c->reqs, tag);
>  	spin_unlock_irqrestore(&c->lock, flags);
> +	return p9_req_put(r);
> +}
> +
> +static void p9_req_free(struct kref *ref)
> +{
> +	struct p9_req_t *r = container_of(ref, struct p9_req_t, refcount);
>  	p9_fcall_fini(&r->tc);
>  	p9_fcall_fini(&r->rc);
>  	kmem_cache_free(p9_req_cache, r);
>  }
>  
> +int p9_req_put(struct p9_req_t *r)
> +{
> +	return kref_put(&r->refcount, p9_req_free);
> +}
> +EXPORT_SYMBOL(p9_req_put);
> +
>  /**
>   * p9_tag_cleanup - cleans up tags structure and reclaims resources
>   * @c:  v9fs client struct
> @@ -379,7 +414,9 @@ static void p9_tag_cleanup(struct p9_client *c)
>  	rcu_read_lock();
>  	idr_for_each_entry(&c->reqs, req, id) {
>  		pr_info("Tag %d still in use\n", id);
> -		p9_tag_remove(c, req);
> +		if (p9_tag_remove(c, req) == 0)
> +			pr_warn("Packet with tag %d has still references",
> +				req->tc.tag);
>  	}
>  	rcu_read_unlock();
>  }
> @@ -403,6 +440,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
>  
>  	wake_up(&req->wq);
>  	p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag);
> +	p9_req_put(req);
>  }
>  EXPORT_SYMBOL(p9_client_cb);
>  
> @@ -682,6 +720,8 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
>  	return req;
>  reterr:
>  	p9_tag_remove(c, req);
> +	/* We have to put also the 2nd reference as it won't be used */
> +	p9_req_put(req);
>  	return ERR_PTR(err);
>  }
>  
> @@ -716,6 +756,8 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
>  
>  	err = c->trans_mod->request(c, req);
>  	if (err < 0) {
> +		/* write won't happen */
> +		p9_req_put(req);
>  		if (err != -ERESTARTSYS && err != -EFAULT)
>  			c->status = Disconnected;
>  		goto recalc_sigpending;
> @@ -2241,7 +2283,7 @@ EXPORT_SYMBOL(p9_client_readlink);
>  
>  int __init p9_client_init(void)
>  {
> -	p9_req_cache = KMEM_CACHE(p9_req_t, 0);
> +	p9_req_cache = KMEM_CACHE(p9_req_t, SLAB_TYPESAFE_BY_RCU);
>  	return p9_req_cache ? 0 : -ENOMEM;
>  }
>  
> diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
> index 20f46f13fe83..686e24e355d0 100644
> --- a/net/9p/trans_fd.c
> +++ b/net/9p/trans_fd.c
> @@ -132,6 +132,7 @@ struct p9_conn {
>  	struct list_head req_list;
>  	struct list_head unsent_req_list;
>  	struct p9_req_t *req;
> +	struct p9_req_t *wreq;

Why adding a wreq for write work? And I wonder we should rename req to
rreq?

>  	char tmp_buf[7];
>  	struct p9_fcall rc;
>  	int wpos;
> @@ -383,6 +384,7 @@ static void p9_read_work(struct work_struct *work)
>  		m->rc.sdata = NULL;
>  		m->rc.offset = 0;
>  		m->rc.capacity = 0;
> +		p9_req_put(m->req);
>  		m->req = NULL;
>  	}
>  
> @@ -472,6 +474,8 @@ static void p9_write_work(struct work_struct *work)
>  		m->wbuf = req->tc.sdata;
>  		m->wsize = req->tc.size;
>  		m->wpos = 0;
> +		p9_req_get(req);
> +		m->wreq = req;
>  		spin_unlock(&m->client->lock);
>  	}
>  
> @@ -492,8 +496,11 @@ static void p9_write_work(struct work_struct *work)
>  	}
>  
>  	m->wpos += err;
> -	if (m->wpos == m->wsize)
> +	if (m->wpos == m->wsize) {
>  		m->wpos = m->wsize = 0;
> +		p9_req_put(m->wreq);
> +		m->wreq = NULL;
> +	}
>  
>  end_clear:
>  	clear_bit(Wworksched, &m->wsched);
> @@ -694,6 +701,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
>  	if (req->status == REQ_STATUS_UNSENT) {
>  		list_del(&req->req_list);
>  		req->status = REQ_STATUS_FLSHD;
> +		p9_req_put(req);
>  		ret = 0;
>  	}
>  	spin_unlock(&client->lock);
> @@ -711,6 +719,7 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
>  	spin_lock(&client->lock);
>  	list_del(&req->req_list);
>  	spin_unlock(&client->lock);
> +	p9_req_put(req);
>  
>  	return 0;
>  }
> diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
> index c60655c90c9e..8cff368a11e3 100644
> --- a/net/9p/trans_rdma.c
> +++ b/net/9p/trans_rdma.c
> @@ -365,6 +365,7 @@ send_done(struct ib_cq *cq, struct ib_wc *wc)
>  			    c->busa, c->req->tc.size,
>  			    DMA_TO_DEVICE);
>  	up(&rdma->sq_sem);
> +	p9_req_put(c->req);
>  	kfree(c);
>  }
>  
>
Dominique Martinet Aug. 13, 2018, 1:48 a.m. UTC | #2
piaojun wrote on Mon, Aug 13, 2018:
> Could you help paste the reason of the crash bug to help others
> understand more clearly? And I have another question below.

The problem for tcp (but other transports have a similar problem) is
that with a malicious server like syzkaller they can try to submit
replies before the request came in.

This leads in the writer thread trying to write a buffer that has
already been freed, and if memory has been reused could potentially leak
some information.

Now, with the previous patches this is based on this would be a slab and
the likeliness of it being sensitive information is rather low (it would
likely be some other packet being sent twice, or a mix and match of two
packets that would have been sent anyway), but it would nevertheless be
a use after free.


There is a second advantage to this reference counting, that is now we
have this system we will be able to implement flush asynchronously.
This will remove the need for the 'goto again' in p9_client_rpc which
was making 9p threads unkillable in practice if the server would not
reply to the flush requests.
Even if the server replies I've always found myself needing to hit ^C
multiple times to exit a process doing I/Os and I think fixing that
behaviour will make 9p more comfortable to use.


> > diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
> > index 20f46f13fe83..686e24e355d0 100644
> > --- a/net/9p/trans_fd.c
> > +++ b/net/9p/trans_fd.c
> > @@ -132,6 +132,7 @@ struct p9_conn {
> >  	struct list_head req_list;
> >  	struct list_head unsent_req_list;
> >  	struct p9_req_t *req;
> > +	struct p9_req_t *wreq;
> 
> Why adding a wreq for write work? And I wonder we should rename req to
> rreq?

We need to store a pointer to the request for the write thread because
we need to put the reference to it when we're done writing its content.

Previously, the worker would only store the write buffer there but
that's not enough to figure what request to dereference.


I personally don't think renaming req to rreq would bring much but it
could be done in another patch if you think that'd be helpful; I think
it shouldn't be done here at least to make the patch more readable.
Dmitry Vyukov Aug. 13, 2018, 1:04 p.m. UTC | #3
On Mon, Aug 13, 2018 at 3:48 AM, Dominique Martinet
<asmadeus@codewreck.org> wrote:
> piaojun wrote on Mon, Aug 13, 2018:
>> Could you help paste the reason of the crash bug to help others
>> understand more clearly? And I have another question below.
>
> The problem for tcp (but other transports have a similar problem) is
> that with a malicious server like syzkaller they can try to submit
> replies before the request came in.
>
> This leads in the writer thread trying to write a buffer that has
> already been freed, and if memory has been reused could potentially leak
> some information.
>
> Now, with the previous patches this is based on this would be a slab and
> the likeliness of it being sensitive information is rather low (it would
> likely be some other packet being sent twice, or a mix and match of two
> packets that would have been sent anyway), but it would nevertheless be
> a use after free.
>
>
> There is a second advantage to this reference counting, that is now we
> have this system we will be able to implement flush asynchronously.
> This will remove the need for the 'goto again' in p9_client_rpc which
> was making 9p threads unkillable in practice if the server would not
> reply to the flush requests.


Fixing unkillalble task would be nice. Don't know how much they are of
a problem in real life, but fixing them would allow fuzzer to find
other, potentially more critical bugs in 9p. These "task hung" crashes
are quite unpleasant for the fuzzer.

Thanks for all recent 9p work, Tomas!


> Even if the server replies I've always found myself needing to hit ^C
> multiple times to exit a process doing I/Os and I think fixing that
> behaviour will make 9p more comfortable to use.
>
>
>> > diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
>> > index 20f46f13fe83..686e24e355d0 100644
>> > --- a/net/9p/trans_fd.c
>> > +++ b/net/9p/trans_fd.c
>> > @@ -132,6 +132,7 @@ struct p9_conn {
>> >     struct list_head req_list;
>> >     struct list_head unsent_req_list;
>> >     struct p9_req_t *req;
>> > +   struct p9_req_t *wreq;
>>
>> Why adding a wreq for write work? And I wonder we should rename req to
>> rreq?
>
> We need to store a pointer to the request for the write thread because
> we need to put the reference to it when we're done writing its content.
>
> Previously, the worker would only store the write buffer there but
> that's not enough to figure what request to dereference.
>
>
> I personally don't think renaming req to rreq would bring much but it
> could be done in another patch if you think that'd be helpful; I think
> it shouldn't be done here at least to make the patch more readable.
>
> --
> Dominique
>
> --
> You received this message because you are subscribed to the Google Groups "syzkaller" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to syzkaller+unsubscribe@googlegroups.com.
> For more options, visit https://groups.google.com/d/optout.
Tomas Bortoli Aug. 13, 2018, 6:14 p.m. UTC | #4
On 08/13/2018 03:04 PM, Dmitry Vyukov wrote:
> On Mon, Aug 13, 2018 at 3:48 AM, Dominique Martinet
> <asmadeus@codewreck.org> wrote:
>> piaojun wrote on Mon, Aug 13, 2018:
>>> Could you help paste the reason of the crash bug to help others
>>> understand more clearly? And I have another question below.
>>
>> The problem for tcp (but other transports have a similar problem) is
>> that with a malicious server like syzkaller they can try to submit
>> replies before the request came in.
>>
>> This leads in the writer thread trying to write a buffer that has
>> already been freed, and if memory has been reused could potentially leak
>> some information.
>>
>> Now, with the previous patches this is based on this would be a slab and
>> the likeliness of it being sensitive information is rather low (it would
>> likely be some other packet being sent twice, or a mix and match of two
>> packets that would have been sent anyway), but it would nevertheless be
>> a use after free.
>>
>>
>> There is a second advantage to this reference counting, that is now we
>> have this system we will be able to implement flush asynchronously.
>> This will remove the need for the 'goto again' in p9_client_rpc which
>> was making 9p threads unkillable in practice if the server would not
>> reply to the flush requests.
> 
> 
> Fixing unkillalble task would be nice. Don't know how much they are of
> a problem in real life, but fixing them would allow fuzzer to find
> other, potentially more critical bugs in 9p. These "task hung" crashes
> are quite unpleasant for the fuzzer.
> 
> Thanks for all recent 9p work, Tomas!
> 

You are welcome, I have to thank Dominique that helped me a lot, I like
to help here, it's educative.

> 
>> Even if the server replies I've always found myself needing to hit ^C
>> multiple times to exit a process doing I/Os and I think fixing that
>> behaviour will make 9p more comfortable to use.
>>
>>
>>>> diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
>>>> index 20f46f13fe83..686e24e355d0 100644
>>>> --- a/net/9p/trans_fd.c
>>>> +++ b/net/9p/trans_fd.c
>>>> @@ -132,6 +132,7 @@ struct p9_conn {
>>>>     struct list_head req_list;
>>>>     struct list_head unsent_req_list;
>>>>     struct p9_req_t *req;
>>>> +   struct p9_req_t *wreq;
>>>
>>> Why adding a wreq for write work? And I wonder we should rename req to
>>> rreq?
>>
>> We need to store a pointer to the request for the write thread because
>> we need to put the reference to it when we're done writing its content.
>>
>> Previously, the worker would only store the write buffer there but
>> that's not enough to figure what request to dereference.
>>
>>
>> I personally don't think renaming req to rreq would bring much but it
>> could be done in another patch if you think that'd be helpful; I think
>> it shouldn't be done here at least to make the patch more readable.
>>
>> --
>> Dominique
>>
>> --
>> You received this message because you are subscribed to the Google Groups "syzkaller" group.
>> To unsubscribe from this group and stop receiving emails from it, send an email to syzkaller+unsubscribe@googlegroups.com.
>> For more options, visit https://groups.google.com/d/optout.
piaojun Aug. 14, 2018, 1:38 a.m. UTC | #5
Hi Tomas & Dominique,

On 2018/8/11 22:42, Tomas Bortoli wrote:
> To avoid use-after-free(s), use a refcount to keep track of the
> usable references to any instantiated struct p9_req_t.
> 
> This commit adds p9_req_put(), p9_req_get() and p9_req_try_get() as
> wrappers to kref_put(), kref_get() and kref_get_unless_zero().
> These are used by the client and the transports to keep track of
> valid requests' references.
> 
> p9_free_req() is added back and used as callback by kref_put().
> 
> Add SLAB_TYPESAFE_BY_RCU as it ensures that the memory freed by
> kmem_cache_free() will not be reused for another type until the rcu
> synchronisation period is over, so an address gotten under rcu read
> lock is safe to inc_ref() without corrupting random memory while
> the lock is held.
> 
> Co-developed-by: Dominique Martinet <dominique.martinet@cea.fr>
> Signed-off-by: Tomas Bortoli <tomasbortoli@gmail.com>
> Reported-by: syzbot+467050c1ce275af2a5b8@syzkaller.appspotmail.com
> Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
> ---
>  include/net/9p/client.h | 14 +++++++++++++
>  net/9p/client.c         | 54 +++++++++++++++++++++++++++++++++++++++++++------
>  net/9p/trans_fd.c       | 11 +++++++++-
>  net/9p/trans_rdma.c     |  1 +
>  4 files changed, 73 insertions(+), 7 deletions(-)
> 
> diff --git a/include/net/9p/client.h b/include/net/9p/client.h
> index 735f3979d559..947a570307a6 100644
> --- a/include/net/9p/client.h
> +++ b/include/net/9p/client.h
> @@ -94,6 +94,7 @@ enum p9_req_status_t {
>  struct p9_req_t {
>  	int status;
>  	int t_err;
> +	struct kref refcount;
>  	wait_queue_head_t wq;
>  	struct p9_fcall tc;
>  	struct p9_fcall rc;
> @@ -233,6 +234,19 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
>  int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
>  void p9_fcall_fini(struct p9_fcall *fc);
>  struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
> +
> +static inline void p9_req_get(struct p9_req_t *r)
> +{
> +	kref_get(&r->refcount);
> +}
> +
> +static inline int p9_req_try_get(struct p9_req_t *r)
> +{
> +	return kref_get_unless_zero(&r->refcount);
> +}
> +
> +int p9_req_put(struct p9_req_t *r);
> +
>  void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status);
>  
>  int p9_parse_header(struct p9_fcall *, int32_t *, int8_t *, int16_t *, int);
> diff --git a/net/9p/client.c b/net/9p/client.c
> index 7942c0bfcc5b..83f2f0aadc14 100644
> --- a/net/9p/client.c
> +++ b/net/9p/client.c
> @@ -310,6 +310,18 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
>  	if (tag < 0)
>  		goto free;
>  
> +	/*	Init ref to two because in the general case there is one ref
> +	 *	that is put asynchronously by a writer thread, one ref
> +	 *	temporarily given by p9_tag_lookup and put by p9_client_cb
> +	 *	in the recv thread, and one ref put by p9_remove_tag in the

There is a spell mistake, p9_remove_tag->p9_tag_remove, and sorry for not
pointing this in last comment.

Thanks,
Jun

> +	 *	main thread. The only exception is virtio that does not use
> +	 *	p9_tag_lookup but does not have a writer thread either
> +	 *	(the write happens synchronously in the request/zc_request
> +	 *	callback), so p9_client_cb eats the second ref there
> +	 *	as the pointer is duplicated directly by virtqueue_add_sgs()
> +	 */
> +	refcount_set(&req->refcount.refcount, 2);
> +
>  	return req;
>  
>  free:
> @@ -333,10 +345,21 @@ struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag)
>  	struct p9_req_t *req;
>  
>  	rcu_read_lock();
> +again:
>  	req = idr_find(&c->reqs, tag);
> -	/* There's no refcount on the req; a malicious server could cause
> -	 * us to dereference a NULL pointer
> -	 */
> +	if (req) {
> +		/* We have to be careful with the req found under rcu_read_lock
> +		 * Thanks to SLAB_TYPESAFE_BY_RCU we can safely try to get the
> +		 * ref again without corrupting other data, then check again
> +		 * that the tag matches once we have the ref
> +		 */
> +		if (!p9_req_try_get(req))
> +			goto again;
> +		if (req->tc.tag != tag) {
> +			p9_req_put(req);
> +			goto again;
> +		}
> +	}
>  	rcu_read_unlock();
>  
>  	return req;
> @@ -350,7 +373,7 @@ EXPORT_SYMBOL(p9_tag_lookup);
>   *
>   * Context: Any context.
>   */
> -static void p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
> +static int p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
>  {
>  	unsigned long flags;
>  	u16 tag = r->tc.tag;
> @@ -359,11 +382,23 @@ static void p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
>  	spin_lock_irqsave(&c->lock, flags);
>  	idr_remove(&c->reqs, tag);
>  	spin_unlock_irqrestore(&c->lock, flags);
> +	return p9_req_put(r);
> +}
> +
> +static void p9_req_free(struct kref *ref)
> +{
> +	struct p9_req_t *r = container_of(ref, struct p9_req_t, refcount);
>  	p9_fcall_fini(&r->tc);
>  	p9_fcall_fini(&r->rc);
>  	kmem_cache_free(p9_req_cache, r);
>  }
>  
> +int p9_req_put(struct p9_req_t *r)
> +{
> +	return kref_put(&r->refcount, p9_req_free);
> +}
> +EXPORT_SYMBOL(p9_req_put);
> +
>  /**
>   * p9_tag_cleanup - cleans up tags structure and reclaims resources
>   * @c:  v9fs client struct
> @@ -379,7 +414,9 @@ static void p9_tag_cleanup(struct p9_client *c)
>  	rcu_read_lock();
>  	idr_for_each_entry(&c->reqs, req, id) {
>  		pr_info("Tag %d still in use\n", id);
> -		p9_tag_remove(c, req);
> +		if (p9_tag_remove(c, req) == 0)
> +			pr_warn("Packet with tag %d has still references",
> +				req->tc.tag);
>  	}
>  	rcu_read_unlock();
>  }
> @@ -403,6 +440,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
>  
>  	wake_up(&req->wq);
>  	p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag);
> +	p9_req_put(req);
>  }
>  EXPORT_SYMBOL(p9_client_cb);
>  
> @@ -682,6 +720,8 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
>  	return req;
>  reterr:
>  	p9_tag_remove(c, req);
> +	/* We have to put also the 2nd reference as it won't be used */
> +	p9_req_put(req);
>  	return ERR_PTR(err);
>  }
>  
> @@ -716,6 +756,8 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
>  
>  	err = c->trans_mod->request(c, req);
>  	if (err < 0) {
> +		/* write won't happen */
> +		p9_req_put(req);
>  		if (err != -ERESTARTSYS && err != -EFAULT)
>  			c->status = Disconnected;
>  		goto recalc_sigpending;
> @@ -2241,7 +2283,7 @@ EXPORT_SYMBOL(p9_client_readlink);
>  
>  int __init p9_client_init(void)
>  {
> -	p9_req_cache = KMEM_CACHE(p9_req_t, 0);
> +	p9_req_cache = KMEM_CACHE(p9_req_t, SLAB_TYPESAFE_BY_RCU);
>  	return p9_req_cache ? 0 : -ENOMEM;
>  }
>  
> diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
> index 20f46f13fe83..686e24e355d0 100644
> --- a/net/9p/trans_fd.c
> +++ b/net/9p/trans_fd.c
> @@ -132,6 +132,7 @@ struct p9_conn {
>  	struct list_head req_list;
>  	struct list_head unsent_req_list;
>  	struct p9_req_t *req;
> +	struct p9_req_t *wreq;
>  	char tmp_buf[7];
>  	struct p9_fcall rc;
>  	int wpos;
> @@ -383,6 +384,7 @@ static void p9_read_work(struct work_struct *work)
>  		m->rc.sdata = NULL;
>  		m->rc.offset = 0;
>  		m->rc.capacity = 0;
> +		p9_req_put(m->req);
>  		m->req = NULL;
>  	}
>  
> @@ -472,6 +474,8 @@ static void p9_write_work(struct work_struct *work)
>  		m->wbuf = req->tc.sdata;
>  		m->wsize = req->tc.size;
>  		m->wpos = 0;
> +		p9_req_get(req);
> +		m->wreq = req;
>  		spin_unlock(&m->client->lock);
>  	}
>  
> @@ -492,8 +496,11 @@ static void p9_write_work(struct work_struct *work)
>  	}
>  
>  	m->wpos += err;
> -	if (m->wpos == m->wsize)
> +	if (m->wpos == m->wsize) {
>  		m->wpos = m->wsize = 0;
> +		p9_req_put(m->wreq);
> +		m->wreq = NULL;
> +	}
>  
>  end_clear:
>  	clear_bit(Wworksched, &m->wsched);
> @@ -694,6 +701,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
>  	if (req->status == REQ_STATUS_UNSENT) {
>  		list_del(&req->req_list);
>  		req->status = REQ_STATUS_FLSHD;
> +		p9_req_put(req);
>  		ret = 0;
>  	}
>  	spin_unlock(&client->lock);
> @@ -711,6 +719,7 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
>  	spin_lock(&client->lock);
>  	list_del(&req->req_list);
>  	spin_unlock(&client->lock);
> +	p9_req_put(req);
>  
>  	return 0;
>  }
> diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
> index c60655c90c9e..8cff368a11e3 100644
> --- a/net/9p/trans_rdma.c
> +++ b/net/9p/trans_rdma.c
> @@ -365,6 +365,7 @@ send_done(struct ib_cq *cq, struct ib_wc *wc)
>  			    c->busa, c->req->tc.size,
>  			    DMA_TO_DEVICE);
>  	up(&rdma->sq_sem);
> +	p9_req_put(c->req);
>  	kfree(c);
>  }
>  
>
Tomas Bortoli Aug. 14, 2018, 5:44 p.m. UTC | #6
On 08/14/2018 03:38 AM, piaojun wrote:
> Hi Tomas & Dominique,
> 
> On 2018/8/11 22:42, Tomas Bortoli wrote:
>> To avoid use-after-free(s), use a refcount to keep track of the
>> usable references to any instantiated struct p9_req_t.
>>
>> This commit adds p9_req_put(), p9_req_get() and p9_req_try_get() as
>> wrappers to kref_put(), kref_get() and kref_get_unless_zero().
>> These are used by the client and the transports to keep track of
>> valid requests' references.
>>
>> p9_free_req() is added back and used as callback by kref_put().
>>
>> Add SLAB_TYPESAFE_BY_RCU as it ensures that the memory freed by
>> kmem_cache_free() will not be reused for another type until the rcu
>> synchronisation period is over, so an address gotten under rcu read
>> lock is safe to inc_ref() without corrupting random memory while
>> the lock is held.
>>
>> Co-developed-by: Dominique Martinet <dominique.martinet@cea.fr>
>> Signed-off-by: Tomas Bortoli <tomasbortoli@gmail.com>
>> Reported-by: syzbot+467050c1ce275af2a5b8@syzkaller.appspotmail.com
>> Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
>> ---
>>  include/net/9p/client.h | 14 +++++++++++++
>>  net/9p/client.c         | 54 +++++++++++++++++++++++++++++++++++++++++++------
>>  net/9p/trans_fd.c       | 11 +++++++++-
>>  net/9p/trans_rdma.c     |  1 +
>>  4 files changed, 73 insertions(+), 7 deletions(-)
>>
>> diff --git a/include/net/9p/client.h b/include/net/9p/client.h
>> index 735f3979d559..947a570307a6 100644
>> --- a/include/net/9p/client.h
>> +++ b/include/net/9p/client.h
>> @@ -94,6 +94,7 @@ enum p9_req_status_t {
>>  struct p9_req_t {
>>  	int status;
>>  	int t_err;
>> +	struct kref refcount;
>>  	wait_queue_head_t wq;
>>  	struct p9_fcall tc;
>>  	struct p9_fcall rc;
>> @@ -233,6 +234,19 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
>>  int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
>>  void p9_fcall_fini(struct p9_fcall *fc);
>>  struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
>> +
>> +static inline void p9_req_get(struct p9_req_t *r)
>> +{
>> +	kref_get(&r->refcount);
>> +}
>> +
>> +static inline int p9_req_try_get(struct p9_req_t *r)
>> +{
>> +	return kref_get_unless_zero(&r->refcount);
>> +}
>> +
>> +int p9_req_put(struct p9_req_t *r);
>> +
>>  void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status);
>>  
>>  int p9_parse_header(struct p9_fcall *, int32_t *, int8_t *, int16_t *, int);
>> diff --git a/net/9p/client.c b/net/9p/client.c
>> index 7942c0bfcc5b..83f2f0aadc14 100644
>> --- a/net/9p/client.c
>> +++ b/net/9p/client.c
>> @@ -310,6 +310,18 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
>>  	if (tag < 0)
>>  		goto free;
>>  
>> +	/*	Init ref to two because in the general case there is one ref
>> +	 *	that is put asynchronously by a writer thread, one ref
>> +	 *	temporarily given by p9_tag_lookup and put by p9_client_cb
>> +	 *	in the recv thread, and one ref put by p9_remove_tag in the
> 
> There is a spell mistake, p9_remove_tag->p9_tag_remove, and sorry for not
> pointing this in last comment.
> 
> Thanks,
> Jun
> 
>> +	 *	main thread. The only exception is virtio that does not use
>> +	 *	p9_tag_lookup but does not have a writer thread either
>> +	 *	(the write happens synchronously in the request/zc_request
>> +	 *	callback), so p9_client_cb eats the second ref there
>> +	 *	as the pointer is duplicated directly by virtqueue_add_sgs()
>> +	 */
>> +	refcount_set(&req->refcount.refcount, 2);
>> +
>>  	return req;
>>  
>>  free:
>> @@ -333,10 +345,21 @@ struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag)
>>  	struct p9_req_t *req;
>>  
>>  	rcu_read_lock();
>> +again:
>>  	req = idr_find(&c->reqs, tag);
>> -	/* There's no refcount on the req; a malicious server could cause
>> -	 * us to dereference a NULL pointer
>> -	 */
>> +	if (req) {
>> +		/* We have to be careful with the req found under rcu_read_lock
>> +		 * Thanks to SLAB_TYPESAFE_BY_RCU we can safely try to get the
>> +		 * ref again without corrupting other data, then check again
>> +		 * that the tag matches once we have the ref
>> +		 */
>> +		if (!p9_req_try_get(req))
>> +			goto again;
>> +		if (req->tc.tag != tag) {
>> +			p9_req_put(req);
>> +			goto again;
>> +		}
>> +	}
>>  	rcu_read_unlock();
>>  
>>  	return req;
>> @@ -350,7 +373,7 @@ EXPORT_SYMBOL(p9_tag_lookup);
>>   *
>>   * Context: Any context.
>>   */
>> -static void p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
>> +static int p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
>>  {
>>  	unsigned long flags;
>>  	u16 tag = r->tc.tag;
>> @@ -359,11 +382,23 @@ static void p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
>>  	spin_lock_irqsave(&c->lock, flags);
>>  	idr_remove(&c->reqs, tag);
>>  	spin_unlock_irqrestore(&c->lock, flags);
>> +	return p9_req_put(r);
>> +}
>> +
>> +static void p9_req_free(struct kref *ref)
>> +{
>> +	struct p9_req_t *r = container_of(ref, struct p9_req_t, refcount);
>>  	p9_fcall_fini(&r->tc);
>>  	p9_fcall_fini(&r->rc);
>>  	kmem_cache_free(p9_req_cache, r);
>>  }
>>  
>> +int p9_req_put(struct p9_req_t *r)
>> +{
>> +	return kref_put(&r->refcount, p9_req_free);
>> +}
>> +EXPORT_SYMBOL(p9_req_put);
>> +
>>  /**
>>   * p9_tag_cleanup - cleans up tags structure and reclaims resources
>>   * @c:  v9fs client struct
>> @@ -379,7 +414,9 @@ static void p9_tag_cleanup(struct p9_client *c)
>>  	rcu_read_lock();
>>  	idr_for_each_entry(&c->reqs, req, id) {
>>  		pr_info("Tag %d still in use\n", id);
>> -		p9_tag_remove(c, req);
>> +		if (p9_tag_remove(c, req) == 0)
>> +			pr_warn("Packet with tag %d has still references",
>> +				req->tc.tag);
>>  	}
>>  	rcu_read_unlock();
>>  }
>> @@ -403,6 +440,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
>>  
>>  	wake_up(&req->wq);
>>  	p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag);
>> +	p9_req_put(req);
>>  }
>>  EXPORT_SYMBOL(p9_client_cb);
>>  
>> @@ -682,6 +720,8 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
>>  	return req;
>>  reterr:
>>  	p9_tag_remove(c, req);
>> +	/* We have to put also the 2nd reference as it won't be used */
>> +	p9_req_put(req);
>>  	return ERR_PTR(err);
>>  }
>>  
>> @@ -716,6 +756,8 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
>>  
>>  	err = c->trans_mod->request(c, req);
>>  	if (err < 0) {
>> +		/* write won't happen */
>> +		p9_req_put(req);
>>  		if (err != -ERESTARTSYS && err != -EFAULT)
>>  			c->status = Disconnected;
>>  		goto recalc_sigpending;
>> @@ -2241,7 +2283,7 @@ EXPORT_SYMBOL(p9_client_readlink);
>>  
>>  int __init p9_client_init(void)
>>  {
>> -	p9_req_cache = KMEM_CACHE(p9_req_t, 0);
>> +	p9_req_cache = KMEM_CACHE(p9_req_t, SLAB_TYPESAFE_BY_RCU);
>>  	return p9_req_cache ? 0 : -ENOMEM;
>>  }
>>  
>> diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
>> index 20f46f13fe83..686e24e355d0 100644
>> --- a/net/9p/trans_fd.c
>> +++ b/net/9p/trans_fd.c
>> @@ -132,6 +132,7 @@ struct p9_conn {
>>  	struct list_head req_list;
>>  	struct list_head unsent_req_list;
>>  	struct p9_req_t *req;
>> +	struct p9_req_t *wreq;
>>  	char tmp_buf[7];
>>  	struct p9_fcall rc;
>>  	int wpos;
>> @@ -383,6 +384,7 @@ static void p9_read_work(struct work_struct *work)
>>  		m->rc.sdata = NULL;
>>  		m->rc.offset = 0;
>>  		m->rc.capacity = 0;
>> +		p9_req_put(m->req);
>>  		m->req = NULL;
>>  	}
>>  
>> @@ -472,6 +474,8 @@ static void p9_write_work(struct work_struct *work)
>>  		m->wbuf = req->tc.sdata;
>>  		m->wsize = req->tc.size;
>>  		m->wpos = 0;
>> +		p9_req_get(req);
>> +		m->wreq = req;
>>  		spin_unlock(&m->client->lock);
>>  	}
>>  
>> @@ -492,8 +496,11 @@ static void p9_write_work(struct work_struct *work)
>>  	}
>>  
>>  	m->wpos += err;
>> -	if (m->wpos == m->wsize)
>> +	if (m->wpos == m->wsize) {
>>  		m->wpos = m->wsize = 0;
>> +		p9_req_put(m->wreq);
>> +		m->wreq = NULL;
>> +	}
>>  
>>  end_clear:
>>  	clear_bit(Wworksched, &m->wsched);
>> @@ -694,6 +701,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
>>  	if (req->status == REQ_STATUS_UNSENT) {
>>  		list_del(&req->req_list);
>>  		req->status = REQ_STATUS_FLSHD;
>> +		p9_req_put(req);
>>  		ret = 0;
>>  	}
>>  	spin_unlock(&client->lock);
>> @@ -711,6 +719,7 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
>>  	spin_lock(&client->lock);
>>  	list_del(&req->req_list);
>>  	spin_unlock(&client->lock);
>> +	p9_req_put(req);
>>  
>>  	return 0;
>>  }
>> diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
>> index c60655c90c9e..8cff368a11e3 100644
>> --- a/net/9p/trans_rdma.c
>> +++ b/net/9p/trans_rdma.c
>> @@ -365,6 +365,7 @@ send_done(struct ib_cq *cq, struct ib_wc *wc)
>>  			    c->busa, c->req->tc.size,
>>  			    DMA_TO_DEVICE);
>>  	up(&rdma->sq_sem);
>> +	p9_req_put(c->req);
>>  	kfree(c);
>>  }
>>  
>>
> 

Spell fixed

Tomas
diff mbox series

Patch

diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 735f3979d559..947a570307a6 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -94,6 +94,7 @@  enum p9_req_status_t {
 struct p9_req_t {
 	int status;
 	int t_err;
+	struct kref refcount;
 	wait_queue_head_t wq;
 	struct p9_fcall tc;
 	struct p9_fcall rc;
@@ -233,6 +234,19 @@  int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
 int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
 void p9_fcall_fini(struct p9_fcall *fc);
 struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
+
+static inline void p9_req_get(struct p9_req_t *r)
+{
+	kref_get(&r->refcount);
+}
+
+static inline int p9_req_try_get(struct p9_req_t *r)
+{
+	return kref_get_unless_zero(&r->refcount);
+}
+
+int p9_req_put(struct p9_req_t *r);
+
 void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status);
 
 int p9_parse_header(struct p9_fcall *, int32_t *, int8_t *, int16_t *, int);
diff --git a/net/9p/client.c b/net/9p/client.c
index 7942c0bfcc5b..83f2f0aadc14 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -310,6 +310,18 @@  p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
 	if (tag < 0)
 		goto free;
 
+	/*	Init ref to two because in the general case there is one ref
+	 *	that is put asynchronously by a writer thread, one ref
+	 *	temporarily given by p9_tag_lookup and put by p9_client_cb
+	 *	in the recv thread, and one ref put by p9_remove_tag in the
+	 *	main thread. The only exception is virtio that does not use
+	 *	p9_tag_lookup but does not have a writer thread either
+	 *	(the write happens synchronously in the request/zc_request
+	 *	callback), so p9_client_cb eats the second ref there
+	 *	as the pointer is duplicated directly by virtqueue_add_sgs()
+	 */
+	refcount_set(&req->refcount.refcount, 2);
+
 	return req;
 
 free:
@@ -333,10 +345,21 @@  struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag)
 	struct p9_req_t *req;
 
 	rcu_read_lock();
+again:
 	req = idr_find(&c->reqs, tag);
-	/* There's no refcount on the req; a malicious server could cause
-	 * us to dereference a NULL pointer
-	 */
+	if (req) {
+		/* We have to be careful with the req found under rcu_read_lock
+		 * Thanks to SLAB_TYPESAFE_BY_RCU we can safely try to get the
+		 * ref again without corrupting other data, then check again
+		 * that the tag matches once we have the ref
+		 */
+		if (!p9_req_try_get(req))
+			goto again;
+		if (req->tc.tag != tag) {
+			p9_req_put(req);
+			goto again;
+		}
+	}
 	rcu_read_unlock();
 
 	return req;
@@ -350,7 +373,7 @@  EXPORT_SYMBOL(p9_tag_lookup);
  *
  * Context: Any context.
  */
-static void p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
+static int p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
 {
 	unsigned long flags;
 	u16 tag = r->tc.tag;
@@ -359,11 +382,23 @@  static void p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
 	spin_lock_irqsave(&c->lock, flags);
 	idr_remove(&c->reqs, tag);
 	spin_unlock_irqrestore(&c->lock, flags);
+	return p9_req_put(r);
+}
+
+static void p9_req_free(struct kref *ref)
+{
+	struct p9_req_t *r = container_of(ref, struct p9_req_t, refcount);
 	p9_fcall_fini(&r->tc);
 	p9_fcall_fini(&r->rc);
 	kmem_cache_free(p9_req_cache, r);
 }
 
+int p9_req_put(struct p9_req_t *r)
+{
+	return kref_put(&r->refcount, p9_req_free);
+}
+EXPORT_SYMBOL(p9_req_put);
+
 /**
  * p9_tag_cleanup - cleans up tags structure and reclaims resources
  * @c:  v9fs client struct
@@ -379,7 +414,9 @@  static void p9_tag_cleanup(struct p9_client *c)
 	rcu_read_lock();
 	idr_for_each_entry(&c->reqs, req, id) {
 		pr_info("Tag %d still in use\n", id);
-		p9_tag_remove(c, req);
+		if (p9_tag_remove(c, req) == 0)
+			pr_warn("Packet with tag %d has still references",
+				req->tc.tag);
 	}
 	rcu_read_unlock();
 }
@@ -403,6 +440,7 @@  void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
 
 	wake_up(&req->wq);
 	p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag);
+	p9_req_put(req);
 }
 EXPORT_SYMBOL(p9_client_cb);
 
@@ -682,6 +720,8 @@  static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
 	return req;
 reterr:
 	p9_tag_remove(c, req);
+	/* We have to put also the 2nd reference as it won't be used */
+	p9_req_put(req);
 	return ERR_PTR(err);
 }
 
@@ -716,6 +756,8 @@  p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
 
 	err = c->trans_mod->request(c, req);
 	if (err < 0) {
+		/* write won't happen */
+		p9_req_put(req);
 		if (err != -ERESTARTSYS && err != -EFAULT)
 			c->status = Disconnected;
 		goto recalc_sigpending;
@@ -2241,7 +2283,7 @@  EXPORT_SYMBOL(p9_client_readlink);
 
 int __init p9_client_init(void)
 {
-	p9_req_cache = KMEM_CACHE(p9_req_t, 0);
+	p9_req_cache = KMEM_CACHE(p9_req_t, SLAB_TYPESAFE_BY_RCU);
 	return p9_req_cache ? 0 : -ENOMEM;
 }
 
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 20f46f13fe83..686e24e355d0 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -132,6 +132,7 @@  struct p9_conn {
 	struct list_head req_list;
 	struct list_head unsent_req_list;
 	struct p9_req_t *req;
+	struct p9_req_t *wreq;
 	char tmp_buf[7];
 	struct p9_fcall rc;
 	int wpos;
@@ -383,6 +384,7 @@  static void p9_read_work(struct work_struct *work)
 		m->rc.sdata = NULL;
 		m->rc.offset = 0;
 		m->rc.capacity = 0;
+		p9_req_put(m->req);
 		m->req = NULL;
 	}
 
@@ -472,6 +474,8 @@  static void p9_write_work(struct work_struct *work)
 		m->wbuf = req->tc.sdata;
 		m->wsize = req->tc.size;
 		m->wpos = 0;
+		p9_req_get(req);
+		m->wreq = req;
 		spin_unlock(&m->client->lock);
 	}
 
@@ -492,8 +496,11 @@  static void p9_write_work(struct work_struct *work)
 	}
 
 	m->wpos += err;
-	if (m->wpos == m->wsize)
+	if (m->wpos == m->wsize) {
 		m->wpos = m->wsize = 0;
+		p9_req_put(m->wreq);
+		m->wreq = NULL;
+	}
 
 end_clear:
 	clear_bit(Wworksched, &m->wsched);
@@ -694,6 +701,7 @@  static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
 	if (req->status == REQ_STATUS_UNSENT) {
 		list_del(&req->req_list);
 		req->status = REQ_STATUS_FLSHD;
+		p9_req_put(req);
 		ret = 0;
 	}
 	spin_unlock(&client->lock);
@@ -711,6 +719,7 @@  static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
 	spin_lock(&client->lock);
 	list_del(&req->req_list);
 	spin_unlock(&client->lock);
+	p9_req_put(req);
 
 	return 0;
 }
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index c60655c90c9e..8cff368a11e3 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -365,6 +365,7 @@  send_done(struct ib_cq *cq, struct ib_wc *wc)
 			    c->busa, c->req->tc.size,
 			    DMA_TO_DEVICE);
 	up(&rdma->sq_sem);
+	p9_req_put(c->req);
 	kfree(c);
 }