diff mbox series

[093/104] virtiofsd: introduce inode refcount to prevent use-after-free

Message ID 20191212163904.159893-94-dgilbert@redhat.com
State New
Headers show
Series virtiofs daemon [all] | expand

Commit Message

Dr. David Alan Gilbert Dec. 12, 2019, 4:38 p.m. UTC
From: Stefan Hajnoczi <stefanha@redhat.com>

If thread A is using an inode it must not be deleted by thread B when
processing a FUSE_FORGET request.

The FUSE protocol itself already has a counter called nlookup that is
used in FUSE_FORGET messages.  We cannot trust this counter since the
untrusted client can manipulate it via FUSE_FORGET messages.

Introduce a new refcount to keep inodes alive for the required lifespan.
lo_inode_put() must be called to release a reference.  FUSE's nlookup
counter holds exactly one reference so that the inode stays alive as
long as the client still wants to remember it.

Note that the lo_inode->is_symlink field is moved to avoid creating a
hole in the struct due to struct field alignment.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 tools/virtiofsd/passthrough_ll.c | 168 ++++++++++++++++++++++++++-----
 1 file changed, 145 insertions(+), 23 deletions(-)

Comments

Misono Tomohiro Jan. 16, 2020, 12:25 p.m. UTC | #1
> From: Stefan Hajnoczi <stefanha@redhat.com>
> 
> If thread A is using an inode it must not be deleted by thread B when
> processing a FUSE_FORGET request.
> 
> The FUSE protocol itself already has a counter called nlookup that is
> used in FUSE_FORGET messages.  We cannot trust this counter since the
> untrusted client can manipulate it via FUSE_FORGET messages.
> 
> Introduce a new refcount to keep inodes alive for the required lifespan.
> lo_inode_put() must be called to release a reference.  FUSE's nlookup
> counter holds exactly one reference so that the inode stays alive as
> long as the client still wants to remember it.
> 
> Note that the lo_inode->is_symlink field is moved to avoid creating a
> hole in the struct due to struct field alignment.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  tools/virtiofsd/passthrough_ll.c | 168 ++++++++++++++++++++++++++-----
>  1 file changed, 145 insertions(+), 23 deletions(-)
> 
> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> index b19c9ee328..8f4ab8351c 100644
> --- a/tools/virtiofsd/passthrough_ll.c
> +++ b/tools/virtiofsd/passthrough_ll.c
> @@ -99,7 +99,13 @@ struct lo_key {
>  
>  struct lo_inode {
>      int fd;
> -    bool is_symlink;
> +
> +    /*
> +     * Atomic reference count for this object.  The nlookup field holds a
> +     * reference and release it when nlookup reaches 0.
> +     */
> +    gint refcount;
> +
>      struct lo_key key;
>  
>      /*
> @@ -118,6 +124,8 @@ struct lo_inode {
>      fuse_ino_t fuse_ino;
>      pthread_mutex_t plock_mutex;
>      GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */
> +
> +    bool is_symlink;
>  };
>  
>  struct lo_cred {
> @@ -473,6 +481,23 @@ static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode)
>      return elem - lo_data(req)->ino_map.elems;
>  }
>  
> +static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep)
> +{
> +    struct lo_inode *inode = *inodep;
> +
> +    if (!inode) {
> +        return;
> +    }
> +
> +    *inodep = NULL;
> +
> +    if (g_atomic_int_dec_and_test(&inode->refcount)) {
> +        close(inode->fd);
> +        free(inode);
> +    }
> +}
> +
> +/* Caller must release refcount using lo_inode_put() */
>  static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
>  {
>      struct lo_data *lo = lo_data(req);
> @@ -480,6 +505,9 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
>  
>      pthread_mutex_lock(&lo->mutex);
>      elem = lo_map_get(&lo->ino_map, ino);
> +    if (elem) {
> +        g_atomic_int_inc(&elem->inode->refcount);
> +    }
>      pthread_mutex_unlock(&lo->mutex);
>  
>      if (!elem) {
> @@ -489,10 +517,23 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
>      return elem->inode;
>  }
>  
> +/*
> + * TODO Remove this helper and force callers to hold an inode refcount until
> + * they are done with the fd.  This will be done in a later patch to make
> + * review easier.
> + */
>  static int lo_fd(fuse_req_t req, fuse_ino_t ino)
>  {
>      struct lo_inode *inode = lo_inode(req, ino);
> -    return inode ? inode->fd : -1;
> +    int fd;
> +
> +    if (!inode) {
> +        return -1;
> +    }
> +
> +    fd = inode->fd;
> +    lo_inode_put(lo_data(req), &inode);
> +    return fd;
>  }
>  
>  static void lo_init(void *userdata, struct fuse_conn_info *conn)
> @@ -547,6 +588,10 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
>      fuse_reply_attr(req, &buf, lo->timeout);
>  }
>  
> +/*
> + * Increments parent->nlookup and caller must release refcount using
> + * lo_inode_put(&parent).
> + */
>  static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
>                                char path[PATH_MAX], struct lo_inode **parent)
>  {
> @@ -584,6 +629,7 @@ retry:
>          p = &lo->root;
>          pthread_mutex_lock(&lo->mutex);
>          p->nlookup++;
> +        g_atomic_int_inc(&p->refcount);
>          pthread_mutex_unlock(&lo->mutex);
>      } else {
>          *last = '\0';

We need lo_ionde_put() in error path, right?:
https://gitlab.com/virtio-fs/qemu/blob/virtio-fs-as-posted-2019-12-12/tools/virtiofsd/passthrough_ll.c#L680

nit: if yes, unref_inode_lolocked() is always paired with lo_inode_put().
So how about combine them in one function? As p->nloockup and p->refcount
are both incremented in one place (lo_find/lo_parent_and_name) in these case,
it seems natural for me to decrement them in one function as well.

Thanks,
Misono
Stefan Hajnoczi Jan. 16, 2020, 5:21 p.m. UTC | #2
On Thu, Jan 16, 2020 at 09:25:42PM +0900, Misono Tomohiro wrote:
> > From: Stefan Hajnoczi <stefanha@redhat.com>
> > 
> > If thread A is using an inode it must not be deleted by thread B when
> > processing a FUSE_FORGET request.
> > 
> > The FUSE protocol itself already has a counter called nlookup that is
> > used in FUSE_FORGET messages.  We cannot trust this counter since the
> > untrusted client can manipulate it via FUSE_FORGET messages.
> > 
> > Introduce a new refcount to keep inodes alive for the required lifespan.
> > lo_inode_put() must be called to release a reference.  FUSE's nlookup
> > counter holds exactly one reference so that the inode stays alive as
> > long as the client still wants to remember it.
> > 
> > Note that the lo_inode->is_symlink field is moved to avoid creating a
> > hole in the struct due to struct field alignment.
> > 
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> >  tools/virtiofsd/passthrough_ll.c | 168 ++++++++++++++++++++++++++-----
> >  1 file changed, 145 insertions(+), 23 deletions(-)
> > 
> > diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> > index b19c9ee328..8f4ab8351c 100644
> > --- a/tools/virtiofsd/passthrough_ll.c
> > +++ b/tools/virtiofsd/passthrough_ll.c
> > @@ -99,7 +99,13 @@ struct lo_key {
> >  
> >  struct lo_inode {
> >      int fd;
> > -    bool is_symlink;
> > +
> > +    /*
> > +     * Atomic reference count for this object.  The nlookup field holds a
> > +     * reference and release it when nlookup reaches 0.
> > +     */
> > +    gint refcount;
> > +
> >      struct lo_key key;
> >  
> >      /*
> > @@ -118,6 +124,8 @@ struct lo_inode {
> >      fuse_ino_t fuse_ino;
> >      pthread_mutex_t plock_mutex;
> >      GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */
> > +
> > +    bool is_symlink;
> >  };
> >  
> >  struct lo_cred {
> > @@ -473,6 +481,23 @@ static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode)
> >      return elem - lo_data(req)->ino_map.elems;
> >  }
> >  
> > +static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep)
> > +{
> > +    struct lo_inode *inode = *inodep;
> > +
> > +    if (!inode) {
> > +        return;
> > +    }
> > +
> > +    *inodep = NULL;
> > +
> > +    if (g_atomic_int_dec_and_test(&inode->refcount)) {
> > +        close(inode->fd);
> > +        free(inode);
> > +    }
> > +}
> > +
> > +/* Caller must release refcount using lo_inode_put() */
> >  static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
> >  {
> >      struct lo_data *lo = lo_data(req);
> > @@ -480,6 +505,9 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
> >  
> >      pthread_mutex_lock(&lo->mutex);
> >      elem = lo_map_get(&lo->ino_map, ino);
> > +    if (elem) {
> > +        g_atomic_int_inc(&elem->inode->refcount);
> > +    }
> >      pthread_mutex_unlock(&lo->mutex);
> >  
> >      if (!elem) {
> > @@ -489,10 +517,23 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
> >      return elem->inode;
> >  }
> >  
> > +/*
> > + * TODO Remove this helper and force callers to hold an inode refcount until
> > + * they are done with the fd.  This will be done in a later patch to make
> > + * review easier.
> > + */
> >  static int lo_fd(fuse_req_t req, fuse_ino_t ino)
> >  {
> >      struct lo_inode *inode = lo_inode(req, ino);
> > -    return inode ? inode->fd : -1;
> > +    int fd;
> > +
> > +    if (!inode) {
> > +        return -1;
> > +    }
> > +
> > +    fd = inode->fd;
> > +    lo_inode_put(lo_data(req), &inode);
> > +    return fd;
> >  }
> >  
> >  static void lo_init(void *userdata, struct fuse_conn_info *conn)
> > @@ -547,6 +588,10 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
> >      fuse_reply_attr(req, &buf, lo->timeout);
> >  }
> >  
> > +/*
> > + * Increments parent->nlookup and caller must release refcount using
> > + * lo_inode_put(&parent).
> > + */
> >  static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
> >                                char path[PATH_MAX], struct lo_inode **parent)
> >  {
> > @@ -584,6 +629,7 @@ retry:
> >          p = &lo->root;
> >          pthread_mutex_lock(&lo->mutex);
> >          p->nlookup++;
> > +        g_atomic_int_inc(&p->refcount);
> >          pthread_mutex_unlock(&lo->mutex);
> >      } else {
> >          *last = '\0';
> 
> We need lo_ionde_put() in error path, right?:
> https://gitlab.com/virtio-fs/qemu/blob/virtio-fs-as-posted-2019-12-12/tools/virtiofsd/passthrough_ll.c#L680

Yes, thanks for spotting this bug!  The lo_parent_and_name() code should
look like this:

  fail_unref:
      unref_inode_lolocked(lo, p, 1);
      lo_inode_put(lo, &p);
  ...

> nit: if yes, unref_inode_lolocked() is always paired with lo_inode_put().
> So how about combine them in one function? As p->nloockup and p->refcount
> are both incremented in one place (lo_find/lo_parent_and_name) in these case,
> it seems natural for me to decrement them in one function as well.

Nice idea.  I would also drop the nlookup argument - this function will
only be used with nlookup=1.

Stefan
Dr. David Alan Gilbert Jan. 16, 2020, 5:42 p.m. UTC | #3
* Stefan Hajnoczi (stefanha@redhat.com) wrote:
> On Thu, Jan 16, 2020 at 09:25:42PM +0900, Misono Tomohiro wrote:
> > > From: Stefan Hajnoczi <stefanha@redhat.com>
> > > 
> > > If thread A is using an inode it must not be deleted by thread B when
> > > processing a FUSE_FORGET request.
> > > 
> > > The FUSE protocol itself already has a counter called nlookup that is
> > > used in FUSE_FORGET messages.  We cannot trust this counter since the
> > > untrusted client can manipulate it via FUSE_FORGET messages.
> > > 
> > > Introduce a new refcount to keep inodes alive for the required lifespan.
> > > lo_inode_put() must be called to release a reference.  FUSE's nlookup
> > > counter holds exactly one reference so that the inode stays alive as
> > > long as the client still wants to remember it.
> > > 
> > > Note that the lo_inode->is_symlink field is moved to avoid creating a
> > > hole in the struct due to struct field alignment.
> > > 
> > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > > ---
> > >  tools/virtiofsd/passthrough_ll.c | 168 ++++++++++++++++++++++++++-----
> > >  1 file changed, 145 insertions(+), 23 deletions(-)
> > > 
> > > diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> > > index b19c9ee328..8f4ab8351c 100644
> > > --- a/tools/virtiofsd/passthrough_ll.c
> > > +++ b/tools/virtiofsd/passthrough_ll.c
> > > @@ -99,7 +99,13 @@ struct lo_key {
> > >  
> > >  struct lo_inode {
> > >      int fd;
> > > -    bool is_symlink;
> > > +
> > > +    /*
> > > +     * Atomic reference count for this object.  The nlookup field holds a
> > > +     * reference and release it when nlookup reaches 0.
> > > +     */
> > > +    gint refcount;
> > > +
> > >      struct lo_key key;
> > >  
> > >      /*
> > > @@ -118,6 +124,8 @@ struct lo_inode {
> > >      fuse_ino_t fuse_ino;
> > >      pthread_mutex_t plock_mutex;
> > >      GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */
> > > +
> > > +    bool is_symlink;
> > >  };
> > >  
> > >  struct lo_cred {
> > > @@ -473,6 +481,23 @@ static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode)
> > >      return elem - lo_data(req)->ino_map.elems;
> > >  }
> > >  
> > > +static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep)
> > > +{
> > > +    struct lo_inode *inode = *inodep;
> > > +
> > > +    if (!inode) {
> > > +        return;
> > > +    }
> > > +
> > > +    *inodep = NULL;
> > > +
> > > +    if (g_atomic_int_dec_and_test(&inode->refcount)) {
> > > +        close(inode->fd);
> > > +        free(inode);
> > > +    }
> > > +}
> > > +
> > > +/* Caller must release refcount using lo_inode_put() */
> > >  static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
> > >  {
> > >      struct lo_data *lo = lo_data(req);
> > > @@ -480,6 +505,9 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
> > >  
> > >      pthread_mutex_lock(&lo->mutex);
> > >      elem = lo_map_get(&lo->ino_map, ino);
> > > +    if (elem) {
> > > +        g_atomic_int_inc(&elem->inode->refcount);
> > > +    }
> > >      pthread_mutex_unlock(&lo->mutex);
> > >  
> > >      if (!elem) {
> > > @@ -489,10 +517,23 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
> > >      return elem->inode;
> > >  }
> > >  
> > > +/*
> > > + * TODO Remove this helper and force callers to hold an inode refcount until
> > > + * they are done with the fd.  This will be done in a later patch to make
> > > + * review easier.
> > > + */
> > >  static int lo_fd(fuse_req_t req, fuse_ino_t ino)
> > >  {
> > >      struct lo_inode *inode = lo_inode(req, ino);
> > > -    return inode ? inode->fd : -1;
> > > +    int fd;
> > > +
> > > +    if (!inode) {
> > > +        return -1;
> > > +    }
> > > +
> > > +    fd = inode->fd;
> > > +    lo_inode_put(lo_data(req), &inode);
> > > +    return fd;
> > >  }
> > >  
> > >  static void lo_init(void *userdata, struct fuse_conn_info *conn)
> > > @@ -547,6 +588,10 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
> > >      fuse_reply_attr(req, &buf, lo->timeout);
> > >  }
> > >  
> > > +/*
> > > + * Increments parent->nlookup and caller must release refcount using
> > > + * lo_inode_put(&parent).
> > > + */
> > >  static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
> > >                                char path[PATH_MAX], struct lo_inode **parent)
> > >  {
> > > @@ -584,6 +629,7 @@ retry:
> > >          p = &lo->root;
> > >          pthread_mutex_lock(&lo->mutex);
> > >          p->nlookup++;
> > > +        g_atomic_int_inc(&p->refcount);
> > >          pthread_mutex_unlock(&lo->mutex);
> > >      } else {
> > >          *last = '\0';
> > 
> > We need lo_ionde_put() in error path, right?:
> > https://gitlab.com/virtio-fs/qemu/blob/virtio-fs-as-posted-2019-12-12/tools/virtiofsd/passthrough_ll.c#L680
> 
> Yes, thanks for spotting this bug!  The lo_parent_and_name() code should
> look like this:
> 
>   fail_unref:
>       unref_inode_lolocked(lo, p, 1);
>       lo_inode_put(lo, &p);
>   ...

I've merged that one in.

> > nit: if yes, unref_inode_lolocked() is always paired with lo_inode_put().
> > So how about combine them in one function? As p->nloockup and p->refcount
> > are both incremented in one place (lo_find/lo_parent_and_name) in these case,
> > it seems natural for me to decrement them in one function as well.
> 
> Nice idea.  I would also drop the nlookup argument - this function will
> only be used with nlookup=1.

I'll leave that to you if you want to send a patch on top.

Dave

> Stefan


--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Tomohiro Misono (Fujitsu) Jan. 17, 2020, 12:47 a.m. UTC | #4
> > On Thu, Jan 16, 2020 at 09:25:42PM +0900, Misono Tomohiro wrote:
> > > > From: Stefan Hajnoczi <stefanha@redhat.com>
> > > >
> > > > If thread A is using an inode it must not be deleted by thread B
> > > > when processing a FUSE_FORGET request.
> > > >
> > > > The FUSE protocol itself already has a counter called nlookup that
> > > > is used in FUSE_FORGET messages.  We cannot trust this counter
> > > > since the untrusted client can manipulate it via FUSE_FORGET messages.
> > > >
> > > > Introduce a new refcount to keep inodes alive for the required lifespan.
> > > > lo_inode_put() must be called to release a reference.  FUSE's
> > > > nlookup counter holds exactly one reference so that the inode
> > > > stays alive as long as the client still wants to remember it.
> > > >
> > > > Note that the lo_inode->is_symlink field is moved to avoid
> > > > creating a hole in the struct due to struct field alignment.
> > > >
> > > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > > > ---
> > > >  tools/virtiofsd/passthrough_ll.c | 168
> > > > ++++++++++++++++++++++++++-----
> > > >  1 file changed, 145 insertions(+), 23 deletions(-)
> > > >
> > > > diff --git a/tools/virtiofsd/passthrough_ll.c
> > > > b/tools/virtiofsd/passthrough_ll.c
> > > > index b19c9ee328..8f4ab8351c 100644
> > > > --- a/tools/virtiofsd/passthrough_ll.c
> > > > +++ b/tools/virtiofsd/passthrough_ll.c
> > > > @@ -99,7 +99,13 @@ struct lo_key {
> > > >
> > > >  struct lo_inode {
> > > >      int fd;
> > > > -    bool is_symlink;
> > > > +
> > > > +    /*
> > > > +     * Atomic reference count for this object.  The nlookup field holds a
> > > > +     * reference and release it when nlookup reaches 0.
> > > > +     */
> > > > +    gint refcount;
> > > > +
> > > >      struct lo_key key;
> > > >
> > > >      /*
> > > > @@ -118,6 +124,8 @@ struct lo_inode {
> > > >      fuse_ino_t fuse_ino;
> > > >      pthread_mutex_t plock_mutex;
> > > >      GHashTable *posix_locks; /* protected by
> > > > lo_inode->plock_mutex */
> > > > +
> > > > +    bool is_symlink;
> > > >  };
> > > >
> > > >  struct lo_cred {
> > > > @@ -473,6 +481,23 @@ static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode)
> > > >      return elem - lo_data(req)->ino_map.elems;  }
> > > >
> > > > +static void lo_inode_put(struct lo_data *lo, struct lo_inode
> > > > +**inodep) {
> > > > +    struct lo_inode *inode = *inodep;
> > > > +
> > > > +    if (!inode) {
> > > > +        return;
> > > > +    }
> > > > +
> > > > +    *inodep = NULL;
> > > > +
> > > > +    if (g_atomic_int_dec_and_test(&inode->refcount)) {
> > > > +        close(inode->fd);
> > > > +        free(inode);
> > > > +    }
> > > > +}
> > > > +
> > > > +/* Caller must release refcount using lo_inode_put() */
> > > >  static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
> > > > {
> > > >      struct lo_data *lo = lo_data(req); @@ -480,6 +505,9 @@ static
> > > > struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
> > > >
> > > >      pthread_mutex_lock(&lo->mutex);
> > > >      elem = lo_map_get(&lo->ino_map, ino);
> > > > +    if (elem) {
> > > > +        g_atomic_int_inc(&elem->inode->refcount);
> > > > +    }
> > > >      pthread_mutex_unlock(&lo->mutex);
> > > >
> > > >      if (!elem) {
> > > > @@ -489,10 +517,23 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
> > > >      return elem->inode;
> > > >  }
> > > >
> > > > +/*
> > > > + * TODO Remove this helper and force callers to hold an inode
> > > > +refcount until
> > > > + * they are done with the fd.  This will be done in a later patch
> > > > +to make
> > > > + * review easier.
> > > > + */
> > > >  static int lo_fd(fuse_req_t req, fuse_ino_t ino)  {
> > > >      struct lo_inode *inode = lo_inode(req, ino);
> > > > -    return inode ? inode->fd : -1;
> > > > +    int fd;
> > > > +
> > > > +    if (!inode) {
> > > > +        return -1;
> > > > +    }
> > > > +
> > > > +    fd = inode->fd;
> > > > +    lo_inode_put(lo_data(req), &inode);
> > > > +    return fd;
> > > >  }
> > > >
> > > >  static void lo_init(void *userdata, struct fuse_conn_info *conn)
> > > > @@ -547,6 +588,10 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
> > > >      fuse_reply_attr(req, &buf, lo->timeout);  }
> > > >
> > > > +/*
> > > > + * Increments parent->nlookup and caller must release refcount
> > > > +using
> > > > + * lo_inode_put(&parent).
> > > > + */
> > > >  static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
> > > >                                char path[PATH_MAX], struct
> > > > lo_inode **parent)  { @@ -584,6 +629,7 @@ retry:
> > > >          p = &lo->root;
> > > >          pthread_mutex_lock(&lo->mutex);
> > > >          p->nlookup++;
> > > > +        g_atomic_int_inc(&p->refcount);
> > > >          pthread_mutex_unlock(&lo->mutex);
> > > >      } else {
> > > >          *last = '\0';
> > >
> > > We need lo_ionde_put() in error path, right?:
> > > https://gitlab.com/virtio-fs/qemu/blob/virtio-fs-as-posted-2019-12-1
> > > 2/tools/virtiofsd/passthrough_ll.c#L680
> >
> > Yes, thanks for spotting this bug!  The lo_parent_and_name() code
> > should look like this:
> >
> >   fail_unref:
> >       unref_inode_lolocked(lo, p, 1);
> >       lo_inode_put(lo, &p);
> >   ...
> 
> I've merged that one in.

Thanks, so with that:
 Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>

> 
> > > nit: if yes, unref_inode_lolocked() is always paired with lo_inode_put().
> > > So how about combine them in one function? As p->nloockup and
> > > p->refcount are both incremented in one place
> > > (lo_find/lo_parent_and_name) in these case, it seems natural for me to decrement them in one function as well.
> >
> > Nice idea.  I would also drop the nlookup argument - this function
> > will only be used with nlookup=1.
> 
> I'll leave that to you if you want to send a patch on top.
> 
> Dave
> 
> > Stefan
> 
> 
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Sergio Lopez Jan. 20, 2020, 10:28 a.m. UTC | #5
Dr. David Alan Gilbert (git) <dgilbert@redhat.com> writes:

> From: Stefan Hajnoczi <stefanha@redhat.com>
>
> If thread A is using an inode it must not be deleted by thread B when
> processing a FUSE_FORGET request.
>
> The FUSE protocol itself already has a counter called nlookup that is
> used in FUSE_FORGET messages.  We cannot trust this counter since the
> untrusted client can manipulate it via FUSE_FORGET messages.
>
> Introduce a new refcount to keep inodes alive for the required lifespan.
> lo_inode_put() must be called to release a reference.  FUSE's nlookup
> counter holds exactly one reference so that the inode stays alive as
> long as the client still wants to remember it.
>
> Note that the lo_inode->is_symlink field is moved to avoid creating a
> hole in the struct due to struct field alignment.
>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  tools/virtiofsd/passthrough_ll.c | 168 ++++++++++++++++++++++++++-----
>  1 file changed, 145 insertions(+), 23 deletions(-)

Reviewed-by: Sergio Lopez <slp@redhat.com>
diff mbox series

Patch

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index b19c9ee328..8f4ab8351c 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -99,7 +99,13 @@  struct lo_key {
 
 struct lo_inode {
     int fd;
-    bool is_symlink;
+
+    /*
+     * Atomic reference count for this object.  The nlookup field holds a
+     * reference and release it when nlookup reaches 0.
+     */
+    gint refcount;
+
     struct lo_key key;
 
     /*
@@ -118,6 +124,8 @@  struct lo_inode {
     fuse_ino_t fuse_ino;
     pthread_mutex_t plock_mutex;
     GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */
+
+    bool is_symlink;
 };
 
 struct lo_cred {
@@ -473,6 +481,23 @@  static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode)
     return elem - lo_data(req)->ino_map.elems;
 }
 
+static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep)
+{
+    struct lo_inode *inode = *inodep;
+
+    if (!inode) {
+        return;
+    }
+
+    *inodep = NULL;
+
+    if (g_atomic_int_dec_and_test(&inode->refcount)) {
+        close(inode->fd);
+        free(inode);
+    }
+}
+
+/* Caller must release refcount using lo_inode_put() */
 static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
 {
     struct lo_data *lo = lo_data(req);
@@ -480,6 +505,9 @@  static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
 
     pthread_mutex_lock(&lo->mutex);
     elem = lo_map_get(&lo->ino_map, ino);
+    if (elem) {
+        g_atomic_int_inc(&elem->inode->refcount);
+    }
     pthread_mutex_unlock(&lo->mutex);
 
     if (!elem) {
@@ -489,10 +517,23 @@  static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
     return elem->inode;
 }
 
+/*
+ * TODO Remove this helper and force callers to hold an inode refcount until
+ * they are done with the fd.  This will be done in a later patch to make
+ * review easier.
+ */
 static int lo_fd(fuse_req_t req, fuse_ino_t ino)
 {
     struct lo_inode *inode = lo_inode(req, ino);
-    return inode ? inode->fd : -1;
+    int fd;
+
+    if (!inode) {
+        return -1;
+    }
+
+    fd = inode->fd;
+    lo_inode_put(lo_data(req), &inode);
+    return fd;
 }
 
 static void lo_init(void *userdata, struct fuse_conn_info *conn)
@@ -547,6 +588,10 @@  static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
     fuse_reply_attr(req, &buf, lo->timeout);
 }
 
+/*
+ * Increments parent->nlookup and caller must release refcount using
+ * lo_inode_put(&parent).
+ */
 static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
                               char path[PATH_MAX], struct lo_inode **parent)
 {
@@ -584,6 +629,7 @@  retry:
         p = &lo->root;
         pthread_mutex_lock(&lo->mutex);
         p->nlookup++;
+        g_atomic_int_inc(&p->refcount);
         pthread_mutex_unlock(&lo->mutex);
     } else {
         *last = '\0';
@@ -665,6 +711,7 @@  fallback:
     if (res != -1) {
         res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW);
         unref_inode_lolocked(lo, parent, 1);
+        lo_inode_put(lo, &parent);
     }
 
     return res;
@@ -782,11 +829,13 @@  static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
             goto out_err;
         }
     }
+    lo_inode_put(lo, &inode);
 
     return lo_getattr(req, ino, fi);
 
 out_err:
     saverr = errno;
+    lo_inode_put(lo, &inode);
     fuse_reply_err(req, saverr);
 }
 
@@ -803,6 +852,7 @@  static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st)
     if (p) {
         assert(p->nlookup > 0);
         p->nlookup++;
+        g_atomic_int_inc(&p->refcount);
     }
     pthread_mutex_unlock(&lo->mutex);
 
@@ -822,6 +872,10 @@  static void posix_locks_value_destroy(gpointer data)
     free(plock);
 }
 
+/*
+ * Increments nlookup and caller must release refcount using
+ * lo_inode_put(&parent).
+ */
 static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
                         struct fuse_entry_param *e)
 {
@@ -829,7 +883,8 @@  static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
     int res;
     int saverr;
     struct lo_data *lo = lo_data(req);
-    struct lo_inode *inode, *dir = lo_inode(req, parent);
+    struct lo_inode *inode = NULL;
+    struct lo_inode *dir = lo_inode(req, parent);
 
     /*
      * name_to_handle_at() and open_by_handle_at() can reach here with fuse
@@ -870,6 +925,13 @@  static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
         }
 
         inode->is_symlink = S_ISLNK(e->attr.st_mode);
+
+        /*
+         * One for the caller and one for nlookup (released in
+         * unref_inode_lolocked())
+         */
+        g_atomic_int_set(&inode->refcount, 2);
+
         inode->nlookup = 1;
         inode->fd = newfd;
         newfd = -1;
@@ -885,6 +947,8 @@  static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
         pthread_mutex_unlock(&lo->mutex);
     }
     e->ino = inode->fuse_ino;
+    lo_inode_put(lo, &inode);
+    lo_inode_put(lo, &dir);
 
     fuse_log(FUSE_LOG_DEBUG, "  %lli/%s -> %lli\n", (unsigned long long)parent,
              name, (unsigned long long)e->ino);
@@ -896,6 +960,8 @@  out_err:
     if (newfd != -1) {
         close(newfd);
     }
+    lo_inode_put(lo, &inode);
+    lo_inode_put(lo, &dir);
     return saverr;
 }
 
@@ -976,6 +1042,7 @@  static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
 {
     int res;
     int saverr;
+    struct lo_data *lo = lo_data(req);
     struct lo_inode *dir;
     struct fuse_entry_param e;
     struct lo_cred old = {};
@@ -1017,9 +1084,11 @@  static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
              name, (unsigned long long)e.ino);
 
     fuse_reply_entry(req, &e);
+    lo_inode_put(lo, &dir);
     return;
 
 out:
+    lo_inode_put(lo, &dir);
     fuse_reply_err(req, saverr);
 }
 
@@ -1070,6 +1139,7 @@  fallback:
     if (res != -1) {
         res = linkat(parent->fd, path, dfd, name, 0);
         unref_inode_lolocked(lo, parent, 1);
+        lo_inode_put(lo, &parent);
     }
 
     return res;
@@ -1080,6 +1150,7 @@  static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
 {
     int res;
     struct lo_data *lo = lo_data(req);
+    struct lo_inode *parent_inode;
     struct lo_inode *inode;
     struct fuse_entry_param e;
     int saverr;
@@ -1089,17 +1160,18 @@  static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
         return;
     }
 
+    parent_inode = lo_inode(req, parent);
     inode = lo_inode(req, ino);
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        return;
+    if (!parent_inode || !inode) {
+        errno = EBADF;
+        goto out_err;
     }
 
     memset(&e, 0, sizeof(struct fuse_entry_param));
     e.attr_timeout = lo->timeout;
     e.entry_timeout = lo->timeout;
 
-    res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name);
+    res = linkat_empty_nofollow(lo, inode, parent_inode->fd, name);
     if (res == -1) {
         goto out_err;
     }
@@ -1118,13 +1190,18 @@  static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
              name, (unsigned long long)e.ino);
 
     fuse_reply_entry(req, &e);
+    lo_inode_put(lo, &parent_inode);
+    lo_inode_put(lo, &inode);
     return;
 
 out_err:
     saverr = errno;
+    lo_inode_put(lo, &parent_inode);
+    lo_inode_put(lo, &inode);
     fuse_reply_err(req, saverr);
 }
 
+/* Increments nlookup and caller must release refcount using lo_inode_put() */
 static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent,
                                     const char *name)
 {
@@ -1161,6 +1238,7 @@  static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name)
 
     fuse_reply_err(req, res == -1 ? errno : 0);
     unref_inode_lolocked(lo, inode, 1);
+    lo_inode_put(lo, &inode);
 }
 
 static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
@@ -1168,8 +1246,10 @@  static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
                       unsigned int flags)
 {
     int res;
-    struct lo_inode *oldinode;
-    struct lo_inode *newinode;
+    struct lo_inode *parent_inode;
+    struct lo_inode *newparent_inode;
+    struct lo_inode *oldinode = NULL;
+    struct lo_inode *newinode = NULL;
     struct lo_data *lo = lo_data(req);
 
     if (!is_safe_path_component(name) || !is_safe_path_component(newname)) {
@@ -1177,6 +1257,13 @@  static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
         return;
     }
 
+    parent_inode = lo_inode(req, parent);
+    newparent_inode = lo_inode(req, newparent);
+    if (!parent_inode || !newparent_inode) {
+        fuse_reply_err(req, EBADF);
+        goto out;
+    }
+
     oldinode = lookup_name(req, parent, name);
     newinode = lookup_name(req, newparent, newname);
 
@@ -1189,8 +1276,8 @@  static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
 #ifndef SYS_renameat2
         fuse_reply_err(req, EINVAL);
 #else
-        res = syscall(SYS_renameat2, lo_fd(req, parent), name,
-                      lo_fd(req, newparent), newname, flags);
+        res = syscall(SYS_renameat2, parent_inode->fd, name,
+                      newparent_inode->fd, newname, flags);
         if (res == -1 && errno == ENOSYS) {
             fuse_reply_err(req, EINVAL);
         } else {
@@ -1200,12 +1287,16 @@  static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
         goto out;
     }
 
-    res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname);
+    res = renameat(parent_inode->fd, name, newparent_inode->fd, newname);
 
     fuse_reply_err(req, res == -1 ? errno : 0);
 out:
     unref_inode_lolocked(lo, oldinode, 1);
     unref_inode_lolocked(lo, newinode, 1);
+    lo_inode_put(lo, &oldinode);
+    lo_inode_put(lo, &newinode);
+    lo_inode_put(lo, &parent_inode);
+    lo_inode_put(lo, &newparent_inode);
 }
 
 static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
@@ -1229,6 +1320,7 @@  static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
 
     fuse_reply_err(req, res == -1 ? errno : 0);
     unref_inode_lolocked(lo, inode, 1);
+    lo_inode_put(lo, &inode);
 }
 
 static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
@@ -1250,8 +1342,9 @@  static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
         g_hash_table_destroy(inode->posix_locks);
         pthread_mutex_destroy(&inode->plock_mutex);
         pthread_mutex_unlock(&lo->mutex);
-        close(inode->fd);
-        free(inode);
+
+        /* Drop our refcount from lo_do_lookup() */
+        lo_inode_put(lo, &inode);
     } else {
         pthread_mutex_unlock(&lo->mutex);
     }
@@ -1265,6 +1358,7 @@  static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data)
     inode->nlookup = 0;
     lo_map_remove(&lo->ino_map, inode->fuse_ino);
     close(inode->fd);
+    lo_inode_put(lo, &inode); /* Drop our refcount from lo_do_lookup() */
 
     return TRUE;
 }
@@ -1291,6 +1385,7 @@  static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
              (unsigned long long)nlookup);
 
     unref_inode_lolocked(lo, inode, nlookup);
+    lo_inode_put(lo, &inode);
 }
 
 static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
@@ -1522,6 +1617,7 @@  static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
     err = 0;
 error:
     lo_dirp_put(&d);
+    lo_inode_put(lo, &dinode);
 
     /*
      * If there's an error, we can only signal it if we haven't stored
@@ -1580,6 +1676,7 @@  static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
 {
     int fd;
     struct lo_data *lo = lo_data(req);
+    struct lo_inode *parent_inode;
     struct fuse_entry_param e;
     int err;
     struct lo_cred old = {};
@@ -1592,12 +1689,18 @@  static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
         return;
     }
 
+    parent_inode = lo_inode(req, parent);
+    if (!parent_inode) {
+        fuse_reply_err(req, EBADF);
+        return;
+    }
+
     err = lo_change_cred(req, &old);
     if (err) {
         goto out;
     }
 
-    fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
+    fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
                 mode);
     err = fd == -1 ? errno : 0;
     lo_restore_cred(&old);
@@ -1610,8 +1713,8 @@  static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
         pthread_mutex_unlock(&lo->mutex);
         if (fh == -1) {
             close(fd);
-            fuse_reply_err(req, ENOMEM);
-            return;
+            err = ENOMEM;
+            goto out;
         }
 
         fi->fh = fh;
@@ -1624,6 +1727,8 @@  static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
     }
 
 out:
+    lo_inode_put(lo, &parent_inode);
+
     if (err) {
         fuse_reply_err(req, err);
     } else {
@@ -1697,16 +1802,18 @@  static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
     plock =
         lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret);
     if (!plock) {
-        pthread_mutex_unlock(&inode->plock_mutex);
-        fuse_reply_err(req, ret);
-        return;
+        saverr = ret;
+        goto out;
     }
 
     ret = fcntl(plock->fd, F_OFD_GETLK, lock);
     if (ret == -1) {
         saverr = errno;
     }
+
+out:
     pthread_mutex_unlock(&inode->plock_mutex);
+    lo_inode_put(lo, &inode);
 
     if (saverr) {
         fuse_reply_err(req, saverr);
@@ -1746,9 +1853,8 @@  static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
         lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret);
 
     if (!plock) {
-        pthread_mutex_unlock(&inode->plock_mutex);
-        fuse_reply_err(req, ret);
-        return;
+        saverr = ret;
+        goto out;
     }
 
     /* TODO: Is it alright to modify flock? */
@@ -1757,7 +1863,11 @@  static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
     if (ret == -1) {
         saverr = errno;
     }
+
+out:
     pthread_mutex_unlock(&inode->plock_mutex);
+    lo_inode_put(lo, &inode);
+
     fuse_reply_err(req, saverr);
 }
 
@@ -1883,6 +1993,7 @@  static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
     pthread_mutex_unlock(&inode->plock_mutex);
 
     res = close(dup(lo_fi_fd(req, fi)));
+    lo_inode_put(lo_data(req), &inode);
     fuse_reply_err(req, res == -1 ? errno : 0);
 }
 
@@ -2099,11 +2210,14 @@  out_free:
     if (fd >= 0) {
         close(fd);
     }
+
+    lo_inode_put(lo, &inode);
     return;
 
 out_err:
     saverr = errno;
 out:
+    lo_inode_put(lo, &inode);
     fuse_reply_err(req, saverr);
     goto out_free;
 }
@@ -2174,11 +2288,14 @@  out_free:
     if (fd >= 0) {
         close(fd);
     }
+
+    lo_inode_put(lo, &inode);
     return;
 
 out_err:
     saverr = errno;
 out:
+    lo_inode_put(lo, &inode);
     fuse_reply_err(req, saverr);
     goto out_free;
 }
@@ -2227,6 +2344,8 @@  out:
     if (fd >= 0) {
         close(fd);
     }
+
+    lo_inode_put(lo, &inode);
     fuse_reply_err(req, saverr);
 }
 
@@ -2273,6 +2392,8 @@  out:
     if (fd >= 0) {
         close(fd);
     }
+
+    lo_inode_put(lo, &inode);
     fuse_reply_err(req, saverr);
 }
 
@@ -2656,6 +2777,7 @@  static void setup_root(struct lo_data *lo, struct lo_inode *root)
     root->key.ino = stat.st_ino;
     root->key.dev = stat.st_dev;
     root->nlookup = 2;
+    g_atomic_int_set(&root->refcount, 2);
 }
 
 static guint lo_key_hash(gconstpointer key)