diff mbox series

[037/104] virtiofsd: passthrough_ll: add fallback for racy ops

Message ID 20191212163904.159893-38-dgilbert@redhat.com
State New
Headers show
Series virtiofs daemon [all] | expand

Commit Message

Dr. David Alan Gilbert Dec. 12, 2019, 4:37 p.m. UTC
From: Miklos Szeredi <mszeredi@redhat.com>

We have two operations that cannot be done race-free on a symlink in
certain cases: utimes and link.

Add racy fallback for these if the race-free method doesn't work.  We do
our best to avoid races even in this case:

  - get absolute path by reading /proc/self/fd/NN symlink

  - lookup parent directory: after this we are safe against renames in
    ancestors

  - lookup name in parent directory, and verify that we got to the original
    inode,  if not retry the whole thing

Both utimes(2) and link(2) hold i_lock on the inode across the operation,
so a racing rename/delete by this fuse instance is not possible, only from
other entities changing the filesystem.

If the "norace" option is given, then disable the racy fallbacks.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 tools/virtiofsd/passthrough_ll.c | 159 +++++++++++++++++++++++++++----
 1 file changed, 142 insertions(+), 17 deletions(-)

Comments

Masayoshi Mizuma Jan. 18, 2020, 4:22 p.m. UTC | #1
On Thu, Dec 12, 2019 at 04:37:57PM +0000, Dr. David Alan Gilbert (git) wrote:
> From: Miklos Szeredi <mszeredi@redhat.com>
> 
> We have two operations that cannot be done race-free on a symlink in
> certain cases: utimes and link.
> 
> Add racy fallback for these if the race-free method doesn't work.  We do
> our best to avoid races even in this case:
> 
>   - get absolute path by reading /proc/self/fd/NN symlink
> 
>   - lookup parent directory: after this we are safe against renames in
>     ancestors
> 
>   - lookup name in parent directory, and verify that we got to the original
>     inode,  if not retry the whole thing
> 
> Both utimes(2) and link(2) hold i_lock on the inode across the operation,
> so a racing rename/delete by this fuse instance is not possible, only from
> other entities changing the filesystem.
> 
> If the "norace" option is given, then disable the racy fallbacks.
> 
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---
>  tools/virtiofsd/passthrough_ll.c | 159 +++++++++++++++++++++++++++----
>  1 file changed, 142 insertions(+), 17 deletions(-)
> 
> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> index 93e74cce21..1faae2753f 100644
> --- a/tools/virtiofsd/passthrough_ll.c
> +++ b/tools/virtiofsd/passthrough_ll.c
> @@ -98,6 +98,7 @@ enum {
>  struct lo_data {
>      pthread_mutex_t mutex;
>      int debug;
> +    int norace;
>      int writeback;
>      int flock;
>      int xattr;
> @@ -124,10 +125,15 @@ static const struct fuse_opt lo_opts[] = {
>      { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER },
>      { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL },
>      { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
> -
> +    { "norace", offsetof(struct lo_data, norace), 1 },
>      FUSE_OPT_END
>  };
>  
> +static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n);
> +
> +static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st);
> +
> +
>  static struct lo_data *lo_data(fuse_req_t req)
>  {
>      return (struct lo_data *)fuse_req_userdata(req);
> @@ -347,23 +353,127 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
>      fuse_reply_attr(req, &buf, lo->timeout);
>  }
>  > -static int utimensat_empty_nofollow(struct lo_inode *inode,
> -                                    const struct timespec *tv)
> +static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
> +                              char path[PATH_MAX], struct lo_inode **parent)
>  {
> -    int res;
>      char procname[64];
> +    char *last;
> +    struct stat stat;
> +    struct lo_inode *p;
> +    int retries = 2;
> +    int res;
> +
> +retry:
> +    sprintf(procname, "/proc/self/fd/%i", inode->fd);
> +
> +    res = readlink(procname, path, PATH_MAX);
> +    if (res < 0) {

> +        fuse_log(FUSE_LOG_WARNING, "lo_parent_and_name: readlink failed: %m\n");

I think it's better to use __func__ macro in case the function name is
changed in the future.

           fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__);

> +        goto fail_noretry;
> +    }
> +
> +    if (res >= PATH_MAX) {

> +        fuse_log(FUSE_LOG_WARNING, "lo_parent_and_name: readlink overflowed\n");

           fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__);

> +        goto fail_noretry;
> +    }
> +    path[res] = '\0';
> +
> +    last = strrchr(path, '/');
> +    if (last == NULL) {
> +        /* Shouldn't happen */

> +        fuse_log(
> +            FUSE_LOG_WARNING,
> +            "lo_parent_and_name: INTERNAL ERROR: bad path read from proc\n");

           fuse_log(
               FUSE_LOG_WARNING,
            "%s: INTERNAL ERROR: bad path read from proc\n", __func__);

> +        goto fail_noretry;
> +    }
> +    if (last == path) {
> +        p = &lo->root;
> +        pthread_mutex_lock(&lo->mutex);
> +        p->refcount++;
> +        pthread_mutex_unlock(&lo->mutex);
> +    } else {
> +        *last = '\0';
> +        res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0);
> +        if (res == -1) {
> +            if (!retries) {

> +                fuse_log(FUSE_LOG_WARNING,
> +                         "lo_parent_and_name: failed to stat parent: %m\n");

                   fuse_log(FUSE_LOG_WARNING,
                            "%s: failed to stat parent: %m\n", __func__);

> +            }
> +            goto fail;
> +        }
> +        p = lo_find(lo, &stat);
> +        if (p == NULL) {
> +            if (!retries) {

> +                fuse_log(FUSE_LOG_WARNING,
> +                         "lo_parent_and_name: failed to find parent\n");

                   fuse_log(FUSE_LOG_WARNING,
                         "%s: failed to find parent\n", __func__);

> +            }
> +            goto fail;
> +        }
> +    }
> +    last++;
> +    res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW);
> +    if (res == -1) {
> +        if (!retries) {

> +            fuse_log(FUSE_LOG_WARNING,
> +                     "lo_parent_and_name: failed to stat last\n");

               fuse_log(FUSE_LOG_WARNING,
                     "%s: failed to stat last\n", __func__);

> +        }
> +        goto fail_unref;
> +    }
> +    if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) {
> +        if (!retries) {

> +            fuse_log(FUSE_LOG_WARNING,
> +                     "lo_parent_and_name: failed to match last\n");

               fuse_log(FUSE_LOG_WARNING,
                        "%s: failed to match last\n", __func__);

> +        }
> +        goto fail_unref;
> +    }
> +    *parent = p;
> +    memmove(path, last, strlen(last) + 1);
> +
> +    return 0;
> +
> +fail_unref:
> +    unref_inode(lo, p, 1);
> +fail:
> +    if (retries) {
> +        retries--;
> +        goto retry;
> +    }
> +fail_noretry:
> +    errno = EIO;
> +    return -1;
> +}
> +
> +static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode,
> +                           const struct timespec *tv)
> +{
> +    int res;
> +    struct lo_inode *parent;
> +    char path[PATH_MAX];
>  
>      if (inode->is_symlink) {
> -        res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
> +        res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH);
>          if (res == -1 && errno == EINVAL) {
>              /* Sorry, no race free way to set times on symlink. */
> -            errno = EPERM;
> +            if (lo->norace) {
> +                errno = EPERM;
> +            } else {
> +                goto fallback;
> +            }
>          }
>          return res;
>      }
> -    sprintf(procname, "/proc/self/fd/%i", inode->fd);
> +    sprintf(path, "/proc/self/fd/%i", inode->fd);
> +
> +    return utimensat(AT_FDCWD, path, tv, 0);
>  
> -    return utimensat(AT_FDCWD, procname, tv, 0);
> +fallback:
> +    res = lo_parent_and_name(lo, inode, path, &parent);
> +    if (res != -1) {
> +        res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW);
> +        unref_inode(lo, parent, 1);
> +    }
> +
> +    return res;
>  }
>  
>  static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi)
> @@ -387,6 +497,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
>  {
>      int saverr;
>      char procname[64];
> +    struct lo_data *lo = lo_data(req);
>      struct lo_inode *inode;
>      int ifd;
>      int res;
> @@ -459,7 +570,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
>          if (fi) {
>              res = futimens(fd, tv);
>          } else {
> -            res = utimensat_empty_nofollow(inode, tv);
> +            res = utimensat_empty(lo, inode, tv);
>          }
>          if (res == -1) {
>              goto out_err;
> @@ -692,24 +803,38 @@ static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent,
>      lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link);
>  }
>  
> -static int linkat_empty_nofollow(struct lo_inode *inode, int dfd,
> -                                 const char *name)
> +static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode,
> +                                 int dfd, const char *name)
>  {
>      int res;
> -    char procname[64];
> +    struct lo_inode *parent;
> +    char path[PATH_MAX];
>  
>      if (inode->is_symlink) {
>          res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH);
>          if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
>              /* Sorry, no race free way to hard-link a symlink. */
> -            errno = EPERM;
> +            if (lo->norace) {
> +                errno = EPERM;
> +            } else {
> +                goto fallback;
> +            }
>          }
>          return res;
>      }
>  
> -    sprintf(procname, "/proc/self/fd/%i", inode->fd);
> +    sprintf(path, "/proc/self/fd/%i", inode->fd);
> +
> +    return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW);
> +
> +fallback:
> +    res = lo_parent_and_name(lo, inode, path, &parent);
> +    if (res != -1) {
> +        res = linkat(parent->fd, path, dfd, name, 0);
> +        unref_inode(lo, parent, 1);
> +    }
>  
> -    return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW);
> +    return res;
>  }
>  
>  static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
> @@ -731,7 +856,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
>      e.attr_timeout = lo->timeout;
>      e.entry_timeout = lo->timeout;
>  
> -    res = linkat_empty_nofollow(inode, lo_fd(req, parent), name);
> +    res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name);
>      if (res == -1) {
>          goto out_err;
>      }
> @@ -1544,7 +1669,7 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
>      }
>  
>      if (inode->is_symlink) {
> -        /* Sorry, no race free way to setxattr on symlink. */
> +        /* Sorry, no race free way to removexattr on symlink. */
>          saverr = EPERM;
>          goto out;
>      }

Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Dr. David Alan Gilbert Jan. 20, 2020, 1:26 p.m. UTC | #2
* Masayoshi Mizuma (msys.mizuma@gmail.com) wrote:
> On Thu, Dec 12, 2019 at 04:37:57PM +0000, Dr. David Alan Gilbert (git) wrote:
> > From: Miklos Szeredi <mszeredi@redhat.com>
> > 
> > We have two operations that cannot be done race-free on a symlink in
> > certain cases: utimes and link.
> > 
> > Add racy fallback for these if the race-free method doesn't work.  We do
> > our best to avoid races even in this case:
> > 
> >   - get absolute path by reading /proc/self/fd/NN symlink
> > 
> >   - lookup parent directory: after this we are safe against renames in
> >     ancestors
> > 
> >   - lookup name in parent directory, and verify that we got to the original
> >     inode,  if not retry the whole thing
> > 
> > Both utimes(2) and link(2) hold i_lock on the inode across the operation,
> > so a racing rename/delete by this fuse instance is not possible, only from
> > other entities changing the filesystem.
> > 
> > If the "norace" option is given, then disable the racy fallbacks.
> > 
> > Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> > ---
> >  tools/virtiofsd/passthrough_ll.c | 159 +++++++++++++++++++++++++++----
> >  1 file changed, 142 insertions(+), 17 deletions(-)
> > 
> > diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> > index 93e74cce21..1faae2753f 100644
> > --- a/tools/virtiofsd/passthrough_ll.c
> > +++ b/tools/virtiofsd/passthrough_ll.c
> > @@ -98,6 +98,7 @@ enum {
> >  struct lo_data {
> >      pthread_mutex_t mutex;
> >      int debug;
> > +    int norace;
> >      int writeback;
> >      int flock;
> >      int xattr;
> > @@ -124,10 +125,15 @@ static const struct fuse_opt lo_opts[] = {
> >      { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER },
> >      { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL },
> >      { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
> > -
> > +    { "norace", offsetof(struct lo_data, norace), 1 },
> >      FUSE_OPT_END
> >  };
> >  
> > +static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n);
> > +
> > +static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st);
> > +
> > +
> >  static struct lo_data *lo_data(fuse_req_t req)
> >  {
> >      return (struct lo_data *)fuse_req_userdata(req);
> > @@ -347,23 +353,127 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
> >      fuse_reply_attr(req, &buf, lo->timeout);
> >  }
> >  > -static int utimensat_empty_nofollow(struct lo_inode *inode,
> > -                                    const struct timespec *tv)
> > +static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
> > +                              char path[PATH_MAX], struct lo_inode **parent)
> >  {
> > -    int res;
> >      char procname[64];
> > +    char *last;
> > +    struct stat stat;
> > +    struct lo_inode *p;
> > +    int retries = 2;
> > +    int res;
> > +
> > +retry:
> > +    sprintf(procname, "/proc/self/fd/%i", inode->fd);
> > +
> > +    res = readlink(procname, path, PATH_MAX);
> > +    if (res < 0) {
> 
> > +        fuse_log(FUSE_LOG_WARNING, "lo_parent_and_name: readlink failed: %m\n");
> 
> I think it's better to use __func__ macro in case the function name is
> changed in the future.

Yes, agreed - I've changed them all

>            fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__);
> 
> > +        goto fail_noretry;
> > +    }
> > +
> > +    if (res >= PATH_MAX) {
> 
> > +        fuse_log(FUSE_LOG_WARNING, "lo_parent_and_name: readlink overflowed\n");
> 
>            fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__);
> 
> > +        goto fail_noretry;
> > +    }
> > +    path[res] = '\0';
> > +
> > +    last = strrchr(path, '/');
> > +    if (last == NULL) {
> > +        /* Shouldn't happen */
> 
> > +        fuse_log(
> > +            FUSE_LOG_WARNING,
> > +            "lo_parent_and_name: INTERNAL ERROR: bad path read from proc\n");
> 
>            fuse_log(
>                FUSE_LOG_WARNING,
>             "%s: INTERNAL ERROR: bad path read from proc\n", __func__);
> 
> > +        goto fail_noretry;
> > +    }
> > +    if (last == path) {
> > +        p = &lo->root;
> > +        pthread_mutex_lock(&lo->mutex);
> > +        p->refcount++;
> > +        pthread_mutex_unlock(&lo->mutex);
> > +    } else {
> > +        *last = '\0';
> > +        res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0);
> > +        if (res == -1) {
> > +            if (!retries) {
> 
> > +                fuse_log(FUSE_LOG_WARNING,
> > +                         "lo_parent_and_name: failed to stat parent: %m\n");
> 
>                    fuse_log(FUSE_LOG_WARNING,
>                             "%s: failed to stat parent: %m\n", __func__);
> 
> > +            }
> > +            goto fail;
> > +        }
> > +        p = lo_find(lo, &stat);
> > +        if (p == NULL) {
> > +            if (!retries) {
> 
> > +                fuse_log(FUSE_LOG_WARNING,
> > +                         "lo_parent_and_name: failed to find parent\n");
> 
>                    fuse_log(FUSE_LOG_WARNING,
>                          "%s: failed to find parent\n", __func__);
> 
> > +            }
> > +            goto fail;
> > +        }
> > +    }
> > +    last++;
> > +    res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW);
> > +    if (res == -1) {
> > +        if (!retries) {
> 
> > +            fuse_log(FUSE_LOG_WARNING,
> > +                     "lo_parent_and_name: failed to stat last\n");
> 
>                fuse_log(FUSE_LOG_WARNING,
>                      "%s: failed to stat last\n", __func__);
> 
> > +        }
> > +        goto fail_unref;
> > +    }
> > +    if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) {
> > +        if (!retries) {
> 
> > +            fuse_log(FUSE_LOG_WARNING,
> > +                     "lo_parent_and_name: failed to match last\n");
> 
>                fuse_log(FUSE_LOG_WARNING,
>                         "%s: failed to match last\n", __func__);
> 
> > +        }
> > +        goto fail_unref;
> > +    }
> > +    *parent = p;
> > +    memmove(path, last, strlen(last) + 1);
> > +
> > +    return 0;
> > +
> > +fail_unref:
> > +    unref_inode(lo, p, 1);
> > +fail:
> > +    if (retries) {
> > +        retries--;
> > +        goto retry;
> > +    }
> > +fail_noretry:
> > +    errno = EIO;
> > +    return -1;
> > +}
> > +
> > +static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode,
> > +                           const struct timespec *tv)
> > +{
> > +    int res;
> > +    struct lo_inode *parent;
> > +    char path[PATH_MAX];
> >  
> >      if (inode->is_symlink) {
> > -        res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
> > +        res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH);
> >          if (res == -1 && errno == EINVAL) {
> >              /* Sorry, no race free way to set times on symlink. */
> > -            errno = EPERM;
> > +            if (lo->norace) {
> > +                errno = EPERM;
> > +            } else {
> > +                goto fallback;
> > +            }
> >          }
> >          return res;
> >      }
> > -    sprintf(procname, "/proc/self/fd/%i", inode->fd);
> > +    sprintf(path, "/proc/self/fd/%i", inode->fd);
> > +
> > +    return utimensat(AT_FDCWD, path, tv, 0);
> >  
> > -    return utimensat(AT_FDCWD, procname, tv, 0);
> > +fallback:
> > +    res = lo_parent_and_name(lo, inode, path, &parent);
> > +    if (res != -1) {
> > +        res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW);
> > +        unref_inode(lo, parent, 1);
> > +    }
> > +
> > +    return res;
> >  }
> >  
> >  static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi)
> > @@ -387,6 +497,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
> >  {
> >      int saverr;
> >      char procname[64];
> > +    struct lo_data *lo = lo_data(req);
> >      struct lo_inode *inode;
> >      int ifd;
> >      int res;
> > @@ -459,7 +570,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
> >          if (fi) {
> >              res = futimens(fd, tv);
> >          } else {
> > -            res = utimensat_empty_nofollow(inode, tv);
> > +            res = utimensat_empty(lo, inode, tv);
> >          }
> >          if (res == -1) {
> >              goto out_err;
> > @@ -692,24 +803,38 @@ static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent,
> >      lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link);
> >  }
> >  
> > -static int linkat_empty_nofollow(struct lo_inode *inode, int dfd,
> > -                                 const char *name)
> > +static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode,
> > +                                 int dfd, const char *name)
> >  {
> >      int res;
> > -    char procname[64];
> > +    struct lo_inode *parent;
> > +    char path[PATH_MAX];
> >  
> >      if (inode->is_symlink) {
> >          res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH);
> >          if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
> >              /* Sorry, no race free way to hard-link a symlink. */
> > -            errno = EPERM;
> > +            if (lo->norace) {
> > +                errno = EPERM;
> > +            } else {
> > +                goto fallback;
> > +            }
> >          }
> >          return res;
> >      }
> >  
> > -    sprintf(procname, "/proc/self/fd/%i", inode->fd);
> > +    sprintf(path, "/proc/self/fd/%i", inode->fd);
> > +
> > +    return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW);
> > +
> > +fallback:
> > +    res = lo_parent_and_name(lo, inode, path, &parent);
> > +    if (res != -1) {
> > +        res = linkat(parent->fd, path, dfd, name, 0);
> > +        unref_inode(lo, parent, 1);
> > +    }
> >  
> > -    return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW);
> > +    return res;
> >  }
> >  
> >  static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
> > @@ -731,7 +856,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
> >      e.attr_timeout = lo->timeout;
> >      e.entry_timeout = lo->timeout;
> >  
> > -    res = linkat_empty_nofollow(inode, lo_fd(req, parent), name);
> > +    res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name);
> >      if (res == -1) {
> >          goto out_err;
> >      }
> > @@ -1544,7 +1669,7 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
> >      }
> >  
> >      if (inode->is_symlink) {
> > -        /* Sorry, no race free way to setxattr on symlink. */
> > +        /* Sorry, no race free way to removexattr on symlink. */

I've undone that change; since it looks bogus.

> >          saverr = EPERM;
> >          goto out;
> >      }
> 
> Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>

Thanks.

--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox series

Patch

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 93e74cce21..1faae2753f 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -98,6 +98,7 @@  enum {
 struct lo_data {
     pthread_mutex_t mutex;
     int debug;
+    int norace;
     int writeback;
     int flock;
     int xattr;
@@ -124,10 +125,15 @@  static const struct fuse_opt lo_opts[] = {
     { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER },
     { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL },
     { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
-
+    { "norace", offsetof(struct lo_data, norace), 1 },
     FUSE_OPT_END
 };
 
+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n);
+
+static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st);
+
+
 static struct lo_data *lo_data(fuse_req_t req)
 {
     return (struct lo_data *)fuse_req_userdata(req);
@@ -347,23 +353,127 @@  static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
     fuse_reply_attr(req, &buf, lo->timeout);
 }
 
-static int utimensat_empty_nofollow(struct lo_inode *inode,
-                                    const struct timespec *tv)
+static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
+                              char path[PATH_MAX], struct lo_inode **parent)
 {
-    int res;
     char procname[64];
+    char *last;
+    struct stat stat;
+    struct lo_inode *p;
+    int retries = 2;
+    int res;
+
+retry:
+    sprintf(procname, "/proc/self/fd/%i", inode->fd);
+
+    res = readlink(procname, path, PATH_MAX);
+    if (res < 0) {
+        fuse_log(FUSE_LOG_WARNING, "lo_parent_and_name: readlink failed: %m\n");
+        goto fail_noretry;
+    }
+
+    if (res >= PATH_MAX) {
+        fuse_log(FUSE_LOG_WARNING, "lo_parent_and_name: readlink overflowed\n");
+        goto fail_noretry;
+    }
+    path[res] = '\0';
+
+    last = strrchr(path, '/');
+    if (last == NULL) {
+        /* Shouldn't happen */
+        fuse_log(
+            FUSE_LOG_WARNING,
+            "lo_parent_and_name: INTERNAL ERROR: bad path read from proc\n");
+        goto fail_noretry;
+    }
+    if (last == path) {
+        p = &lo->root;
+        pthread_mutex_lock(&lo->mutex);
+        p->refcount++;
+        pthread_mutex_unlock(&lo->mutex);
+    } else {
+        *last = '\0';
+        res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0);
+        if (res == -1) {
+            if (!retries) {
+                fuse_log(FUSE_LOG_WARNING,
+                         "lo_parent_and_name: failed to stat parent: %m\n");
+            }
+            goto fail;
+        }
+        p = lo_find(lo, &stat);
+        if (p == NULL) {
+            if (!retries) {
+                fuse_log(FUSE_LOG_WARNING,
+                         "lo_parent_and_name: failed to find parent\n");
+            }
+            goto fail;
+        }
+    }
+    last++;
+    res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW);
+    if (res == -1) {
+        if (!retries) {
+            fuse_log(FUSE_LOG_WARNING,
+                     "lo_parent_and_name: failed to stat last\n");
+        }
+        goto fail_unref;
+    }
+    if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) {
+        if (!retries) {
+            fuse_log(FUSE_LOG_WARNING,
+                     "lo_parent_and_name: failed to match last\n");
+        }
+        goto fail_unref;
+    }
+    *parent = p;
+    memmove(path, last, strlen(last) + 1);
+
+    return 0;
+
+fail_unref:
+    unref_inode(lo, p, 1);
+fail:
+    if (retries) {
+        retries--;
+        goto retry;
+    }
+fail_noretry:
+    errno = EIO;
+    return -1;
+}
+
+static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode,
+                           const struct timespec *tv)
+{
+    int res;
+    struct lo_inode *parent;
+    char path[PATH_MAX];
 
     if (inode->is_symlink) {
-        res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+        res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH);
         if (res == -1 && errno == EINVAL) {
             /* Sorry, no race free way to set times on symlink. */
-            errno = EPERM;
+            if (lo->norace) {
+                errno = EPERM;
+            } else {
+                goto fallback;
+            }
         }
         return res;
     }
-    sprintf(procname, "/proc/self/fd/%i", inode->fd);
+    sprintf(path, "/proc/self/fd/%i", inode->fd);
+
+    return utimensat(AT_FDCWD, path, tv, 0);
 
-    return utimensat(AT_FDCWD, procname, tv, 0);
+fallback:
+    res = lo_parent_and_name(lo, inode, path, &parent);
+    if (res != -1) {
+        res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW);
+        unref_inode(lo, parent, 1);
+    }
+
+    return res;
 }
 
 static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi)
@@ -387,6 +497,7 @@  static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
 {
     int saverr;
     char procname[64];
+    struct lo_data *lo = lo_data(req);
     struct lo_inode *inode;
     int ifd;
     int res;
@@ -459,7 +570,7 @@  static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
         if (fi) {
             res = futimens(fd, tv);
         } else {
-            res = utimensat_empty_nofollow(inode, tv);
+            res = utimensat_empty(lo, inode, tv);
         }
         if (res == -1) {
             goto out_err;
@@ -692,24 +803,38 @@  static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent,
     lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link);
 }
 
-static int linkat_empty_nofollow(struct lo_inode *inode, int dfd,
-                                 const char *name)
+static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode,
+                                 int dfd, const char *name)
 {
     int res;
-    char procname[64];
+    struct lo_inode *parent;
+    char path[PATH_MAX];
 
     if (inode->is_symlink) {
         res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH);
         if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
             /* Sorry, no race free way to hard-link a symlink. */
-            errno = EPERM;
+            if (lo->norace) {
+                errno = EPERM;
+            } else {
+                goto fallback;
+            }
         }
         return res;
     }
 
-    sprintf(procname, "/proc/self/fd/%i", inode->fd);
+    sprintf(path, "/proc/self/fd/%i", inode->fd);
+
+    return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW);
+
+fallback:
+    res = lo_parent_and_name(lo, inode, path, &parent);
+    if (res != -1) {
+        res = linkat(parent->fd, path, dfd, name, 0);
+        unref_inode(lo, parent, 1);
+    }
 
-    return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW);
+    return res;
 }
 
 static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
@@ -731,7 +856,7 @@  static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
     e.attr_timeout = lo->timeout;
     e.entry_timeout = lo->timeout;
 
-    res = linkat_empty_nofollow(inode, lo_fd(req, parent), name);
+    res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name);
     if (res == -1) {
         goto out_err;
     }
@@ -1544,7 +1669,7 @@  static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
     }
 
     if (inode->is_symlink) {
-        /* Sorry, no race free way to setxattr on symlink. */
+        /* Sorry, no race free way to removexattr on symlink. */
         saverr = EPERM;
         goto out;
     }