Message ID | 150229685736.21846.2809147507731700887.stgit@bahia.lan |
---|---|
State | New |
Headers | show |
Tested-by: Zhi Yong Wu <zhiyong.wu@ucloud.cn> Regards, Zhi Yong Wu At 2017-08-10 00:40:57, "Greg Kurz" <groug@kaod.org> wrote: >This function has to ensure it doesn't follow a symlink that could be used >to escape the virtfs directory. This could be easily achieved if fchmodat() >on linux honored the AT_SYMLINK_NOFOLLOW flag as described in POSIX, but >it doesn't. There was a tentative to implement a new fchmodat2() syscall >with the correct semantics: > >https://patchwork.kernel.org/patch/9596301/ > >but it didn't gain much momentum. Also it was suggested to look at an O_PATH >based solution in the first place. > >The current implementation covers most use-cases, but it notably fails if: >- the target path has access rights equal to 0000 (openat() returns EPERM), > => once you've done chmod(0000) on a file, you can never chmod() again >- the target path is UNIX domain socket (openat() returns ENXIO) > => bind() of UNIX domain sockets fails if the file is on 9pfs > >The solution is to use O_PATH: openat() now succeeds in both cases, and we >can ensure the path isn't a symlink with fstat(). The associated entry in >"/proc/self/fd" can hence be safely passed to the regular chmod() syscall. > >The previous behavior is kept for older systems that don't have O_PATH. > >Signed-off-by: Greg Kurz <groug@kaod.org> >Reviewed-by: Eric Blake <eblake@redhat.com> >--- >v4: - fixed #if condition > - moved out: label above #endif > - fixed typo in changelog > - added Eric's r-b > >v3: - O_PATH in a separate block of code > - added a reference to the fchmodat2() tentative in the changelog > >v2: - renamed OPENAT_DIR_O_PATH to O_PATH_9P_UTIL and use it as a replacement > for O_PATH to avoid build breaks on O_PATH-less systems > - keep current behavior for O_PATH-less systems > - added comments > - TODO in 2.11: add _nofollow suffix to openat_dir() and openat_file() >--- > hw/9pfs/9p-local.c | 43 ++++++++++++++++++++++++++++++++++++------- > hw/9pfs/9p-util.h | 24 +++++++++++++++--------- > 2 files changed, 51 insertions(+), 16 deletions(-) > >diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c >index 6e478f4765ef..d9ef57d343c9 100644 >--- a/hw/9pfs/9p-local.c >+++ b/hw/9pfs/9p-local.c >@@ -333,17 +333,27 @@ update_map_file: > > static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode) > { >+ struct stat stbuf; > int fd, ret; > > /* FIXME: this should be handled with fchmodat(AT_SYMLINK_NOFOLLOW). >- * Unfortunately, the linux kernel doesn't implement it yet. As an >- * alternative, let's open the file and use fchmod() instead. This >- * may fail depending on the permissions of the file, but it is the >- * best we can do to avoid TOCTTOU. We first try to open read-only >- * in case name points to a directory. If that fails, we try write-only >- * in case name doesn't point to a directory. >+ * Unfortunately, the linux kernel doesn't implement it yet. > */ >- fd = openat_file(dirfd, name, O_RDONLY, 0); >+ >+ /* First, we clear non-racing symlinks out of the way. */ >+ if (fstatat(dirfd, name, &stbuf, AT_SYMLINK_NOFOLLOW)) { >+ return -1; >+ } >+ if (S_ISLNK(stbuf.st_mode)) { >+ errno = ELOOP; >+ return -1; >+ } >+ >+ /* Access modes are ignored when O_PATH is supported. We try O_RDONLY and >+ * O_WRONLY for old-systems that don't support O_PATH. >+ */ >+ fd = openat_file(dirfd, name, O_RDONLY | O_PATH_9P_UTIL, 0); >+#if O_PATH_9P_UTIL == 0 > if (fd == -1) { > /* In case the file is writable-only and isn't a directory. */ > if (errno == EACCES) { >@@ -357,6 +367,25 @@ static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode) > return -1; > } > ret = fchmod(fd, mode); >+#else >+ /* Now we handle racing symlinks. */ >+ ret = fstat(fd, &stbuf); >+ if (ret) { >+ goto out; >+ } >+ if (S_ISLNK(stbuf.st_mode)) { >+ errno = ELOOP; >+ ret = -1; >+ goto out; >+ } >+ >+ { >+ char *proc_path = g_strdup_printf("/proc/self/fd/%d", fd); >+ ret = chmod(proc_path, mode); >+ g_free(proc_path); >+ } >+out: >+#endif > close_preserve_errno(fd); > return ret; > } >diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h >index 91299a24b8af..dc0d2e29aa3b 100644 >--- a/hw/9pfs/9p-util.h >+++ b/hw/9pfs/9p-util.h >@@ -13,6 +13,12 @@ > #ifndef QEMU_9P_UTIL_H > #define QEMU_9P_UTIL_H > >+#ifdef O_PATH >+#define O_PATH_9P_UTIL O_PATH >+#else >+#define O_PATH_9P_UTIL 0 >+#endif >+ > static inline void close_preserve_errno(int fd) > { > int serrno = errno; >@@ -22,13 +28,8 @@ static inline void close_preserve_errno(int fd) > > static inline int openat_dir(int dirfd, const char *name) > { >-#ifdef O_PATH >-#define OPENAT_DIR_O_PATH O_PATH >-#else >-#define OPENAT_DIR_O_PATH 0 >-#endif > return openat(dirfd, name, >- O_DIRECTORY | O_RDONLY | O_NOFOLLOW | OPENAT_DIR_O_PATH); >+ O_DIRECTORY | O_RDONLY | O_NOFOLLOW | O_PATH_9P_UTIL); > } > > static inline int openat_file(int dirfd, const char *name, int flags, >@@ -43,9 +44,14 @@ static inline int openat_file(int dirfd, const char *name, int flags, > } > > serrno = errno; >- /* O_NONBLOCK was only needed to open the file. Let's drop it. */ >- ret = fcntl(fd, F_SETFL, flags); >- assert(!ret); >+ /* O_NONBLOCK was only needed to open the file. Let's drop it. We don't >+ * do that with O_PATH since fcntl(F_SETFL) isn't supported, and openat() >+ * ignored it anyway. >+ */ >+ if (!(flags & O_PATH_9P_UTIL)) { >+ ret = fcntl(fd, F_SETFL, flags); >+ assert(!ret); >+ } > errno = serrno; > return fd; > } >
On Wed, 09 Aug 2017 18:40:57 +0200 Greg Kurz <groug@kaod.org> wrote: > This function has to ensure it doesn't follow a symlink that could be used > to escape the virtfs directory. This could be easily achieved if fchmodat() > on linux honored the AT_SYMLINK_NOFOLLOW flag as described in POSIX, but > it doesn't. There was a tentative to implement a new fchmodat2() syscall > with the correct semantics: > > https://patchwork.kernel.org/patch/9596301/ > > but it didn't gain much momentum. Also it was suggested to look at an O_PATH > based solution in the first place. > > The current implementation covers most use-cases, but it notably fails if: > - the target path has access rights equal to 0000 (openat() returns EPERM), > => once you've done chmod(0000) on a file, you can never chmod() again > - the target path is UNIX domain socket (openat() returns ENXIO) > => bind() of UNIX domain sockets fails if the file is on 9pfs > > The solution is to use O_PATH: openat() now succeeds in both cases, and we > can ensure the path isn't a symlink with fstat(). The associated entry in > "/proc/self/fd" can hence be safely passed to the regular chmod() syscall. > > The previous behavior is kept for older systems that don't have O_PATH. > > Signed-off-by: Greg Kurz <groug@kaod.org> > Reviewed-by: Eric Blake <eblake@redhat.com> > --- > v4: - fixed #if condition > - moved out: label above #endif > - fixed typo in changelog > - added Eric's r-b > > v3: - O_PATH in a separate block of code > - added a reference to the fchmodat2() tentative in the changelog > > v2: - renamed OPENAT_DIR_O_PATH to O_PATH_9P_UTIL and use it as a replacement > for O_PATH to avoid build breaks on O_PATH-less systems > - keep current behavior for O_PATH-less systems > - added comments > - TODO in 2.11: add _nofollow suffix to openat_dir() and openat_file() > --- > hw/9pfs/9p-local.c | 43 ++++++++++++++++++++++++++++++++++++------- > hw/9pfs/9p-util.h | 24 +++++++++++++++--------- > 2 files changed, 51 insertions(+), 16 deletions(-) > > diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c > index 6e478f4765ef..d9ef57d343c9 100644 > --- a/hw/9pfs/9p-local.c > +++ b/hw/9pfs/9p-local.c > @@ -333,17 +333,27 @@ update_map_file: > > static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode) > { > + struct stat stbuf; > int fd, ret; > > /* FIXME: this should be handled with fchmodat(AT_SYMLINK_NOFOLLOW). > - * Unfortunately, the linux kernel doesn't implement it yet. As an > - * alternative, let's open the file and use fchmod() instead. This > - * may fail depending on the permissions of the file, but it is the > - * best we can do to avoid TOCTTOU. We first try to open read-only > - * in case name points to a directory. If that fails, we try write-only > - * in case name doesn't point to a directory. > + * Unfortunately, the linux kernel doesn't implement it yet. > */ > - fd = openat_file(dirfd, name, O_RDONLY, 0); > + > + /* First, we clear non-racing symlinks out of the way. */ > + if (fstatat(dirfd, name, &stbuf, AT_SYMLINK_NOFOLLOW)) { > + return -1; > + } > + if (S_ISLNK(stbuf.st_mode)) { > + errno = ELOOP; > + return -1; > + } > + > + /* Access modes are ignored when O_PATH is supported. We try O_RDONLY and > + * O_WRONLY for old-systems that don't support O_PATH. > + */ > + fd = openat_file(dirfd, name, O_RDONLY | O_PATH_9P_UTIL, 0); > +#if O_PATH_9P_UTIL == 0 > if (fd == -1) { > /* In case the file is writable-only and isn't a directory. */ > if (errno == EACCES) { > @@ -357,6 +367,25 @@ static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode) > return -1; > } > ret = fchmod(fd, mode); > +#else Oops, missing fd == -1 check... > + /* Now we handle racing symlinks. */ > + ret = fstat(fd, &stbuf); > + if (ret) { > + goto out; > + } > + if (S_ISLNK(stbuf.st_mode)) { > + errno = ELOOP; > + ret = -1; > + goto out; > + } > + > + { > + char *proc_path = g_strdup_printf("/proc/self/fd/%d", fd); > + ret = chmod(proc_path, mode); > + g_free(proc_path); > + } > +out: > +#endif > close_preserve_errno(fd); > return ret; > } > diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h > index 91299a24b8af..dc0d2e29aa3b 100644 > --- a/hw/9pfs/9p-util.h > +++ b/hw/9pfs/9p-util.h > @@ -13,6 +13,12 @@ > #ifndef QEMU_9P_UTIL_H > #define QEMU_9P_UTIL_H > > +#ifdef O_PATH > +#define O_PATH_9P_UTIL O_PATH > +#else > +#define O_PATH_9P_UTIL 0 > +#endif > + > static inline void close_preserve_errno(int fd) > { > int serrno = errno; > @@ -22,13 +28,8 @@ static inline void close_preserve_errno(int fd) > > static inline int openat_dir(int dirfd, const char *name) > { > -#ifdef O_PATH > -#define OPENAT_DIR_O_PATH O_PATH > -#else > -#define OPENAT_DIR_O_PATH 0 > -#endif > return openat(dirfd, name, > - O_DIRECTORY | O_RDONLY | O_NOFOLLOW | OPENAT_DIR_O_PATH); > + O_DIRECTORY | O_RDONLY | O_NOFOLLOW | O_PATH_9P_UTIL); > } > > static inline int openat_file(int dirfd, const char *name, int flags, > @@ -43,9 +44,14 @@ static inline int openat_file(int dirfd, const char *name, int flags, > } > > serrno = errno; > - /* O_NONBLOCK was only needed to open the file. Let's drop it. */ > - ret = fcntl(fd, F_SETFL, flags); > - assert(!ret); > + /* O_NONBLOCK was only needed to open the file. Let's drop it. We don't > + * do that with O_PATH since fcntl(F_SETFL) isn't supported, and openat() > + * ignored it anyway. > + */ > + if (!(flags & O_PATH_9P_UTIL)) { > + ret = fcntl(fd, F_SETFL, flags); > + assert(!ret); > + } > errno = serrno; > return fd; > } > >
diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index 6e478f4765ef..d9ef57d343c9 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -333,17 +333,27 @@ update_map_file: static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode) { + struct stat stbuf; int fd, ret; /* FIXME: this should be handled with fchmodat(AT_SYMLINK_NOFOLLOW). - * Unfortunately, the linux kernel doesn't implement it yet. As an - * alternative, let's open the file and use fchmod() instead. This - * may fail depending on the permissions of the file, but it is the - * best we can do to avoid TOCTTOU. We first try to open read-only - * in case name points to a directory. If that fails, we try write-only - * in case name doesn't point to a directory. + * Unfortunately, the linux kernel doesn't implement it yet. */ - fd = openat_file(dirfd, name, O_RDONLY, 0); + + /* First, we clear non-racing symlinks out of the way. */ + if (fstatat(dirfd, name, &stbuf, AT_SYMLINK_NOFOLLOW)) { + return -1; + } + if (S_ISLNK(stbuf.st_mode)) { + errno = ELOOP; + return -1; + } + + /* Access modes are ignored when O_PATH is supported. We try O_RDONLY and + * O_WRONLY for old-systems that don't support O_PATH. + */ + fd = openat_file(dirfd, name, O_RDONLY | O_PATH_9P_UTIL, 0); +#if O_PATH_9P_UTIL == 0 if (fd == -1) { /* In case the file is writable-only and isn't a directory. */ if (errno == EACCES) { @@ -357,6 +367,25 @@ static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode) return -1; } ret = fchmod(fd, mode); +#else + /* Now we handle racing symlinks. */ + ret = fstat(fd, &stbuf); + if (ret) { + goto out; + } + if (S_ISLNK(stbuf.st_mode)) { + errno = ELOOP; + ret = -1; + goto out; + } + + { + char *proc_path = g_strdup_printf("/proc/self/fd/%d", fd); + ret = chmod(proc_path, mode); + g_free(proc_path); + } +out: +#endif close_preserve_errno(fd); return ret; } diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h index 91299a24b8af..dc0d2e29aa3b 100644 --- a/hw/9pfs/9p-util.h +++ b/hw/9pfs/9p-util.h @@ -13,6 +13,12 @@ #ifndef QEMU_9P_UTIL_H #define QEMU_9P_UTIL_H +#ifdef O_PATH +#define O_PATH_9P_UTIL O_PATH +#else +#define O_PATH_9P_UTIL 0 +#endif + static inline void close_preserve_errno(int fd) { int serrno = errno; @@ -22,13 +28,8 @@ static inline void close_preserve_errno(int fd) static inline int openat_dir(int dirfd, const char *name) { -#ifdef O_PATH -#define OPENAT_DIR_O_PATH O_PATH -#else -#define OPENAT_DIR_O_PATH 0 -#endif return openat(dirfd, name, - O_DIRECTORY | O_RDONLY | O_NOFOLLOW | OPENAT_DIR_O_PATH); + O_DIRECTORY | O_RDONLY | O_NOFOLLOW | O_PATH_9P_UTIL); } static inline int openat_file(int dirfd, const char *name, int flags, @@ -43,9 +44,14 @@ static inline int openat_file(int dirfd, const char *name, int flags, } serrno = errno; - /* O_NONBLOCK was only needed to open the file. Let's drop it. */ - ret = fcntl(fd, F_SETFL, flags); - assert(!ret); + /* O_NONBLOCK was only needed to open the file. Let's drop it. We don't + * do that with O_PATH since fcntl(F_SETFL) isn't supported, and openat() + * ignored it anyway. + */ + if (!(flags & O_PATH_9P_UTIL)) { + ret = fcntl(fd, F_SETFL, flags); + assert(!ret); + } errno = serrno; return fd; }