diff mbox

[v9,6/7] block: Enable qemu_open/close to work with fd sets

Message ID 1344690878-1555-7-git-send-email-coreyb@linux.vnet.ibm.com
State New
Headers show

Commit Message

Corey Bryant Aug. 11, 2012, 1:14 p.m. UTC
When qemu_open is passed a filename of the "/dev/fdset/nnn"
format (where nnn is the fdset ID), an fd with matching access
mode flags will be searched for within the specified monitor
fd set.  If the fd is found, a dup of the fd will be returned
from qemu_open.

Signed-off-by: Corey Bryant <coreyb@linux.vnet.ibm.com>
---
v2:
 -Get rid of file_open and move dup code to qemu_open
  (kwolf@redhat.com)
 -Use strtol wrapper instead of atoi (kwolf@redhat.com)

v3:
 -Add note about fd leakage (eblake@redhat.com)

v4
 -Moved patch to be later in series (lcapitulino@redhat.com)
 -Update qemu_open to check access mode flags and set flags that
  can be set (eblake@redhat.com, kwolf@redhat.com)

v5:
 -This patch was overhauled quite a bit in this version, with
  the addition of fd set and refcount support.
 -Use qemu_set_cloexec() on dup'd fd (eblake@redhat.com)
 -Modify flags set by fcntl on dup'd fd (eblake@redhat.com)
 -Reduce syscalls when setting flags for dup'd fd (eblake@redhat.com)
 -Fix O_RDWR, O_RDONLY, O_WRONLY checks (eblake@redhat.com)

v6:
 -Pass only the fd to qemu_close() and keep track of dup fds per fd
  set. (kwolf@redhat.com, eblake@redhat.com)
 -Handle refcount incr/decr in new dup_fd_add/remove fd functions.
 -Use qemu_set_cloexec() appropriately in qemu_dup() (kwolf@redhat.com)
 -Simplify setting of setfl_flags in qemu_dup() (kwolf@redhat.com)
 -Add preprocessor checks for F_DUPFD_CLOEXEC (eblake@redhat.com)
 -Simplify flag checking in monitor_fdset_get_fd() (kwolf@redhat.com)

v7:
 -Minor updates to reference global mon_fdsets, and to remove
  default_mon usage in osdep.c. (kwolf@redhat.com)

v8:
 -Use camel case for structures. (stefanha@linux.vnet.ibm.com)

v9:
 -Drop fdset refcount and check dup_fds instead. (eblake@redhat.com)
 -Fix dupfd leak in qemu_dup(). (eblake@redhat.com)
 -Always set O_CLOEXEC in qemu_dup(). (kwolf@redhat.com)
 -Change name of qemu_dup() to qemu_dup_flags(). (kwolf@redhat.com)

 cutils.c      |    5 +++
 monitor.c     |   83 +++++++++++++++++++++++++++++++++++++++++++-
 monitor.h     |    5 +++
 osdep.c       |  108 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 qemu-common.h |    1 +
 qemu-tool.c   |   20 +++++++++++
 6 files changed, 221 insertions(+), 1 deletion(-)

Comments

Eric Blake Aug. 11, 2012, 2:28 p.m. UTC | #1
On 08/11/2012 07:14 AM, Corey Bryant wrote:
> When qemu_open is passed a filename of the "/dev/fdset/nnn"
> format (where nnn is the fdset ID), an fd with matching access
> mode flags will be searched for within the specified monitor
> fd set.  If the fd is found, a dup of the fd will be returned
> from qemu_open.
> 

> v9:
>  -Drop fdset refcount and check dup_fds instead. (eblake@redhat.com)
>  -Fix dupfd leak in qemu_dup(). (eblake@redhat.com)
>  -Always set O_CLOEXEC in qemu_dup(). (kwolf@redhat.com)
>  -Change name of qemu_dup() to qemu_dup_flags(). (kwolf@redhat.com)
> 

> @@ -87,6 +146,40 @@ int qemu_open(const char *name, int flags, ...)
>      int ret;
>      int mode = 0;
>  
> +#ifndef _WIN32
> +    const char *fdset_id_str;
> +
> +    /* Attempt dup of fd from fd set */
> +    if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
> +        int64_t fdset_id;
> +        int fd, dupfd;
> +
> +        fdset_id = qemu_parse_fdset(fdset_id_str);
> +        if (fdset_id == -1) {
> +            errno = EINVAL;
> +            return -1;
> +        }
> +
> +        fd = monitor_fdset_get_fd(fdset_id, flags);
> +        if (fd == -1) {
> +            return -1;
> +        }
> +
> +        dupfd = qemu_dup_flags(fd, flags);
> +        if (fd == -1) {

Checking the wrong condition:
s/fd/dupfd/

> +            return -1;
> +        }
> +
> +        ret = monitor_fdset_dup_fd_add(fdset_id, dupfd);
> +        if (ret == -1) {
> +            close(dupfd);
> +            return -1;

This function appears to promise a reasonable errno on failure.
However, I don't think monitor_fdset_dup_fd_add guarantees a reasonable
errno, and even if it does, close() can corrupt errno.  I think that
prior to returning here, you either need an explicit errno=ENOMEM, or
fix monitor_fdset_dup_fd to guarantee a nice errno, plus a save and
restore of errno here.  Unless no one cares about errno on failure, in
which case your earlier errno=EINVAL can be dropped.
Corey Bryant Aug. 13, 2012, 1:44 p.m. UTC | #2
I'll send a new version shortly with these updates also.
Eric Blake Aug. 13, 2012, 4:16 p.m. UTC | #3
On 08/13/2012 07:44 AM, Corey Bryant wrote:
> I'll send a new version shortly with these updates also.
> 

>>> +
>>> +        ret = monitor_fdset_dup_fd_add(fdset_id, dupfd);
>>> +        if (ret == -1) {
>>> +            close(dupfd);
>>> +            return -1;
>>
>> This function appears to promise a reasonable errno on failure.

Actually, looking at that function again,


+int monitor_fdset_dup_fd_add(int64_t fdset_id, int dup_fd)
+{
+    MonFdset *mon_fdset;
+    MonFdsetFd *mon_fdset_fd_dup;
+
+    QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+        if (mon_fdset->id != fdset_id) {
+            continue;
+        }
+        QLIST_FOREACH(mon_fdset_fd_dup, &mon_fdset->dup_fds, next) {
+            if (mon_fdset_fd_dup->fd == dup_fd) {
+                return -1;
+            }
+        }
+        mon_fdset_fd_dup = g_malloc0(sizeof(*mon_fdset_fd_dup));
+        mon_fdset_fd_dup->fd = dup_fd;
+        QLIST_INSERT_HEAD(&mon_fdset->dup_fds, mon_fdset_fd_dup, next);
+        return 0;
+    }
+    return -1;
+}

The only way it could fail is if we are trying to add an fd that is
already in the set, or if we don't find mon_fdset; both of which would
indicate logic bugs earlier in our program.  Would it be worth asserting
that these conditions are impossible, and making this function return
void (the addition is always successful if it returns, since g_malloc0
aborts rather than failing with ENOMEM)?

And the more I think about it, the more I think that qemu_open MUST
provide a sane errno value on exit, so you need to make sure that all
exit paths out of qemu_open have a sensible errno (whether or not the
helper functions also have to leave errno sane is a matter of taste).
Corey Bryant Aug. 13, 2012, 4:33 p.m. UTC | #4
On 08/13/2012 12:16 PM, Eric Blake wrote:
> On 08/13/2012 07:44 AM, Corey Bryant wrote:
>> I'll send a new version shortly with these updates also.
>>
>
>>>> +
>>>> +        ret = monitor_fdset_dup_fd_add(fdset_id, dupfd);
>>>> +        if (ret == -1) {
>>>> +            close(dupfd);
>>>> +            return -1;
>>>
>>> This function appears to promise a reasonable errno on failure.
>
> Actually, looking at that function again,
>
>
> +int monitor_fdset_dup_fd_add(int64_t fdset_id, int dup_fd)
> +{
> +    MonFdset *mon_fdset;
> +    MonFdsetFd *mon_fdset_fd_dup;
> +
> +    QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
> +        if (mon_fdset->id != fdset_id) {
> +            continue;
> +        }
> +        QLIST_FOREACH(mon_fdset_fd_dup, &mon_fdset->dup_fds, next) {
> +            if (mon_fdset_fd_dup->fd == dup_fd) {
> +                return -1;
> +            }
> +        }
> +        mon_fdset_fd_dup = g_malloc0(sizeof(*mon_fdset_fd_dup));
> +        mon_fdset_fd_dup->fd = dup_fd;
> +        QLIST_INSERT_HEAD(&mon_fdset->dup_fds, mon_fdset_fd_dup, next);
> +        return 0;
> +    }
> +    return -1;
> +}
>
> The only way it could fail is if we are trying to add an fd that is
> already in the set, or if we don't find mon_fdset; both of which would
> indicate logic bugs earlier in our program.  Would it be worth asserting
> that these conditions are impossible, and making this function return
> void (the addition is always successful if it returns, since g_malloc0
> aborts rather than failing with ENOMEM)?

I think what I did in v10 should suffice.  I didn't update 
monitor_fdset_dup_fd_add(), but I did update the calling code.  If the 
call fails then I set errno to EINVAL since (unless there's a bug) the 
only possible error is that the fdset ID was non-existent.

It makes sense to add the asserts, but at this point I'd like to stick 
with what we have in v10 if that's ok.

>
> And the more I think about it, the more I think that qemu_open MUST
> provide a sane errno value on exit, so you need to make sure that all
> exit paths out of qemu_open have a sensible errno (whether or not the
> helper functions also have to leave errno sane is a matter of taste).
>

Yes, I agree.  I went through the code and at this point (with the v10 
patches) we're always setting errno, or calling a library API that 
should be setting it.
Eric Blake Aug. 13, 2012, 5:13 p.m. UTC | #5
On 08/13/2012 10:33 AM, Corey Bryant wrote:

>> The only way it could fail is if we are trying to add an fd that is
>> already in the set, or if we don't find mon_fdset; both of which would
>> indicate logic bugs earlier in our program.  Would it be worth asserting
>> that these conditions are impossible, and making this function return
>> void (the addition is always successful if it returns, since g_malloc0
>> aborts rather than failing with ENOMEM)?
> 
> I think what I did in v10 should suffice.  I didn't update
> monitor_fdset_dup_fd_add(), but I did update the calling code.  If the
> call fails then I set errno to EINVAL since (unless there's a bug) the
> only possible error is that the fdset ID was non-existent.
> 
> It makes sense to add the asserts, but at this point I'd like to stick
> with what we have in v10 if that's ok.

The problems of reading my inbox in FIFO order - I see now that v10
landed before my comments on v9 :)  Yes, what you did in v10 is probably
fine.
Corey Bryant Aug. 13, 2012, 5:32 p.m. UTC | #6
On 08/13/2012 01:13 PM, Eric Blake wrote:
> On 08/13/2012 10:33 AM, Corey Bryant wrote:
>
>>> The only way it could fail is if we are trying to add an fd that is
>>> already in the set, or if we don't find mon_fdset; both of which would
>>> indicate logic bugs earlier in our program.  Would it be worth asserting
>>> that these conditions are impossible, and making this function return
>>> void (the addition is always successful if it returns, since g_malloc0
>>> aborts rather than failing with ENOMEM)?
>>
>> I think what I did in v10 should suffice.  I didn't update
>> monitor_fdset_dup_fd_add(), but I did update the calling code.  If the
>> call fails then I set errno to EINVAL since (unless there's a bug) the
>> only possible error is that the fdset ID was non-existent.
>>
>> It makes sense to add the asserts, but at this point I'd like to stick
>> with what we have in v10 if that's ok.
>
> The problems of reading my inbox in FIFO order - I see now that v10
> landed before my comments on v9 :)  Yes, what you did in v10 is probably
> fine.
>

Heh.  Well thanks again for the thorough review.  If you have any other 
comments on the code let me know.  But hopefully we're all set at this 
point and can make it into QEMU 1.2.
diff mbox

Patch

diff --git a/cutils.c b/cutils.c
index 9d4c570..8b0d2bb 100644
--- a/cutils.c
+++ b/cutils.c
@@ -382,3 +382,8 @@  int qemu_parse_fd(const char *param)
     }
     return fd;
 }
+
+int qemu_parse_fdset(const char *param)
+{
+    return qemu_parse_fd(param);
+}
diff --git a/monitor.c b/monitor.c
index c1851f0..4f6d2ce 100644
--- a/monitor.c
+++ b/monitor.c
@@ -154,6 +154,7 @@  typedef struct MonFdset MonFdset;
 struct MonFdset {
     int64_t id;
     QLIST_HEAD(, MonFdsetFd) fds;
+    QLIST_HEAD(, MonFdsetFd) dup_fds;
     QLIST_ENTRY(MonFdset) next;
 };
 
@@ -2421,7 +2422,7 @@  static void monitor_fdset_cleanup(MonFdset *mon_fdset)
         }
     }
 
-    if (QLIST_EMPTY(&mon_fdset->fds)) {
+    if (QLIST_EMPTY(&mon_fdset->fds) && QLIST_EMPTY(&mon_fdset->dup_fds)) {
         QLIST_REMOVE(mon_fdset, next);
         g_free(mon_fdset);
     }
@@ -2574,6 +2575,86 @@  FdsetInfoList *qmp_query_fdsets(Error **errp)
     return fdset_list;
 }
 
+int monitor_fdset_get_fd(int64_t fdset_id, int flags)
+{
+    MonFdset *mon_fdset;
+    MonFdsetFd *mon_fdset_fd;
+    int mon_fd_flags;
+
+    QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+        if (mon_fdset->id != fdset_id) {
+            continue;
+        }
+        QLIST_FOREACH(mon_fdset_fd, &mon_fdset->fds, next) {
+            mon_fd_flags = fcntl(mon_fdset_fd->fd, F_GETFL);
+            if (mon_fd_flags == -1) {
+                return -1;
+            }
+
+            if ((flags & O_ACCMODE) == (mon_fd_flags & O_ACCMODE)) {
+                return mon_fdset_fd->fd;
+            }
+        }
+        errno = EACCES;
+        return -1;
+    }
+    errno = ENOENT;
+    return -1;
+}
+
+int monitor_fdset_dup_fd_add(int64_t fdset_id, int dup_fd)
+{
+    MonFdset *mon_fdset;
+    MonFdsetFd *mon_fdset_fd_dup;
+
+    QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+        if (mon_fdset->id != fdset_id) {
+            continue;
+        }
+        QLIST_FOREACH(mon_fdset_fd_dup, &mon_fdset->dup_fds, next) {
+            if (mon_fdset_fd_dup->fd == dup_fd) {
+                return -1;
+            }
+        }
+        mon_fdset_fd_dup = g_malloc0(sizeof(*mon_fdset_fd_dup));
+        mon_fdset_fd_dup->fd = dup_fd;
+        QLIST_INSERT_HEAD(&mon_fdset->dup_fds, mon_fdset_fd_dup, next);
+        return 0;
+    }
+    return -1;
+}
+
+static int _monitor_fdset_dup_fd_find(int dup_fd, bool remove)
+{
+    MonFdset *mon_fdset;
+    MonFdsetFd *mon_fdset_fd_dup;
+
+    QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+        QLIST_FOREACH(mon_fdset_fd_dup, &mon_fdset->dup_fds, next) {
+            if (mon_fdset_fd_dup->fd == dup_fd) {
+                if (remove) {
+                    QLIST_REMOVE(mon_fdset_fd_dup, next);
+                    if (QLIST_EMPTY(&mon_fdset->dup_fds)) {
+                        monitor_fdset_cleanup(mon_fdset);
+                    }
+                }
+                return mon_fdset->id;
+            }
+        }
+    }
+    return -1;
+}
+
+int monitor_fdset_dup_fd_find(int dup_fd)
+{
+    return _monitor_fdset_dup_fd_find(dup_fd, false);
+}
+
+int monitor_fdset_dup_fd_remove(int dup_fd)
+{
+    return _monitor_fdset_dup_fd_find(dup_fd, true);
+}
+
 /* mon_cmds and info_cmds would be sorted at runtime */
 static mon_cmd_t mon_cmds[] = {
 #include "hmp-commands.h"
diff --git a/monitor.h b/monitor.h
index 5f4de1b..30b660f 100644
--- a/monitor.h
+++ b/monitor.h
@@ -86,4 +86,9 @@  int qmp_qom_set(Monitor *mon, const QDict *qdict, QObject **ret);
 
 int qmp_qom_get(Monitor *mon, const QDict *qdict, QObject **ret);
 
+int monitor_fdset_get_fd(int64_t fdset_id, int flags);
+int monitor_fdset_dup_fd_add(int64_t fdset_id, int dup_fd);
+int monitor_fdset_dup_fd_remove(int dup_fd);
+int monitor_fdset_dup_fd_find(int dup_fd);
+
 #endif /* !MONITOR_H */
diff --git a/osdep.c b/osdep.c
index 7f876ae..a326639 100644
--- a/osdep.c
+++ b/osdep.c
@@ -48,6 +48,7 @@  extern int madvise(caddr_t, size_t, int);
 #include "qemu-common.h"
 #include "trace.h"
 #include "qemu_socket.h"
+#include "monitor.h"
 
 static bool fips_enabled = false;
 
@@ -78,6 +79,64 @@  int qemu_madvise(void *addr, size_t len, int advice)
 #endif
 }
 
+/*
+ * Dups an fd and sets the flags
+ */
+static int qemu_dup_flags(int fd, int flags)
+{
+    int ret;
+    int serrno;
+    int dup_flags;
+    int setfl_flags;
+
+#ifdef F_DUPFD_CLOEXEC
+    ret = fcntl(fd, F_DUPFD_CLOEXEC, 0);
+#else
+    ret = dup(fd);
+    if (ret != -1) {
+        qemu_set_cloexec(ret);
+    }
+#endif
+    if (ret == -1) {
+        goto fail;
+    }
+
+    dup_flags = fcntl(ret, F_GETFL);
+    if (dup_flags == -1) {
+        goto fail;
+    }
+
+    if ((flags & O_SYNC) != (dup_flags & O_SYNC)) {
+        errno = EINVAL;
+        goto fail;
+    }
+
+    /* Set/unset flags that we can with fcntl */
+    setfl_flags = O_APPEND | O_ASYNC | O_DIRECT | O_NOATIME | O_NONBLOCK;
+    dup_flags &= ~setfl_flags;
+    dup_flags |= (flags & setfl_flags);
+    if (fcntl(ret, F_SETFL, dup_flags) == -1) {
+        goto fail;
+    }
+
+    /* Truncate the file in the cases that open() would truncate it */
+    if (flags & O_TRUNC ||
+            ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) {
+        if (ftruncate(ret, 0) == -1) {
+            goto fail;
+        }
+    }
+
+    return ret;
+
+fail:
+    serrno = errno;
+    if (ret != -1) {
+        close(ret);
+    }
+    errno = serrno;
+    return -1;
+}
 
 /*
  * Opens a file with FD_CLOEXEC set
@@ -87,6 +146,40 @@  int qemu_open(const char *name, int flags, ...)
     int ret;
     int mode = 0;
 
+#ifndef _WIN32
+    const char *fdset_id_str;
+
+    /* Attempt dup of fd from fd set */
+    if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
+        int64_t fdset_id;
+        int fd, dupfd;
+
+        fdset_id = qemu_parse_fdset(fdset_id_str);
+        if (fdset_id == -1) {
+            errno = EINVAL;
+            return -1;
+        }
+
+        fd = monitor_fdset_get_fd(fdset_id, flags);
+        if (fd == -1) {
+            return -1;
+        }
+
+        dupfd = qemu_dup_flags(fd, flags);
+        if (fd == -1) {
+            return -1;
+        }
+
+        ret = monitor_fdset_dup_fd_add(fdset_id, dupfd);
+        if (ret == -1) {
+            close(dupfd);
+            return -1;
+        }
+
+        return dupfd;
+    }
+#endif
+
     if (flags & O_CREAT) {
         va_list ap;
 
@@ -109,6 +202,21 @@  int qemu_open(const char *name, int flags, ...)
 
 int qemu_close(int fd)
 {
+    int64_t fdset_id;
+
+    /* Close fd that was dup'd from an fdset */
+    fdset_id = monitor_fdset_dup_fd_find(fd);
+    if (fdset_id != -1) {
+        int ret;
+
+        ret = close(fd);
+        if (ret == 0) {
+            monitor_fdset_dup_fd_remove(fd);
+        }
+
+        return ret;
+    }
+
     return close(fd);
 }
 
diff --git a/qemu-common.h b/qemu-common.h
index e53126d..9becb32 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -166,6 +166,7 @@  int qemu_fls(int i);
 int qemu_fdatasync(int fd);
 int fcntl_setfl(int fd, int flag);
 int qemu_parse_fd(const char *param);
+int qemu_parse_fdset(const char *param);
 
 /*
  * strtosz() suffixes used to specify the default treatment of an
diff --git a/qemu-tool.c b/qemu-tool.c
index 318c5fc..b7622f5 100644
--- a/qemu-tool.c
+++ b/qemu-tool.c
@@ -57,6 +57,26 @@  void monitor_protocol_event(MonitorEvent event, QObject *data)
 {
 }
 
+int monitor_fdset_get_fd(int64_t fdset_id, int flags)
+{
+    return -1;
+}
+
+int monitor_fdset_dup_fd_add(int64_t fdset_id, int dup_fd)
+{
+    return -1;
+}
+
+int monitor_fdset_dup_fd_remove(int dup_fd)
+{
+    return -1;
+}
+
+int monitor_fdset_dup_fd_find(int dup_fd)
+{
+    return -1;
+}
+
 int64_t cpu_get_clock(void)
 {
     return qemu_get_clock_ns(rt_clock);