Patchwork [-V3,2/8] hw/9pfs: Add file descriptor reclaim support

login
register
mail settings
Submitter Aneesh Kumar K.V
Date March 5, 2011, 5:52 p.m.
Message ID <1299347533-17047-2-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/85542/
State New
Headers show

Comments

Aneesh Kumar K.V - March 5, 2011, 5:52 p.m.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 hw/9pfs/virtio-9p.c |   99 ++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 95 insertions(+), 4 deletions(-)
Stefan Hajnoczi - March 13, 2011, 4:08 p.m.
On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
<aneesh.kumar@linux.vnet.ibm.com> wrote:
> @@ -107,7 +108,12 @@ static int v9fs_do_closedir(V9fsState *s, DIR *dir)
>
>  static int v9fs_do_open(V9fsState *s, V9fsString *path, int flags)
>  {
> -    return s->ops->open(&s->ctx, path->data, flags);
> +    int fd;
> +    fd = s->ops->open(&s->ctx, path->data, flags);
> +    if (fd > P9_FD_RECLAIM_THRES) {
> +        v9fs_reclaim_fd(s);
> +    }

I think the threshold should depend on the file descriptor ulimit.
The hardcoded constant doesn't work if the ulimit is set to 1000 or
less (it would cause other users in QEMU to hit EMFILE errors).

> +            if (f->fsmap.fid_type == P9_FID_FILE) {
> +                /* FIXME!! should we remember the open flags ?*/
> +                if (f->fsmap.fs.fd == -1) {
> +                    f->fsmap.fs.fd = v9fs_do_open(s, &f->fsmap.path, O_RDWR);
> +                }

Please address the FIXME.  I think the case where O_RDWR breaks is if
QEMU has permissions to open the file for read only.  The the client
is able to open the file for read but when the file descriptor is
resurrected we'll get EPERM here.

> @@ -516,7 +600,10 @@ static int free_fid(V9fsState *s, int32_t fid)
>     *fidpp = fidp->next;
>
>     if (fidp->fsmap.fid_type == P9_FID_FILE) {
> -        v9fs_do_close(s, fidp->fsmap.fs.fd);
> +        /* I we reclaimed the fd no need to close */

s/I //

> +        if (fidp->fsmap.fs.fd != -1) {
> +            v9fs_do_close(s, fidp->fsmap.fs.fd);
> +        }
>     } else if (fidp->fsmap.fid_type == P9_FID_DIR) {
>         v9fs_do_closedir(s, fidp->fsmap.fs.dir);
>     } else if (fidp->fsmap.fid_type == P9_FID_XATTR) {
> @@ -2719,7 +2806,11 @@ static void v9fs_remove(V9fsState *s, V9fsPDU *pdu)
>         err = -EINVAL;
>         goto out;
>     }
> -
> +    /*
> +     * IF the file is unlinked, we cannot reopen
> +     * the file later. So don't reclaim fd
> +     */
> +    v9fs_mark_fids_unreclaim(s, &vs->fidp->fsmap.path);

This poses a problem for the case where guest and host are both
accessing the file system.  If the fd is reclaimed and the host
deletes the file, then the guest cannot access its open file anymore.

The same issue also affects rename and has not been covered by this patch.

Stefan
Aneesh Kumar K.V - March 13, 2011, 6:57 p.m.
On Sun, 13 Mar 2011 16:08:29 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> > @@ -107,7 +108,12 @@ static int v9fs_do_closedir(V9fsState *s, DIR *dir)
> >
> >  static int v9fs_do_open(V9fsState *s, V9fsString *path, int flags)
> >  {
> > -    return s->ops->open(&s->ctx, path->data, flags);
> > +    int fd;
> > +    fd = s->ops->open(&s->ctx, path->data, flags);
> > +    if (fd > P9_FD_RECLAIM_THRES) {
> > +        v9fs_reclaim_fd(s);
> > +    }
> 
> I think the threshold should depend on the file descriptor ulimit.
> The hardcoded constant doesn't work if the ulimit is set to 1000 or
> less (it would cause other users in QEMU to hit EMFILE errors).

Yes. That is suppose to be a follow up patch. I had that set to 100 for
all the early testing.

> 
> > +            if (f->fsmap.fid_type == P9_FID_FILE) {
> > +                /* FIXME!! should we remember the open flags ?*/
> > +                if (f->fsmap.fs.fd == -1) {
> > +                    f->fsmap.fs.fd = v9fs_do_open(s, &f->fsmap.path, O_RDWR);
> > +                }
> 
> Please address the FIXME.  I think the case where O_RDWR breaks is if
> QEMU has permissions to open the file for read only.  The the client
> is able to open the file for read but when the file descriptor is
> resurrected we'll get EPERM here.

The FIXME is fixed in the follow up patch  (patch 5)

> 
> > @@ -516,7 +600,10 @@ static int free_fid(V9fsState *s, int32_t fid)
> >     *fidpp = fidp->next;
> >
> >     if (fidp->fsmap.fid_type == P9_FID_FILE) {
> > -        v9fs_do_close(s, fidp->fsmap.fs.fd);
> > +        /* I we reclaimed the fd no need to close */
> 
> s/I //
> 
> > +        if (fidp->fsmap.fs.fd != -1) {
> > +            v9fs_do_close(s, fidp->fsmap.fs.fd);
> > +        }
> >     } else if (fidp->fsmap.fid_type == P9_FID_DIR) {
> >         v9fs_do_closedir(s, fidp->fsmap.fs.dir);
> >     } else if (fidp->fsmap.fid_type == P9_FID_XATTR) {
> > @@ -2719,7 +2806,11 @@ static void v9fs_remove(V9fsState *s, V9fsPDU *pdu)
> >         err = -EINVAL;
> >         goto out;
> >     }
> > -
> > +    /*
> > +     * IF the file is unlinked, we cannot reopen
> > +     * the file later. So don't reclaim fd
> > +     */
> > +    v9fs_mark_fids_unreclaim(s, &vs->fidp->fsmap.path);
> 
> This poses a problem for the case where guest and host are both
> accessing the file system.  If the fd is reclaimed and the host
> deletes the file, then the guest cannot access its open file anymore.
> 
> The same issue also affects rename and has not been covered by this patch.
> 

Currently virtFS don't handle the host rename/unlink. That we walk
a name and get the fid and then use the fid to open the file. In between
if the file get removed/renamed we will get an EINVAL.

All that will go away once we switch to handle based open.

-aneesh
Stefan Hajnoczi - March 14, 2011, 10:13 a.m.
On Sun, Mar 13, 2011 at 6:57 PM, Aneesh Kumar K. V
<aneesh.kumar@linux.vnet.ibm.com> wrote:
> On Sun, 13 Mar 2011 16:08:29 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
>> <aneesh.kumar@linux.vnet.ibm.com> wrote:
>> > @@ -107,7 +108,12 @@ static int v9fs_do_closedir(V9fsState *s, DIR *dir)
>> >
>> >  static int v9fs_do_open(V9fsState *s, V9fsString *path, int flags)
>> >  {
>> > -    return s->ops->open(&s->ctx, path->data, flags);
>> > +    int fd;
>> > +    fd = s->ops->open(&s->ctx, path->data, flags);
>> > +    if (fd > P9_FD_RECLAIM_THRES) {
>> > +        v9fs_reclaim_fd(s);
>> > +    }
>>
>> I think the threshold should depend on the file descriptor ulimit.
>> The hardcoded constant doesn't work if the ulimit is set to 1000 or
>> less (it would cause other users in QEMU to hit EMFILE errors).
>
> Yes. That is suppose to be a follow up patch. I had that set to 100 for
> all the early testing.

Using getrlimit(2) to choose a good threshold at runtime shouldn't be
a lot of code.  Please add it to this patch so the threshold isn't
arbitrary and possibly ineffective due to ulimit.

>> > @@ -2719,7 +2806,11 @@ static void v9fs_remove(V9fsState *s, V9fsPDU *pdu)
>> >         err = -EINVAL;
>> >         goto out;
>> >     }
>> > -
>> > +    /*
>> > +     * IF the file is unlinked, we cannot reopen
>> > +     * the file later. So don't reclaim fd
>> > +     */
>> > +    v9fs_mark_fids_unreclaim(s, &vs->fidp->fsmap.path);
>>
>> This poses a problem for the case where guest and host are both
>> accessing the file system.  If the fd is reclaimed and the host
>> deletes the file, then the guest cannot access its open file anymore.
>>
>> The same issue also affects rename and has not been covered by this patch.
>>
>
> Currently virtFS don't handle the host rename/unlink. That we walk
> a name and get the fid and then use the fid to open the file. In between
> if the file get removed/renamed we will get an EINVAL.
>
> All that will go away once we switch to handle based open.

Can you explain this more?  Will multiple entities be able to safely
use the file system (e.g. host and guest)?

Stefan
Aneesh Kumar K.V - March 15, 2011, 8:35 a.m.
On Mon, 14 Mar 2011 10:13:59 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Sun, Mar 13, 2011 at 6:57 PM, Aneesh Kumar K. V
> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> > On Sun, 13 Mar 2011 16:08:29 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> >> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
> >> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> >> > @@ -107,7 +108,12 @@ static int v9fs_do_closedir(V9fsState *s, DIR *dir)
> >> >
> >> >  static int v9fs_do_open(V9fsState *s, V9fsString *path, int flags)
> >> >  {
> >> > -    return s->ops->open(&s->ctx, path->data, flags);
> >> > +    int fd;
> >> > +    fd = s->ops->open(&s->ctx, path->data, flags);
> >> > +    if (fd > P9_FD_RECLAIM_THRES) {
> >> > +        v9fs_reclaim_fd(s);
> >> > +    }
> >>
> >> I think the threshold should depend on the file descriptor ulimit.
> >> The hardcoded constant doesn't work if the ulimit is set to 1000 or
> >> less (it would cause other users in QEMU to hit EMFILE errors).
> >
> > Yes. That is suppose to be a follow up patch. I had that set to 100 for
> > all the early testing.
> 
> Using getrlimit(2) to choose a good threshold at runtime shouldn't be
> a lot of code.  Please add it to this patch so the threshold isn't
> arbitrary and possibly ineffective due to ulimit.

ok.

> 
> >> > @@ -2719,7 +2806,11 @@ static void v9fs_remove(V9fsState *s, V9fsPDU *pdu)
> >> >         err = -EINVAL;
> >> >         goto out;
> >> >     }
> >> > -
> >> > +    /*
> >> > +     * IF the file is unlinked, we cannot reopen
> >> > +     * the file later. So don't reclaim fd
> >> > +     */
> >> > +    v9fs_mark_fids_unreclaim(s, &vs->fidp->fsmap.path);
> >>
> >> This poses a problem for the case where guest and host are both
> >> accessing the file system.  If the fd is reclaimed and the host
> >> deletes the file, then the guest cannot access its open file anymore.
> >>
> >> The same issue also affects rename and has not been covered by this patch.
> >>
> >
> > Currently virtFS don't handle the host rename/unlink. That we walk
> > a name and get the fid and then use the fid to open the file. In between
> > if the file get removed/renamed we will get an EINVAL.
> >
> > All that will go away once we switch to handle based open.
> 
> Can you explain this more?  Will multiple entities be able to safely
> use the file system (e.g. host and guest)?

handles are stable across renames. So even if host rename the file, qemu
will be able to access it. But we still won't be able to handle unlink
on host. But that is true with even other file servers. They do get
ESTALE in that case.


-aneesh

Patch

diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
index a9f52c6..811ac38 100644
--- a/hw/9pfs/virtio-9p.c
+++ b/hw/9pfs/virtio-9p.c
@@ -20,6 +20,7 @@ 
 #include "virtio-9p-xattr.h"
 
 int debug_9p_pdu;
+static void v9fs_reclaim_fd(V9fsState *s);
 
 enum {
     Oread   = 0x00,
@@ -107,7 +108,12 @@  static int v9fs_do_closedir(V9fsState *s, DIR *dir)
 
 static int v9fs_do_open(V9fsState *s, V9fsString *path, int flags)
 {
-    return s->ops->open(&s->ctx, path->data, flags);
+    int fd;
+    fd = s->ops->open(&s->ctx, path->data, flags);
+    if (fd > P9_FD_RECLAIM_THRES) {
+        v9fs_reclaim_fd(s);
+    }
+    return fd;
 }
 
 static DIR *v9fs_do_opendir(V9fsState *s, V9fsString *path)
@@ -188,6 +194,7 @@  static int v9fs_do_fstat(V9fsState *s, int fd, struct stat *stbuf)
 static int v9fs_do_open2(V9fsState *s, char *fullname, uid_t uid, gid_t gid,
         int flags, int mode)
 {
+    int fd;
     FsCred cred;
 
     cred_init(&cred);
@@ -196,7 +203,11 @@  static int v9fs_do_open2(V9fsState *s, char *fullname, uid_t uid, gid_t gid,
     cred.fc_mode = mode & 07777;
     flags = flags;
 
-    return s->ops->open2(&s->ctx, fullname, flags, &cred);
+    fd = s->ops->open2(&s->ctx, fullname, flags, &cred);
+    if (fd > P9_FD_RECLAIM_THRES) {
+        v9fs_reclaim_fd(s);
+    }
+    return fd;
 }
 
 static int v9fs_do_symlink(V9fsState *s, V9fsFidState *fidp,
@@ -434,6 +445,23 @@  static V9fsFidState *lookup_fid(V9fsState *s, int32_t fid)
 
     for (f = s->fid_list; f; f = f->next) {
         if (f->fid == fid) {
+            /*
+             * check whether we need to reopen the
+             * file. We might have closed the fd
+             * while trying to free up some file
+             * descriptors.
+             */
+            if (f->fsmap.fid_type == P9_FID_FILE) {
+                /* FIXME!! should we remember the open flags ?*/
+                if (f->fsmap.fs.fd == -1) {
+                    f->fsmap.fs.fd = v9fs_do_open(s, &f->fsmap.path, O_RDWR);
+                }
+            }
+            /*
+             * Mark the fid as referenced so that the LRU
+             * reclaim won't close the file descriptor
+             */
+            f->fsmap.flags |= FID_REFERENCED;
             return f;
         }
     }
@@ -461,6 +489,62 @@  static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
     return f;
 }
 
+static void v9fs_reclaim_fd(V9fsState *s)
+{
+    int reclaim_count = 0;
+    V9fsFidState *f;
+
+    for (f = s->fid_list; f; f = f->next) {
+        /*
+         * Unlink fids cannot be reclaimed. Check
+         * for them and skip them
+         */
+        if (f->fsmap.flags & FID_NON_RECLAIMABLE) {
+            continue;
+        }
+        /*
+         * if it is a recently referenced fid
+         * we leave the fid untouched and clear the
+         * reference bit. We come back to it later
+         * in the next iteration. (a simple LRU without
+         * moving list elements around)
+         */
+        if (f->fsmap.flags & FID_REFERENCED) {
+            f->fsmap.flags  &= ~FID_REFERENCED;
+            continue;
+        }
+        /*
+         * reclaim fd, by closing the file descriptors
+         */
+        if (f->fsmap.fid_type == P9_FID_FILE) {
+            if (f->fsmap.fs.fd != -1) {
+                v9fs_do_close(s, f->fsmap.fs.fd);
+                f->fsmap.fs.fd = -1;
+                reclaim_count++;
+            }
+        }
+        if (reclaim_count >= P9_FD_RECLAIM_THRES/2) {
+            break;
+        }
+    }
+}
+
+static void v9fs_mark_fids_unreclaim(V9fsState *s, V9fsString *str)
+{
+    V9fsFidState *fidp;
+    for (fidp = s->fid_list; fidp; fidp = fidp->next) {
+        if (!strcmp(fidp->fsmap.path.data, str->data)) {
+            /* Mark the fid non reclaimable. */
+            fidp->fsmap.flags  |= FID_NON_RECLAIMABLE;
+            /* reopen the file if already closed */
+            if (fidp->fsmap.fs.fd == -1) {
+                fidp->fsmap.fs.fd = v9fs_do_open(s, &fidp->fsmap.path, O_RDWR);
+            }
+        }
+    }
+}
+
+
 static int v9fs_xattr_fid_clunk(V9fsState *s, V9fsFidState *fidp)
 {
     int retval = 0;
@@ -516,7 +600,10 @@  static int free_fid(V9fsState *s, int32_t fid)
     *fidpp = fidp->next;
 
     if (fidp->fsmap.fid_type == P9_FID_FILE) {
-        v9fs_do_close(s, fidp->fsmap.fs.fd);
+        /* I we reclaimed the fd no need to close */
+        if (fidp->fsmap.fs.fd != -1) {
+            v9fs_do_close(s, fidp->fsmap.fs.fd);
+        }
     } else if (fidp->fsmap.fid_type == P9_FID_DIR) {
         v9fs_do_closedir(s, fidp->fsmap.fs.dir);
     } else if (fidp->fsmap.fid_type == P9_FID_XATTR) {
@@ -2719,7 +2806,11 @@  static void v9fs_remove(V9fsState *s, V9fsPDU *pdu)
         err = -EINVAL;
         goto out;
     }
-
+    /*
+     * IF the file is unlinked, we cannot reopen
+     * the file later. So don't reclaim fd
+     */
+    v9fs_mark_fids_unreclaim(s, &vs->fidp->fsmap.path);
     err = v9fs_do_remove(s, &vs->fidp->fsmap.path);
     v9fs_remove_post_remove(s, vs, err);
     return;