Patchwork [3/6] hw/9pfs: Add file descriptor reclaim support

login
register
mail settings
Submitter Aneesh Kumar K.V
Date June 6, 2011, 5:16 p.m.
Message ID <1307380618-1963-3-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/99016/
State New
Headers show

Comments

Aneesh Kumar K.V - June 6, 2011, 5:16 p.m.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 hw/9pfs/codir.c            |    4 +-
 hw/9pfs/cofile.c           |   19 ++++-
 hw/9pfs/virtio-9p-coth.h   |    4 +-
 hw/9pfs/virtio-9p-device.c |    1 +
 hw/9pfs/virtio-9p.c        |  193 +++++++++++++++++++++++++++++++++++++++++++-
 hw/9pfs/virtio-9p.h        |   23 +++++-
 6 files changed, 229 insertions(+), 15 deletions(-)
jvrao - June 10, 2011, 1:27 a.m.
On 06/06/2011 10:16 AM, Aneesh Kumar K.V wrote:
> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
Minor comments below; I think this looks good.

Reviewed-by : Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>

> ---
>   hw/9pfs/codir.c            |    4 +-
>   hw/9pfs/cofile.c           |   19 ++++-
>   hw/9pfs/virtio-9p-coth.h   |    4 +-
>   hw/9pfs/virtio-9p-device.c |    1 +
>   hw/9pfs/virtio-9p.c        |  193 +++++++++++++++++++++++++++++++++++++++++++-
>   hw/9pfs/virtio-9p.h        |   23 +++++-
>   6 files changed, 229 insertions(+), 15 deletions(-)
>
> diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
> index 783d279..3347038 100644
> --- a/hw/9pfs/codir.c
> +++ b/hw/9pfs/codir.c
> @@ -101,12 +101,10 @@ int v9fs_co_opendir(V9fsState *s, V9fsFidState *fidp)
>       return err;
>   }
>
> -int v9fs_co_closedir(V9fsState *s, V9fsFidState *fidp)
> +int v9fs_co_closedir(V9fsState *s, DIR *dir)
>   {
>       int err;
> -    DIR *dir;
>
> -    dir = fidp->fs.dir;
>       v9fs_co_run_in_worker(
>           {
>               err = s->ops->closedir(&s->ctx, dir);
> diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c
> index e388146..0caf1e3 100644
> --- a/hw/9pfs/cofile.c
> +++ b/hw/9pfs/cofile.c
> @@ -58,6 +58,12 @@ int v9fs_co_open(V9fsState *s, V9fsFidState *fidp, int flags)
>                   err = 0;
>               }
>           });
> +    if (!err) {
> +        total_open_fd++;
> +        if (total_open_fd>  open_fd_hw) {
> +            v9fs_reclaim_fd(s);
> +        }
> +    }
>       return err;
>   }
>
> @@ -79,15 +85,19 @@ int v9fs_co_open2(V9fsState *s, V9fsFidState *fidp, char *fullname, gid_t gid,
>                   err = -errno;
>               }
>           });
> +    if (!err) {
> +        total_open_fd++;
> +        if (total_open_fd>  open_fd_hw) {
> +            v9fs_reclaim_fd(s);
> +        }
> +    }
>       return err;
>   }
>
> -int v9fs_co_close(V9fsState *s, V9fsFidState *fidp)
> +int v9fs_co_close(V9fsState *s, int fd)
>   {
> -    int fd;
>       int err;
>
> -    fd = fidp->fs.fd;
>       v9fs_co_run_in_worker(
>           {
>               err = s->ops->close(&s->ctx, fd);
> @@ -95,6 +105,9 @@ int v9fs_co_close(V9fsState *s, V9fsFidState *fidp)
>                   err = -errno;
>               }
>           });
> +    if (!err) {
> +        total_open_fd--;
> +    }
>       return err;
>   }
>
> diff --git a/hw/9pfs/virtio-9p-coth.h b/hw/9pfs/virtio-9p-coth.h
> index 48defb7..b7be9b5 100644
> --- a/hw/9pfs/virtio-9p-coth.h
> +++ b/hw/9pfs/virtio-9p-coth.h
> @@ -83,8 +83,8 @@ extern int v9fs_co_open2(V9fsState *, V9fsFidState *, char *, gid_t, int, int);
>   extern int v9fs_co_lsetxattr(V9fsState *, V9fsString *, V9fsString *,
>                                void *, size_t, int);
>   extern int v9fs_co_lremovexattr(V9fsState *, V9fsString *, V9fsString *);
> -extern int v9fs_co_closedir(V9fsState *, V9fsFidState *);
> -extern int v9fs_co_close(V9fsState *, V9fsFidState *);
> +extern int v9fs_co_closedir(V9fsState *, DIR *);
> +extern int v9fs_co_close(V9fsState *, int);
>   extern int v9fs_co_fsync(V9fsState *, V9fsFidState *, int);
>   extern int v9fs_co_symlink(V9fsState *, V9fsFidState *, const char *,
>                              const char *, gid_t);
> diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
> index 7811103..70629b2 100644
> --- a/hw/9pfs/virtio-9p-device.c
> +++ b/hw/9pfs/virtio-9p-device.c
> @@ -171,6 +171,7 @@ static PCIDeviceInfo virtio_9p_info = {
>   static void virtio_9p_register_devices(void)
>   {
>       pci_qdev_register(&virtio_9p_info);
> +    virtio_9p_set_fd_limit();
>   }
>
>   device_init(virtio_9p_register_devices)
> diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
> index f3127a5..21e07fb 100644
> --- a/hw/9pfs/virtio-9p.c
> +++ b/hw/9pfs/virtio-9p.c
> @@ -22,6 +22,9 @@
>   #include "virtio-9p-coth.h"
>
>   int debug_9p_pdu;
> +int open_fd_hw;
> +int total_open_fd;
> +static int open_fd_rc;
>
>   enum {
>       Oread   = 0x00,
> @@ -234,11 +237,40 @@ static size_t v9fs_string_size(V9fsString *str)
>
>   static V9fsFidState *get_fid(V9fsState *s, int32_t fid)
>   {
> +    int err;
>       V9fsFidState *f;
>
>       for (f = s->fid_list; f; f = f->next) {
>           if (f->fid == fid&&  !f->clunked) {
> +            /*
> +             * Update the fid ref upfront so that
> +             * we don't get reclaimed when we yield
> +             * in open later.
> +             */
>               f->ref++;
> +
> +            /*
> +             * check whether we need to reopen the
> +             * file. We might have closed the fd
> +             * while trying to free up some file
> +             * descriptors.
> +             */
> +            if (f->fid_type == P9_FID_FILE) {
> +                if (f->fs.fd == -1) {
> +                    do {
> +                        err = v9fs_co_open(s, f, f->open_flags);
> +                    } while (err == -EINTR);
> +                    if (err<  0) {
> +                        f->ref--;
> +                        return NULL;
> +                    }
> +                }
> +            }
> +            /*
> +             * Mark the fid as referenced so that the LRU
> +             * reclaim won't close the file descriptor
> +             */
> +            f->flags |= FID_REFERENCED;
>               return f;
>           }
>       }
> @@ -259,6 +291,11 @@ static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
>       f->fid = fid;
>       f->fid_type = P9_FID_NONE;
>       f->ref = 1;
> +    /*
> +     * Mark the fid as referenced so that the LRU
> +     * reclaim won't close the file descriptor
> +     */
> +    f->flags |= FID_REFERENCED;
>       f->next = s->fid_list;
>       s->fid_list = f;
>
> @@ -304,9 +341,12 @@ static int release_fid(V9fsState *s, V9fsFidState *fidp)
>       int retval = 0;
>
>       if (fidp->fid_type == P9_FID_FILE) {
> -        retval = v9fs_co_close(s, fidp);
> +        /* If we reclaimed the fd no need to close */
> +        if (fidp->fs.fd != -1) {
> +            retval = v9fs_co_close(s, fidp->fs.fd);
> +        }
>       } else if (fidp->fid_type == P9_FID_DIR) {
> -        retval = v9fs_co_closedir(s, fidp);
> +        retval = v9fs_co_closedir(s, fidp->fs.dir);
>       } else if (fidp->fid_type == P9_FID_XATTR) {
>           retval = v9fs_xattr_fid_clunk(s, fidp);
>       }
> @@ -319,7 +359,10 @@ static void put_fid(V9fsState *s, V9fsFidState *fidp)
>   {
>       BUG_ON(!fidp->ref);
>       fidp->ref--;
> -    if (!fidp->ref&&  fidp->clunked) {
> +    /*
> +     * Don't free the fid if it is in reclaim list
> +     */
> +    if (!fidp->ref&&  fidp->clunked&&  !fidp->rclm_lst) {
>           release_fid(s, fidp);
>       }
>   }
> @@ -343,6 +386,105 @@ static int free_fid(V9fsState *s, int32_t fid)
>       return 0;
>   }
>
> +void v9fs_reclaim_fd(V9fsState *s)
> +{
> +    int reclaim_count = 0;
> +    V9fsFidState *f, head_fid, *reclaim_list = NULL;
> +
> +    /*
> +     * We don't want multiple coroutine to do reclaim
> +     */
> +    if (s->reclaim_in_prgs) {
> +        return;
> +    }
> +    s->reclaim_in_prgs = 1;
> +
> +    head_fid.next = s->fid_list;
> +    for (f = s->fid_list; f; f = f->next) {
> +        /*
> +         * Unlink fids cannot be reclaimed. Check
> +         * for them and skip them. Also skip fids
> +         * currently being operated on.
> +         */
> +        if (f->ref || f->flags&  FID_NON_RECLAIMABLE) {
> +            continue;
> +        }
> +        /*
> +         * if it is a recently referenced fid
> +         * we leave the fid untouched and clear the
> +         * reference bit. We come back to it later
> +         * in the next iteration. (a simple LRU without
> +         * moving list elements around)
> +         */
> +        if (f->flags&  FID_REFERENCED) {
> +            f->flags&= ~FID_REFERENCED;
> +            continue;
> +        }
> +        /*
> +         * Add fids to reclaim list.
> +         */
> +        if (f->fid_type == P9_FID_FILE) {
> +            if (f->fs.fd != -1) {
> +                f->rclm_lst = reclaim_list;
> +                reclaim_list = f;
> +                f->fs_reclaim.fd = f->fs.fd;
> +                f->fs.fd = -1;
> +                reclaim_count++;
> +            }
> +        }
> +        if (reclaim_count>= open_fd_rc) {
> +            break;
> +        }
> +    }
> +    /*
> +     * Now close the fid in reclaim list. Free them if they
> +     * are already clunked.
> +     */
> +    while (reclaim_list) {
> +        f = reclaim_list;
> +        reclaim_list = f->rclm_lst;
> +        if (f->clunked) {
> +            release_fid(s, f);
> +        } else {
> +            if (f->fid_type == P9_FID_FILE) {
> +                v9fs_co_close(s, f->fs_reclaim.fd);
> +            }
> +            f->rclm_lst = NULL;
> +        }
> +    }
> +    s->reclaim_in_prgs = 0;
> +}
> +
> +static int v9fs_mark_fids_unreclaim(V9fsState *s, V9fsString *str)
> +{
> +    int err;
> +    V9fsFidState *fidp, head_fid;
> +
> +    head_fid.next = s->fid_list;
> +    for (fidp = s->fid_list; fidp; fidp = fidp->next) {
> +        if (!strcmp(fidp->path.data, str->data)) {
&& !( fidp->flags && FID_NON_RECLAIMABLE)  To avoid unnecessarily 
resetting the flag.
> +            /* Mark the fid non reclaimable. */
> +            fidp->flags |= FID_NON_RECLAIMABLE;
> +            /* reopen the file if already closed */
> +            if (fidp->fs.fd == -1) {
> +                do {
> +                    err = v9fs_co_open(s, fidp, fidp->open_flags);
> +                } while (err == -EINTR);
> +                if (err<  0) {
> +                    return -1;
> +                }
> +                /*
> +                 * Go back to head of fid list because
> +                 * the list could have got updated when
> +                 * switched to the worker thread
> +                 */
> +                fidp =&head_fid;
> +            }
> +        }
> +    }
> +    return 0;
> +}
> +
>   #define P9_QID_TYPE_DIR         0x80
>   #define P9_QID_TYPE_SYMLINK     0x02
>
> @@ -1388,6 +1530,14 @@ static void v9fs_open(void *opaque)
>               goto out;
>           }
>           fidp->fid_type = P9_FID_FILE;
> +        fidp->open_flags = flags;
> +        if (flags&  O_EXCL) {
> +            /*
> +             * We let the host file system do O_EXCL check
> +             * We should not reclaim such fd
> +             */
> +            fidp->flags |= FID_NON_RECLAIMABLE;
> +        }
>           iounit = get_iounit(s,&fidp->path);
>           offset += pdu_marshal(pdu, offset, "Qd",&qid, iounit);
>           err = offset;
> @@ -1432,6 +1582,14 @@ static void v9fs_lcreate(void *opaque)
>           goto out;
>       }
>       fidp->fid_type = P9_FID_FILE;
> +    fidp->open_flags = flags;
> +    if (flags&  O_EXCL) {
> +        /*
> +         * We let the host file system do O_EXCL check
> +         * We should not reclaim such fd
> +         */
> +        fidp->flags |= FID_NON_RECLAIMABLE;
> +    }
>       iounit =  get_iounit(pdu->s,&fullname);
>
>       err = v9fs_co_lstat(pdu->s,&fullname,&stbuf);
> @@ -1993,6 +2151,14 @@ static void v9fs_create(void *opaque)
>               goto out;
>           }
>           fidp->fid_type = P9_FID_FILE;
> +        fidp->open_flags = omode_to_uflags(mode);
> +        if (fidp->open_flags&  O_EXCL) {
> +            /*
> +             * We let the host file system do O_EXCL check
> +             * We should not reclaim such fd
> +             */
> +            fidp->flags |= FID_NON_RECLAIMABLE;
> +        }
>       }
>       err = v9fs_co_lstat(pdu->s,&fullname,&stbuf);
>       if (err<  0) {
> @@ -2126,11 +2292,19 @@ static void v9fs_remove(void *opaque)
>           err = -EINVAL;
>           goto out_nofid;
>       }
> +    /*
> +     * IF the file is unlinked, we cannot reopen
> +     * the file later. So don't reclaim fd
> +     */
> +    err = v9fs_mark_fids_unreclaim(pdu->s,&fidp->path);
> +    if (err<  0) {
> +        goto out_err;
> +    }
>       err = v9fs_co_remove(pdu->s,&fidp->path);
>       if (!err) {
>           err = offset;
>       }
> -
> +out_err:
>       /* For TREMOVE we need to clunk the fid even on failed remove */
>       free_fid(pdu->s, fidp->fid);
>       put_fid(pdu->s, fidp);
> @@ -2826,3 +3000,14 @@ void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq)
>       }
>       free_pdu(s, pdu);
>   }
> +
> +void virtio_9p_set_fd_limit(void)
> +{
> +    struct rlimit rlim;
> +    if (getrlimit(RLIMIT_NOFILE,&rlim)<  0) {
> +        fprintf(stderr, "Failed to get the resource limit\n");
> +        exit(1);
> +    }
> +    open_fd_hw = rlim.rlim_cur - MIN(400, rlim.rlim_cur/3);
> +    open_fd_rc = rlim.rlim_cur/2;
Why not call it LW?
> +}
> diff --git a/hw/9pfs/virtio-9p.h b/hw/9pfs/virtio-9p.h
> index e16e5f4..36aa4a5 100644
> --- a/hw/9pfs/virtio-9p.h
> +++ b/hw/9pfs/virtio-9p.h
> @@ -5,6 +5,7 @@
>   #include<dirent.h>
>   #include<sys/time.h>
>   #include<utime.h>
> +#include<sys/resource.h>
>   #include "hw/virtio.h"
>   #include "fsdev/file-op-9p.h"
>
> @@ -101,6 +102,9 @@ enum p9_proto_version {
>   #define P9_NOTAG    (u16)(~0)
>   #define P9_NOFID    (u32)(~0)
>   #define P9_MAXWELEM 16
> +
> +#define FID_REFERENCED          0x1
> +#define FID_NON_RECLAIMABLE     0x2
>   static inline const char *rpath(FsContext *ctx, const char *path, char *buffer)
>   {
>       snprintf(buffer, PATH_MAX, "%s/%s", ctx->fs_root, path);
> @@ -198,14 +202,21 @@ struct V9fsFidState
>       int32_t fid;
>       V9fsString path;
>       union {
> -	int fd;
> -	DIR *dir;
> -	V9fsXattr xattr;
> +        int fd;
> +        DIR *dir;
> +        V9fsXattr xattr;
>       } fs;
> +    union {
> +        int fd;
> +        DIR *dir;
> +    } fs_reclaim;
> +    int flags;
> +    int open_flags;
>       uid_t uid;
>       int ref;
>       int clunked;
>       V9fsFidState *next;
> +    V9fsFidState *rclm_lst;
>   };
>
>   typedef struct V9fsState
> @@ -222,6 +233,7 @@ typedef struct V9fsState
>       size_t config_size;
>       enum p9_proto_version proto_version;
>       int32_t msize;
> +    int reclaim_in_prgs;
>   } V9fsState;
>
>   typedef struct V9fsStatState {
> @@ -354,6 +366,9 @@ typedef struct V9fsGetlock
>       V9fsString client_id;
>   } V9fsGetlock;
>
> +extern int open_fd_hw;
> +extern int total_open_fd;
> +
>   size_t pdu_packunpack(void *addr, struct iovec *sg, int sg_count,
>                         size_t offset, size_t size, int pack);
>
> @@ -364,4 +379,6 @@ static inline size_t do_pdu_unpack(void *dst, struct iovec *sg, int sg_count,
>   }
>
>   extern void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq);
> +extern void virtio_9p_set_fd_limit(void);
> +extern void v9fs_reclaim_fd(V9fsState *s);
>   #endif

Patch

diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
index 783d279..3347038 100644
--- a/hw/9pfs/codir.c
+++ b/hw/9pfs/codir.c
@@ -101,12 +101,10 @@  int v9fs_co_opendir(V9fsState *s, V9fsFidState *fidp)
     return err;
 }
 
-int v9fs_co_closedir(V9fsState *s, V9fsFidState *fidp)
+int v9fs_co_closedir(V9fsState *s, DIR *dir)
 {
     int err;
-    DIR *dir;
 
-    dir = fidp->fs.dir;
     v9fs_co_run_in_worker(
         {
             err = s->ops->closedir(&s->ctx, dir);
diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c
index e388146..0caf1e3 100644
--- a/hw/9pfs/cofile.c
+++ b/hw/9pfs/cofile.c
@@ -58,6 +58,12 @@  int v9fs_co_open(V9fsState *s, V9fsFidState *fidp, int flags)
                 err = 0;
             }
         });
+    if (!err) {
+        total_open_fd++;
+        if (total_open_fd > open_fd_hw) {
+            v9fs_reclaim_fd(s);
+        }
+    }
     return err;
 }
 
@@ -79,15 +85,19 @@  int v9fs_co_open2(V9fsState *s, V9fsFidState *fidp, char *fullname, gid_t gid,
                 err = -errno;
             }
         });
+    if (!err) {
+        total_open_fd++;
+        if (total_open_fd > open_fd_hw) {
+            v9fs_reclaim_fd(s);
+        }
+    }
     return err;
 }
 
-int v9fs_co_close(V9fsState *s, V9fsFidState *fidp)
+int v9fs_co_close(V9fsState *s, int fd)
 {
-    int fd;
     int err;
 
-    fd = fidp->fs.fd;
     v9fs_co_run_in_worker(
         {
             err = s->ops->close(&s->ctx, fd);
@@ -95,6 +105,9 @@  int v9fs_co_close(V9fsState *s, V9fsFidState *fidp)
                 err = -errno;
             }
         });
+    if (!err) {
+        total_open_fd--;
+    }
     return err;
 }
 
diff --git a/hw/9pfs/virtio-9p-coth.h b/hw/9pfs/virtio-9p-coth.h
index 48defb7..b7be9b5 100644
--- a/hw/9pfs/virtio-9p-coth.h
+++ b/hw/9pfs/virtio-9p-coth.h
@@ -83,8 +83,8 @@  extern int v9fs_co_open2(V9fsState *, V9fsFidState *, char *, gid_t, int, int);
 extern int v9fs_co_lsetxattr(V9fsState *, V9fsString *, V9fsString *,
                              void *, size_t, int);
 extern int v9fs_co_lremovexattr(V9fsState *, V9fsString *, V9fsString *);
-extern int v9fs_co_closedir(V9fsState *, V9fsFidState *);
-extern int v9fs_co_close(V9fsState *, V9fsFidState *);
+extern int v9fs_co_closedir(V9fsState *, DIR *);
+extern int v9fs_co_close(V9fsState *, int);
 extern int v9fs_co_fsync(V9fsState *, V9fsFidState *, int);
 extern int v9fs_co_symlink(V9fsState *, V9fsFidState *, const char *,
                            const char *, gid_t);
diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
index 7811103..70629b2 100644
--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -171,6 +171,7 @@  static PCIDeviceInfo virtio_9p_info = {
 static void virtio_9p_register_devices(void)
 {
     pci_qdev_register(&virtio_9p_info);
+    virtio_9p_set_fd_limit();
 }
 
 device_init(virtio_9p_register_devices)
diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
index f3127a5..21e07fb 100644
--- a/hw/9pfs/virtio-9p.c
+++ b/hw/9pfs/virtio-9p.c
@@ -22,6 +22,9 @@ 
 #include "virtio-9p-coth.h"
 
 int debug_9p_pdu;
+int open_fd_hw;
+int total_open_fd;
+static int open_fd_rc;
 
 enum {
     Oread   = 0x00,
@@ -234,11 +237,40 @@  static size_t v9fs_string_size(V9fsString *str)
 
 static V9fsFidState *get_fid(V9fsState *s, int32_t fid)
 {
+    int err;
     V9fsFidState *f;
 
     for (f = s->fid_list; f; f = f->next) {
         if (f->fid == fid && !f->clunked) {
+            /*
+             * Update the fid ref upfront so that
+             * we don't get reclaimed when we yield
+             * in open later.
+             */
             f->ref++;
+
+            /*
+             * check whether we need to reopen the
+             * file. We might have closed the fd
+             * while trying to free up some file
+             * descriptors.
+             */
+            if (f->fid_type == P9_FID_FILE) {
+                if (f->fs.fd == -1) {
+                    do {
+                        err = v9fs_co_open(s, f, f->open_flags);
+                    } while (err == -EINTR);
+                    if (err < 0) {
+                        f->ref--;
+                        return NULL;
+                    }
+                }
+            }
+            /*
+             * Mark the fid as referenced so that the LRU
+             * reclaim won't close the file descriptor
+             */
+            f->flags |= FID_REFERENCED;
             return f;
         }
     }
@@ -259,6 +291,11 @@  static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
     f->fid = fid;
     f->fid_type = P9_FID_NONE;
     f->ref = 1;
+    /*
+     * Mark the fid as referenced so that the LRU
+     * reclaim won't close the file descriptor
+     */
+    f->flags |= FID_REFERENCED;
     f->next = s->fid_list;
     s->fid_list = f;
 
@@ -304,9 +341,12 @@  static int release_fid(V9fsState *s, V9fsFidState *fidp)
     int retval = 0;
 
     if (fidp->fid_type == P9_FID_FILE) {
-        retval = v9fs_co_close(s, fidp);
+        /* If we reclaimed the fd no need to close */
+        if (fidp->fs.fd != -1) {
+            retval = v9fs_co_close(s, fidp->fs.fd);
+        }
     } else if (fidp->fid_type == P9_FID_DIR) {
-        retval = v9fs_co_closedir(s, fidp);
+        retval = v9fs_co_closedir(s, fidp->fs.dir);
     } else if (fidp->fid_type == P9_FID_XATTR) {
         retval = v9fs_xattr_fid_clunk(s, fidp);
     }
@@ -319,7 +359,10 @@  static void put_fid(V9fsState *s, V9fsFidState *fidp)
 {
     BUG_ON(!fidp->ref);
     fidp->ref--;
-    if (!fidp->ref && fidp->clunked) {
+    /*
+     * Don't free the fid if it is in reclaim list
+     */
+    if (!fidp->ref && fidp->clunked && !fidp->rclm_lst) {
         release_fid(s, fidp);
     }
 }
@@ -343,6 +386,105 @@  static int free_fid(V9fsState *s, int32_t fid)
     return 0;
 }
 
+void v9fs_reclaim_fd(V9fsState *s)
+{
+    int reclaim_count = 0;
+    V9fsFidState *f, head_fid, *reclaim_list = NULL;
+
+    /*
+     * We don't want multiple coroutine to do reclaim
+     */
+    if (s->reclaim_in_prgs) {
+        return;
+    }
+    s->reclaim_in_prgs = 1;
+
+    head_fid.next = s->fid_list;
+    for (f = s->fid_list; f; f = f->next) {
+        /*
+         * Unlink fids cannot be reclaimed. Check
+         * for them and skip them. Also skip fids
+         * currently being operated on.
+         */
+        if (f->ref || f->flags & FID_NON_RECLAIMABLE) {
+            continue;
+        }
+        /*
+         * if it is a recently referenced fid
+         * we leave the fid untouched and clear the
+         * reference bit. We come back to it later
+         * in the next iteration. (a simple LRU without
+         * moving list elements around)
+         */
+        if (f->flags & FID_REFERENCED) {
+            f->flags &= ~FID_REFERENCED;
+            continue;
+        }
+        /*
+         * Add fids to reclaim list.
+         */
+        if (f->fid_type == P9_FID_FILE) {
+            if (f->fs.fd != -1) {
+                f->rclm_lst = reclaim_list;
+                reclaim_list = f;
+                f->fs_reclaim.fd = f->fs.fd;
+                f->fs.fd = -1;
+                reclaim_count++;
+            }
+        }
+        if (reclaim_count >= open_fd_rc) {
+            break;
+        }
+    }
+    /*
+     * Now close the fid in reclaim list. Free them if they
+     * are already clunked.
+     */
+    while (reclaim_list) {
+        f = reclaim_list;
+        reclaim_list = f->rclm_lst;
+        if (f->clunked) {
+            release_fid(s, f);
+        } else {
+            if (f->fid_type == P9_FID_FILE) {
+                v9fs_co_close(s, f->fs_reclaim.fd);
+            }
+            f->rclm_lst = NULL;
+        }
+    }
+    s->reclaim_in_prgs = 0;
+}
+
+static int v9fs_mark_fids_unreclaim(V9fsState *s, V9fsString *str)
+{
+    int err;
+    V9fsFidState *fidp, head_fid;
+
+    head_fid.next = s->fid_list;
+    for (fidp = s->fid_list; fidp; fidp = fidp->next) {
+        if (!strcmp(fidp->path.data, str->data)) {
+            /* Mark the fid non reclaimable. */
+            fidp->flags |= FID_NON_RECLAIMABLE;
+            /* reopen the file if already closed */
+            if (fidp->fs.fd == -1) {
+                do {
+                    err = v9fs_co_open(s, fidp, fidp->open_flags);
+                } while (err == -EINTR);
+                if (err < 0) {
+                    return -1;
+                }
+                /*
+                 * Go back to head of fid list because
+                 * the list could have got updated when
+                 * switched to the worker thread
+                 */
+                fidp = &head_fid;
+            }
+        }
+    }
+    return 0;
+}
+
 #define P9_QID_TYPE_DIR         0x80
 #define P9_QID_TYPE_SYMLINK     0x02
 
@@ -1388,6 +1530,14 @@  static void v9fs_open(void *opaque)
             goto out;
         }
         fidp->fid_type = P9_FID_FILE;
+        fidp->open_flags = flags;
+        if (flags & O_EXCL) {
+            /*
+             * We let the host file system do O_EXCL check
+             * We should not reclaim such fd
+             */
+            fidp->flags |= FID_NON_RECLAIMABLE;
+        }
         iounit = get_iounit(s, &fidp->path);
         offset += pdu_marshal(pdu, offset, "Qd", &qid, iounit);
         err = offset;
@@ -1432,6 +1582,14 @@  static void v9fs_lcreate(void *opaque)
         goto out;
     }
     fidp->fid_type = P9_FID_FILE;
+    fidp->open_flags = flags;
+    if (flags & O_EXCL) {
+        /*
+         * We let the host file system do O_EXCL check
+         * We should not reclaim such fd
+         */
+        fidp->flags |= FID_NON_RECLAIMABLE;
+    }
     iounit =  get_iounit(pdu->s, &fullname);
 
     err = v9fs_co_lstat(pdu->s, &fullname, &stbuf);
@@ -1993,6 +2151,14 @@  static void v9fs_create(void *opaque)
             goto out;
         }
         fidp->fid_type = P9_FID_FILE;
+        fidp->open_flags = omode_to_uflags(mode);
+        if (fidp->open_flags & O_EXCL) {
+            /*
+             * We let the host file system do O_EXCL check
+             * We should not reclaim such fd
+             */
+            fidp->flags |= FID_NON_RECLAIMABLE;
+        }
     }
     err = v9fs_co_lstat(pdu->s, &fullname, &stbuf);
     if (err < 0) {
@@ -2126,11 +2292,19 @@  static void v9fs_remove(void *opaque)
         err = -EINVAL;
         goto out_nofid;
     }
+    /*
+     * IF the file is unlinked, we cannot reopen
+     * the file later. So don't reclaim fd
+     */
+    err = v9fs_mark_fids_unreclaim(pdu->s, &fidp->path);
+    if (err < 0) {
+        goto out_err;
+    }
     err = v9fs_co_remove(pdu->s, &fidp->path);
     if (!err) {
         err = offset;
     }
-
+out_err:
     /* For TREMOVE we need to clunk the fid even on failed remove */
     free_fid(pdu->s, fidp->fid);
     put_fid(pdu->s, fidp);
@@ -2826,3 +3000,14 @@  void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq)
     }
     free_pdu(s, pdu);
 }
+
+void virtio_9p_set_fd_limit(void)
+{
+    struct rlimit rlim;
+    if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
+        fprintf(stderr, "Failed to get the resource limit\n");
+        exit(1);
+    }
+    open_fd_hw = rlim.rlim_cur - MIN(400, rlim.rlim_cur/3);
+    open_fd_rc = rlim.rlim_cur/2;
+}
diff --git a/hw/9pfs/virtio-9p.h b/hw/9pfs/virtio-9p.h
index e16e5f4..36aa4a5 100644
--- a/hw/9pfs/virtio-9p.h
+++ b/hw/9pfs/virtio-9p.h
@@ -5,6 +5,7 @@ 
 #include <dirent.h>
 #include <sys/time.h>
 #include <utime.h>
+#include <sys/resource.h>
 #include "hw/virtio.h"
 #include "fsdev/file-op-9p.h"
 
@@ -101,6 +102,9 @@  enum p9_proto_version {
 #define P9_NOTAG    (u16)(~0)
 #define P9_NOFID    (u32)(~0)
 #define P9_MAXWELEM 16
+
+#define FID_REFERENCED          0x1
+#define FID_NON_RECLAIMABLE     0x2
 static inline const char *rpath(FsContext *ctx, const char *path, char *buffer)
 {
     snprintf(buffer, PATH_MAX, "%s/%s", ctx->fs_root, path);
@@ -198,14 +202,21 @@  struct V9fsFidState
     int32_t fid;
     V9fsString path;
     union {
-	int fd;
-	DIR *dir;
-	V9fsXattr xattr;
+        int fd;
+        DIR *dir;
+        V9fsXattr xattr;
     } fs;
+    union {
+        int fd;
+        DIR *dir;
+    } fs_reclaim;
+    int flags;
+    int open_flags;
     uid_t uid;
     int ref;
     int clunked;
     V9fsFidState *next;
+    V9fsFidState *rclm_lst;
 };
 
 typedef struct V9fsState
@@ -222,6 +233,7 @@  typedef struct V9fsState
     size_t config_size;
     enum p9_proto_version proto_version;
     int32_t msize;
+    int reclaim_in_prgs;
 } V9fsState;
 
 typedef struct V9fsStatState {
@@ -354,6 +366,9 @@  typedef struct V9fsGetlock
     V9fsString client_id;
 } V9fsGetlock;
 
+extern int open_fd_hw;
+extern int total_open_fd;
+
 size_t pdu_packunpack(void *addr, struct iovec *sg, int sg_count,
                       size_t offset, size_t size, int pack);
 
@@ -364,4 +379,6 @@  static inline size_t do_pdu_unpack(void *dst, struct iovec *sg, int sg_count,
 }
 
 extern void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq);
+extern void virtio_9p_set_fd_limit(void);
+extern void v9fs_reclaim_fd(V9fsState *s);
 #endif