diff mbox series

[v2,7/7] 9pfs: reduce latency of Twalk

Message ID 1a6701674afc4f08d40396e3aa2631e18a4dbb33.1622821729.git.qemu_oss@crudebyte.com
State New
Headers show
Series 9pfs: Twalk optimization | expand

Commit Message

Christian Schoenebeck June 4, 2021, 3:38 p.m. UTC
As with previous performance optimization on Treaddir handling;
reduce the overall latency, i.e. overall time spent on processing
a Twalk request by reducing the amount of thread hops between the
9p server's main thread and fs worker thread(s).

In fact this patch even reduces the thread hops for Twalk handling
to its theoritical minimum of exactly 2 thread hops:

main thread -> fs worker thread -> main thread

This is achieved by doing all the required fs driver tasks altogether
in a single v9fs_co_run_in_worker({ ... }); code block.

Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
---
 hw/9pfs/9p.c | 89 +++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 70 insertions(+), 19 deletions(-)

Comments

Greg Kurz July 2, 2021, 2:36 p.m. UTC | #1
On Fri, 4 Jun 2021 17:38:31 +0200
Christian Schoenebeck <qemu_oss@crudebyte.com> wrote:

> As with previous performance optimization on Treaddir handling;
> reduce the overall latency, i.e. overall time spent on processing
> a Twalk request by reducing the amount of thread hops between the
> 9p server's main thread and fs worker thread(s).
> 
> In fact this patch even reduces the thread hops for Twalk handling
> to its theoritical minimum of exactly 2 thread hops:
> 
> main thread -> fs worker thread -> main thread
> 
> This is achieved by doing all the required fs driver tasks altogether
> in a single v9fs_co_run_in_worker({ ... }); code block.
> 
> Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
> ---
>  hw/9pfs/9p.c | 89 +++++++++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 70 insertions(+), 19 deletions(-)
> 
> diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
> index 7be07f2d68..2815257f42 100644
> --- a/hw/9pfs/9p.c
> +++ b/hw/9pfs/9p.c
> @@ -1705,9 +1705,9 @@ static void coroutine_fn v9fs_walk(void *opaque)
>      int name_idx;
>      V9fsQID *qids = NULL;
>      int i, err = 0;
> -    V9fsPath dpath, path;
> +    V9fsPath dpath, path, *pathes = NULL;
>      uint16_t nwnames;
> -    struct stat stbuf;
> +    struct stat stbuf, fidst, *stbufs = NULL;
>      size_t offset = 7;
>      int32_t fid, newfid;
>      V9fsString *wnames = NULL;
> @@ -1733,6 +1733,8 @@ static void coroutine_fn v9fs_walk(void *opaque)
>      if (nwnames) {
>          wnames = g_new0(V9fsString, nwnames);
>          qids   = g_new0(V9fsQID, nwnames);
> +        stbufs = g_new0(struct stat, nwnames);
> +        pathes = g_new0(V9fsPath, nwnames);
>          for (i = 0; i < nwnames; i++) {
>              err = pdu_unmarshal(pdu, offset, "s", &wnames[i]);
>              if (err < 0) {
> @@ -1753,39 +1755,85 @@ static void coroutine_fn v9fs_walk(void *opaque)
>  
>      v9fs_path_init(&dpath);
>      v9fs_path_init(&path);
> +    /*
> +     * Both dpath and path initially point to fidp.
> +     * Needed to handle request with nwnames == 0
> +     */
> +    v9fs_path_copy(&dpath, &fidp->path);
> +    v9fs_path_copy(&path, &fidp->path);
>  
> -    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
> +    /*
> +     * To keep latency (i.e. overall execution time for processing this
> +     * Twalk client request) as small as possible, run all the required fs
> +     * driver code altogether inside the following block.
> +     */
> +    v9fs_co_run_in_worker({
> +        if (v9fs_request_cancelled(pdu)) {
> +            err = -EINTR;
> +            break;
> +        }
> +        err = s->ops->lstat(&s->ctx, &dpath, &fidst);
> +        if (err < 0) {
> +            err = -errno;
> +            break;
> +        }
> +        stbuf = fidst;
> +        for (name_idx = 0; name_idx < nwnames; name_idx++) {
> +            if (v9fs_request_cancelled(pdu)) {
> +                err = -EINTR;
> +                break;
> +            }
> +            if (!same_stat_id(&pdu->s->root_st, &stbuf) ||
> +                strcmp("..", wnames[name_idx].data))
> +            {
> +                err = s->ops->name_to_path(&s->ctx, &dpath,
> +                                        wnames[name_idx].data, &path);

It seems you could pass &pathes[name_idx] instead of &path and...

> +                if (err < 0) {
> +                    err = -errno;
> +                    break;
> +                }
> +                if (v9fs_request_cancelled(pdu)) {
> +                    err = -EINTR;
> +                    break;
> +                }
> +                err = s->ops->lstat(&s->ctx, &path, &stbuf);
> +                if (err < 0) {
> +                    err = -errno;
> +                    break;
> +                }
> +                stbufs[name_idx] = stbuf;
> +                v9fs_path_copy(&dpath, &path);
> +                v9fs_path_copy(&pathes[name_idx], &path);

... avoid a copy.

Also, I believe the path -> dpath could be avoided as well in
the existing code, but this is a separate cleanup.

> +            }
> +        }
> +    });
> +    /*
> +     * Handle all the rest of this Twalk request on main thread ...
> +     */
>      if (err < 0) {
>          goto out;
>      }
> -    err = stat_to_qid(pdu, &stbuf, &qid);
> +
> +    err = stat_to_qid(pdu, &fidst, &qid);
>      if (err < 0) {
>          goto out;
>      }
> +    stbuf = fidst;
>  
> -    /*
> -     * Both dpath and path initially poin to fidp.
> -     * Needed to handle request with nwnames == 0
> -     */
> +    /* reset dpath and path */
>      v9fs_path_copy(&dpath, &fidp->path);
>      v9fs_path_copy(&path, &fidp->path);
> +
>      for (name_idx = 0; name_idx < nwnames; name_idx++) {
>          if (!same_stat_id(&pdu->s->root_st, &stbuf) ||
> -            strcmp("..", wnames[name_idx].data)) {
> -            err = v9fs_co_name_to_path(pdu, &dpath, wnames[name_idx].data,
> -                                       &path);
> -            if (err < 0) {
> -                goto out;
> -            }
> -
> -            err = v9fs_co_lstat(pdu, &path, &stbuf);
> -            if (err < 0) {
> -                goto out;
> -            }
> +            strcmp("..", wnames[name_idx].data))
> +        {
> +            stbuf = stbufs[name_idx];
>              err = stat_to_qid(pdu, &stbuf, &qid);
>              if (err < 0) {
>                  goto out;
>              }
> +            v9fs_path_copy(&path, &pathes[name_idx]);
>              v9fs_path_copy(&dpath, &path);
>          }
>          memcpy(&qids[name_idx], &qid, sizeof(qid));
> @@ -1821,9 +1869,12 @@ out_nofid:
>      if (nwnames && nwnames <= P9_MAXWELEM) {
>          for (name_idx = 0; name_idx < nwnames; name_idx++) {
>              v9fs_string_free(&wnames[name_idx]);
> +            v9fs_path_free(&pathes[name_idx]);
>          }
>          g_free(wnames);
>          g_free(qids);
> +        g_free(stbufs);
> +        g_free(pathes);

All of these guys should be converted to g_autofree. Separate cleanup
again.

v9fs_walk() was already a bit hairy and the diffstat definitely
doesn't improve things... this being said, the change makes sense
and I haven't spotted anything wrong, so:

Reviewed-by: Greg Kurz <groug@kaod.org>

Improvements could be to :
- track the previous path with a V9fsPath * instead of copying
- have a separate path for the nwnames == 0 case

>      }
>  }
>
Christian Schoenebeck July 2, 2021, 3:05 p.m. UTC | #2
On Freitag, 2. Juli 2021 16:36:56 CEST Greg Kurz wrote:
> On Fri, 4 Jun 2021 17:38:31 +0200
> 
> Christian Schoenebeck <qemu_oss@crudebyte.com> wrote:
> > As with previous performance optimization on Treaddir handling;
> > reduce the overall latency, i.e. overall time spent on processing
> > a Twalk request by reducing the amount of thread hops between the
> > 9p server's main thread and fs worker thread(s).
> > 
> > In fact this patch even reduces the thread hops for Twalk handling
> > to its theoritical minimum of exactly 2 thread hops:
> > 
> > main thread -> fs worker thread -> main thread
> > 
> > This is achieved by doing all the required fs driver tasks altogether
> > in a single v9fs_co_run_in_worker({ ... }); code block.
> > 
> > Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
> > ---
> > 
> >  hw/9pfs/9p.c | 89 +++++++++++++++++++++++++++++++++++++++++-----------
> >  1 file changed, 70 insertions(+), 19 deletions(-)
> > 
> > diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
> > index 7be07f2d68..2815257f42 100644
> > --- a/hw/9pfs/9p.c
> > +++ b/hw/9pfs/9p.c
> > @@ -1705,9 +1705,9 @@ static void coroutine_fn v9fs_walk(void *opaque)
> > 
> >      int name_idx;
> >      V9fsQID *qids = NULL;
> >      int i, err = 0;
> > 
> > -    V9fsPath dpath, path;
> > +    V9fsPath dpath, path, *pathes = NULL;
> > 
> >      uint16_t nwnames;
> > 
> > -    struct stat stbuf;
> > +    struct stat stbuf, fidst, *stbufs = NULL;
> > 
> >      size_t offset = 7;
> >      int32_t fid, newfid;
> >      V9fsString *wnames = NULL;
> > 
> > @@ -1733,6 +1733,8 @@ static void coroutine_fn v9fs_walk(void *opaque)
> > 
> >      if (nwnames) {
> >      
> >          wnames = g_new0(V9fsString, nwnames);
> >          qids   = g_new0(V9fsQID, nwnames);
> > 
> > +        stbufs = g_new0(struct stat, nwnames);
> > +        pathes = g_new0(V9fsPath, nwnames);
> > 
> >          for (i = 0; i < nwnames; i++) {
> >          
> >              err = pdu_unmarshal(pdu, offset, "s", &wnames[i]);
> >              if (err < 0) {
> > 
> > @@ -1753,39 +1755,85 @@ static void coroutine_fn v9fs_walk(void *opaque)
> > 
> >      v9fs_path_init(&dpath);
> >      v9fs_path_init(&path);
> > 
> > +    /*
> > +     * Both dpath and path initially point to fidp.
> > +     * Needed to handle request with nwnames == 0
> > +     */
> > +    v9fs_path_copy(&dpath, &fidp->path);
> > +    v9fs_path_copy(&path, &fidp->path);
> > 
> > -    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
> > +    /*
> > +     * To keep latency (i.e. overall execution time for processing this
> > +     * Twalk client request) as small as possible, run all the required
> > fs
> > +     * driver code altogether inside the following block.
> > +     */
> > +    v9fs_co_run_in_worker({
> > +        if (v9fs_request_cancelled(pdu)) {
> > +            err = -EINTR;
> > +            break;
> > +        }
> > +        err = s->ops->lstat(&s->ctx, &dpath, &fidst);
> > +        if (err < 0) {
> > +            err = -errno;
> > +            break;
> > +        }
> > +        stbuf = fidst;
> > +        for (name_idx = 0; name_idx < nwnames; name_idx++) {
> > +            if (v9fs_request_cancelled(pdu)) {
> > +                err = -EINTR;
> > +                break;
> > +            }
> > +            if (!same_stat_id(&pdu->s->root_st, &stbuf) ||
> > +                strcmp("..", wnames[name_idx].data))
> > +            {
> > +                err = s->ops->name_to_path(&s->ctx, &dpath,
> > +                                        wnames[name_idx].data, &path);
> 
> It seems you could pass &pathes[name_idx] instead of &path and...
> 
> > +                if (err < 0) {
> > +                    err = -errno;
> > +                    break;
> > +                }
> > +                if (v9fs_request_cancelled(pdu)) {
> > +                    err = -EINTR;
> > +                    break;
> > +                }
> > +                err = s->ops->lstat(&s->ctx, &path, &stbuf);
> > +                if (err < 0) {
> > +                    err = -errno;
> > +                    break;
> > +                }
> > +                stbufs[name_idx] = stbuf;
> > +                v9fs_path_copy(&dpath, &path);
> > +                v9fs_path_copy(&pathes[name_idx], &path);
> 
> ... avoid a copy.

It's been a while since I did this, but looks like you are right.

> Also, I believe the path -> dpath could be avoided as well in
> the existing code, but this is a separate cleanup.
> 
> > +            }
> > +        }
> > +    });
> > +    /*
> > +     * Handle all the rest of this Twalk request on main thread ...
> > +     */
> > 
> >      if (err < 0) {
> >      
> >          goto out;
> >      
> >      }
> > 
> > -    err = stat_to_qid(pdu, &stbuf, &qid);
> > +
> > +    err = stat_to_qid(pdu, &fidst, &qid);
> > 
> >      if (err < 0) {
> >      
> >          goto out;
> >      
> >      }
> > 
> > +    stbuf = fidst;
> > 
> > -    /*
> > -     * Both dpath and path initially poin to fidp.
> > -     * Needed to handle request with nwnames == 0
> > -     */
> > +    /* reset dpath and path */
> > 
> >      v9fs_path_copy(&dpath, &fidp->path);
> >      v9fs_path_copy(&path, &fidp->path);
> > 
> > +
> > 
> >      for (name_idx = 0; name_idx < nwnames; name_idx++) {
> >      
> >          if (!same_stat_id(&pdu->s->root_st, &stbuf) ||
> > 
> > -            strcmp("..", wnames[name_idx].data)) {
> > -            err = v9fs_co_name_to_path(pdu, &dpath,
> > wnames[name_idx].data,
> > -                                       &path);
> > -            if (err < 0) {
> > -                goto out;
> > -            }
> > -
> > -            err = v9fs_co_lstat(pdu, &path, &stbuf);
> > -            if (err < 0) {
> > -                goto out;
> > -            }
> > +            strcmp("..", wnames[name_idx].data))
> > +        {
> > +            stbuf = stbufs[name_idx];
> > 
> >              err = stat_to_qid(pdu, &stbuf, &qid);
> >              if (err < 0) {
> >              
> >                  goto out;
> >              
> >              }
> > 
> > +            v9fs_path_copy(&path, &pathes[name_idx]);
> > 
> >              v9fs_path_copy(&dpath, &path);
> >          
> >          }
> >          memcpy(&qids[name_idx], &qid, sizeof(qid));
> > 
> > @@ -1821,9 +1869,12 @@ out_nofid:
> >      if (nwnames && nwnames <= P9_MAXWELEM) {
> >      
> >          for (name_idx = 0; name_idx < nwnames; name_idx++) {
> >          
> >              v9fs_string_free(&wnames[name_idx]);
> > 
> > +            v9fs_path_free(&pathes[name_idx]);
> > 
> >          }
> >          g_free(wnames);
> >          g_free(qids);
> > 
> > +        g_free(stbufs);
> > +        g_free(pathes);
> 
> All of these guys should be converted to g_autofree. Separate cleanup
> again.

Definitely agreed on that.

> v9fs_walk() was already a bit hairy and the diffstat definitely
> doesn't improve things... this being said, the change makes sense
> and I haven't spotted anything wrong, so:
> 
> Reviewed-by: Greg Kurz <groug@kaod.org>
> 
> Improvements could be to :
> - track the previous path with a V9fsPath * instead of copying
> - have a separate path for the nwnames == 0 case
> 
> >      }
> >  
> >  }

Yeah, there is a lot more to do on v9fs_walk(), both cleanup, as well as the 
previously (couple weeks ago) mentioned protocol fix (i.e. Twalk should only 
reply Rerror if there is an error on the very first path element).

If you don't mind, I queue this patch as is for now and prepare a PR for the 
current 9p patches in my queue in order to bring them into the soft freeze 
deadline.

Thanks for looking at this Greg, I appreciate it!

Best regards,
Christian Schoenebeck
Greg Kurz July 2, 2021, 3:26 p.m. UTC | #3
On Fri, 02 Jul 2021 17:05:32 +0200
Christian Schoenebeck <qemu_oss@crudebyte.com> wrote:

> On Freitag, 2. Juli 2021 16:36:56 CEST Greg Kurz wrote:
> > On Fri, 4 Jun 2021 17:38:31 +0200
> > 
> > Christian Schoenebeck <qemu_oss@crudebyte.com> wrote:
> > > As with previous performance optimization on Treaddir handling;
> > > reduce the overall latency, i.e. overall time spent on processing
> > > a Twalk request by reducing the amount of thread hops between the
> > > 9p server's main thread and fs worker thread(s).
> > > 
> > > In fact this patch even reduces the thread hops for Twalk handling
> > > to its theoritical minimum of exactly 2 thread hops:
> > > 
> > > main thread -> fs worker thread -> main thread
> > > 
> > > This is achieved by doing all the required fs driver tasks altogether
> > > in a single v9fs_co_run_in_worker({ ... }); code block.
> > > 
> > > Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
> > > ---
> > > 
> > >  hw/9pfs/9p.c | 89 +++++++++++++++++++++++++++++++++++++++++-----------
> > >  1 file changed, 70 insertions(+), 19 deletions(-)
> > > 

[...]

> 
> Yeah, there is a lot more to do on v9fs_walk(), both cleanup, as well as the 
> previously (couple weeks ago) mentioned protocol fix (i.e. Twalk should only 
> reply Rerror if there is an error on the very first path element).
> 

Ah yes... I had forgotten about this one. One more argument for a thorough
rewrite of this function.

> If you don't mind, I queue this patch as is for now and prepare a PR for the 
> current 9p patches in my queue in order to bring them into the soft freeze 
> deadline.
> 

Sure, please do.

> Thanks for looking at this Greg, I appreciate it!
> 

Upcoming soft freeze provided the extra motivation to finish the review :-)

> Best regards,
> Christian Schoenebeck
> 
> 

Cheers,

--
Greg
diff mbox series

Patch

diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 7be07f2d68..2815257f42 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -1705,9 +1705,9 @@  static void coroutine_fn v9fs_walk(void *opaque)
     int name_idx;
     V9fsQID *qids = NULL;
     int i, err = 0;
-    V9fsPath dpath, path;
+    V9fsPath dpath, path, *pathes = NULL;
     uint16_t nwnames;
-    struct stat stbuf;
+    struct stat stbuf, fidst, *stbufs = NULL;
     size_t offset = 7;
     int32_t fid, newfid;
     V9fsString *wnames = NULL;
@@ -1733,6 +1733,8 @@  static void coroutine_fn v9fs_walk(void *opaque)
     if (nwnames) {
         wnames = g_new0(V9fsString, nwnames);
         qids   = g_new0(V9fsQID, nwnames);
+        stbufs = g_new0(struct stat, nwnames);
+        pathes = g_new0(V9fsPath, nwnames);
         for (i = 0; i < nwnames; i++) {
             err = pdu_unmarshal(pdu, offset, "s", &wnames[i]);
             if (err < 0) {
@@ -1753,39 +1755,85 @@  static void coroutine_fn v9fs_walk(void *opaque)
 
     v9fs_path_init(&dpath);
     v9fs_path_init(&path);
+    /*
+     * Both dpath and path initially point to fidp.
+     * Needed to handle request with nwnames == 0
+     */
+    v9fs_path_copy(&dpath, &fidp->path);
+    v9fs_path_copy(&path, &fidp->path);
 
-    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
+    /*
+     * To keep latency (i.e. overall execution time for processing this
+     * Twalk client request) as small as possible, run all the required fs
+     * driver code altogether inside the following block.
+     */
+    v9fs_co_run_in_worker({
+        if (v9fs_request_cancelled(pdu)) {
+            err = -EINTR;
+            break;
+        }
+        err = s->ops->lstat(&s->ctx, &dpath, &fidst);
+        if (err < 0) {
+            err = -errno;
+            break;
+        }
+        stbuf = fidst;
+        for (name_idx = 0; name_idx < nwnames; name_idx++) {
+            if (v9fs_request_cancelled(pdu)) {
+                err = -EINTR;
+                break;
+            }
+            if (!same_stat_id(&pdu->s->root_st, &stbuf) ||
+                strcmp("..", wnames[name_idx].data))
+            {
+                err = s->ops->name_to_path(&s->ctx, &dpath,
+                                        wnames[name_idx].data, &path);
+                if (err < 0) {
+                    err = -errno;
+                    break;
+                }
+                if (v9fs_request_cancelled(pdu)) {
+                    err = -EINTR;
+                    break;
+                }
+                err = s->ops->lstat(&s->ctx, &path, &stbuf);
+                if (err < 0) {
+                    err = -errno;
+                    break;
+                }
+                stbufs[name_idx] = stbuf;
+                v9fs_path_copy(&dpath, &path);
+                v9fs_path_copy(&pathes[name_idx], &path);
+            }
+        }
+    });
+    /*
+     * Handle all the rest of this Twalk request on main thread ...
+     */
     if (err < 0) {
         goto out;
     }
-    err = stat_to_qid(pdu, &stbuf, &qid);
+
+    err = stat_to_qid(pdu, &fidst, &qid);
     if (err < 0) {
         goto out;
     }
+    stbuf = fidst;
 
-    /*
-     * Both dpath and path initially poin to fidp.
-     * Needed to handle request with nwnames == 0
-     */
+    /* reset dpath and path */
     v9fs_path_copy(&dpath, &fidp->path);
     v9fs_path_copy(&path, &fidp->path);
+
     for (name_idx = 0; name_idx < nwnames; name_idx++) {
         if (!same_stat_id(&pdu->s->root_st, &stbuf) ||
-            strcmp("..", wnames[name_idx].data)) {
-            err = v9fs_co_name_to_path(pdu, &dpath, wnames[name_idx].data,
-                                       &path);
-            if (err < 0) {
-                goto out;
-            }
-
-            err = v9fs_co_lstat(pdu, &path, &stbuf);
-            if (err < 0) {
-                goto out;
-            }
+            strcmp("..", wnames[name_idx].data))
+        {
+            stbuf = stbufs[name_idx];
             err = stat_to_qid(pdu, &stbuf, &qid);
             if (err < 0) {
                 goto out;
             }
+            v9fs_path_copy(&path, &pathes[name_idx]);
             v9fs_path_copy(&dpath, &path);
         }
         memcpy(&qids[name_idx], &qid, sizeof(qid));
@@ -1821,9 +1869,12 @@  out_nofid:
     if (nwnames && nwnames <= P9_MAXWELEM) {
         for (name_idx = 0; name_idx < nwnames; name_idx++) {
             v9fs_string_free(&wnames[name_idx]);
+            v9fs_path_free(&pathes[name_idx]);
         }
         g_free(wnames);
         g_free(qids);
+        g_free(stbufs);
+        g_free(pathes);
     }
 }