diff mbox series

[040/104] virtiofsd: Pass write iov's all the way through

Message ID 20191212163904.159893-41-dgilbert@redhat.com
State New
Headers show
Series virtiofs daemon [all] | expand

Commit Message

Dr. David Alan Gilbert Dec. 12, 2019, 4:38 p.m. UTC
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>

Pass the write iov pointing to guest RAM all the way through rather
than copying the data.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 tools/virtiofsd/fuse_virtio.c | 79 ++++++++++++++++++++++++++++++++---
 1 file changed, 73 insertions(+), 6 deletions(-)

Comments

Xiao Yang Jan. 19, 2020, 8:08 a.m. UTC | #1
On 2019/12/13 0:38, Dr. David Alan Gilbert (git) wrote:
> From: "Dr. David Alan Gilbert"<dgilbert@redhat.com>
>
> Pass the write iov pointing to guest RAM all the way through rather
> than copying the data.
>
> Signed-off-by: Dr. David Alan Gilbert<dgilbert@redhat.com>
> ---
>   tools/virtiofsd/fuse_virtio.c | 79 ++++++++++++++++++++++++++++++++---
>   1 file changed, 73 insertions(+), 6 deletions(-)
>
> diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
> index 99c877ea2e..3c778b6296 100644
> --- a/tools/virtiofsd/fuse_virtio.c
> +++ b/tools/virtiofsd/fuse_virtio.c
> @@ -452,6 +452,10 @@ static void *fv_queue_thread(void *opaque)
>                    __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes);
>
>           while (1) {
> +            bool allocated_bufv = false;
> +            struct fuse_bufvec bufv;
> +            struct fuse_bufvec *pbufv;
> +
>               /*
>                * An element contains one request and the space to send our
>                * response They're spread over multiple descriptors in a
> @@ -493,14 +497,76 @@ static void *fv_queue_thread(void *opaque)
>                            __func__, elem->index);
>                   assert(0); /* TODO */
>               }
> -            copy_from_iov(&fbuf, out_num, out_sg);
> -            fbuf.size = out_len;
> +            /* Copy just the first element and look at it */
> +            copy_from_iov(&fbuf, 1, out_sg);
> +
> +            if (out_num>  2&&
> +                out_sg[0].iov_len == sizeof(struct fuse_in_header)&&
> +                ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE&&
> +                out_sg[1].iov_len == sizeof(struct fuse_write_in)) {
> +                /*
> +                 * For a write we don't actually need to copy the
> +                 * data, we can just do it straight out of guest memory
> +                 * but we must still copy the headers in case the guest
> +                 * was nasty and changed them while we were using them.
> +                 */
> +                fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__);
> +
> +                /* copy the fuse_write_in header after the fuse_in_header */
> +                fbuf.mem += out_sg->iov_len;
> +                copy_from_iov(&fbuf, 1, out_sg + 1);
> +                fbuf.mem -= out_sg->iov_len;
> +                fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len;
> +
> +                /* Allocate the bufv, with space for the rest of the iov */
> +                allocated_bufv = true;
> +                pbufv = malloc(sizeof(struct fuse_bufvec) +
> +                               sizeof(struct fuse_buf) * (out_num - 2));
> +                if (!pbufv) {
> +                    vu_queue_unpop(dev, q, elem, 0);
> +                    free(elem);
> +                    fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n",
> +                             __func__);
> +                    goto out;
> +                }
> +
> +                pbufv->count = 1;
> +                pbufv->buf[0] = fbuf;
> +
> +                size_t iovindex, pbufvindex;
> +                iovindex = 2; /* 2 headers, separate iovs */
> +                pbufvindex = 1; /* 2 headers, 1 fusebuf */
> +
> +                for (; iovindex<  out_num; iovindex++, pbufvindex++) {
> +                    pbufv->count++;
> +                    pbufv->buf[pbufvindex].pos = ~0; /* Dummy */
> +                    pbufv->buf[pbufvindex].flags = 0;
> +                    pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base;
> +                    pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len;
> +                }
> +            } else {
> +                /* Normal (non fast write) path */
> +
> +                /* Copy the rest of the buffer */
> +                fbuf.mem += out_sg->iov_len;
> +                copy_from_iov(&fbuf, out_num - 1, out_sg + 1);
> +                fbuf.mem -= out_sg->iov_len;
> +                fbuf.size = out_len;
>
> -            /* TODO! Endianness of header */
> +                /* TODO! Endianness of header */
>
> -            /* TODO: Add checks for fuse_session_exited */
> -            struct fuse_bufvec bufv = { .buf[0] = fbuf, .count = 1 };
> -            fuse_session_process_buf_int(se,&bufv,&ch);
> +                /* TODO: Add checks for fuse_session_exited */
> +                bufv.buf[0] = fbuf;
> +                bufv.count = 1;
> +                pbufv =&bufv;
> +            }
> +            pbufv->idx = 0;
> +            pbufv->off = 0;
> +            fuse_session_process_buf_int(se, pbufv,&ch);
> +
> +            if (allocated_bufv) {
> +                free(pbufv);
> +            }
>
>               if (!qi->reply_sent) {
>                   fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n",
> @@ -514,6 +580,7 @@ static void *fv_queue_thread(void *opaque)
>               elem = NULL;
>           }
>       }
> +out:
>       pthread_mutex_destroy(&ch.lock);
>       free(fbuf.mem);
>
Hi,

Tested the patch and got the correct data written by guest, so it looks 
fine to me.
Reviewed-by: Xiao Yang <yangx.jy@cn.fujitsu.com>

Best Regards,
Xiao Yang
Philippe Mathieu-Daudé Jan. 20, 2020, 8:24 a.m. UTC | #2
On 1/19/20 9:08 AM, Xiao Yang wrote:
> On 2019/12/13 0:38, Dr. David Alan Gilbert (git) wrote:
>> From: "Dr. David Alan Gilbert"<dgilbert@redhat.com>
>>
>> Pass the write iov pointing to guest RAM all the way through rather
>> than copying the data.
>>
>> Signed-off-by: Dr. David Alan Gilbert<dgilbert@redhat.com>
>> ---
>>   tools/virtiofsd/fuse_virtio.c | 79 ++++++++++++++++++++++++++++++++---
>>   1 file changed, 73 insertions(+), 6 deletions(-)
>>
[...]
> Hi,
> 
> Tested the patch and got the correct data written by guest, so it looks 
> fine to me.
> Reviewed-by: Xiao Yang <yangx.jy@cn.fujitsu.com>

So also:
Tested-by: Xiao Yang <yangx.jy@cn.fujitsu.com>

> 
> Best Regards,
> Xiao Yang
Dr. David Alan Gilbert Jan. 20, 2020, 1:28 p.m. UTC | #3
* Philippe Mathieu-Daudé (philmd@redhat.com) wrote:
> On 1/19/20 9:08 AM, Xiao Yang wrote:
> > On 2019/12/13 0:38, Dr. David Alan Gilbert (git) wrote:
> > > From: "Dr. David Alan Gilbert"<dgilbert@redhat.com>
> > > 
> > > Pass the write iov pointing to guest RAM all the way through rather
> > > than copying the data.
> > > 
> > > Signed-off-by: Dr. David Alan Gilbert<dgilbert@redhat.com>
> > > ---
> > >   tools/virtiofsd/fuse_virtio.c | 79 ++++++++++++++++++++++++++++++++---
> > >   1 file changed, 73 insertions(+), 6 deletions(-)
> > > 
> [...]
> > Hi,
> > 
> > Tested the patch and got the correct data written by guest, so it looks
> > fine to me.
> > Reviewed-by: Xiao Yang <yangx.jy@cn.fujitsu.com>
> 
> So also:
> Tested-by: Xiao Yang <yangx.jy@cn.fujitsu.com>

I'd take that but only if that's directly on a Xiao Yang's mail.

> > 
> > Best Regards,
> > Xiao Yang
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox series

Patch

diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index 99c877ea2e..3c778b6296 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -452,6 +452,10 @@  static void *fv_queue_thread(void *opaque)
                  __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes);
 
         while (1) {
+            bool allocated_bufv = false;
+            struct fuse_bufvec bufv;
+            struct fuse_bufvec *pbufv;
+
             /*
              * An element contains one request and the space to send our
              * response They're spread over multiple descriptors in a
@@ -493,14 +497,76 @@  static void *fv_queue_thread(void *opaque)
                          __func__, elem->index);
                 assert(0); /* TODO */
             }
-            copy_from_iov(&fbuf, out_num, out_sg);
-            fbuf.size = out_len;
+            /* Copy just the first element and look at it */
+            copy_from_iov(&fbuf, 1, out_sg);
+
+            if (out_num > 2 &&
+                out_sg[0].iov_len == sizeof(struct fuse_in_header) &&
+                ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE &&
+                out_sg[1].iov_len == sizeof(struct fuse_write_in)) {
+                /*
+                 * For a write we don't actually need to copy the
+                 * data, we can just do it straight out of guest memory
+                 * but we must still copy the headers in case the guest
+                 * was nasty and changed them while we were using them.
+                 */
+                fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__);
+
+                /* copy the fuse_write_in header after the fuse_in_header */
+                fbuf.mem += out_sg->iov_len;
+                copy_from_iov(&fbuf, 1, out_sg + 1);
+                fbuf.mem -= out_sg->iov_len;
+                fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len;
+
+                /* Allocate the bufv, with space for the rest of the iov */
+                allocated_bufv = true;
+                pbufv = malloc(sizeof(struct fuse_bufvec) +
+                               sizeof(struct fuse_buf) * (out_num - 2));
+                if (!pbufv) {
+                    vu_queue_unpop(dev, q, elem, 0);
+                    free(elem);
+                    fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n",
+                             __func__);
+                    goto out;
+                }
+
+                pbufv->count = 1;
+                pbufv->buf[0] = fbuf;
+
+                size_t iovindex, pbufvindex;
+                iovindex = 2; /* 2 headers, separate iovs */
+                pbufvindex = 1; /* 2 headers, 1 fusebuf */
+
+                for (; iovindex < out_num; iovindex++, pbufvindex++) {
+                    pbufv->count++;
+                    pbufv->buf[pbufvindex].pos = ~0; /* Dummy */
+                    pbufv->buf[pbufvindex].flags = 0;
+                    pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base;
+                    pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len;
+                }
+            } else {
+                /* Normal (non fast write) path */
+
+                /* Copy the rest of the buffer */
+                fbuf.mem += out_sg->iov_len;
+                copy_from_iov(&fbuf, out_num - 1, out_sg + 1);
+                fbuf.mem -= out_sg->iov_len;
+                fbuf.size = out_len;
 
-            /* TODO! Endianness of header */
+                /* TODO! Endianness of header */
 
-            /* TODO: Add checks for fuse_session_exited */
-            struct fuse_bufvec bufv = { .buf[0] = fbuf, .count = 1 };
-            fuse_session_process_buf_int(se, &bufv, &ch);
+                /* TODO: Add checks for fuse_session_exited */
+                bufv.buf[0] = fbuf;
+                bufv.count = 1;
+                pbufv = &bufv;
+            }
+            pbufv->idx = 0;
+            pbufv->off = 0;
+            fuse_session_process_buf_int(se, pbufv, &ch);
+
+            if (allocated_bufv) {
+                free(pbufv);
+            }
 
             if (!qi->reply_sent) {
                 fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n",
@@ -514,6 +580,7 @@  static void *fv_queue_thread(void *opaque)
             elem = NULL;
         }
     }
+out:
     pthread_mutex_destroy(&ch.lock);
     free(fbuf.mem);