Patchwork [1/2] hw/9pfs: replace iovec manipulation with QEMUIOVector

login
register
mail settings
Submitter Aneesh Kumar K.V
Date Dec. 13, 2011, 2:37 p.m.
Message ID <1323787065-587-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/131110/
State New
Headers show

Comments

Aneesh Kumar K.V - Dec. 13, 2011, 2:37 p.m.
From: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>

The v9fs_read() and v9fs_write() functions rely on iovec[] manipulation
code should be replaced with QEMUIOVector to avoid duplicating code.
In the future it may be possible to make the code even more concise by
using QEMUIOVector consistently across virtio and 9pfs.

The "v" format specifier for pdu_marshal() and pdu_unmarshal() is
dropped since it does not actually pack/unpack anything.  The specifier
was also not implemented to update the offset variable and could only be
used at the end of a format string, another sign that this shouldn't
really be a format specifier.  Instead, see the new
v9fs_init_qiov_from_pdu() function.

This change avoids a possible iovec[] buffer overflow when indirect
vrings are used since the number of vectors is now limited by the
underlying VirtQueueElement and cannot be out-of-bounds.

Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 hw/9pfs/virtio-9p.c |  162 +++++++++++++++++++--------------------------------
 1 files changed, 60 insertions(+), 102 deletions(-)
Stefan Hajnoczi - Dec. 13, 2011, 3:20 p.m.
On Tue, Dec 13, 2011 at 2:37 PM, Aneesh Kumar K.V
<aneesh.kumar@linux.vnet.ibm.com> wrote:
> This change avoids a possible iovec[] buffer overflow when indirect
> vrings are used since the number of vectors is now limited by the
> underlying VirtQueueElement and cannot be out-of-bounds.

Please consider both of these patches for stable and distro packages
that ship virtfs.

Stefan

Patch

diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
index dd43209..c018916 100644
--- a/hw/9pfs/virtio-9p.c
+++ b/hw/9pfs/virtio-9p.c
@@ -674,40 +674,6 @@  static size_t pdu_pack(V9fsPDU *pdu, size_t offset, const void *src,
                              offset, size, 1);
 }
 
-static int pdu_copy_sg(V9fsPDU *pdu, size_t offset, int rx, struct iovec *sg)
-{
-    size_t pos = 0;
-    int i, j;
-    struct iovec *src_sg;
-    unsigned int num;
-
-    if (rx) {
-        src_sg = pdu->elem.in_sg;
-        num = pdu->elem.in_num;
-    } else {
-        src_sg = pdu->elem.out_sg;
-        num = pdu->elem.out_num;
-    }
-
-    j = 0;
-    for (i = 0; i < num; i++) {
-        if (offset <= pos) {
-            sg[j].iov_base = src_sg[i].iov_base;
-            sg[j].iov_len = src_sg[i].iov_len;
-            j++;
-        } else if (offset < (src_sg[i].iov_len + pos)) {
-            sg[j].iov_base = src_sg[i].iov_base;
-            sg[j].iov_len = src_sg[i].iov_len;
-            sg[j].iov_base += (offset - pos);
-            sg[j].iov_len -= (offset - pos);
-            j++;
-        }
-        pos += src_sg[i].iov_len;
-    }
-
-    return j;
-}
-
 static size_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
 {
     size_t old_offset = offset;
@@ -743,12 +709,6 @@  static size_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
             *valp = le64_to_cpu(val);
             break;
         }
-        case 'v': {
-            struct iovec *iov = va_arg(ap, struct iovec *);
-            int *iovcnt = va_arg(ap, int *);
-            *iovcnt = pdu_copy_sg(pdu, offset, 0, iov);
-            break;
-        }
         case 's': {
             V9fsString *str = va_arg(ap, V9fsString *);
             offset += pdu_unmarshal(pdu, offset, "w", &str->size);
@@ -827,12 +787,6 @@  static size_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
             offset += pdu_pack(pdu, offset, &val, sizeof(val));
             break;
         }
-        case 'v': {
-            struct iovec *iov = va_arg(ap, struct iovec *);
-            int *iovcnt = va_arg(ap, int *);
-            *iovcnt = pdu_copy_sg(pdu, offset, 1, iov);
-            break;
-        }
         case 's': {
             V9fsString *str = va_arg(ap, V9fsString *);
             offset += pdu_marshal(pdu, offset, "w", str->size);
@@ -1143,42 +1097,6 @@  static void stat_to_v9stat_dotl(V9fsState *s, const struct stat *stbuf,
     stat_to_qid(stbuf, &v9lstat->qid);
 }
 
-static struct iovec *adjust_sg(struct iovec *sg, int len, int *iovcnt)
-{
-    while (len && *iovcnt) {
-        if (len < sg->iov_len) {
-            sg->iov_len -= len;
-            sg->iov_base += len;
-            len = 0;
-        } else {
-            len -= sg->iov_len;
-            sg++;
-            *iovcnt -= 1;
-        }
-    }
-
-    return sg;
-}
-
-static struct iovec *cap_sg(struct iovec *sg, int cap, int *cnt)
-{
-    int i;
-    int total = 0;
-
-    for (i = 0; i < *cnt; i++) {
-        if ((total + sg[i].iov_len) > cap) {
-            sg[i].iov_len -= ((total + sg[i].iov_len) - cap);
-            i++;
-            break;
-        }
-        total += sg[i].iov_len;
-    }
-
-    *cnt = i;
-
-    return sg;
-}
-
 static void print_sg(struct iovec *sg, int cnt)
 {
     int i;
@@ -1861,6 +1779,38 @@  out:
     return count;
 }
 
+/*
+ * Create a QEMUIOVector for a sub-region of PDU iovecs
+ *
+ * @qiov:       uninitialized QEMUIOVector
+ * @skip:       number of bytes to skip from beginning of PDU
+ * @size:       number of bytes to include
+ * @is_write:   true - write, false - read
+ *
+ * The resulting QEMUIOVector has heap-allocated iovecs and must be cleaned up
+ * with qemu_iovec_destroy().
+ */
+static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu,
+                                    uint64_t skip, size_t size,
+                                    bool is_write)
+{
+    QEMUIOVector elem;
+    struct iovec *iov;
+    unsigned int niov;
+
+    if (is_write) {
+        iov = pdu->elem.out_sg;
+        niov = pdu->elem.out_num;
+    } else {
+        iov = pdu->elem.in_sg;
+        niov = pdu->elem.in_num;
+    }
+
+    qemu_iovec_init_external(&elem, iov, niov);
+    qemu_iovec_init(qiov, niov);
+    qemu_iovec_copy(qiov, &elem, skip, size);
+}
+
 static void v9fs_read(void *opaque)
 {
     int32_t fid;
@@ -1895,21 +1845,21 @@  static void v9fs_read(void *opaque)
         err += pdu_marshal(pdu, offset, "d", count);
         err += count;
     } else if (fidp->fid_type == P9_FID_FILE) {
-        int32_t cnt;
+        QEMUIOVector qiov_full;
+        QEMUIOVector qiov;
         int32_t len;
-        struct iovec *sg;
-        struct iovec iov[128]; /* FIXME: bad, bad, bad */
 
-        sg = iov;
-        pdu_marshal(pdu, offset + 4, "v", sg, &cnt);
-        sg = cap_sg(sg, max_count, &cnt);
+        v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, max_count, false);
+        qemu_iovec_init(&qiov, qiov_full.niov);
         do {
+            qemu_iovec_reset(&qiov);
+            qemu_iovec_copy(&qiov, &qiov_full, count, qiov_full.size - count);
             if (0) {
-                print_sg(sg, cnt);
+                print_sg(qiov.iov, qiov.niov);
             }
             /* Loop in case of EINTR */
             do {
-                len = v9fs_co_preadv(pdu, fidp, sg, cnt, off);
+                len = v9fs_co_preadv(pdu, fidp, qiov.iov, qiov.niov, off);
                 if (len >= 0) {
                     off   += len;
                     count += len;
@@ -1920,11 +1870,12 @@  static void v9fs_read(void *opaque)
                 err = len;
                 goto out;
             }
-            sg = adjust_sg(sg, len, &cnt);
         } while (count < max_count && len > 0);
         err = offset;
         err += pdu_marshal(pdu, offset, "d", count);
         err += count;
+        qemu_iovec_destroy(&qiov);
+        qemu_iovec_destroy(&qiov_full);
     } else if (fidp->fid_type == P9_FID_XATTR) {
         err = v9fs_xattr_read(s, pdu, fidp, off, max_count);
     } else {
@@ -2095,7 +2046,6 @@  out:
 
 static void v9fs_write(void *opaque)
 {
-    int cnt;
     ssize_t err;
     int32_t fid;
     int64_t off;
@@ -2104,13 +2054,14 @@  static void v9fs_write(void *opaque)
     int32_t total = 0;
     size_t offset = 7;
     V9fsFidState *fidp;
-    struct iovec iov[128]; /* FIXME: bad, bad, bad */
-    struct iovec *sg = iov;
     V9fsPDU *pdu = opaque;
     V9fsState *s = pdu->s;
+    QEMUIOVector qiov_full;
+    QEMUIOVector qiov;
 
-    pdu_unmarshal(pdu, offset, "dqdv", &fid, &off, &count, sg, &cnt);
-    trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, cnt);
+    offset += pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &count);
+    v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, count, true);
+    trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov);
 
     fidp = get_fid(pdu, fid);
     if (fidp == NULL) {
@@ -2126,20 +2077,23 @@  static void v9fs_write(void *opaque)
         /*
          * setxattr operation
          */
-        err = v9fs_xattr_write(s, pdu, fidp, off, count, sg, cnt);
+        err = v9fs_xattr_write(s, pdu, fidp, off, count,
+                               qiov_full.iov, qiov_full.niov);
         goto out;
     } else {
         err = -EINVAL;
         goto out;
     }
-    sg = cap_sg(sg, count, &cnt);
+    qemu_iovec_init(&qiov, qiov_full.niov);
     do {
+        qemu_iovec_reset(&qiov);
+        qemu_iovec_copy(&qiov, &qiov_full, total, qiov_full.size - total);
         if (0) {
-            print_sg(sg, cnt);
+            print_sg(qiov.iov, qiov.niov);
         }
         /* Loop in case of EINTR */
         do {
-            len = v9fs_co_pwritev(pdu, fidp, sg, cnt, off);
+            len = v9fs_co_pwritev(pdu, fidp, qiov.iov, qiov.niov, off);
             if (len >= 0) {
                 off   += len;
                 total += len;
@@ -2148,16 +2102,20 @@  static void v9fs_write(void *opaque)
         if (len < 0) {
             /* IO error return the error */
             err = len;
-            goto out;
+            goto out_qiov;
         }
-        sg = adjust_sg(sg, len, &cnt);
     } while (total < count && len > 0);
+
+    offset = 7;
     offset += pdu_marshal(pdu, offset, "d", total);
     err = offset;
     trace_v9fs_write_return(pdu->tag, pdu->id, total, err);
+out_qiov:
+    qemu_iovec_destroy(&qiov);
 out:
     put_fid(pdu, fidp);
 out_nofid:
+    qemu_iovec_destroy(&qiov_full);
     complete_pdu(s, pdu, err);
 }