diff mbox

raw: Fix image header protection

Message ID 1283861325-23785-1-git-send-email-kwolf@redhat.com
State New
Headers show

Commit Message

Kevin Wolf Sept. 7, 2010, 12:08 p.m. UTC
Recenty a patch was committed to protect the first four bytes of an image to
avoid "converting" a probed raw image to a different format when a malicious
guest writes e.g. a qcow2 header to it.

This check relies on the assumption that all qiov entries are multiples of 512,
which isn't true in practice. At least the installers of some Windows versions
are reported to fail the corresponding assertion.

This patch removes the wrong assumption and fixes Win 2003 installation for me
(other Windows versions not tested, should be the same)

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/raw.c   |   57 ++++++++++++++++++++++++---------------------------------
 cutils.c      |   16 ++++++++++++----
 qemu-common.h |    1 +
 3 files changed, 37 insertions(+), 37 deletions(-)

Comments

Anthony Liguori Sept. 9, 2010, 12:30 p.m. UTC | #1
On 09/07/2010 07:08 AM, Kevin Wolf wrote:
> Recenty a patch was committed to protect the first four bytes of an image to
> avoid "converting" a probed raw image to a different format when a malicious
> guest writes e.g. a qcow2 header to it.
>
> This check relies on the assumption that all qiov entries are multiples of 512,
> which isn't true in practice. At least the installers of some Windows versions
> are reported to fail the corresponding assertion.
>
> This patch removes the wrong assumption and fixes Win 2003 installation for me
> (other Windows versions not tested, should be the same)
>
> Signed-off-by: Kevin Wolf<kwolf@redhat.com>
> ---
>   block/raw.c   |   57 ++++++++++++++++++++++++---------------------------------
>   cutils.c      |   16 ++++++++++++----
>   qemu-common.h |    1 +
>   3 files changed, 37 insertions(+), 37 deletions(-)
>
> diff --git a/block/raw.c b/block/raw.c
> index 61e6748..3286550 100644
> --- a/block/raw.c
> +++ b/block/raw.c
> @@ -99,6 +99,7 @@ typedef struct RawScrubberBounce
>   {
>       BlockDriverCompletionFunc *cb;
>       void *opaque;
> +    uint8_t *buf;
>       QEMUIOVector qiov;
>   } RawScrubberBounce;
>
> @@ -113,6 +114,7 @@ static void raw_aio_writev_scrubbed(void *opaque, int ret)
>       }
>
>       qemu_iovec_destroy(&b->qiov);
> +    qemu_free(b->buf);
>       qemu_free(b);
>   }
>
> @@ -120,46 +122,35 @@ static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
>       int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
>       BlockDriverCompletionFunc *cb, void *opaque)
>   {
> -    const uint8_t *first_buf;
> -    int first_buf_index = 0, i;
> -
> -    /* This is probably being paranoid, but handle cases of zero size
> -       vectors. */
> -    for (i = 0; i<  qiov->niov; i++) {
> -        if (qiov->iov[i].iov_len) {
> -            assert(qiov->iov[i].iov_len>= 512);
> -            first_buf_index = i;
> -            break;
> -        }
> -    }
> +    if (bs->probed) {
> +        uint8_t first_buf[512];
> +        qemu_iovec_part_to_buffer(qiov, first_buf, 512);
>
> -    first_buf = qiov->iov[first_buf_index].iov_base;
> +        if (check_write_unsafe(bs, sector_num, first_buf, nb_sectors)) {
> +            RawScrubberBounce *b;
> +            int ret;
>
> -    if (check_write_unsafe(bs, sector_num, first_buf, nb_sectors)) {
> -        RawScrubberBounce *b;
> -        int ret;
> +            /* write the first sector using sync I/O */
> +            ret = raw_write_scrubbed_bootsect(bs, first_buf);
> +            if (ret<  0) {
> +                return NULL;
> +            }
>
> -        /* write the first sector using sync I/O */
> -        ret = raw_write_scrubbed_bootsect(bs, first_buf);
> -        if (ret<  0) {
> -            return NULL;
> -        }
> -
> -        /* adjust request to be everything but first sector */
> +            /* adjust request to be everything but first sector */
>
> -        b = qemu_malloc(sizeof(*b));
> -        b->cb = cb;
> -        b->opaque = opaque;
> +            b = qemu_malloc(sizeof(*b));
> +            b->cb = cb;
> +            b->opaque = opaque;
>
> -        qemu_iovec_init(&b->qiov, qiov->nalloc);
> -        qemu_iovec_concat(&b->qiov, qiov, qiov->size);
> +            b->buf = qemu_malloc(qiov->size);
> +            qemu_iovec_to_buffer(qiov, b->buf);
>    

Isn't this an unbounded, guest controlled, malloc?  IOW, a guest could 
do a request of 4GB and on a 32-bit system crash the qemu instance.

Regards,

Anthony Liguori

> -        b->qiov.size -= 512;
> -        b->qiov.iov[first_buf_index].iov_base += 512;
> -        b->qiov.iov[first_buf_index].iov_len -= 512;
> +            qemu_iovec_init(&b->qiov, 1);
> +            qemu_iovec_add(&b->qiov, b->buf + 512, qiov->size - 512);
>
> -        return bdrv_aio_writev(bs->file, sector_num + 1,&b->qiov,
> -                               nb_sectors - 1, raw_aio_writev_scrubbed, b);
> +            return bdrv_aio_writev(bs->file, sector_num + 1,&b->qiov,
> +                                   nb_sectors - 1, raw_aio_writev_scrubbed, b);
> +        }
>       }
>
>       return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
> diff --git a/cutils.c b/cutils.c
> index 036ae3c..0fb4968 100644
> --- a/cutils.c
> +++ b/cutils.c
> @@ -207,17 +207,25 @@ void qemu_iovec_reset(QEMUIOVector *qiov)
>       qiov->size = 0;
>   }
>
> -void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf)
> +void qemu_iovec_part_to_buffer(QEMUIOVector *qiov, void *buf, size_t len)
>   {
>       uint8_t *p = (uint8_t *)buf;
>       int i;
> +    size_t n;
>
> -    for (i = 0; i<  qiov->niov; ++i) {
> -        memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len);
> -        p += qiov->iov[i].iov_len;
> +    for (i = 0; len&&  i<  qiov->niov; ++i) {
> +        n = MIN(len, qiov->iov[i].iov_len);
> +        memcpy(p, qiov->iov[i].iov_base, n);
> +        p += n;
> +        len -= n;
>       }
>   }
>
> +void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf)
> +{
> +    qemu_iovec_part_to_buffer(qiov, buf, qiov->size);
> +}
> +
>   void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count)
>   {
>       const uint8_t *p = (const uint8_t *)buf;
> diff --git a/qemu-common.h b/qemu-common.h
> index dfd3dc0..c584539 100644
> --- a/qemu-common.h
> +++ b/qemu-common.h
> @@ -281,6 +281,7 @@ void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
>   void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size);
>   void qemu_iovec_destroy(QEMUIOVector *qiov);
>   void qemu_iovec_reset(QEMUIOVector *qiov);
> +void qemu_iovec_part_to_buffer(QEMUIOVector *qiov, void *buf, size_t len);
>   void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf);
>   void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count);
>
>
Kevin Wolf Sept. 9, 2010, 12:44 p.m. UTC | #2
Am 09.09.2010 14:30, schrieb Anthony Liguori:
> On 09/07/2010 07:08 AM, Kevin Wolf wrote:
>> Recenty a patch was committed to protect the first four bytes of an image to
>> avoid "converting" a probed raw image to a different format when a malicious
>> guest writes e.g. a qcow2 header to it.
>>
>> This check relies on the assumption that all qiov entries are multiples of 512,
>> which isn't true in practice. At least the installers of some Windows versions
>> are reported to fail the corresponding assertion.
>>
>> This patch removes the wrong assumption and fixes Win 2003 installation for me
>> (other Windows versions not tested, should be the same)
>>
>> Signed-off-by: Kevin Wolf<kwolf@redhat.com>
>> ---
>>   block/raw.c   |   57 ++++++++++++++++++++++++---------------------------------
>>   cutils.c      |   16 ++++++++++++----
>>   qemu-common.h |    1 +
>>   3 files changed, 37 insertions(+), 37 deletions(-)
>>
>> diff --git a/block/raw.c b/block/raw.c
>> index 61e6748..3286550 100644
>> --- a/block/raw.c
>> +++ b/block/raw.c
>> @@ -99,6 +99,7 @@ typedef struct RawScrubberBounce
>>   {
>>       BlockDriverCompletionFunc *cb;
>>       void *opaque;
>> +    uint8_t *buf;
>>       QEMUIOVector qiov;
>>   } RawScrubberBounce;
>>
>> @@ -113,6 +114,7 @@ static void raw_aio_writev_scrubbed(void *opaque, int ret)
>>       }
>>
>>       qemu_iovec_destroy(&b->qiov);
>> +    qemu_free(b->buf);
>>       qemu_free(b);
>>   }
>>
>> @@ -120,46 +122,35 @@ static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
>>       int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
>>       BlockDriverCompletionFunc *cb, void *opaque)
>>   {
>> -    const uint8_t *first_buf;
>> -    int first_buf_index = 0, i;
>> -
>> -    /* This is probably being paranoid, but handle cases of zero size
>> -       vectors. */
>> -    for (i = 0; i<  qiov->niov; i++) {
>> -        if (qiov->iov[i].iov_len) {
>> -            assert(qiov->iov[i].iov_len>= 512);
>> -            first_buf_index = i;
>> -            break;
>> -        }
>> -    }
>> +    if (bs->probed) {
>> +        uint8_t first_buf[512];
>> +        qemu_iovec_part_to_buffer(qiov, first_buf, 512);
>>
>> -    first_buf = qiov->iov[first_buf_index].iov_base;
>> +        if (check_write_unsafe(bs, sector_num, first_buf, nb_sectors)) {
>> +            RawScrubberBounce *b;
>> +            int ret;
>>
>> -    if (check_write_unsafe(bs, sector_num, first_buf, nb_sectors)) {
>> -        RawScrubberBounce *b;
>> -        int ret;
>> +            /* write the first sector using sync I/O */
>> +            ret = raw_write_scrubbed_bootsect(bs, first_buf);
>> +            if (ret<  0) {
>> +                return NULL;
>> +            }
>>
>> -        /* write the first sector using sync I/O */
>> -        ret = raw_write_scrubbed_bootsect(bs, first_buf);
>> -        if (ret<  0) {
>> -            return NULL;
>> -        }
>> -
>> -        /* adjust request to be everything but first sector */
>> +            /* adjust request to be everything but first sector */
>>
>> -        b = qemu_malloc(sizeof(*b));
>> -        b->cb = cb;
>> -        b->opaque = opaque;
>> +            b = qemu_malloc(sizeof(*b));
>> +            b->cb = cb;
>> +            b->opaque = opaque;
>>
>> -        qemu_iovec_init(&b->qiov, qiov->nalloc);
>> -        qemu_iovec_concat(&b->qiov, qiov, qiov->size);
>> +            b->buf = qemu_malloc(qiov->size);
>> +            qemu_iovec_to_buffer(qiov, b->buf);
>>    
> 
> Isn't this an unbounded, guest controlled, malloc?  IOW, a guest could 
> do a request of 4GB and on a 32-bit system crash the qemu instance.

If you're concerned about that, we need to ban qemu_iovec_to_buffer()
completely. Currently we do the same thing for every write request for
every format but raw. Or instead of completely removing it, we could add
a size limit, though I suspect that would mean violating some specs.

If I was a guest though and wanted to crash qemu, I would just mess up
the virtio ring a bit so that qemu would exit() voluntarily. ;-)

Kevin
Anthony Liguori Sept. 9, 2010, 12:52 p.m. UTC | #3
On 09/09/2010 07:44 AM, Kevin Wolf wrote:
>> Isn't this an unbounded, guest controlled, malloc?  IOW, a guest could
>> do a request of 4GB and on a 32-bit system crash the qemu instance.
>>      
> If you're concerned about that, we need to ban qemu_iovec_to_buffer()
> completely. Currently we do the same thing for every write request for
> every format but raw.

And QED ;-)

>   Or instead of completely removing it, we could add
> a size limit, though I suspect that would mean violating some specs.
>    

One thing I was thinking of trying was splitting off the first sector 
into a linear buffer, then allocating a new iovec and adjusting the new 
iovec to cover the new request minus the first sector.

> If I was a guest though and wanted to crash qemu, I would just mess up
> the virtio ring a bit so that qemu would exit() voluntarily. ;-)
>    

Yeah, we're terrible at this but we should try to avoid making things 
worse.  Particularly in a code path (like raw images) where we don't 
have this problem today.

Regards,

Anthony Liguori

> Kevin
>
Kevin Wolf Sept. 9, 2010, 1:02 p.m. UTC | #4
Am 09.09.2010 14:52, schrieb Anthony Liguori:
> On 09/09/2010 07:44 AM, Kevin Wolf wrote:
>>> Isn't this an unbounded, guest controlled, malloc?  IOW, a guest could
>>> do a request of 4GB and on a 32-bit system crash the qemu instance.
>>>      
>> If you're concerned about that, we need to ban qemu_iovec_to_buffer()
>> completely. Currently we do the same thing for every write request for
>> every format but raw.
> 
> And QED ;-)

qed doesn't exist. We have something some notices from a brainstorming
thread that should become a specification some day. And yes, there's
some prototype code. That's everything we have today.

Anyway, if you declare qemu_iovec_to_buffer() broken, it doesn't really
matter if n-1 formats or n-2 formats are broken...

>>   Or instead of completely removing it, we could add
>> a size limit, though I suspect that would mean violating some specs.
>>    
> 
> One thing I was thinking of trying was splitting off the first sector 
> into a linear buffer, then allocating a new iovec and adjusting the new 
> iovec to cover the new request minus the first sector.

That doesn't help any of the other use cases. Either we consider it a
problem or not. If we do, it must be fixed everywhere.

Kevin
Anthony Liguori Sept. 9, 2010, 1:16 p.m. UTC | #5
On 09/09/2010 08:02 AM, Kevin Wolf wrote:
>>>    Or instead of completely removing it, we could add
>>> a size limit, though I suspect that would mean violating some specs.
>>>
>>>        
>> One thing I was thinking of trying was splitting off the first sector
>> into a linear buffer, then allocating a new iovec and adjusting the new
>> iovec to cover the new request minus the first sector.
>>      
> That doesn't help any of the other use cases. Either we consider it a
> problem or not. If we do, it must be fixed everywhere.
>    

Yes, it's a problem.  In other places in the code base, we go to 
incredible lengths to avoid unbounded allocations.

I think we have to two choices: 1) refactor all of the code to not 
require qemu_iovec_to_buffer() or 2) cap the request size and fail a 
request if it's greater.

Regards,

Anthony Liguori

> Kevin
>
diff mbox

Patch

diff --git a/block/raw.c b/block/raw.c
index 61e6748..3286550 100644
--- a/block/raw.c
+++ b/block/raw.c
@@ -99,6 +99,7 @@  typedef struct RawScrubberBounce
 {
     BlockDriverCompletionFunc *cb;
     void *opaque;
+    uint8_t *buf;
     QEMUIOVector qiov;
 } RawScrubberBounce;
 
@@ -113,6 +114,7 @@  static void raw_aio_writev_scrubbed(void *opaque, int ret)
     }
 
     qemu_iovec_destroy(&b->qiov);
+    qemu_free(b->buf);
     qemu_free(b);
 }
 
@@ -120,46 +122,35 @@  static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
     int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
     BlockDriverCompletionFunc *cb, void *opaque)
 {
-    const uint8_t *first_buf;
-    int first_buf_index = 0, i;
-
-    /* This is probably being paranoid, but handle cases of zero size
-       vectors. */
-    for (i = 0; i < qiov->niov; i++) {
-        if (qiov->iov[i].iov_len) {
-            assert(qiov->iov[i].iov_len >= 512);
-            first_buf_index = i;
-            break;
-        }
-    }
+    if (bs->probed) {
+        uint8_t first_buf[512];
+        qemu_iovec_part_to_buffer(qiov, first_buf, 512);
 
-    first_buf = qiov->iov[first_buf_index].iov_base;
+        if (check_write_unsafe(bs, sector_num, first_buf, nb_sectors)) {
+            RawScrubberBounce *b;
+            int ret;
 
-    if (check_write_unsafe(bs, sector_num, first_buf, nb_sectors)) {
-        RawScrubberBounce *b;
-        int ret;
+            /* write the first sector using sync I/O */
+            ret = raw_write_scrubbed_bootsect(bs, first_buf);
+            if (ret < 0) {
+                return NULL;
+            }
 
-        /* write the first sector using sync I/O */
-        ret = raw_write_scrubbed_bootsect(bs, first_buf);
-        if (ret < 0) {
-            return NULL;
-        }
-
-        /* adjust request to be everything but first sector */
+            /* adjust request to be everything but first sector */
 
-        b = qemu_malloc(sizeof(*b));
-        b->cb = cb;
-        b->opaque = opaque;
+            b = qemu_malloc(sizeof(*b));
+            b->cb = cb;
+            b->opaque = opaque;
 
-        qemu_iovec_init(&b->qiov, qiov->nalloc);
-        qemu_iovec_concat(&b->qiov, qiov, qiov->size);
+            b->buf = qemu_malloc(qiov->size);
+            qemu_iovec_to_buffer(qiov, b->buf);
 
-        b->qiov.size -= 512;
-        b->qiov.iov[first_buf_index].iov_base += 512;
-        b->qiov.iov[first_buf_index].iov_len -= 512;
+            qemu_iovec_init(&b->qiov, 1);
+            qemu_iovec_add(&b->qiov, b->buf + 512, qiov->size - 512);
 
-        return bdrv_aio_writev(bs->file, sector_num + 1, &b->qiov,
-                               nb_sectors - 1, raw_aio_writev_scrubbed, b);
+            return bdrv_aio_writev(bs->file, sector_num + 1, &b->qiov,
+                                   nb_sectors - 1, raw_aio_writev_scrubbed, b);
+        }
     }
 
     return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
diff --git a/cutils.c b/cutils.c
index 036ae3c..0fb4968 100644
--- a/cutils.c
+++ b/cutils.c
@@ -207,17 +207,25 @@  void qemu_iovec_reset(QEMUIOVector *qiov)
     qiov->size = 0;
 }
 
-void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf)
+void qemu_iovec_part_to_buffer(QEMUIOVector *qiov, void *buf, size_t len)
 {
     uint8_t *p = (uint8_t *)buf;
     int i;
+    size_t n;
 
-    for (i = 0; i < qiov->niov; ++i) {
-        memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len);
-        p += qiov->iov[i].iov_len;
+    for (i = 0; len && i < qiov->niov; ++i) {
+        n = MIN(len, qiov->iov[i].iov_len);
+        memcpy(p, qiov->iov[i].iov_base, n);
+        p += n;
+        len -= n;
     }
 }
 
+void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf)
+{
+    qemu_iovec_part_to_buffer(qiov, buf, qiov->size);
+}
+
 void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count)
 {
     const uint8_t *p = (const uint8_t *)buf;
diff --git a/qemu-common.h b/qemu-common.h
index dfd3dc0..c584539 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -281,6 +281,7 @@  void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
 void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size);
 void qemu_iovec_destroy(QEMUIOVector *qiov);
 void qemu_iovec_reset(QEMUIOVector *qiov);
+void qemu_iovec_part_to_buffer(QEMUIOVector *qiov, void *buf, size_t len);
 void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf);
 void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count);