diff mbox

[06/10] block/dmg: process XML plists

Message ID 1419692504-29373-7-git-send-email-peter@lekensteyn.nl
State New
Headers show

Commit Message

Peter Wu Dec. 27, 2014, 3:01 p.m. UTC
The format is simple enough to avoid using a full-blown XML parser.
The offsets are based on the description at
http://newosxbook.com/DMG.html

Signed-off-by: Peter Wu <peter@lekensteyn.nl>
---
 block/dmg.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

Comments

John Snow Jan. 3, 2015, 12:04 a.m. UTC | #1
On 12/27/2014 10:01 AM, Peter Wu wrote:
> The format is simple enough to avoid using a full-blown XML parser.
> The offsets are based on the description at
> http://newosxbook.com/DMG.html
>
> Signed-off-by: Peter Wu <peter@lekensteyn.nl>
> ---
>   block/dmg.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 69 insertions(+)
>
> diff --git a/block/dmg.c b/block/dmg.c
> index 19e4fe2..c03ea01 100644
> --- a/block/dmg.c
> +++ b/block/dmg.c
> @@ -26,6 +26,7 @@
>   #include "qemu/bswap.h"
>   #include "qemu/module.h"
>   #include <zlib.h>
> +#include <glib.h>
>
>   enum {
>       /* Limit chunk sizes to prevent unreasonable amounts of memory being used
> @@ -333,12 +334,66 @@ fail:
>       return ret;
>   }
>
> +static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds,
> +                              uint64_t info_begin, uint64_t info_length)
> +{
> +    BDRVDMGState *s = bs->opaque;
> +    int ret;
> +    uint8_t *buffer = NULL;
> +    char *data_begin, *data_end;
> +
> +    /* Have at least some length to avoid NULL for g_malloc. Attempt to set a
> +     * safe upper cap on the data length. A test sample had a XML length of
> +     * about 1 MiB. */
> +    if (info_length == 0 || info_length > 16 * 1024 * 1024) {
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +
> +    buffer = g_malloc(info_length + 1);
> +    buffer[info_length] = '\0';
> +    ret = bdrv_pread(bs->file, info_begin, buffer, info_length);
> +    if (ret != info_length) {
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +
> +    /* look for <data>...</data>. The data is 284 (0x11c) bytes after base64
> +     * decode. The actual data element has 431 (0x1af) bytes which includes tabs
> +     * and line feeds. */
> +    data_end = (char *)buffer;
> +    while ((data_begin = strstr(data_end, "<data>")) != NULL) {
> +        gsize out_len = 0;
> +
> +        data_begin += 6;
> +        data_end = strstr(data_begin, "</data>");
> +        /* malformed XML? */
> +        if (data_end == NULL) {
> +            ret = -EINVAL;
> +            goto fail;
> +        }
> +        *data_end++ = '\0';
> +        g_base64_decode_inplace(data_begin, &out_len);
> +        ret = dmg_read_mish_block(s, ds, (uint8_t *)data_begin,
> +                                  (uint32_t)out_len);
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +    }
> +    ret = 0;
> +
> +fail:
> +    g_free(buffer);
> +    return ret;
> +}
> +

This starts to make me a little nervous, because we're ignoring so much 
of the XML document structure here and just effectively performing a 
regular search for "<data>(.*)</data>".

Can we guarantee that the ONLY time the data element is used in this 
document is when it is being used in the exact context we are expecting 
here, where it contains the b64 mish data we expect it to?

i.e. it is always in a path like this as detailed by 
http://newosxbook.com/DMG.html :

plist/dict/key[text()='resource-fork']/following-sibling::dict/key[text()='blkx']/following-sibling::array/dict/key[text()='data']/following-sibling::data

I notice that this document says other sections MAY be present, do any 
of them ever need to be parsed? Has anyone written about them before?

Do we know if any use data sections?

I suppose at the very least, sections of interest are always going to 
include the "mish" magic, so that should probably keep us from doing 
anything too stupid ...

>   static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
>                       Error **errp)
>   {
>       BDRVDMGState *s = bs->opaque;
>       DmgHeaderState ds;
>       uint64_t rsrc_fork_offset, rsrc_fork_length;
> +    uint64_t plist_xml_offset, plist_xml_length;
>       int64_t offset;
>       int ret;
>
> @@ -366,12 +421,26 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
>       if (ret < 0) {
>           goto fail;
>       }
> +    /* offset of property list (XMLOffset) */
> +    ret = read_uint64(bs, offset + 0xd8, &plist_xml_offset);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +    ret = read_uint64(bs, offset + 0xe0, &plist_xml_length);
> +    if (ret < 0) {
> +        goto fail;
> +    }
>       if (rsrc_fork_offset != 0 && rsrc_fork_length != 0) {
>           ret = dmg_read_resource_fork(bs, &ds,
>                                        rsrc_fork_offset, rsrc_fork_length);
>           if (ret < 0) {
>               goto fail;
>           }
> +    } else if (plist_xml_offset != 0 && plist_xml_length != 0) {
> +        ret = dmg_read_plist_xml(bs, &ds, plist_xml_offset, plist_xml_length);
> +        if (ret < 0) {
> +            goto fail;
> +        }
>       } else {
>           ret = -EINVAL;
>           goto fail;
>
Peter Wu Jan. 3, 2015, 11:54 a.m. UTC | #2
On Friday 02 January 2015 19:04:32 John Snow wrote:
> On 12/27/2014 10:01 AM, Peter Wu wrote:
> > The format is simple enough to avoid using a full-blown XML parser.
> > The offsets are based on the description at
> > http://newosxbook.com/DMG.html
> >
> > Signed-off-by: Peter Wu <peter@lekensteyn.nl>
> > ---
> >   block/dmg.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 69 insertions(+)
> >
> > diff --git a/block/dmg.c b/block/dmg.c
> > index 19e4fe2..c03ea01 100644
> > --- a/block/dmg.c
> > +++ b/block/dmg.c
> > @@ -26,6 +26,7 @@
> >   #include "qemu/bswap.h"
> >   #include "qemu/module.h"
> >   #include <zlib.h>
> > +#include <glib.h>
> >
> >   enum {
> >       /* Limit chunk sizes to prevent unreasonable amounts of memory being used
> > @@ -333,12 +334,66 @@ fail:
> >       return ret;
> >   }
> >
> > +static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds,
> > +                              uint64_t info_begin, uint64_t info_length)
> > +{
> > +    BDRVDMGState *s = bs->opaque;
> > +    int ret;
> > +    uint8_t *buffer = NULL;
> > +    char *data_begin, *data_end;
> > +
> > +    /* Have at least some length to avoid NULL for g_malloc. Attempt to set a
> > +     * safe upper cap on the data length. A test sample had a XML length of
> > +     * about 1 MiB. */
> > +    if (info_length == 0 || info_length > 16 * 1024 * 1024) {
> > +        ret = -EINVAL;
> > +        goto fail;
> > +    }
> > +
> > +    buffer = g_malloc(info_length + 1);
> > +    buffer[info_length] = '\0';
> > +    ret = bdrv_pread(bs->file, info_begin, buffer, info_length);
> > +    if (ret != info_length) {
> > +        ret = -EINVAL;
> > +        goto fail;
> > +    }
> > +
> > +    /* look for <data>...</data>. The data is 284 (0x11c) bytes after base64
> > +     * decode. The actual data element has 431 (0x1af) bytes which includes tabs
> > +     * and line feeds. */
> > +    data_end = (char *)buffer;
> > +    while ((data_begin = strstr(data_end, "<data>")) != NULL) {
> > +        gsize out_len = 0;
> > +
> > +        data_begin += 6;
> > +        data_end = strstr(data_begin, "</data>");
> > +        /* malformed XML? */
> > +        if (data_end == NULL) {
> > +            ret = -EINVAL;
> > +            goto fail;
> > +        }
> > +        *data_end++ = '\0';
> > +        g_base64_decode_inplace(data_begin, &out_len);
> > +        ret = dmg_read_mish_block(s, ds, (uint8_t *)data_begin,
> > +                                  (uint32_t)out_len);
> > +        if (ret < 0) {
> > +            goto fail;
> > +        }
> > +    }
> > +    ret = 0;
> > +
> > +fail:
> > +    g_free(buffer);
> > +    return ret;
> > +}
> > +
> 
> This starts to make me a little nervous, because we're ignoring so much 
> of the XML document structure here and just effectively performing a 
> regular search for "<data>(.*)</data>".
> 
> Can we guarantee that the ONLY time the data element is used in this 
> document is when it is being used in the exact context we are expecting 
> here, where it contains the b64 mish data we expect it to?
> 
> i.e. it is always in a path like this as detailed by 
> http://newosxbook.com/DMG.html :
> 
> plist/dict/key[text()='resource-fork']/following-sibling::dict/key[text()='blkx']/following-sibling::array/dict/key[text()='data']/following-sibling::data
> 
> I notice that this document says other sections MAY be present, do any 
> of them ever need to be parsed? Has anyone written about them before?
> 
> Do we know if any use data sections?
> 
> I suppose at the very least, sections of interest are always going to 
> include the "mish" magic, so that should probably keep us from doing 
> anything too stupid ...

I did not find DMG files with <data> elements at other locations. If it
would occur, at worst we would fail to parse a DMG file. I think that
introducing a XML parser here would introduce a risk for a minor benefit
(being prepared for future cases).

Since this is a property list, in theory people could include all kinds
of data for different keys (which would then be matched by the current
implementation). But how likely is this for a disk image?

FWIW, I looked into the dmg2img program and that also looks for the
strings "<data>" and "</data>". Nobody has raised a bug for that program
so far.

Do you think that it is worth to use a XML parser on potentially
insecure data? I suggest to keep it as it, and reconsider a different
approach in case a problem is encountered.

Kind regards,
Peter

> >   static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
> >                       Error **errp)
> >   {
> >       BDRVDMGState *s = bs->opaque;
> >       DmgHeaderState ds;
> >       uint64_t rsrc_fork_offset, rsrc_fork_length;
> > +    uint64_t plist_xml_offset, plist_xml_length;
> >       int64_t offset;
> >       int ret;
> >
> > @@ -366,12 +421,26 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
> >       if (ret < 0) {
> >           goto fail;
> >       }
> > +    /* offset of property list (XMLOffset) */
> > +    ret = read_uint64(bs, offset + 0xd8, &plist_xml_offset);
> > +    if (ret < 0) {
> > +        goto fail;
> > +    }
> > +    ret = read_uint64(bs, offset + 0xe0, &plist_xml_length);
> > +    if (ret < 0) {
> > +        goto fail;
> > +    }
> >       if (rsrc_fork_offset != 0 && rsrc_fork_length != 0) {
> >           ret = dmg_read_resource_fork(bs, &ds,
> >                                        rsrc_fork_offset, rsrc_fork_length);
> >           if (ret < 0) {
> >               goto fail;
> >           }
> > +    } else if (plist_xml_offset != 0 && plist_xml_length != 0) {
> > +        ret = dmg_read_plist_xml(bs, &ds, plist_xml_offset, plist_xml_length);
> > +        if (ret < 0) {
> > +            goto fail;
> > +        }
> >       } else {
> >           ret = -EINVAL;
> >           goto fail;
> >
John Snow Jan. 5, 2015, 4:46 p.m. UTC | #3
On 01/03/2015 06:54 AM, Peter Wu wrote:
> On Friday 02 January 2015 19:04:32 John Snow wrote:
>> On 12/27/2014 10:01 AM, Peter Wu wrote:
>>> The format is simple enough to avoid using a full-blown XML parser.
>>> The offsets are based on the description at
>>> http://newosxbook.com/DMG.html
>>>
>>> Signed-off-by: Peter Wu <peter@lekensteyn.nl>
>>> ---
>>>    block/dmg.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>    1 file changed, 69 insertions(+)
>>>
>>> diff --git a/block/dmg.c b/block/dmg.c
>>> index 19e4fe2..c03ea01 100644
>>> --- a/block/dmg.c
>>> +++ b/block/dmg.c
>>> @@ -26,6 +26,7 @@
>>>    #include "qemu/bswap.h"
>>>    #include "qemu/module.h"
>>>    #include <zlib.h>
>>> +#include <glib.h>
>>>
>>>    enum {
>>>        /* Limit chunk sizes to prevent unreasonable amounts of memory being used
>>> @@ -333,12 +334,66 @@ fail:
>>>        return ret;
>>>    }
>>>
>>> +static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds,
>>> +                              uint64_t info_begin, uint64_t info_length)
>>> +{
>>> +    BDRVDMGState *s = bs->opaque;
>>> +    int ret;
>>> +    uint8_t *buffer = NULL;
>>> +    char *data_begin, *data_end;
>>> +
>>> +    /* Have at least some length to avoid NULL for g_malloc. Attempt to set a
>>> +     * safe upper cap on the data length. A test sample had a XML length of
>>> +     * about 1 MiB. */
>>> +    if (info_length == 0 || info_length > 16 * 1024 * 1024) {
>>> +        ret = -EINVAL;
>>> +        goto fail;
>>> +    }
>>> +
>>> +    buffer = g_malloc(info_length + 1);
>>> +    buffer[info_length] = '\0';
>>> +    ret = bdrv_pread(bs->file, info_begin, buffer, info_length);
>>> +    if (ret != info_length) {
>>> +        ret = -EINVAL;
>>> +        goto fail;
>>> +    }
>>> +
>>> +    /* look for <data>...</data>. The data is 284 (0x11c) bytes after base64
>>> +     * decode. The actual data element has 431 (0x1af) bytes which includes tabs
>>> +     * and line feeds. */
>>> +    data_end = (char *)buffer;
>>> +    while ((data_begin = strstr(data_end, "<data>")) != NULL) {
>>> +        gsize out_len = 0;
>>> +
>>> +        data_begin += 6;
>>> +        data_end = strstr(data_begin, "</data>");
>>> +        /* malformed XML? */
>>> +        if (data_end == NULL) {
>>> +            ret = -EINVAL;
>>> +            goto fail;
>>> +        }
>>> +        *data_end++ = '\0';
>>> +        g_base64_decode_inplace(data_begin, &out_len);
>>> +        ret = dmg_read_mish_block(s, ds, (uint8_t *)data_begin,
>>> +                                  (uint32_t)out_len);
>>> +        if (ret < 0) {
>>> +            goto fail;
>>> +        }
>>> +    }
>>> +    ret = 0;
>>> +
>>> +fail:
>>> +    g_free(buffer);
>>> +    return ret;
>>> +}
>>> +
>>
>> This starts to make me a little nervous, because we're ignoring so much
>> of the XML document structure here and just effectively performing a
>> regular search for "<data>(.*)</data>".
>>
>> Can we guarantee that the ONLY time the data element is used in this
>> document is when it is being used in the exact context we are expecting
>> here, where it contains the b64 mish data we expect it to?
>>
>> i.e. it is always in a path like this as detailed by
>> http://newosxbook.com/DMG.html :
>>
>> plist/dict/key[text()='resource-fork']/following-sibling::dict/key[text()='blkx']/following-sibling::array/dict/key[text()='data']/following-sibling::data
>>
>> I notice that this document says other sections MAY be present, do any
>> of them ever need to be parsed? Has anyone written about them before?
>>
>> Do we know if any use data sections?
>>
>> I suppose at the very least, sections of interest are always going to
>> include the "mish" magic, so that should probably keep us from doing
>> anything too stupid ...
>
> I did not find DMG files with <data> elements at other locations. If it
> would occur, at worst we would fail to parse a DMG file. I think that
> introducing a XML parser here would introduce a risk for a minor benefit
> (being prepared for future cases).
>
> Since this is a property list, in theory people could include all kinds
> of data for different keys (which would then be matched by the current
> implementation). But how likely is this for a disk image?
>
> FWIW, I looked into the dmg2img program and that also looks for the
> strings "<data>" and "</data>". Nobody has raised a bug for that program
> so far.
>
> Do you think that it is worth to use a XML parser on potentially
> insecure data? I suggest to keep it as it, and reconsider a different
> approach in case a problem is encountered.
>
> Kind regards,
> Peter

No: I was just asking the questions. If dmg2img gets away with it, the 
worst that will happen is we will fail to parse/load a DMG file because 
we ignore everything without the "mish" magic, so this is OK.

I just wanted to check, since I didn't have a lot of DMG files on-hand 
and I couldn't really find a fuller reference to the types of XML that 
shows up.

Thanks!

>>>    static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
>>>                        Error **errp)
>>>    {
>>>        BDRVDMGState *s = bs->opaque;
>>>        DmgHeaderState ds;
>>>        uint64_t rsrc_fork_offset, rsrc_fork_length;
>>> +    uint64_t plist_xml_offset, plist_xml_length;
>>>        int64_t offset;
>>>        int ret;
>>>
>>> @@ -366,12 +421,26 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
>>>        if (ret < 0) {
>>>            goto fail;
>>>        }
>>> +    /* offset of property list (XMLOffset) */
>>> +    ret = read_uint64(bs, offset + 0xd8, &plist_xml_offset);
>>> +    if (ret < 0) {
>>> +        goto fail;
>>> +    }
>>> +    ret = read_uint64(bs, offset + 0xe0, &plist_xml_length);
>>> +    if (ret < 0) {
>>> +        goto fail;
>>> +    }
>>>        if (rsrc_fork_offset != 0 && rsrc_fork_length != 0) {
>>>            ret = dmg_read_resource_fork(bs, &ds,
>>>                                         rsrc_fork_offset, rsrc_fork_length);
>>>            if (ret < 0) {
>>>                goto fail;
>>>            }
>>> +    } else if (plist_xml_offset != 0 && plist_xml_length != 0) {
>>> +        ret = dmg_read_plist_xml(bs, &ds, plist_xml_offset, plist_xml_length);
>>> +        if (ret < 0) {
>>> +            goto fail;
>>> +        }
>>>        } else {
>>>            ret = -EINVAL;
>>>            goto fail;
>>>
>
John Snow Jan. 5, 2015, 4:54 p.m. UTC | #4
On 12/27/2014 10:01 AM, Peter Wu wrote:
> The format is simple enough to avoid using a full-blown XML parser.
> The offsets are based on the description at
> http://newosxbook.com/DMG.html
>
> Signed-off-by: Peter Wu <peter@lekensteyn.nl>
> ---
>   block/dmg.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 69 insertions(+)
>
> diff --git a/block/dmg.c b/block/dmg.c
> index 19e4fe2..c03ea01 100644
> --- a/block/dmg.c
> +++ b/block/dmg.c
> @@ -26,6 +26,7 @@
>   #include "qemu/bswap.h"
>   #include "qemu/module.h"
>   #include <zlib.h>
> +#include <glib.h>
>
>   enum {
>       /* Limit chunk sizes to prevent unreasonable amounts of memory being used
> @@ -333,12 +334,66 @@ fail:
>       return ret;
>   }
>
> +static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds,
> +                              uint64_t info_begin, uint64_t info_length)
> +{
> +    BDRVDMGState *s = bs->opaque;
> +    int ret;
> +    uint8_t *buffer = NULL;
> +    char *data_begin, *data_end;
> +
> +    /* Have at least some length to avoid NULL for g_malloc. Attempt to set a
> +     * safe upper cap on the data length. A test sample had a XML length of
> +     * about 1 MiB. */
> +    if (info_length == 0 || info_length > 16 * 1024 * 1024) {
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +
> +    buffer = g_malloc(info_length + 1);
> +    buffer[info_length] = '\0';
> +    ret = bdrv_pread(bs->file, info_begin, buffer, info_length);
> +    if (ret != info_length) {
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +
> +    /* look for <data>...</data>. The data is 284 (0x11c) bytes after base64
> +     * decode. The actual data element has 431 (0x1af) bytes which includes tabs
> +     * and line feeds. */
> +    data_end = (char *)buffer;
> +    while ((data_begin = strstr(data_end, "<data>")) != NULL) {
> +        gsize out_len = 0;
> +
> +        data_begin += 6;
> +        data_end = strstr(data_begin, "</data>");
> +        /* malformed XML? */
> +        if (data_end == NULL) {
> +            ret = -EINVAL;
> +            goto fail;
> +        }
> +        *data_end++ = '\0';
> +        g_base64_decode_inplace(data_begin, &out_len);
> +        ret = dmg_read_mish_block(s, ds, (uint8_t *)data_begin,
> +                                  (uint32_t)out_len);
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +    }
> +    ret = 0;
> +
> +fail:
> +    g_free(buffer);
> +    return ret;
> +}
> +
>   static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
>                       Error **errp)
>   {
>       BDRVDMGState *s = bs->opaque;
>       DmgHeaderState ds;
>       uint64_t rsrc_fork_offset, rsrc_fork_length;
> +    uint64_t plist_xml_offset, plist_xml_length;
>       int64_t offset;
>       int ret;
>
> @@ -366,12 +421,26 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
>       if (ret < 0) {
>           goto fail;
>       }
> +    /* offset of property list (XMLOffset) */
> +    ret = read_uint64(bs, offset + 0xd8, &plist_xml_offset);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +    ret = read_uint64(bs, offset + 0xe0, &plist_xml_length);
> +    if (ret < 0) {
> +        goto fail;
> +    }
>       if (rsrc_fork_offset != 0 && rsrc_fork_length != 0) {
>           ret = dmg_read_resource_fork(bs, &ds,
>                                        rsrc_fork_offset, rsrc_fork_length);
>           if (ret < 0) {
>               goto fail;
>           }
> +    } else if (plist_xml_offset != 0 && plist_xml_length != 0) {
> +        ret = dmg_read_plist_xml(bs, &ds, plist_xml_offset, plist_xml_length);
> +        if (ret < 0) {
> +            goto fail;
> +        }
>       } else {
>           ret = -EINVAL;
>           goto fail;
>

After discussion, and because the dmg2img program uses a similar 
strategy, this is probably safe enough -- failures do not seem likely 
and if they occur, we will simply ignore the erroneous data.

We can complicate this in the future if we need to, as stated.

Reviewed-by: John Snow <jsnow@redhat.com>
diff mbox

Patch

diff --git a/block/dmg.c b/block/dmg.c
index 19e4fe2..c03ea01 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -26,6 +26,7 @@ 
 #include "qemu/bswap.h"
 #include "qemu/module.h"
 #include <zlib.h>
+#include <glib.h>
 
 enum {
     /* Limit chunk sizes to prevent unreasonable amounts of memory being used
@@ -333,12 +334,66 @@  fail:
     return ret;
 }
 
+static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds,
+                              uint64_t info_begin, uint64_t info_length)
+{
+    BDRVDMGState *s = bs->opaque;
+    int ret;
+    uint8_t *buffer = NULL;
+    char *data_begin, *data_end;
+
+    /* Have at least some length to avoid NULL for g_malloc. Attempt to set a
+     * safe upper cap on the data length. A test sample had a XML length of
+     * about 1 MiB. */
+    if (info_length == 0 || info_length > 16 * 1024 * 1024) {
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    buffer = g_malloc(info_length + 1);
+    buffer[info_length] = '\0';
+    ret = bdrv_pread(bs->file, info_begin, buffer, info_length);
+    if (ret != info_length) {
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    /* look for <data>...</data>. The data is 284 (0x11c) bytes after base64
+     * decode. The actual data element has 431 (0x1af) bytes which includes tabs
+     * and line feeds. */
+    data_end = (char *)buffer;
+    while ((data_begin = strstr(data_end, "<data>")) != NULL) {
+        gsize out_len = 0;
+
+        data_begin += 6;
+        data_end = strstr(data_begin, "</data>");
+        /* malformed XML? */
+        if (data_end == NULL) {
+            ret = -EINVAL;
+            goto fail;
+        }
+        *data_end++ = '\0';
+        g_base64_decode_inplace(data_begin, &out_len);
+        ret = dmg_read_mish_block(s, ds, (uint8_t *)data_begin,
+                                  (uint32_t)out_len);
+        if (ret < 0) {
+            goto fail;
+        }
+    }
+    ret = 0;
+
+fail:
+    g_free(buffer);
+    return ret;
+}
+
 static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
     BDRVDMGState *s = bs->opaque;
     DmgHeaderState ds;
     uint64_t rsrc_fork_offset, rsrc_fork_length;
+    uint64_t plist_xml_offset, plist_xml_length;
     int64_t offset;
     int ret;
 
@@ -366,12 +421,26 @@  static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
     if (ret < 0) {
         goto fail;
     }
+    /* offset of property list (XMLOffset) */
+    ret = read_uint64(bs, offset + 0xd8, &plist_xml_offset);
+    if (ret < 0) {
+        goto fail;
+    }
+    ret = read_uint64(bs, offset + 0xe0, &plist_xml_length);
+    if (ret < 0) {
+        goto fail;
+    }
     if (rsrc_fork_offset != 0 && rsrc_fork_length != 0) {
         ret = dmg_read_resource_fork(bs, &ds,
                                      rsrc_fork_offset, rsrc_fork_length);
         if (ret < 0) {
             goto fail;
         }
+    } else if (plist_xml_offset != 0 && plist_xml_length != 0) {
+        ret = dmg_read_plist_xml(bs, &ds, plist_xml_offset, plist_xml_length);
+        if (ret < 0) {
+            goto fail;
+        }
     } else {
         ret = -EINVAL;
         goto fail;