diff mbox

[09/10] block/dmg: support bzip2 block entry types

Message ID 1419692504-29373-10-git-send-email-peter@lekensteyn.nl
State New
Headers show

Commit Message

Peter Wu Dec. 27, 2014, 3:01 p.m. UTC
This patch adds support for bzip2-compressed block entries as introduced
with OS X 10.4 (source: https://en.wikipedia.org/wiki/Apple_Disk_Image).

It was tested against a 5.2G "OS X Yosemite" installation image which
stores the BLXX block in the XML property list (instead of resource
forks) and has over 5k chunks.

New configure entries are added (--enable-bzip2 / --disable-bzip2) to
control inclusion of bzip2 functionality (which requires linking against
libbz2). The help message suggests that this option is needed for DMG
files, but the tests are generic enough that other parts of QEMU can use
bzip2 if needed.

The identifiers are based on http://newosxbook.com/DMG.html.

The decompression routines are based on the zlib case, but as there is
no way to reset the decompression state (unlike zlib), memory is
allocated and deallocated for every decompression. This should not be
problematic as the decompression takes most of the time and as blocks
are typically about/over 1 MiB in size, only one allocation is done
every 2000 sectors.

Signed-off-by: Peter Wu <peter@lekensteyn.nl>
---
 block/dmg.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
 configure   | 31 +++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 3 deletions(-)

Comments

John Snow Jan. 5, 2015, 7:32 p.m. UTC | #1
On 12/27/2014 10:01 AM, Peter Wu wrote:
> This patch adds support for bzip2-compressed block entries as introduced
> with OS X 10.4 (source: https://en.wikipedia.org/wiki/Apple_Disk_Image).
>
> It was tested against a 5.2G "OS X Yosemite" installation image which
> stores the BLXX block in the XML property list (instead of resource
> forks) and has over 5k chunks.
>
> New configure entries are added (--enable-bzip2 / --disable-bzip2) to
> control inclusion of bzip2 functionality (which requires linking against
> libbz2). The help message suggests that this option is needed for DMG
> files, but the tests are generic enough that other parts of QEMU can use
> bzip2 if needed.
>
> The identifiers are based on http://newosxbook.com/DMG.html.
>
> The decompression routines are based on the zlib case, but as there is
> no way to reset the decompression state (unlike zlib), memory is
> allocated and deallocated for every decompression. This should not be
> problematic as the decompression takes most of the time and as blocks
> are typically about/over 1 MiB in size, only one allocation is done
> every 2000 sectors.
>
> Signed-off-by: Peter Wu <peter@lekensteyn.nl>
> ---
>   block/dmg.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
>   configure   | 31 +++++++++++++++++++++++++++++++
>   2 files changed, 84 insertions(+), 3 deletions(-)
>
> diff --git a/block/dmg.c b/block/dmg.c
> index 93b597f..67d4e2b 100644
> --- a/block/dmg.c
> +++ b/block/dmg.c
> @@ -26,6 +26,9 @@
>   #include "qemu/bswap.h"
>   #include "qemu/module.h"
>   #include <zlib.h>
> +#ifdef CONFIG_BZIP2
> +#include <bzlib.h>
> +#endif
>   #include <glib.h>
>
>   enum {
> @@ -56,6 +59,9 @@ typedef struct BDRVDMGState {
>       uint8_t *compressed_chunk;
>       uint8_t *uncompressed_chunk;
>       z_stream zstream;
> +#ifdef CONFIG_BZIP2
> +    bz_stream bzstream;
> +#endif
>   } BDRVDMGState;
>
>   static int dmg_probe(const uint8_t *buf, int buf_size, const char *filename)
> @@ -123,6 +129,7 @@ static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,
>
>       switch (s->types[chunk]) {
>       case 0x80000005: /* zlib compressed */
> +    case 0x80000006: /* bzip2 compressed */
>           compressed_size = s->lengths[chunk];
>           uncompressed_sectors = s->sectorcounts[chunk];
>           break;
> @@ -187,6 +194,21 @@ typedef struct DmgHeaderState {
>       uint32_t max_sectors_per_chunk;
>   } DmgHeaderState;
>
> +static bool dmg_is_valid_block_type(uint32_t entry_type)
> +{
> +    switch (entry_type) {
> +    case 0x00000001:    /* uncompressed */
> +    case 0x00000002:    /* zeroes */
> +    case 0x80000005:    /* zlib */
> +#ifdef CONFIG_BZIP2
> +    case 0x80000006:    /* bzip2 */
> +#endif
> +        return true;
> +    default:
> +        return false;
> +    }
> +}
> +

Wish I had read this patch first before trying to read the blob of 
conditionals this replaces. :)

>   static int dmg_read_mish_block(BDRVDMGState *s, DmgHeaderState *ds,
>                                  uint8_t *buffer, uint32_t count)
>   {
> @@ -218,8 +240,7 @@ static int dmg_read_mish_block(BDRVDMGState *s, DmgHeaderState *ds,
>       for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) {
>           s->types[i] = buff_read_uint32(buffer, offset);
>           offset += 4;
> -        if (s->types[i] != 0x80000005 && s->types[i] != 1 &&
> -            s->types[i] != 2) {
> +        if (!dmg_is_valid_block_type(s->types[i])) {
>               if (s->types[i] == 0xffffffff && i > 0) {
>                   ds->last_in_offset = s->offsets[i - 1] + s->lengths[i - 1];
>                   ds->last_out_offset = s->sectors[i - 1] +
> @@ -534,13 +555,14 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
>       if (!is_sector_in_chunk(s, s->current_chunk, sector_num)) {
>           int ret;
>           uint32_t chunk = search_chunk(s, sector_num);
> +        uint64_t total_out;

This breaks compilation when configured with --disable-bzip2. (unused 
variable)

>
>           if (chunk >= s->n_chunks) {
>               return -1;
>           }
>
>           s->current_chunk = s->n_chunks;
> -        switch (s->types[chunk]) {
> +        switch (s->types[chunk]) { /* block entry type */
>           case 0x80000005: { /* zlib compressed */
>               /* we need to buffer, because only the chunk as whole can be
>                * inflated. */
> @@ -564,6 +586,34 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
>                   return -1;
>               }
>               break; }
> +#ifdef CONFIG_BZIP2
> +        case 0x80000006: /* bzip2 compressed */
> +            /* we need to buffer, because only the chunk as whole can be
> +             * inflated. */
> +            ret = bdrv_pread(bs->file, s->offsets[chunk],
> +                             s->compressed_chunk, s->lengths[chunk]);
> +            if (ret != s->lengths[chunk]) {
> +                return -1;
> +            }
> +
> +            ret = BZ2_bzDecompressInit(&s->bzstream, 0, 0);
> +            if (ret != BZ_OK) {
> +                return -1;
> +            }
> +            s->bzstream.next_in = (char *)s->compressed_chunk;
> +            s->bzstream.avail_in = (unsigned int) s->lengths[chunk];
> +            s->bzstream.next_out = (char *)s->uncompressed_chunk;
> +            s->bzstream.avail_out = (unsigned int) 512 * s->sectorcounts[chunk];
> +            ret = BZ2_bzDecompress(&s->bzstream);
> +            total_out = ((uint64_t)s->bzstream.total_out_hi32 << 32) +
> +                        s->bzstream.total_out_lo32;
> +            BZ2_bzDecompressEnd(&s->bzstream);
> +            if (ret != BZ_STREAM_END ||
> +                total_out != 512 * s->sectorcounts[chunk]) {
> +                return -1;
> +            }
> +            break;
> +#endif /* CONFIG_BZIP2 */
>           case 1: /* copy */
>               ret = bdrv_pread(bs->file, s->offsets[chunk],
>                                s->uncompressed_chunk, s->lengths[chunk]);
> diff --git a/configure b/configure
> index cae588c..eadae63 100755
> --- a/configure
> +++ b/configure
> @@ -313,6 +313,7 @@ glx=""
>   zlib="yes"
>   lzo=""
>   snappy=""
> +bzip2=""
>   guest_agent=""
>   guest_agent_with_vss="no"
>   vss_win32_sdk=""
> @@ -1060,6 +1061,10 @@ for opt do
>     ;;
>     --enable-snappy) snappy="yes"
>     ;;
> +  --disable-bzip2) bzip2="no"
> +  ;;
> +  --enable-bzip2) bzip2="yes"
> +  ;;
>     --enable-guest-agent) guest_agent="yes"
>     ;;
>     --disable-guest-agent) guest_agent="no"
> @@ -1374,6 +1379,8 @@ Advanced options (experts only):
>     --enable-usb-redir       enable usb network redirection support
>     --enable-lzo             enable the support of lzo compression library
>     --enable-snappy          enable the support of snappy compression library
> +  --enable-bzip2           enable the support of bzip2 compression library (for
> +                           reading bzip2-compressed dmg images)
>     --disable-guest-agent    disable building of the QEMU Guest Agent
>     --enable-guest-agent     enable building of the QEMU Guest Agent
>     --with-vss-sdk=SDK-path  enable Windows VSS support in QEMU Guest Agent
> @@ -1820,6 +1827,25 @@ EOF
>   fi
>
>   ##########################################
> +# bzip2 check
> +
> +if test "$bzip2" != "no" ; then
> +    cat > $TMPC << EOF
> +#include <bzlib.h>
> +int main(void) { BZ2_bzlibVersion(); return 0; }
> +EOF
> +    if compile_prog "" "-lbz2" ; then
> +        libs_softmmu="$libs_softmmu -lbz2"
> +        bzip2="yes"
> +    else
> +        if test "$bzip2" = "yes"; then
> +            feature_not_found "libbzip2" "Install libbzip2 devel"
> +        fi
> +        bzip2="no"
> +    fi
> +fi
> +
> +##########################################
>   # libseccomp check
>
>   if test "$seccomp" != "no" ; then
> @@ -4340,6 +4366,7 @@ echo "vhdx              $vhdx"
>   echo "Quorum            $quorum"
>   echo "lzo support       $lzo"
>   echo "snappy support    $snappy"
> +echo "bzip2 support     $bzip2"
>   echo "NUMA host support $numa"
>
>   if test "$sdl_too_old" = "yes"; then
> @@ -4695,6 +4722,10 @@ if test "$snappy" = "yes" ; then
>     echo "CONFIG_SNAPPY=y" >> $config_host_mak
>   fi
>
> +if test "$bzip2" = "yes" ; then
> +  echo "CONFIG_BZIP2=y" >> $config_host_mak
> +fi
> +
>   if test "$libiscsi" = "yes" ; then
>     echo "CONFIG_LIBISCSI=m" >> $config_host_mak
>     echo "LIBISCSI_CFLAGS=$libiscsi_cflags" >> $config_host_mak
>

Looks good otherwise. CCing Paolo so he can take a quick peek at the 
configure script. It looks sane to me, though.
Paolo Bonzini Jan. 7, 2015, 10:29 a.m. UTC | #2
On 05/01/2015 20:32, John Snow wrote:
>>
>>   fi
>>
>>   ##########################################
>> +# bzip2 check
>> +
>> +if test "$bzip2" != "no" ; then
>> +    cat > $TMPC << EOF
>> +#include <bzlib.h>
>> +int main(void) { BZ2_bzlibVersion(); return 0; }
>> +EOF
>> +    if compile_prog "" "-lbz2" ; then
>> +        libs_softmmu="$libs_softmmu -lbz2"
>> +        bzip2="yes"
>> +    else
>> +        if test "$bzip2" = "yes"; then
>> +            feature_not_found "libbzip2" "Install libbzip2 devel"
>> +        fi
>> +        bzip2="no"
>> +    fi
>> +fi
>> +
>> +##########################################
>>   # libseccomp check
>>
>>   if test "$seccomp" != "no" ; then
>> @@ -4340,6 +4366,7 @@ echo "vhdx              $vhdx"
>>   echo "Quorum            $quorum"
>>   echo "lzo support       $lzo"
>>   echo "snappy support    $snappy"
>> +echo "bzip2 support     $bzip2"
>>   echo "NUMA host support $numa"
>>
>>   if test "$sdl_too_old" = "yes"; then
>> @@ -4695,6 +4722,10 @@ if test "$snappy" = "yes" ; then
>>     echo "CONFIG_SNAPPY=y" >> $config_host_mak
>>   fi
>>
>> +if test "$bzip2" = "yes" ; then
>> +  echo "CONFIG_BZIP2=y" >> $config_host_mak
>> +fi
>> +
>>   if test "$libiscsi" = "yes" ; then
>>     echo "CONFIG_LIBISCSI=m" >> $config_host_mak
>>     echo "LIBISCSI_CFLAGS=$libiscsi_cflags" >> $config_host_mak
>>
> 
> Looks good otherwise. CCing Paolo so he can take a quick peek at the
> configure script. It looks sane to me, though.

It is sane, but instead of libs_softmmu="$libs_softmmu -lbz2" it's
better to follow what we do for libiscsi.  In configure:

CONFIG_BZIP2=y
LIBBZ2_CFLAGS=-lbz2

In Makefile.objs:

dmg.o-libs     := $(LIBBZ2_CFLAGS)

Paolo
Peter Wu Jan. 7, 2015, 10:31 a.m. UTC | #3
On Wednesday 07 January 2015 11:29:20 Paolo Bonzini wrote:
> 
> On 05/01/2015 20:32, John Snow wrote:
> >>
> >>   fi
> >>
> >>   ##########################################
> >> +# bzip2 check
> >> +
> >> +if test "$bzip2" != "no" ; then
> >> +    cat > $TMPC << EOF
> >> +#include <bzlib.h>
> >> +int main(void) { BZ2_bzlibVersion(); return 0; }
> >> +EOF
> >> +    if compile_prog "" "-lbz2" ; then
> >> +        libs_softmmu="$libs_softmmu -lbz2"
> >> +        bzip2="yes"
> >> +    else
> >> +        if test "$bzip2" = "yes"; then
> >> +            feature_not_found "libbzip2" "Install libbzip2 devel"
> >> +        fi
> >> +        bzip2="no"
> >> +    fi
> >> +fi
> >> +
> >> +##########################################
> >>   # libseccomp check
> >>
> >>   if test "$seccomp" != "no" ; then
> >> @@ -4340,6 +4366,7 @@ echo "vhdx              $vhdx"
> >>   echo "Quorum            $quorum"
> >>   echo "lzo support       $lzo"
> >>   echo "snappy support    $snappy"
> >> +echo "bzip2 support     $bzip2"
> >>   echo "NUMA host support $numa"
> >>
> >>   if test "$sdl_too_old" = "yes"; then
> >> @@ -4695,6 +4722,10 @@ if test "$snappy" = "yes" ; then
> >>     echo "CONFIG_SNAPPY=y" >> $config_host_mak
> >>   fi
> >>
> >> +if test "$bzip2" = "yes" ; then
> >> +  echo "CONFIG_BZIP2=y" >> $config_host_mak
> >> +fi
> >> +
> >>   if test "$libiscsi" = "yes" ; then
> >>     echo "CONFIG_LIBISCSI=m" >> $config_host_mak
> >>     echo "LIBISCSI_CFLAGS=$libiscsi_cflags" >> $config_host_mak
> >>
> > 
> > Looks good otherwise. CCing Paolo so he can take a quick peek at the
> > configure script. It looks sane to me, though.
> 
> It is sane, but instead of libs_softmmu="$libs_softmmu -lbz2" it's
> better to follow what we do for libiscsi.  In configure:
> 
> CONFIG_BZIP2=y
> LIBBZ2_CFLAGS=-lbz2
> 
> In Makefile.objs:
> 
> dmg.o-libs     := $(LIBBZ2_CFLAGS)
> 
> Paolo

Already taken care of in v2 which was posted to the list yesterday :-)

I named it "BZIP2_LIBS" though (matching the package name).
Paolo Bonzini Jan. 7, 2015, 10:53 a.m. UTC | #4
On 07/01/2015 11:31, Peter Wu wrote:
> Already taken care of in v2 which was posted to the list yesterday :-)
> 
> I named it "BZIP2_LIBS" though (matching the package name).

Great. :)

Paolo
diff mbox

Patch

diff --git a/block/dmg.c b/block/dmg.c
index 93b597f..67d4e2b 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -26,6 +26,9 @@ 
 #include "qemu/bswap.h"
 #include "qemu/module.h"
 #include <zlib.h>
+#ifdef CONFIG_BZIP2
+#include <bzlib.h>
+#endif
 #include <glib.h>
 
 enum {
@@ -56,6 +59,9 @@  typedef struct BDRVDMGState {
     uint8_t *compressed_chunk;
     uint8_t *uncompressed_chunk;
     z_stream zstream;
+#ifdef CONFIG_BZIP2
+    bz_stream bzstream;
+#endif
 } BDRVDMGState;
 
 static int dmg_probe(const uint8_t *buf, int buf_size, const char *filename)
@@ -123,6 +129,7 @@  static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,
 
     switch (s->types[chunk]) {
     case 0x80000005: /* zlib compressed */
+    case 0x80000006: /* bzip2 compressed */
         compressed_size = s->lengths[chunk];
         uncompressed_sectors = s->sectorcounts[chunk];
         break;
@@ -187,6 +194,21 @@  typedef struct DmgHeaderState {
     uint32_t max_sectors_per_chunk;
 } DmgHeaderState;
 
+static bool dmg_is_valid_block_type(uint32_t entry_type)
+{
+    switch (entry_type) {
+    case 0x00000001:    /* uncompressed */
+    case 0x00000002:    /* zeroes */
+    case 0x80000005:    /* zlib */
+#ifdef CONFIG_BZIP2
+    case 0x80000006:    /* bzip2 */
+#endif
+        return true;
+    default:
+        return false;
+    }
+}
+
 static int dmg_read_mish_block(BDRVDMGState *s, DmgHeaderState *ds,
                                uint8_t *buffer, uint32_t count)
 {
@@ -218,8 +240,7 @@  static int dmg_read_mish_block(BDRVDMGState *s, DmgHeaderState *ds,
     for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) {
         s->types[i] = buff_read_uint32(buffer, offset);
         offset += 4;
-        if (s->types[i] != 0x80000005 && s->types[i] != 1 &&
-            s->types[i] != 2) {
+        if (!dmg_is_valid_block_type(s->types[i])) {
             if (s->types[i] == 0xffffffff && i > 0) {
                 ds->last_in_offset = s->offsets[i - 1] + s->lengths[i - 1];
                 ds->last_out_offset = s->sectors[i - 1] +
@@ -534,13 +555,14 @@  static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
     if (!is_sector_in_chunk(s, s->current_chunk, sector_num)) {
         int ret;
         uint32_t chunk = search_chunk(s, sector_num);
+        uint64_t total_out;
 
         if (chunk >= s->n_chunks) {
             return -1;
         }
 
         s->current_chunk = s->n_chunks;
-        switch (s->types[chunk]) {
+        switch (s->types[chunk]) { /* block entry type */
         case 0x80000005: { /* zlib compressed */
             /* we need to buffer, because only the chunk as whole can be
              * inflated. */
@@ -564,6 +586,34 @@  static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
                 return -1;
             }
             break; }
+#ifdef CONFIG_BZIP2
+        case 0x80000006: /* bzip2 compressed */
+            /* we need to buffer, because only the chunk as whole can be
+             * inflated. */
+            ret = bdrv_pread(bs->file, s->offsets[chunk],
+                             s->compressed_chunk, s->lengths[chunk]);
+            if (ret != s->lengths[chunk]) {
+                return -1;
+            }
+
+            ret = BZ2_bzDecompressInit(&s->bzstream, 0, 0);
+            if (ret != BZ_OK) {
+                return -1;
+            }
+            s->bzstream.next_in = (char *)s->compressed_chunk;
+            s->bzstream.avail_in = (unsigned int) s->lengths[chunk];
+            s->bzstream.next_out = (char *)s->uncompressed_chunk;
+            s->bzstream.avail_out = (unsigned int) 512 * s->sectorcounts[chunk];
+            ret = BZ2_bzDecompress(&s->bzstream);
+            total_out = ((uint64_t)s->bzstream.total_out_hi32 << 32) +
+                        s->bzstream.total_out_lo32;
+            BZ2_bzDecompressEnd(&s->bzstream);
+            if (ret != BZ_STREAM_END ||
+                total_out != 512 * s->sectorcounts[chunk]) {
+                return -1;
+            }
+            break;
+#endif /* CONFIG_BZIP2 */
         case 1: /* copy */
             ret = bdrv_pread(bs->file, s->offsets[chunk],
                              s->uncompressed_chunk, s->lengths[chunk]);
diff --git a/configure b/configure
index cae588c..eadae63 100755
--- a/configure
+++ b/configure
@@ -313,6 +313,7 @@  glx=""
 zlib="yes"
 lzo=""
 snappy=""
+bzip2=""
 guest_agent=""
 guest_agent_with_vss="no"
 vss_win32_sdk=""
@@ -1060,6 +1061,10 @@  for opt do
   ;;
   --enable-snappy) snappy="yes"
   ;;
+  --disable-bzip2) bzip2="no"
+  ;;
+  --enable-bzip2) bzip2="yes"
+  ;;
   --enable-guest-agent) guest_agent="yes"
   ;;
   --disable-guest-agent) guest_agent="no"
@@ -1374,6 +1379,8 @@  Advanced options (experts only):
   --enable-usb-redir       enable usb network redirection support
   --enable-lzo             enable the support of lzo compression library
   --enable-snappy          enable the support of snappy compression library
+  --enable-bzip2           enable the support of bzip2 compression library (for
+                           reading bzip2-compressed dmg images)
   --disable-guest-agent    disable building of the QEMU Guest Agent
   --enable-guest-agent     enable building of the QEMU Guest Agent
   --with-vss-sdk=SDK-path  enable Windows VSS support in QEMU Guest Agent
@@ -1820,6 +1827,25 @@  EOF
 fi
 
 ##########################################
+# bzip2 check
+
+if test "$bzip2" != "no" ; then
+    cat > $TMPC << EOF
+#include <bzlib.h>
+int main(void) { BZ2_bzlibVersion(); return 0; }
+EOF
+    if compile_prog "" "-lbz2" ; then
+        libs_softmmu="$libs_softmmu -lbz2"
+        bzip2="yes"
+    else
+        if test "$bzip2" = "yes"; then
+            feature_not_found "libbzip2" "Install libbzip2 devel"
+        fi
+        bzip2="no"
+    fi
+fi
+
+##########################################
 # libseccomp check
 
 if test "$seccomp" != "no" ; then
@@ -4340,6 +4366,7 @@  echo "vhdx              $vhdx"
 echo "Quorum            $quorum"
 echo "lzo support       $lzo"
 echo "snappy support    $snappy"
+echo "bzip2 support     $bzip2"
 echo "NUMA host support $numa"
 
 if test "$sdl_too_old" = "yes"; then
@@ -4695,6 +4722,10 @@  if test "$snappy" = "yes" ; then
   echo "CONFIG_SNAPPY=y" >> $config_host_mak
 fi
 
+if test "$bzip2" = "yes" ; then
+  echo "CONFIG_BZIP2=y" >> $config_host_mak
+fi
+
 if test "$libiscsi" = "yes" ; then
   echo "CONFIG_LIBISCSI=m" >> $config_host_mak
   echo "LIBISCSI_CFLAGS=$libiscsi_cflags" >> $config_host_mak