diff mbox

Add basic read, write and create support for AMD SimNow HDD images.

Message ID C3B7E30C-E010-4C64-BDF6-079F09F8538E@free.fr
State New
Headers show

Commit Message

François Revol Nov. 28, 2010, 7:08 p.m. UTC
$subj.
Someone asked about this format, wanting to try Haiku in SimNow, so I wrote this.
I got a report of successfully booting a converted image in SimNow.
It doesn't yet support automatically growing the file, so we just preallocate on create.

François.

From b0602bc2b02dcd7b15f0f9a143f850defd767509 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Revol?= <revol@free.fr>
Date: Sun, 28 Nov 2010 20:01:03 +0100
Subject: [PATCH] Add basic read, write and create support for AMD SimNow HDD images.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit


Signed-off-by: François Revol <revol@free.fr>
---
 Makefile.objs |    2 +-
 block/hdd.c   |  354 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 355 insertions(+), 1 deletions(-)
 create mode 100644 block/hdd.c

Comments

Stefan Hajnoczi Dec. 1, 2010, 10:38 a.m. UTC | #1
On Sun, Nov 28, 2010 at 7:08 PM, François Revol <revol@free.fr> wrote:
> From b0602bc2b02dcd7b15f0f9a143f850defd767509 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Fran=C3=A7ois=20Revol?= <revol@free.fr>
> Date: Sun, 28 Nov 2010 20:01:03 +0100
> Subject: [PATCH] Add basic read, write and create support for AMD SimNow HDD images.
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
>
> Signed-off-by: François Revol <revol@free.fr>
> ---
>  Makefile.objs |    2 +-
>  block/hdd.c   |  354 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 355 insertions(+), 1 deletions(-)
>  create mode 100644 block/hdd.c

This block driver does not implement the asynchronous APIs
(bdrv_aio_writev, bdrv_aio_readv, bdrv_aio_flush) which are necessary
for running a VM properly.  Some block drivers are currently written
without async support and that limits them to being used as qemu-img
formats.  It's a bad idea to run a VM with these block drivers because
I/O will block the VM from making progress (it is synchronous).

Anthony mentioned this on IRC and you explained that this requirement
isn't obvious from the QEMU source.  Luckily the changes needed are
small so it's definitely worth making SimNow HDD images async.

> +typedef struct BDRVHddState {
> +    uint8_t identify_data[SECTOR_SIZE];

Perhaps identify_data[] should be uint16_t since it gets casted on every use.

> +static void padstr(char *str, const char *src, int len)
> +{
> +    int i, v;
> +    for(i = 0; i < len; i++) {
> +        if (*src)
> +            v = *src++;
> +        else
> +            v = ' ';
> +        str[i^1] = v;
> +    }
> +}

This function is confusing, it uses int v instead of char.  The name
padstr() doesn't hint that it also byteswaps the string.

QEMU coding style uses {} even for one-line if statement bodies
(Section 4 in the CODING_STYLE file).

> +static int hdd_probe(const uint8_t *buf, int buf_size, const char *filename)
> +{
> +    int name_len;
> +    uint16_t *p = (uint16_t *)buf;
> +    int64_t nb_sectors;
> +    uint32_t nb_sectors_clipped;
> +    int result = 0;
> +    int i;
> +
> +    if (buf_size < SECTOR_SIZE) {
> +        /* Header too small, no VDI. */
> +        return 0;
> +    }
> +
> +    /* best effort sanity check */
> +    /* TODO: check more (CHS size...) */
> +
> +    /* serial number */
> +    for (i = 10 * 2; i < 10 * 2 + 20; i++) {
> +        if (!isvalid_ide_chr(buf[i])) {
> +            return 0;
> +        }
> +    }
> +    result += 20;
> +
> +    /* firmware version */
> +    for (i = 23 * 2; i < 23 * 2 + 8; i++) {
> +        if (!isvalid_ide_chr(buf[i])) {
> +            return 0;
> +        }
> +    }
> +    result += 8;
> +
> +    /* model */
> +    for (i = 27 * 2; i < 27 * 2 + 40; i++) {
> +        if (!isvalid_ide_chr(buf[i])) {
> +            return 0;
> +        }
> +    }
> +    result += 40;
> +
> +    nb_sectors = le16_to_cpu(p[100]);
> +    nb_sectors |= (uint64_t)le16_to_cpu(p[101]) << 16;
> +    nb_sectors |= (uint64_t)le16_to_cpu(p[102]) << 32;
> +    nb_sectors |= (uint64_t)le16_to_cpu(p[103]) << 48;
> +
> +    nb_sectors_clipped = le16_to_cpu(p[60]) | (le16_to_cpu(p[61]) << 16);
> +
> +    if (nb_sectors < 1 || ((uint32_t)nb_sectors) != nb_sectors_clipped) {
> +        return 0;
> +    }
> +    result += 10;
> +
> +    if (filename != NULL) {
> +        name_len = strlen(filename);
> +        if (name_len > 4 && !strcmp(filename + name_len - 4, ".hdd"))
> +            result += 20;
> +    }
> +
> +    return result;
> +}

HDD has no magic by which to identify valid files.  We need to avoid
false positives because existing image files could be corrupted or VMs
at least made unbootable.  Although using filename extensions to test
for formats is lame, in this case I don't think we have another
choice.

The result variable calculations are not needed.  Either we reject the
file and return 0, or we end up having added the same constants
(result = 20 + 8 + 40 + 10 + (endswith(filename, ".hdd") ? 20 : 0)).

> +    int sectors;
> +    int cylinders;
> +    int heads;

Unused variables?

> +    if (bdrv_read(bs->file, 0, s->identify_data, 1) < 0) {
> +        goto fail;
> +    }

We're assuming that BDRV_SECTOR_SIZE == SECTOR_SIZE == 512 throughout
the code.  It would be safer to explicitly calculate against
BDRV_SECTOR_SIZE.  It would be clearer to rename SECTOR_SIZE to
ATA_IDENTIFY_SIZE.

> +    if (hdd_probe(s->identify_data, SECTOR_SIZE, NULL) == 0) {
> +        goto fail;
> +    }
[...]
> + fail:
> +    return -1;

Please don't throw away specific error values.

> +    /* hints */
> +    /*
> +    bs->cyls = le16_to_cpu(p[54]);
> +    bs->heads = le16_to_cpu(p[55]);
> +    bs->secs = le16_to_cpu(p[56]);
> +    */

Deadcode.

> +static int hdd_read(BlockDriverState *bs, int64_t sector_num,
> +                    uint8_t *buf, int nb_sectors)
> +{
> +    int ret;
> +    if (bdrv_read(bs->file, sector_num + DATA_START, buf, nb_sectors) < 0) {
> +        return -1;
> +    }
> +    return 0;
> +}

Replace with async version:

static int hdd_aio_readv(BlockDriverState *bs, int64_t sector_num,
QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb,
void *opaque)
{
    return bdrv_aio_readv(bs->file, sector_num + DATA_START, qiov,
nb_sectors, cb, opaque);
}

> +static int hdd_write(BlockDriverState *bs, int64_t sector_num,
> +                    const uint8_t *buf, int nb_sectors)
> +{
> +    int ret;
> +
> +    if (sector_num > bs->total_sectors) {
> +        fprintf(stderr,
> +                "(HDD) Wrong offset: sector_num=0x%" PRIx64
> +                " total_sectors=0x%" PRIx64 "\n",
> +                sector_num, bs->total_sectors);
> +        return -1;
> +    }

Not needed since block.c already checks request range.

> +    /* TODO: check if already allocated, else truncate() */
> +    if (bdrv_write(bs->file, sector_num + DATA_START, buf, nb_sectors) < 0) {
> +        return -1;
> +    }
> +    return 0;
> +}

Should also be replaced with async version.

bdrv_aio_flush() is trivial too.  It is necessary for running VMs
since they may rely on flush working for data integrity when a mode
other than -drive ...,cache=writethrough is selected.

> +    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
> +              0644);
> +    if (fd < 0) {
> +        return -errno;
> +    }

Please use bdrv_create_file(), bdrv_file_open(), and bdrv_pwrite()
instead of POSIX file I/O.  See block/qcow2.c:qcow_create2 as an
example.

> +    /* TODO: specs says it can grow, so no need to always do this */
> +    if (static_image) {
> +        if (ftruncate(fd, sizeof(header) + blocks * SECTOR_SIZE)) {
> +            result = -errno;
> +        }
> +    }

Is there an issue with leaving ftruncate() out?  I don't see a reason
to truncate.  If we want to preallocate then ftruncate() isn't
appropriate anyway.

> +static void hdd_close(BlockDriverState *bs)
> +{
> +    BDRVHddState *s = bs->opaque;

Unused variable.

Stefan
François Revol Nov. 23, 2011, 2:20 p.m. UTC | #2
Hi,

Le 01/12/2010 11:38, Stefan Hajnoczi a écrit :
> This block driver does not implement the asynchronous APIs
> (bdrv_aio_writev, bdrv_aio_readv, bdrv_aio_flush) which are necessary
> for running a VM properly.  Some block drivers are currently written
> without async support and that limits them to being used as qemu-img
> formats.  It's a bad idea to run a VM with these block drivers because
> I/O will block the VM from making progress (it is synchronous).

I'm digging old stuff at the moment...
It seems to be the coroutine calls recently introduced makes aio
optional, does it ?

I added co versions of the calls in the code and it seems to work.
It passes the qemu-iotests:
Not run: 006 007 013 014 015 016 017 018 019 020 022 023 024 025 026 027 028
Passed all 11 tests

But before I submit the patch again I'll rather have it written correctly.

>> +typedef struct BDRVHddState {
>> +    uint8_t identify_data[SECTOR_SIZE];
> 
> Perhaps identify_data[] should be uint16_t since it gets casted on every use.

I tried doing so but you end up adding many other casts everywhere
and another #define to ...SIZE/sizeof(uint16_t) which doesn't look
better though.

>> +static void padstr(char *str, const char *src, int len)
>> +{
>> +    int i, v;
>> +    for(i = 0; i < len; i++) {
>> +        if (*src)
>> +            v = *src++;
>> +        else
>> +            v = ' ';
>> +        str[i^1] = v;
>> +    }
>> +}
> 
> This function is confusing, it uses int v instead of char.  The name
> padstr() doesn't hint that it also byteswaps the string.

Well it was copied verbatim from another file.
I now added a comment mentioning it.

> QEMU coding style uses {} even for one-line if statement bodies
> (Section 4 in the CODING_STYLE file).

Well it was copied verbatim from another file. :P

>> +static int hdd_probe(const uint8_t *buf, int buf_size, const char *filename)
> 
> HDD has no magic by which to identify valid files.  We need to avoid
> false positives because existing image files could be corrupted or VMs
> at least made unbootable.  Although using filename extensions to test
> for formats is lame, in this case I don't think we have another
> choice.

I suppose so, I didn't look any further but apart from checking the
geometry validity at several places in the header there is not much to
check for.

>> +    if (bdrv_read(bs->file, 0, s->identify_data, 1) < 0) {
>> +        goto fail;
>> +    }
> 
> We're assuming that BDRV_SECTOR_SIZE == SECTOR_SIZE == 512 throughout
> the code.  It would be safer to explicitly calculate against
> BDRV_SECTOR_SIZE.  It would be clearer to rename SECTOR_SIZE to
> ATA_IDENTIFY_SIZE.

Right, though the code would not work for SECTOR_SIZE != 512 since it's
the size of the header, so having it defined to something else would
make blocks unaligned anyway.
I'll use other defines but also an #error if SECTOR_SIZE doesn't fit to
make sure people don't try things without noticing.

>> +    /* TODO: specs says it can grow, so no need to always do this */
>> +    if (static_image) {
>> +        if (ftruncate(fd, sizeof(header) + blocks * SECTOR_SIZE)) {
>> +            result = -errno;
>> +        }
>> +    }
> 
> Is there an issue with leaving ftruncate() out?  I don't see a reason
> to truncate.  If we want to preallocate then ftruncate() isn't
> appropriate anyway.

Right, it doesn't write zeroes on ext2 anyway.
I'd have to test without it first.

François.
Kevin Wolf Nov. 23, 2011, 2:46 p.m. UTC | #3
Am 23.11.2011 15:20, schrieb François Revol:
> Hi,
> 
> Le 01/12/2010 11:38, Stefan Hajnoczi a écrit :
>> This block driver does not implement the asynchronous APIs
>> (bdrv_aio_writev, bdrv_aio_readv, bdrv_aio_flush) which are necessary
>> for running a VM properly.  Some block drivers are currently written
>> without async support and that limits them to being used as qemu-img
>> formats.  It's a bad idea to run a VM with these block drivers because
>> I/O will block the VM from making progress (it is synchronous).
> 
> I'm digging old stuff at the moment...
> It seems to be the coroutine calls recently introduced makes aio
> optional, does it ?

Yes. In fact, if you have co_* functions, aio_* will never be called
because the coroutine ones are always preferred.

> I added co versions of the calls in the code and it seems to work.
> It passes the qemu-iotests:
> Not run: 006 007 013 014 015 016 017 018 019 020 022 023 024 025 026 027 028
> Passed all 11 tests
> 
> But before I submit the patch again I'll rather have it written correctly.
> 
>>> +typedef struct BDRVHddState {
>>> +    uint8_t identify_data[SECTOR_SIZE];
>>
>> Perhaps identify_data[] should be uint16_t since it gets casted on every use.
> 
> I tried doing so but you end up adding many other casts everywhere
> and another #define to ...SIZE/sizeof(uint16_t) which doesn't look
> better though.

Yeah, I can see that there are many byte accesses as well. Probably best
to leave it as it is.

> 
>>> +static void padstr(char *str, const char *src, int len)
>>> +{
>>> +    int i, v;
>>> +    for(i = 0; i < len; i++) {
>>> +        if (*src)
>>> +            v = *src++;
>>> +        else
>>> +            v = ' ';
>>> +        str[i^1] = v;
>>> +    }
>>> +}
>>
>> This function is confusing, it uses int v instead of char.  The name
>> padstr() doesn't hint that it also byteswaps the string.
> 
> Well it was copied verbatim from another file.
> I now added a comment mentioning it.

Should be a common helper function somewhere then. Duplicating code is
always bad.

>> QEMU coding style uses {} even for one-line if statement bodies
>> (Section 4 in the CODING_STYLE file).
> 
> Well it was copied verbatim from another file. :P

While moving it to a common place, fix the code style. ;-)

>>> +static int hdd_probe(const uint8_t *buf, int buf_size, const char *filename)
>>
>> HDD has no magic by which to identify valid files.  We need to avoid
>> false positives because existing image files could be corrupted or VMs
>> at least made unbootable.  Although using filename extensions to test
>> for formats is lame, in this case I don't think we have another
>> choice.
> 
> I suppose so, I didn't look any further but apart from checking the
> geometry validity at several places in the header there is not much to
> check for.

We should make sure to return a very low value so that any other
matching format will take precedence.

Or we could consider hdd_probe() { return 0; } and require the user to
specify the format explicitly. Would be a bit nasty to use, though.

> 
>>> +    if (bdrv_read(bs->file, 0, s->identify_data, 1) < 0) {
>>> +        goto fail;
>>> +    }
>>
>> We're assuming that BDRV_SECTOR_SIZE == SECTOR_SIZE == 512 throughout
>> the code.  It would be safer to explicitly calculate against
>> BDRV_SECTOR_SIZE.  It would be clearer to rename SECTOR_SIZE to
>> ATA_IDENTIFY_SIZE.
> 
> Right, though the code would not work for SECTOR_SIZE != 512 since it's
> the size of the header, so having it defined to something else would
> make blocks unaligned anyway.
> I'll use other defines but also an #error if SECTOR_SIZE doesn't fit to
> make sure people don't try things without noticing.

I think QEMU_BUILD_BUG_ON() is what you're looking for.

>>> +    /* TODO: specs says it can grow, so no need to always do this */
>>> +    if (static_image) {
>>> +        if (ftruncate(fd, sizeof(header) + blocks * SECTOR_SIZE)) {
>>> +            result = -errno;
>>> +        }
>>> +    }
>>
>> Is there an issue with leaving ftruncate() out?  I don't see a reason
>> to truncate.  If we want to preallocate then ftruncate() isn't
>> appropriate anyway.
> 
> Right, it doesn't write zeroes on ext2 anyway.
> I'd have to test without it first.

Please use bdrv_* functions instead of POSIX ones to create the image.

Kevin
diff mbox

Patch

diff --git a/Makefile.objs b/Makefile.objs
index 23b17ce..20e346d 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -20,7 +20,7 @@  block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 
 block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
 block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o
-block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
+block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o hdd.o
 block-nested-$(CONFIG_WIN32) += raw-win32.o
 block-nested-$(CONFIG_POSIX) += raw-posix.o
 block-nested-$(CONFIG_CURL) += curl.o
diff --git a/block/hdd.c b/block/hdd.c
new file mode 100644
index 0000000..aed609e
--- /dev/null
+++ b/block/hdd.c
@@ -0,0 +1,354 @@ 
+/*
+ * Block driver for the AMD SimNow HDD disk image
+ *
+ * Copyright (c) 2010 François Revol
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * Reference:
+ * http://developer.amd.com/Assets/SimNowUsersManual4.6.2.pdf page 181
+ * 
+ * "the hard-disk image file contains a 512-byte header before the raw data.
+ *  This 512-byte header is identical to the information provided by the drive
+ *  in response to the ATA command "IDENTIFY". Following the 512-byte header
+ *  is the data for each sector from the device."
+ */
+
+#include "qemu-common.h"
+#include "block_int.h"
+#include "module.h"
+
+/* Command line option for static images. */
+#define BLOCK_OPT_STATIC "static"
+
+#define SECTOR_SIZE 512
+#define DATA_START 1
+#define MAX_MULT_SECTORS 1
+
+typedef struct BDRVHddState {
+    uint8_t identify_data[SECTOR_SIZE];
+} BDRVHddState;
+
+
+static void padstr(char *str, const char *src, int len)
+{
+    int i, v;
+    for(i = 0; i < len; i++) {
+        if (*src)
+            v = *src++;
+        else
+            v = ' ';
+        str[i^1] = v;
+    }
+}
+
+static void put_le16(uint16_t *p, unsigned int v)
+{
+    *p = cpu_to_le16(v);
+}
+
+static int isvalid_ide_chr(char c)
+{
+    /* XXX: other chars also maybe? */
+    return (isalnum(c) || c == ' ' || c == '.');
+}
+
+static int hdd_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+    int name_len;
+    uint16_t *p = (uint16_t *)buf;
+    int64_t nb_sectors;
+    uint32_t nb_sectors_clipped;
+    int result = 0;
+    int i;
+    
+    if (buf_size < SECTOR_SIZE) {
+        /* Header too small, no VDI. */
+        return 0;
+    }
+
+    /* best effort sanity check */
+    /* TODO: check more (CHS size...) */
+
+    /* serial number */
+    for (i = 10 * 2; i < 10 * 2 + 20; i++) {
+        if (!isvalid_ide_chr(buf[i])) {
+            return 0;
+        }
+    }
+    result += 20;
+
+    /* firmware version */
+    for (i = 23 * 2; i < 23 * 2 + 8; i++) {
+        if (!isvalid_ide_chr(buf[i])) {
+            return 0;
+        }
+    }
+    result += 8;
+
+    /* model */
+    for (i = 27 * 2; i < 27 * 2 + 40; i++) {
+        if (!isvalid_ide_chr(buf[i])) {
+            return 0;
+        }
+    }
+    result += 40;
+
+    nb_sectors = le16_to_cpu(p[100]);
+    nb_sectors |= (uint64_t)le16_to_cpu(p[101]) << 16;
+    nb_sectors |= (uint64_t)le16_to_cpu(p[102]) << 32;
+    nb_sectors |= (uint64_t)le16_to_cpu(p[103]) << 48;
+
+    nb_sectors_clipped = le16_to_cpu(p[60]) | (le16_to_cpu(p[61]) << 16);
+
+    if (nb_sectors < 1 || ((uint32_t)nb_sectors) != nb_sectors_clipped) {
+        return 0;
+    }
+    result += 10;
+
+    if (filename != NULL) {
+        name_len = strlen(filename);
+        if (name_len > 4 && !strcmp(filename + name_len - 4, ".hdd"))
+            result += 20;
+    }
+
+    return result;
+}
+
+static int hdd_open(BlockDriverState *bs, int flags)
+{
+    BDRVHddState *s = bs->opaque;
+    uint16_t *p = (uint16_t *)s->identify_data;
+    int64_t nb_sectors;
+    int sectors;
+    int cylinders;
+    int heads;
+
+    if (bdrv_read(bs->file, 0, s->identify_data, 1) < 0) {
+        goto fail;
+    }
+
+    if (hdd_probe(s->identify_data, SECTOR_SIZE, NULL) == 0) {
+        goto fail;
+    }
+
+    nb_sectors = le16_to_cpu(p[100]);
+    nb_sectors |= (uint64_t)le16_to_cpu(p[101]) << 16;
+    nb_sectors |= (uint64_t)le16_to_cpu(p[102]) << 32;
+    nb_sectors |= (uint64_t)le16_to_cpu(p[103]) << 48;
+
+    bs->total_sectors = nb_sectors;
+
+    /* hints */
+    /*
+    bs->cyls = le16_to_cpu(p[54]);
+    bs->heads = le16_to_cpu(p[55]);
+    bs->secs = le16_to_cpu(p[56]);
+    */
+
+    return 0;
+
+ fail:
+    return -1;
+}
+
+static int hdd_read(BlockDriverState *bs, int64_t sector_num,
+                    uint8_t *buf, int nb_sectors)
+{
+    int ret;
+    if (bdrv_read(bs->file, sector_num + DATA_START, buf, nb_sectors) < 0) {
+        return -1;
+    }
+    return 0;
+}
+
+static int hdd_write(BlockDriverState *bs, int64_t sector_num,
+                    const uint8_t *buf, int nb_sectors)
+{
+    int ret;
+
+    if (sector_num > bs->total_sectors) {
+        fprintf(stderr,
+                "(HDD) Wrong offset: sector_num=0x%" PRIx64
+                " total_sectors=0x%" PRIx64 "\n",
+                sector_num, bs->total_sectors);
+        return -1;
+    }
+    /* TODO: check if already allocated, else truncate() */
+    if (bdrv_write(bs->file, sector_num + DATA_START, buf, nb_sectors) < 0) {
+        return -1;
+    }
+    return 0;
+}
+
+static int hdd_create(const char *filename, QEMUOptionParameter *options)
+{
+    int fd;
+    int result = 0;
+    int static_image = 1; /* make it default for now */
+    uint64_t bytes = 0;
+    uint32_t blocks;
+    uint8_t header[SECTOR_SIZE];
+    size_t i;
+    uint16_t *p;
+    unsigned int oldsize;
+    char drive_serial_str[20];
+    int mult_sectors = 0;
+    int64_t nb_sectors;
+    int sectors;
+    int cylinders;
+    int heads;
+
+    /* Read out options. */
+    while (options && options->name) {
+        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+            bytes = options->value.n;
+        } else if (!strcmp(options->name, BLOCK_OPT_STATIC)) {
+            if (options->value.n) {
+                static_image = 1;
+            }
+        }
+        options++;
+    }
+
+    nb_sectors = bytes / SECTOR_SIZE;
+    /* we can't use bdrv_guess_geometry, use a standard physical disk geometry */
+    cylinders = nb_sectors / (16 * 63);
+    if (cylinders > 16383)
+        cylinders = 16383;
+    else if (cylinders < 2)
+        cylinders = 2;
+    heads = 16;
+    sectors = 63;
+
+    /* XXX: generate one ? */
+    snprintf(drive_serial_str, sizeof(drive_serial_str), "QM%05d", 0);
+
+    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
+              0644);
+    if (fd < 0) {
+        return -errno;
+    }
+
+    /* We need enough blocks to store the given disk size,
+       so always round up. */
+    blocks = (bytes + SECTOR_SIZE - 1) / SECTOR_SIZE;
+
+    /* Stolen from hw/ide/core.c */
+    /* XXX: are the hw flags ok there ? */
+    memset(header, 0, sizeof(header));
+    p = (uint16_t *)header;
+    put_le16(p + 0, 0x0040);
+    put_le16(p + 1, cylinders);
+    put_le16(p + 3, heads);
+    put_le16(p + 4, 512 * sectors); /* XXX: retired, remove ? */
+    put_le16(p + 5, 512); /* XXX: retired, remove ? */
+    put_le16(p + 6, sectors);
+    padstr((char *)(p + 10), drive_serial_str, 20); /* serial number */
+    put_le16(p + 20, 3); /* XXX: retired, remove ? */
+    put_le16(p + 21, 512); /* cache size in sectors */
+    put_le16(p + 22, 4); /* ecc bytes */
+    padstr((char *)(p + 23), QEMU_VERSION/*s->version*/, 8); /* firmware version */
+    padstr((char *)(p + 27), "QEMU HARDDISK", 40); /* model */
+#if MAX_MULT_SECTORS > 1
+    put_le16(p + 47, 0x8000 /*| MAX_MULT_SECTORS*/);
+#endif
+    put_le16(p + 48, 1); /* dword I/O */
+    put_le16(p + 49, (1 << 11) | (1 << 9) | (1 << 8)); /* DMA and LBA supported */
+    put_le16(p + 51, 0x200); /* PIO transfer cycle */
+    put_le16(p + 52, 0x200); /* DMA transfer cycle */
+    put_le16(p + 53, 1 | (1 << 1) | (1 << 2)); /* words 54-58,64-70,88 are valid */
+    put_le16(p + 54, cylinders);
+    put_le16(p + 55, heads);
+    put_le16(p + 56, sectors);
+    oldsize = cylinders * heads * sectors;
+    put_le16(p + 57, oldsize);
+    put_le16(p + 58, oldsize >> 16);
+    if (mult_sectors)
+        put_le16(p + 59, 0x100 | mult_sectors);
+    put_le16(p + 60, nb_sectors);
+    put_le16(p + 61, nb_sectors >> 16);
+    put_le16(p + 62, 0x07); /* single word dma0-2 supported */
+    put_le16(p + 63, 0x07); /* mdma0-2 supported */
+    put_le16(p + 64, 0x03); /* pio3-4 supported */
+    put_le16(p + 65, 120);
+    put_le16(p + 66, 120);
+    put_le16(p + 67, 120);
+    put_le16(p + 68, 120);
+    put_le16(p + 80, 0xf0); /* ata3 -> ata6 supported */
+    put_le16(p + 81, 0x16); /* conforms to ata5 */
+    /* 14=NOP supported, 5=WCACHE supported, 0=SMART supported */
+    put_le16(p + 82, (1 << 14) | (1 << 5) | 1);
+    /* 13=flush_cache_ext,12=flush_cache,10=lba48 */
+    put_le16(p + 83, (1 << 14) | (1 << 13) | (1 <<12) | (1 << 10));
+    /* 14=set to 1, 1=SMART self test, 0=SMART error logging */
+    put_le16(p + 84, (1 << 14) | 0);
+    /* 14 = NOP supported, 5=WCACHE enabled, 0=SMART feature set enabled */
+    put_le16(p + 85, (1 << 14) | 1);
+    /* 13=flush_cache_ext,12=flush_cache,10=lba48 */
+    put_le16(p + 86, (1 << 14) | (1 << 13) | (1 <<12) | (1 << 10));
+    /* 14=set to 1, 1=smart self test, 0=smart error logging */
+    put_le16(p + 87, (1 << 14) | 0);
+    put_le16(p + 88, 0x3f | (1 << 13)); /* udma5 set and supported */
+    put_le16(p + 93, 1 | (1 << 14) | 0x2000);
+    put_le16(p + 100, nb_sectors);
+    put_le16(p + 101, nb_sectors >> 16);
+    put_le16(p + 102, nb_sectors >> 32);
+    put_le16(p + 103, nb_sectors >> 48);
+
+    if (write(fd, &header, sizeof(header)) < 0) {
+        result = -errno;
+    }
+
+    /* TODO: specs says it can grow, so no need to always do this */
+    if (static_image) {
+        if (ftruncate(fd, sizeof(header) + blocks * SECTOR_SIZE)) {
+            result = -errno;
+        }
+    }
+
+    if (close(fd) < 0) {
+        result = -errno;
+    }
+
+    return result;
+}
+
+static void hdd_close(BlockDriverState *bs)
+{
+    BDRVHddState *s = bs->opaque;
+}
+
+static BlockDriver bdrv_hdd = {
+    .format_name    = "hdd",
+    .instance_size  = sizeof(BDRVHddState),
+    .bdrv_probe     = hdd_probe,
+    .bdrv_open      = hdd_open,
+    .bdrv_read      = hdd_read,
+    .bdrv_write     = hdd_write,
+    .bdrv_close     = hdd_close,
+    .bdrv_create    = hdd_create,
+};
+
+static void bdrv_hdd_init(void)
+{
+    bdrv_register(&bdrv_hdd);
+}
+
+block_init(bdrv_hdd_init);