Patchwork support add-cow file format

login
register
mail settings
Submitter Robert Wang
Date Sept. 9, 2011, 5:48 a.m.
Message ID <1315547296-22066-1-git-send-email-wdongxu@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/114012/
State New
Headers show

Comments

Robert Wang - Sept. 9, 2011, 5:48 a.m.
As raw file format does not support backing_file and copy on write feature, so 
I add COW to it to support backing_file option. I store dirty bitmap in an 
add-cow file. When executed, it looks like this:
qemu-img create -f add-cow -o backing_file=ubuntu.img,image_file=test.img test.add-cow
qemu -drive if=virtio,file=test.add-cow -m 1024 

(test.img is a raw format file; test.add-cow stores bitmap)

Signed-off-by: Dong Xu Wang <wdongxu@linux.vnet.ibm.com>
---
 Makefile.objs   |    1 +
 block.c         |   83 ++++++++++-
 block.h         |    2 +
 block/add-cow.c |  456 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 block_int.h     |    6 +
 qemu-img.c      |   10 ++
 6 files changed, 555 insertions(+), 3 deletions(-)
 create mode 100644 block/add-cow.c
Kevin Wolf - Sept. 9, 2011, 2:27 p.m.
Am 09.09.2011 07:48, schrieb Dong Xu Wang:
> As raw file format does not support backing_file and copy on write feature, so 
> I add COW to it to support backing_file option. I store dirty bitmap in an 
> add-cow file. When executed, it looks like this:
> qemu-img create -f add-cow -o backing_file=ubuntu.img,image_file=test.img test.add-cow
> qemu -drive if=virtio,file=test.add-cow -m 1024 
> 
> (test.img is a raw format file; test.add-cow stores bitmap)
> 
> Signed-off-by: Dong Xu Wang <wdongxu@linux.vnet.ibm.com>

You should not make any changes to generic code, except maybe add
something to bdrv_get_info(). In particular you shouldn't need to touch
bdrv_open() or bdrv_create() at all.

The one required change in the approach for this to work is that you
shouldn't view raw+add_cow as a unit, but add_cow should be treated as
something separate that happens to be stacked on a raw file (which is
created separately).

Then you can do almost everything in block/add-cow.c.

> ---
>  Makefile.objs   |    1 +
>  block.c         |   83 ++++++++++-
>  block.h         |    2 +
>  block/add-cow.c |  456 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  block_int.h     |    6 +
>  qemu-img.c      |   10 ++
>  6 files changed, 555 insertions(+), 3 deletions(-)
>  create mode 100644 block/add-cow.c
> 
> diff --git a/Makefile.objs b/Makefile.objs
> index 26b885b..1402f9f 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -31,6 +31,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>  
>  block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
>  block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
> +block-nested-y += add-cow.o
>  block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
>  block-nested-y += qed-check.o
>  block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
> diff --git a/block.c b/block.c
> index a8c789a..c797cfc 100644
> --- a/block.c
> +++ b/block.c
> @@ -369,7 +369,7 @@ static int find_image_format(const char *filename, BlockDriver **pdrv)
>  {
>      int ret, score, score_max;
>      BlockDriver *drv1, *drv;
> -    uint8_t buf[2048];
> +    uint8_t buf[4096];
>      BlockDriverState *bs;

What's the reason for this change?

> diff --git a/block/add-cow.c b/block/add-cow.c
> new file mode 100644
> index 0000000..f4b67e5
> --- /dev/null
> +++ b/block/add-cow.c
> @@ -0,0 +1,456 @@
> +#include "qemu-common.h"
> +#include "block_int.h"
> +#include "module.h"
> +
> +#define ADD_COW_MAGIC  (((uint64_t)'A' << 56) | ((uint64_t)'D' << 48) | \
> +                        ((uint64_t)'D' << 40) | ((uint64_t)'_' << 32) | \
> +                        ((uint64_t)'C' << 24) | ((uint64_t)'O' << 16) | \
> +                        ((uint64_t)'W' << 8) | 0xFF)
> +#define ADD_COW_VERSION 1
> +
> +struct add_cow_header {
> +    uint64_t magic;
> +    uint32_t version;
> +    char backing_file[1024];
> +    char image_file[1024];
> +    uint64_t size;
> +    uint32_t sectorsize;
> +} add_cow_header;

QEMU_PACKED

> +typedef struct BDRVAddCowState {
> +    CoMutex lock;
> +    CoMutex bitmap_lock;
> +} BDRVAddCowState;
> +
> +typedef struct AddCowAIOCB {
> +    BlockDriverAIOCB common;
> +    int64_t sector_num;
> +    QEMUIOVector *qiov;
> +    int remaining_sectors;
> +    int cur_nr_sectors;
> +    uint64_t bytes_done;
> +    bool is_write;
> +    QEMUIOVector hd_qiov;
> +    QEMUBH *bh;
> +
> +} AddCowAIOCB;

You shouldn't be using AIOCBs with a coroutine-based block driver.
Instead you should just use variables on the stack and function parameters.

> +static int add_cow_flush(BlockDriverState *bs)
> +{
> +    return bdrv_flush(bs->file);
> +}

What about bs->image_hd?

> @@ -208,6 +209,11 @@ struct BlockDriverState {
>      int in_use; /* users other than guest access, eg. block migration */
>      QTAILQ_ENTRY(BlockDriverState) list;
>      void *private;
> +
> +    char image_file[1024];
> +    BlockDriverState *image_hd;
> +    uint8_t *bitmap;
> +    uint64_t bitmap_size;
>  };

These belong in BDRVAddCowState.

Kevin
Robert Wang - Sept. 10, 2011, 12:54 a.m.
于Fri 09 Sep 2011 10:27:26 PM CST,Kevin Wolf写到:
> Am 09.09.2011 07:48, schrieb Dong Xu Wang:
>> As raw file format does not support backing_file and copy on write feature, so 
>> I add COW to it to support backing_file option. I store dirty bitmap in an 
>> add-cow file. When executed, it looks like this:
>> qemu-img create -f add-cow -o backing_file=ubuntu.img,image_file=test.img test.add-cow
>> qemu -drive if=virtio,file=test.add-cow -m 1024 
>>
>> (test.img is a raw format file; test.add-cow stores bitmap)
>>
>> Signed-off-by: Dong Xu Wang <wdongxu@linux.vnet.ibm.com>
>
> You should not make any changes to generic code, except maybe add
> something to bdrv_get_info(). In particular you shouldn't need to touch
> bdrv_open() or bdrv_create() at all.
>
> The one required change in the approach for this to work is that you
> shouldn't view raw+add_cow as a unit, but add_cow should be treated as
> something separate that happens to be stacked on a raw file (which is
> created separately).
>
> Then you can do almost everything in block/add-cow.c.
>
>> ---
>>  Makefile.objs   |    1 +
>>  block.c         |   83 ++++++++++-
>>  block.h         |    2 +
>>  block/add-cow.c |  456 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  block_int.h     |    6 +
>>  qemu-img.c      |   10 ++
>>  6 files changed, 555 insertions(+), 3 deletions(-)
>>  create mode 100644 block/add-cow.c
>>
>> diff --git a/Makefile.objs b/Makefile.objs
>> index 26b885b..1402f9f 100644
>> --- a/Makefile.objs
>> +++ b/Makefile.objs
>> @@ -31,6 +31,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>>  
>>  block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
>>  block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
>> +block-nested-y += add-cow.o
>>  block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
>>  block-nested-y += qed-check.o
>>  block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
>> diff --git a/block.c b/block.c
>> index a8c789a..c797cfc 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -369,7 +369,7 @@ static int find_image_format(const char *filename, BlockDriver **pdrv)
>>  {
>>      int ret, score, score_max;
>>      BlockDriver *drv1, *drv;
>> -    uint8_t buf[2048];
>> +    uint8_t buf[4096];
>>      BlockDriverState *bs;
>
> What's the reason for this change?
>
The size of add_cow_header in my code is larger than 2048.
>> diff --git a/block/add-cow.c b/block/add-cow.c
>> new file mode 100644
>> index 0000000..f4b67e5
>> --- /dev/null
>> +++ b/block/add-cow.c
>> @@ -0,0 +1,456 @@
>> +#include "qemu-common.h"
>> +#include "block_int.h"
>> +#include "module.h"
>> +
>> +#define ADD_COW_MAGIC  (((uint64_t)'A' << 56) | ((uint64_t)'D' << 48) | \
>> +                        ((uint64_t)'D' << 40) | ((uint64_t)'_' << 32) | \
>> +                        ((uint64_t)'C' << 24) | ((uint64_t)'O' << 16) | \
>> +                        ((uint64_t)'W' << 8) | 0xFF)
>> +#define ADD_COW_VERSION 1
>> +
>> +struct add_cow_header {
>> +    uint64_t magic;
>> +    uint32_t version;
>> +    char backing_file[1024];
>> +    char image_file[1024];
>> +    uint64_t size;
>> +    uint32_t sectorsize;
>> +} add_cow_header;
>
> QEMU_PACKED
Sorry, what does QEMU_PACKED mean?
>
>> +typedef struct BDRVAddCowState {
>> +    CoMutex lock;
>> +    CoMutex bitmap_lock;
>> +} BDRVAddCowState;
>> +
>> +typedef struct AddCowAIOCB {
>> +    BlockDriverAIOCB common;
>> +    int64_t sector_num;
>> +    QEMUIOVector *qiov;
>> +    int remaining_sectors;
>> +    int cur_nr_sectors;
>> +    uint64_t bytes_done;
>> +    bool is_write;
>> +    QEMUIOVector hd_qiov;
>> +    QEMUBH *bh;
>> +
>> +} AddCowAIOCB;
>
> You shouldn't be using AIOCBs with a coroutine-based block driver.
> Instead you should just use variables on the stack and function parameters.
>
>> +static int add_cow_flush(BlockDriverState *bs)
>> +{
>> +    return bdrv_flush(bs->file);
>> +}
>
> What about bs->image_hd?
>
>> @@ -208,6 +209,11 @@ struct BlockDriverState {
>>      int in_use; /* users other than guest access, eg. block migration */
>>      QTAILQ_ENTRY(BlockDriverState) list;
>>      void *private;
>> +
>> +    char image_file[1024];
>> +    BlockDriverState *image_hd;
>> +    uint8_t *bitmap;
>> +    uint64_t bitmap_size;
>>  };
>
> These belong in BDRVAddCowState.
>
> Kevin
>
>
Thanks for your comments Kevin, I will produce a second version soon.
Kevin Wolf - Sept. 12, 2011, 7:59 a.m.
Am 10.09.2011 02:54, schrieb Dong Xu Wang:
> 于Fri 09 Sep 2011 10:27:26 PM CST,Kevin Wolf写到:
>> Am 09.09.2011 07:48, schrieb Dong Xu Wang:
>>> As raw file format does not support backing_file and copy on write feature, so 
>>> I add COW to it to support backing_file option. I store dirty bitmap in an 
>>> add-cow file. When executed, it looks like this:
>>> qemu-img create -f add-cow -o backing_file=ubuntu.img,image_file=test.img test.add-cow
>>> qemu -drive if=virtio,file=test.add-cow -m 1024 
>>>
>>> (test.img is a raw format file; test.add-cow stores bitmap)
>>>
>>> Signed-off-by: Dong Xu Wang <wdongxu@linux.vnet.ibm.com>
>>
>> You should not make any changes to generic code, except maybe add
>> something to bdrv_get_info(). In particular you shouldn't need to touch
>> bdrv_open() or bdrv_create() at all.
>>
>> The one required change in the approach for this to work is that you
>> shouldn't view raw+add_cow as a unit, but add_cow should be treated as
>> something separate that happens to be stacked on a raw file (which is
>> created separately).
>>
>> Then you can do almost everything in block/add-cow.c.
>>
>>> ---
>>>  Makefile.objs   |    1 +
>>>  block.c         |   83 ++++++++++-
>>>  block.h         |    2 +
>>>  block/add-cow.c |  456 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>  block_int.h     |    6 +
>>>  qemu-img.c      |   10 ++
>>>  6 files changed, 555 insertions(+), 3 deletions(-)
>>>  create mode 100644 block/add-cow.c
>>>
>>> diff --git a/Makefile.objs b/Makefile.objs
>>> index 26b885b..1402f9f 100644
>>> --- a/Makefile.objs
>>> +++ b/Makefile.objs
>>> @@ -31,6 +31,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>>>  
>>>  block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
>>>  block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
>>> +block-nested-y += add-cow.o
>>>  block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
>>>  block-nested-y += qed-check.o
>>>  block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
>>> diff --git a/block.c b/block.c
>>> index a8c789a..c797cfc 100644
>>> --- a/block.c
>>> +++ b/block.c
>>> @@ -369,7 +369,7 @@ static int find_image_format(const char *filename, BlockDriver **pdrv)
>>>  {
>>>      int ret, score, score_max;
>>>      BlockDriver *drv1, *drv;
>>> -    uint8_t buf[2048];
>>> +    uint8_t buf[4096];
>>>      BlockDriverState *bs;
>>
>> What's the reason for this change?
>>
> The size of add_cow_header in my code is larger than 2048.

Right, but the magic is in the first 8 bytes, so for probing 2048 bytes
should be more than enough.

>>> diff --git a/block/add-cow.c b/block/add-cow.c
>>> new file mode 100644
>>> index 0000000..f4b67e5
>>> --- /dev/null
>>> +++ b/block/add-cow.c
>>> @@ -0,0 +1,456 @@
>>> +#include "qemu-common.h"
>>> +#include "block_int.h"
>>> +#include "module.h"
>>> +
>>> +#define ADD_COW_MAGIC  (((uint64_t)'A' << 56) | ((uint64_t)'D' << 48) | \
>>> +                        ((uint64_t)'D' << 40) | ((uint64_t)'_' << 32) | \
>>> +                        ((uint64_t)'C' << 24) | ((uint64_t)'O' << 16) | \
>>> +                        ((uint64_t)'W' << 8) | 0xFF)
>>> +#define ADD_COW_VERSION 1
>>> +
>>> +struct add_cow_header {
>>> +    uint64_t magic;
>>> +    uint32_t version;
>>> +    char backing_file[1024];
>>> +    char image_file[1024];
>>> +    uint64_t size;
>>> +    uint32_t sectorsize;
>>> +} add_cow_header;
>>
>> QEMU_PACKED
> Sorry, what does QEMU_PACKED mean?

This is an on-disk structure, so you need to pack the structure.
Otherwise the compiler would be free to add padding between the fields
in order to optimise alignment.

struct add_cow_header {
    ...
} QEMU_PACKED add_cow_header;

Hm, actually, do you really want to declare a global variable here? Or
is a typedef missing? Also, coding style requires the struct name to be
spelled AddCowHeader.

Kevin
Dong xu Wang - Sept. 13, 2011, 2:15 a.m.
于Mon 12 Sep 2011 03:59:07 PM CST,Kevin Wolf写到:
> Am 10.09.2011 02:54, schrieb Dong Xu Wang:
>> 于Fri 09 Sep 2011 10:27:26 PM CST,Kevin Wolf写到:
>>> Am 09.09.2011 07:48, schrieb Dong Xu Wang:
>>>> As raw file format does not support backing_file and copy on write feature, so 
>>>> I add COW to it to support backing_file option. I store dirty bitmap in an 
>>>> add-cow file. When executed, it looks like this:
>>>> qemu-img create -f add-cow -o backing_file=ubuntu.img,image_file=test.img test.add-cow
>>>> qemu -drive if=virtio,file=test.add-cow -m 1024 
>>>>
>>>> (test.img is a raw format file; test.add-cow stores bitmap)
>>>>
>>>> Signed-off-by: Dong Xu Wang <wdongxu@linux.vnet.ibm.com>
>>>
>>> You should not make any changes to generic code, except maybe add
>>> something to bdrv_get_info(). In particular you shouldn't need to touch
>>> bdrv_open() or bdrv_create() at all.
>>>
>>> The one required change in the approach for this to work is that you
>>> shouldn't view raw+add_cow as a unit, but add_cow should be treated as
>>> something separate that happens to be stacked on a raw file (which is
>>> created separately).
>>>
>>> Then you can do almost everything in block/add-cow.c.
>>>
>>>> ---
>>>>  Makefile.objs   |    1 +
>>>>  block.c         |   83 ++++++++++-
>>>>  block.h         |    2 +
>>>>  block/add-cow.c |  456 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>  block_int.h     |    6 +
>>>>  qemu-img.c      |   10 ++
>>>>  6 files changed, 555 insertions(+), 3 deletions(-)
>>>>  create mode 100644 block/add-cow.c
>>>>
>>>> diff --git a/Makefile.objs b/Makefile.objs
>>>> index 26b885b..1402f9f 100644
>>>> --- a/Makefile.objs
>>>> +++ b/Makefile.objs
>>>> @@ -31,6 +31,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>>>>  
>>>>  block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
>>>>  block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
>>>> +block-nested-y += add-cow.o
>>>>  block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
>>>>  block-nested-y += qed-check.o
>>>>  block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
>>>> diff --git a/block.c b/block.c
>>>> index a8c789a..c797cfc 100644
>>>> --- a/block.c
>>>> +++ b/block.c
>>>> @@ -369,7 +369,7 @@ static int find_image_format(const char *filename, BlockDriver **pdrv)
>>>>  {
>>>>      int ret, score, score_max;
>>>>      BlockDriver *drv1, *drv;
>>>> -    uint8_t buf[2048];
>>>> +    uint8_t buf[4096];
>>>>      BlockDriverState *bs;
>>>
>>> What's the reason for this change?
>>>
>> The size of add_cow_header in my code is larger than 2048.
>
> Right, but the magic is in the first 8 bytes, so for probing 2048 bytes
> should be more than enough.
>
>>>> diff --git a/block/add-cow.c b/block/add-cow.c
>>>> new file mode 100644
>>>> index 0000000..f4b67e5
>>>> --- /dev/null
>>>> +++ b/block/add-cow.c
>>>> @@ -0,0 +1,456 @@
>>>> +#include "qemu-common.h"
>>>> +#include "block_int.h"
>>>> +#include "module.h"
>>>> +
>>>> +#define ADD_COW_MAGIC  (((uint64_t)'A' << 56) | ((uint64_t)'D' << 48) | \
>>>> +                        ((uint64_t)'D' << 40) | ((uint64_t)'_' << 32) | \
>>>> +                        ((uint64_t)'C' << 24) | ((uint64_t)'O' << 16) | \
>>>> +                        ((uint64_t)'W' << 8) | 0xFF)
>>>> +#define ADD_COW_VERSION 1
>>>> +
>>>> +struct add_cow_header {
>>>> +    uint64_t magic;
>>>> +    uint32_t version;
>>>> +    char backing_file[1024];
>>>> +    char image_file[1024];
>>>> +    uint64_t size;
>>>> +    uint32_t sectorsize;
>>>> +} add_cow_header;
>>>
>>> QEMU_PACKED
>> Sorry, what does QEMU_PACKED mean?
>
> This is an on-disk structure, so you need to pack the structure.
> Otherwise the compiler would be free to add padding between the fields
> in order to optimise alignment.
>
> struct add_cow_header {
>     ...
> } QEMU_PACKED add_cow_header;
>
> Hm, actually, do you really want to declare a global variable here? Or
> is a typedef missing? Also, coding style requires the struct name to be
> spelled AddCowHeader.
>
> Kevin
>
Thank you,Kevin. I will consider these advice in the second version.

Patch

diff --git a/Makefile.objs b/Makefile.objs
index 26b885b..1402f9f 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -31,6 +31,7 @@  block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 
 block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
 block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
+block-nested-y += add-cow.o
 block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-nested-y += qed-check.o
 block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
diff --git a/block.c b/block.c
index a8c789a..c797cfc 100644
--- a/block.c
+++ b/block.c
@@ -369,7 +369,7 @@  static int find_image_format(const char *filename, BlockDriver **pdrv)
 {
     int ret, score, score_max;
     BlockDriver *drv1, *drv;
-    uint8_t buf[2048];
+    uint8_t buf[4096];
     BlockDriverState *bs;
 
     ret = bdrv_file_open(&bs, filename, 0);
@@ -657,6 +657,10 @@  int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
         int back_flags;
         BlockDriver *back_drv = NULL;
 
+        char imaging_filename[PATH_MAX];
+        int cow_flags;
+        BlockDriver *cow_drv = NULL;
+
         bs->backing_hd = bdrv_new("");
 
         if (path_has_protocol(bs->backing_file)) {
@@ -686,6 +690,30 @@  int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
             /* base image inherits from "parent" */
             bs->backing_hd->keep_read_only = bs->keep_read_only;
         }
+
+        /* If there is a image_file, must be together with backing_file */
+        if (bs->image_file[0] != '\0') {
+            bs->image_hd = bdrv_new("");
+            if (path_has_protocol(bs->image_file)) {
+                pstrcpy(imaging_filename, sizeof(imaging_filename),
+                        bs->image_file);
+            } else {
+                path_combine(imaging_filename, sizeof(imaging_filename),
+                             filename, bs->image_file);
+            }
+
+            cow_drv = bdrv_find_format("add-cow");
+
+            cow_flags =
+                 (flags & (~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING))) | BDRV_O_RDWR;
+            bs->image_hd->keep_read_only = 0;
+
+            ret = bdrv_open(bs->image_hd, imaging_filename, cow_flags, back_drv);
+            if (ret < 0) {
+                bdrv_close(bs);
+                return ret;
+            }
+        }
     }
 
     if (!bdrv_key_required(bs)) {
@@ -711,6 +739,10 @@  void bdrv_close(BlockDriverState *bs)
             bdrv_delete(bs->backing_hd);
             bs->backing_hd = NULL;
         }
+        if (bs->image_hd) {
+            bdrv_delete(bs->image_hd);
+            bs->image_hd = NULL;
+        }
         bs->drv->bdrv_close(bs);
         g_free(bs->opaque);
 #ifdef _WIN32
@@ -851,7 +883,7 @@  int bdrv_commit(BlockDriverState *bs)
 
     if (!drv)
         return -ENOMEDIUM;
-    
+
     if (!bs->backing_hd) {
         return -ENOTSUP;
     }
@@ -2024,6 +2056,16 @@  void bdrv_get_backing_filename(BlockDriverState *bs,
     }
 }
 
+void bdrv_get_image_filename(BlockDriverState *bs,
+                               char *filename, int filename_size)
+{
+    if (!bs->image_file) {
+        pstrcpy(filename, filename_size, "");
+    } else {
+        pstrcpy(filename, filename_size, bs->image_file);
+    }
+}
+
 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
                           const uint8_t *buf, int nb_sectors)
 {
@@ -3201,8 +3243,10 @@  int bdrv_img_create(const char *filename, const char *fmt,
     QEMUOptionParameter *param = NULL, *create_options = NULL;
     QEMUOptionParameter *backing_fmt, *backing_file, *size;
     BlockDriverState *bs = NULL;
-    BlockDriver *drv, *proto_drv;
+    BlockDriver *drv, *proto_drv, *cow_drv;;
     BlockDriver *backing_drv = NULL;
+    QEMUOptionParameter *cow_create_options = NULL;
+    QEMUOptionParameter *image_file;
     int ret = 0;
 
     /* Find driver and parse its options */
@@ -3225,10 +3269,16 @@  int bdrv_img_create(const char *filename, const char *fmt,
     create_options = append_option_parameters(create_options,
                                               proto_drv->create_options);
 
+    /* Just support raw format now*/
+    cow_drv = bdrv_find_format("raw");
+    cow_create_options = append_option_parameters(cow_create_options,
+                                              cow_drv->create_options);
+
     /* Create parameter list with default values */
     param = parse_option_parameters("", create_options, param);
 
     set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
+    set_option_parameter_int(cow_create_options, BLOCK_OPT_SIZE, img_size);
 
     /* Parse -o options */
     if (options) {
@@ -3269,6 +3319,25 @@  int bdrv_img_create(const char *filename, const char *fmt,
         }
     }
 
+    image_file = get_option_parameter(param, BLOCK_OPT_IMAGE_FILE);
+    if (image_file && image_file->value.s) {
+        if (!strcmp(filename, image_file->value.s)) {
+            error_report("Error: Trying to create an cow file with the "
+                         "same filename as the backing file");
+            ret = -EINVAL;
+            goto out;
+        }
+
+        if (backing_file && backing_file->value.s) {
+            if (!strcmp(image_file->value.s, backing_file->value.s)) {
+                error_report("Error: Trying to create an cow file with the "
+                             "same filename as the backing file");
+                ret = -EINVAL;
+                goto out;
+            }
+        }
+    }
+
     backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
     if (backing_fmt && backing_fmt->value.s) {
         backing_drv = bdrv_find_format(backing_fmt->value.s);
@@ -3300,6 +3369,7 @@  int bdrv_img_create(const char *filename, const char *fmt,
 
             snprintf(buf, sizeof(buf), "%" PRId64, size);
             set_option_parameter(param, BLOCK_OPT_SIZE, buf);
+            set_option_parameter(cow_create_options, BLOCK_OPT_SIZE, buf);
         } else {
             error_report("Image creation needs a size parameter");
             ret = -EINVAL;
@@ -3326,9 +3396,16 @@  int bdrv_img_create(const char *filename, const char *fmt,
         }
     }
 
+    if (!strcmp(fmt, "add-cow") && image_file && image_file->value.s) {
+        printf("Formatting '%s', fmt= raw ", image_file->value.s);
+        print_option_parameters(cow_create_options);
+        puts("");
+        ret = bdrv_create(cow_drv, image_file->value.s, cow_create_options);
+    }
 out:
     free_option_parameters(create_options);
     free_option_parameters(param);
+    free_option_parameters(cow_create_options);
 
     if (bs) {
         bdrv_delete(bs);
diff --git a/block.h b/block.h
index 8ec409f..e55ce01 100644
--- a/block.h
+++ b/block.h
@@ -228,6 +228,8 @@  int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
 const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
 void bdrv_get_backing_filename(BlockDriverState *bs,
                                char *filename, int filename_size);
+void bdrv_get_image_filename(BlockDriverState *bs,
+                               char *filename, int filename_size);
 int bdrv_can_snapshot(BlockDriverState *bs);
 int bdrv_is_snapshot(BlockDriverState *bs);
 BlockDriverState *bdrv_snapshots(void);
diff --git a/block/add-cow.c b/block/add-cow.c
new file mode 100644
index 0000000..f4b67e5
--- /dev/null
+++ b/block/add-cow.c
@@ -0,0 +1,456 @@ 
+#include "qemu-common.h"
+#include "block_int.h"
+#include "module.h"
+
+#define ADD_COW_MAGIC  (((uint64_t)'A' << 56) | ((uint64_t)'D' << 48) | \
+                        ((uint64_t)'D' << 40) | ((uint64_t)'_' << 32) | \
+                        ((uint64_t)'C' << 24) | ((uint64_t)'O' << 16) | \
+                        ((uint64_t)'W' << 8) | 0xFF)
+#define ADD_COW_VERSION 1
+
+struct add_cow_header {
+    uint64_t magic;
+    uint32_t version;
+    char backing_file[1024];
+    char image_file[1024];
+    uint64_t size;
+    uint32_t sectorsize;
+} add_cow_header;
+
+typedef struct BDRVAddCowState {
+    CoMutex lock;
+    CoMutex bitmap_lock;
+} BDRVAddCowState;
+
+typedef struct AddCowAIOCB {
+    BlockDriverAIOCB common;
+    int64_t sector_num;
+    QEMUIOVector *qiov;
+    int remaining_sectors;
+    int cur_nr_sectors;
+    uint64_t bytes_done;
+    bool is_write;
+    QEMUIOVector hd_qiov;
+    QEMUBH *bh;
+
+} AddCowAIOCB;
+
+static int add_cow_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+    const struct add_cow_header *add_cow_header = (const void *)buf;
+
+    if (buf_size >= sizeof(struct add_cow_header) &&
+        be64_to_cpu(add_cow_header->magic) == ADD_COW_MAGIC &&
+        be32_to_cpu(add_cow_header->version) == ADD_COW_VERSION) {
+        return 100;
+    } else {
+        return 0;
+    }
+}
+
+static int add_cow_open(BlockDriverState *bs, int flags)
+{
+    struct add_cow_header add_cow_header;
+    int64_t size;
+
+    if (bdrv_pread(bs->file, 0, &add_cow_header, sizeof(add_cow_header)) !=
+            sizeof(add_cow_header)) {
+        goto fail;
+    }
+
+    if (be64_to_cpu(add_cow_header.magic) != ADD_COW_MAGIC ||
+        be32_to_cpu(add_cow_header.version) != ADD_COW_VERSION) {
+        goto fail;
+    }
+
+    size = be64_to_cpu(add_cow_header.size);
+    bs->total_sectors = size / 512;
+
+    pstrcpy(bs->backing_file, sizeof(bs->backing_file),
+            add_cow_header.backing_file);
+    pstrcpy(bs->image_file, sizeof(bs->image_file),
+            add_cow_header.image_file);
+
+    bs->bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(add_cow_header);
+    if (!bs->bitmap) {
+        bs->bitmap = g_malloc0(bs->bitmap_size);
+    }
+    if (bdrv_pread(bs->file, sizeof(add_cow_header), bs->bitmap,  \
+        bs->bitmap_size) != bs->bitmap_size) {
+        goto fail;
+    }
+
+    return 0;
+ fail:
+    if (bs->bitmap) {
+        g_free(bs->bitmap);
+        bs->bitmap = NULL;
+    }
+    return -1;
+}
+
+static inline int add_cow_set_bit(BlockDriverState *bs, int64_t bitnum)
+{
+    uint64_t offset = sizeof(struct add_cow_header) + bitnum / 8;
+    uint8_t bitmap;
+
+    qemu_co_mutex_lock(&(((BDRVAddCowState *)(bs->opaque))->bitmap_lock));
+    bitmap = (bs->bitmap[offset]) |= (1 << (bitnum % 8));
+    qemu_co_mutex_unlock(&((BDRVAddCowState *)(bs->opaque))->bitmap_lock);
+
+    return 0;
+}
+
+static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum)
+{
+    qemu_co_mutex_lock(&((BDRVAddCowState *)(bs->opaque))->bitmap_lock);
+    uint64_t offset = sizeof(struct add_cow_header) + bitnum / 8;
+    qemu_co_mutex_unlock(&((BDRVAddCowState *)(bs->opaque))->bitmap_lock);
+
+    return !!(bs->bitmap[offset] & (1 << (bitnum % 8)));
+}
+
+static int add_cow_is_allocated(BlockDriverState *bs, int64_t sector_num,
+        int nb_sectors, int *num_same)
+{
+    int changed;
+
+    if (nb_sectors == 0) {
+        *num_same = nb_sectors;
+        return 0;
+    }
+
+    changed = is_bit_set(bs, sector_num);
+    if (changed < 0) {
+        return 0;
+    }
+
+    for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) {
+        if (is_bit_set(bs, sector_num + *num_same) != changed) {
+            break;
+        }
+    }
+
+    return changed;
+}
+
+static int add_cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
+        int nb_sectors)
+{
+    int error = 0;
+    int i;
+    int ret;
+    for (i = 0; i < nb_sectors; i++) {
+        error = add_cow_set_bit(bs, sector_num + i);
+        if (error) {
+            break;
+        }
+    }
+    ret = bdrv_pwrite_sync(bs->file, sizeof(struct add_cow_header), \
+            bs->bitmap, bs->bitmap_size);
+    if (ret < 0) {
+        return ret;
+    }
+    return error;
+}
+
+static void add_cow_close(BlockDriverState *bs)
+{
+    if (bs->bitmap) {
+        g_free(bs->bitmap);
+        bs->bitmap = NULL;
+    }
+}
+
+static int add_cow_create(const char *filename, QEMUOptionParameter *options)
+{
+    struct add_cow_header add_cow_header;
+    int64_t image_sectors = 0;
+    const char *backing_filename = NULL;
+    const char *image_filename = NULL;
+    int ret;
+    BlockDriverState *bs;
+
+    while (options && options->name) {
+        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+            image_sectors = options->value.n / 512;
+        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
+            backing_filename = options->value.s;
+        } else if (!strcmp(options->name, BLOCK_OPT_IMAGE_FILE)) {
+            image_filename = options->value.s;
+        }
+        options++;
+    }
+    if (!backing_filename || !image_filename) {
+        fprintf(stderr, " backing_file and image_file can not be empty!\n");
+        return -EINVAL;
+    }
+    ret = bdrv_create_file(filename, NULL);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = bdrv_file_open(&bs, filename, BDRV_O_RDWR);
+    if (ret < 0) {
+        return ret;
+    }
+
+    memset(&add_cow_header, 0, sizeof(add_cow_header));
+    add_cow_header.magic = cpu_to_be64(ADD_COW_MAGIC);
+    add_cow_header.version = cpu_to_be32(ADD_COW_VERSION);
+    pstrcpy(add_cow_header.backing_file, \
+                sizeof(add_cow_header.backing_file), backing_filename);
+    pstrcpy(add_cow_header.image_file, sizeof(add_cow_header.image_file),
+                image_filename);
+
+    add_cow_header.sectorsize = cpu_to_be32(512);
+    add_cow_header.size = cpu_to_be64(image_sectors * 512);
+
+    ret = bdrv_pwrite(bs, 0, &add_cow_header, sizeof(add_cow_header));
+    if (ret < 0) {
+        return ret;
+    }
+    bdrv_close(bs);
+
+    ret = bdrv_create_file(image_filename, NULL);
+    if (ret < 0) {
+        return ret;
+    }
+
+    BlockDriver *drv = bdrv_find_format("add-cow");
+    assert(drv != NULL);
+    ret = bdrv_open(bs, filename, BDRV_O_RDWR | BDRV_O_NO_FLUSH, drv);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = bdrv_truncate(bs, ((image_sectors + 7) >> 3));
+    if (ret < 0) {
+        return ret;
+    }
+    return ret;
+}
+
+static void add_cow_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    AddCowAIOCB *acb = container_of(blockacb, AddCowAIOCB, common);
+    qemu_aio_release(acb);
+}
+
+static AIOPool add_cow_aio_pool = {
+    .aiocb_size         = sizeof(AddCowAIOCB),
+    .cancel             = add_cow_aio_cancel,
+};
+
+static AddCowAIOCB *add_cow_aio_setup(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        int is_write)
+{
+    AddCowAIOCB *acb;
+    acb = qemu_aio_get(&add_cow_aio_pool, bs, NULL, NULL);
+    if (!acb) {
+        return NULL;
+    }
+    acb->sector_num = sector_num;
+    acb->qiov = qiov;
+    acb->is_write = is_write;
+    qemu_iovec_init(&acb->hd_qiov, qiov->niov);
+    acb->bytes_done = 0;
+    acb->remaining_sectors = nb_sectors;
+    acb->cur_nr_sectors = 0;
+    return acb;
+}
+
+static int add_cow_aio_read_cb(void *opaque)
+{
+    AddCowAIOCB *acb = opaque;
+    BlockDriverState *bs = acb->common.bs;
+    BDRVAddCowState *s = bs->opaque;
+    int ret;
+    int n;
+    acb->remaining_sectors -= acb->cur_nr_sectors;
+    acb->sector_num += acb->cur_nr_sectors;
+    acb->bytes_done += acb->cur_nr_sectors * 512;
+
+    if (acb->remaining_sectors == 0) {
+        return 0;
+    }
+    acb->cur_nr_sectors = acb->remaining_sectors;
+    if (add_cow_is_allocated(bs, acb->sector_num, acb->cur_nr_sectors, &n)) {
+        acb->cur_nr_sectors = n;
+        qemu_iovec_reset(&acb->hd_qiov);
+        qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done,
+                        acb->cur_nr_sectors * 512);
+        qemu_co_mutex_unlock(&s->lock);
+        ret = bdrv_co_readv(bs->image_hd, acb->sector_num,
+                            n, &acb->hd_qiov);
+        qemu_co_mutex_lock(&s->lock);
+        if (ret < 0) {
+            return -EIO;
+        }
+
+        return 1;
+    } else {
+        acb->cur_nr_sectors = n;
+        if (bs->backing_hd) {
+            qemu_iovec_reset(&acb->hd_qiov);
+            qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done,
+                        acb->cur_nr_sectors * 512);
+            qemu_co_mutex_unlock(&s->lock);
+            ret = bdrv_co_readv(bs->backing_hd, acb->sector_num,
+                                n, &acb->hd_qiov);
+            qemu_co_mutex_lock(&s->lock);
+            if (ret < 0) {
+                return -EIO;
+            }
+            return 1;
+        } else {
+            qemu_iovec_memset(&acb->hd_qiov, 0, 512 * acb->cur_nr_sectors);
+            return 1;
+        }
+    }
+
+    return 1;
+}
+
+static int add_cow_aio_write_cb(void *opaque)
+{
+    AddCowAIOCB *acb = opaque;
+    BlockDriverState *bs = acb->common.bs;
+    BDRVAddCowState *s = bs->opaque;
+    int ret = 0;
+
+    acb->remaining_sectors -= acb->cur_nr_sectors;
+    acb->sector_num += acb->cur_nr_sectors;
+    acb->bytes_done += acb->cur_nr_sectors * 512;
+
+    if (acb->remaining_sectors == 0) {
+        return 0;
+    }
+
+    acb->cur_nr_sectors = acb->remaining_sectors;
+
+    qemu_iovec_reset(&acb->hd_qiov);
+    qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done,
+        acb->cur_nr_sectors * 512);
+
+    qemu_co_mutex_unlock(&s->lock);
+    ret = bdrv_co_writev(bs->image_hd,
+                         acb->sector_num,
+                         acb->cur_nr_sectors, &acb->hd_qiov);
+    qemu_co_mutex_lock(&s->lock);
+
+    if (ret < 0) {
+        return ret;
+    }
+    return 1;
+}
+
+static int add_cow_co_readv(BlockDriverState *bs, int64_t sector_num,
+                         int nb_sectors, QEMUIOVector *qiov)
+{
+    BDRVAddCowState *s = bs->opaque;
+    AddCowAIOCB *acb;
+    int ret;
+
+    acb = add_cow_aio_setup(bs, sector_num, qiov, nb_sectors, 0);
+    qemu_co_mutex_lock(&s->lock);
+    do {
+        ret = add_cow_aio_read_cb(acb);
+    } while (ret > 0);
+    qemu_co_mutex_unlock(&s->lock);
+
+    qemu_iovec_destroy(&acb->hd_qiov);
+    qemu_aio_release(acb);
+    return ret;
+}
+
+static int add_cow_co_writev(BlockDriverState *bs, int64_t sector_num,
+                          int nb_sectors, QEMUIOVector *qiov)
+{
+    BDRVAddCowState *s = bs->opaque;
+    AddCowAIOCB *acb;
+    int ret;
+
+    acb = add_cow_aio_setup(bs, sector_num, qiov, nb_sectors, 1);
+
+    qemu_co_mutex_lock(&s->lock);
+    do {
+        ret = add_cow_aio_write_cb(acb);
+    } while (ret > 0);
+    qemu_co_mutex_unlock(&s->lock);
+    if (ret == 0) {
+        add_cow_update_bitmap(bs, sector_num, nb_sectors);
+    }
+
+    qemu_iovec_destroy(&acb->hd_qiov);
+    qemu_aio_release(acb);
+
+    return ret;
+}
+
+static int bdrv_add_cow_truncate(BlockDriverState *bs, int64_t offset)
+{
+    int ret;
+    ret = bdrv_truncate(bs->file, offset + sizeof(add_cow_header));
+    if (ret < 0) {
+        return ret;
+    }
+
+    return 0;
+}
+
+static QEMUOptionParameter add_cow_create_options[] = {
+    {
+        .name = BLOCK_OPT_SIZE,
+        .type = OPT_SIZE,
+        .help = "Virtual disk size"
+    },
+    {
+        .name = BLOCK_OPT_BACKING_FILE,
+        .type = OPT_STRING,
+        .help = "File name of a base image"
+    },
+    {
+        .name = BLOCK_OPT_IMAGE_FILE,
+        .type = OPT_STRING,
+        .help = "File name of a image file"
+    },
+    { NULL }
+};
+
+static int add_cow_flush(BlockDriverState *bs)
+{
+    return bdrv_flush(bs->file);
+}
+
+static BlockDriverAIOCB *add_cow_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return bdrv_aio_flush(bs->file, cb, opaque);
+}
+
+static BlockDriver bdrv_add_cow = {
+    .format_name        = "add-cow",
+    .instance_size      = sizeof(BDRVAddCowState),
+    .bdrv_probe         = add_cow_probe,
+    .bdrv_open          = add_cow_open,
+    .bdrv_close         = add_cow_close,
+    .bdrv_create        = add_cow_create,
+    .bdrv_is_allocated  = add_cow_is_allocated,
+
+    .bdrv_co_readv      = add_cow_co_readv,
+    .bdrv_co_writev     = add_cow_co_writev,
+    .bdrv_truncate      = bdrv_add_cow_truncate,
+
+    .create_options     = add_cow_create_options,
+    .bdrv_flush         = add_cow_flush,
+    .bdrv_aio_flush     = add_cow_aio_flush,
+};
+
+static void bdrv_add_cow_init(void)
+{
+    bdrv_register(&bdrv_add_cow);
+}
+
+block_init(bdrv_add_cow_init);
diff --git a/block_int.h b/block_int.h
index 5dc0074..1acb2ad 100644
--- a/block_int.h
+++ b/block_int.h
@@ -42,6 +42,7 @@ 
 #define BLOCK_OPT_TABLE_SIZE    "table_size"
 #define BLOCK_OPT_PREALLOC      "preallocation"
 #define BLOCK_OPT_SUBFMT        "subformat"
+#define BLOCK_OPT_IMAGE_FILE    "image_file"
 
 typedef struct AIOPool {
     void (*cancel)(BlockDriverAIOCB *acb);
@@ -208,6 +209,11 @@  struct BlockDriverState {
     int in_use; /* users other than guest access, eg. block migration */
     QTAILQ_ENTRY(BlockDriverState) list;
     void *private;
+
+    char image_file[1024];
+    BlockDriverState *image_hd;
+    uint8_t *bitmap;
+    uint64_t bitmap_size;
 };
 
 struct BlockDriverAIOCB {
diff --git a/qemu-img.c b/qemu-img.c
index 6a39731..0c3f1c2 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1093,6 +1093,8 @@  static int img_info(int argc, char **argv)
     int64_t allocated_size;
     char backing_filename[1024];
     char backing_filename2[1024];
+    char image_filename[1024];
+    char image_filename2[1024];
     BlockDriverInfo bdi;
 
     fmt = NULL;
@@ -1153,6 +1155,14 @@  static int img_info(int argc, char **argv)
                backing_filename,
                backing_filename2);
     }
+    bdrv_get_image_filename(bs, image_filename, sizeof(image_filename));
+    if (image_filename[0] != '\0') {
+        path_combine(image_filename2, sizeof(image_filename2),
+                     filename, image_filename);
+        printf("image file: %s (actual path: %s)\n",
+               image_filename,
+               image_filename2);
+    }
     dump_snapshots(bs);
     bdrv_delete(bs);
     return 0;