diff --git a/block/Makefile.objs b/block/Makefile.objs
index c067f38..e045440 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,5 +1,6 @@
 block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
+block-obj-y += irow.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-y += parallels.o blkdebug.o blkverify.o
diff --git a/block/irow.c b/block/irow.c
new file mode 100644
index 0000000..99b8579
--- /dev/null
+++ b/block/irow.c
@@ -0,0 +1,2257 @@
+/* IROW（Improved ROW）Disk Format
+ * */
+
+/*
+ * iRow (imporved Redirect-on-Write) is a disk format supporting high-efficiency VM disk snapshot.
+ * iROW uses bitmap to reduce the amount of metadata, so that both the VM disk snapshot key operations
+ * performance and the VM disk I/O performance would be enhanced at the same time.
+ *
+ *The iROW VM disk image consists of a meta file and several snapshots.
+ *
+ *A snapshot consists of 2 files: a bitmap file (btmp file) and a VM disk data file (irvd file).
+ *The current state of the iROW VM disk also occupies a snapshot.
+ *
+ *The meta file consists of the meta header and the snapshots information. The meta header is used to
+ *store basic information of VM disk image. The snapshots information sequentially stores every snapshot’s name,
+ *id and others related information.
+ *
+ *The btmp file consists of a bitmap and the VM state data. The bitmap is used to indicate whether the
+ *clusters exist in corresponding irvd file. Each cluster in the VM disk image is mapped to a bit in the bitmap.
+ *
+ *The irvd file is used to store the actual data of the VM disk image. The smallest unit of storage is cluster.
+ *iROW does not decide the address of the data clusters. It just writes the clusters to the same VM disk image
+ *addresses as the virtual addresses of the clusters. Because of host machine’s file system support sparse files,
+ *iROW also achieves the gradual growth of the VM disk image size with the actual disk usage.
+ *
+ */
+
+#include "qemu-common.h"
+#include "include/block/block_int.h"
+#include "include/qemu/module.h"
+#include "block/irow.h"
+
+#include <linux/falloc.h>
+
+BDRVIrowState **birows_cache = NULL;
+ClusterCache *cluster_cache = NULL;
+
+static int get_bits_from_size(size_t size)
+{
+    int ret = 0;
+    if (size == 0) {
+        return -1;
+    }
+    while (size != 1) {
+    	if (size & 1) {
+    		return -1;
+        }
+        size >>= 1;
+        ret++;
+    }
+    return ret;
+}
+
+static int irow_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+	const IRowMeta *irow_meta = (const void *)buf;
+
+    if (buf_size >= sizeof(IRowMeta) &&
+        be32_to_cpu(irow_meta->magic) == IROW_MAGIC &&
+        be32_to_cpu(irow_meta->version) == IROW_VERSION){
+        return 100;
+    }
+    else {
+        return 0;
+    }
+}
+
+static void irow_close_btmp(BDRVIrowState *s) {
+	if(s->bitmap) {
+		g_free(s->bitmap);
+		s->bitmap = NULL;
+	}
+
+	if(s->irow_btmp) {
+		bdrv_delete(s->irow_btmp);
+		s->irow_btmp = NULL;
+	}
+}
+
+static void irow_close_irvd(BDRVIrowState *s) {
+	if(s->irow_irvd) {
+		bdrv_delete(s->irow_irvd);
+		s->irow_irvd = NULL;
+	}
+}
+
+static void irow_close_snapshots2(IRowSnapshot *snapshots, int nb_snapshots) {
+	int i;
+	IRowSnapshot *snap_ptr;
+
+	if(snapshots == NULL)
+		return;
+
+	for(i = 0; i < nb_snapshots; i++) {
+		snap_ptr = snapshots + i;
+		if(snap_ptr->btmp_file) {
+			g_free(snap_ptr->btmp_file);
+			snap_ptr->btmp_file = NULL;
+		}
+
+		if(snap_ptr->irvd_file) {
+			g_free(snap_ptr->irvd_file);
+			snap_ptr->irvd_file = NULL;
+		}
+
+		if(snap_ptr->father_btmp_file) {
+			g_free(snap_ptr->father_btmp_file);
+			snap_ptr->father_btmp_file = NULL;
+		}
+
+		if(snap_ptr->id_str) {
+			g_free(snap_ptr->id_str);
+			snap_ptr->id_str = NULL;
+		}
+
+		if(snap_ptr->name) {
+			g_free(snap_ptr->name);
+			snap_ptr->name = NULL;
+		}
+	}
+	g_free(snapshots);
+}
+
+static void irow_close_snapshots(BDRVIrowState *birows) {
+	irow_close_snapshots2(birows->snapshots, birows->nb_snapshots);
+	birows->snapshots = NULL;
+}
+
+static void irow_close_meta(BDRVIrowState *s) {
+	if(s->meta_file) {
+		g_free(s->meta_file);
+		s->meta_file = NULL;
+	}
+
+	if(s->current_btmp_file) {
+		g_free(s->current_btmp_file);
+		s->current_btmp_file = NULL;
+	}
+
+	if(s->father_btmp_file) {
+		g_free(s->father_btmp_file);
+		s->father_btmp_file = NULL;
+	}
+
+	if(s->irvd_file) {
+		g_free(s->irvd_file);
+		s->irvd_file = NULL;
+	}
+
+	if(s->opened_btmp_file) {
+		g_free(s->opened_btmp_file);
+		s->opened_btmp_file = NULL;
+	}
+
+	if(s->irow_meta) {
+		bdrv_delete(s->irow_meta);
+		s->irow_meta = NULL;
+	}
+	if(s->snapshots) {
+		irow_close_snapshots(s);
+	}
+}
+
+static void irow_close_state(BDRVIrowState *s) {
+
+	irow_close_meta(s);
+	irow_close_btmp(s);
+	irow_close_irvd(s);
+
+}
+
+static int irow_check_bitmap(BDRVIrowState *birows) {
+	uint64_t i;
+	for(i = 0; i < birows->bitmap_size; i++) {
+		if(birows->bitmap[i] != 0xff)
+			return 0;
+	}
+	return 1;
+}
+
+static int irow_update_btmp(BDRVIrowState *birows) {
+
+	int ret = 0;
+	if(birows->bitmap_is_dirty) {
+		if(bdrv_pwrite(birows->irow_btmp, 0, birows->bitmap, birows->bitmap_size) != birows->bitmap_size) {
+			fprintf(stderr, "Failed to write the IROW bitmap data to %s\n", birows->opened_btmp_file);
+			ret = -1;
+			goto end;
+		}
+		birows->bitmap_is_dirty = 0;
+		ret = bdrv_truncate(birows->irow_btmp, birows->bitmap_size + birows->vm_state_size);
+		if(irow_check_bitmap(birows)) {
+			birows->complete_image = 1;
+		}
+	}
+	if(birows->vmstate_is_saved) {
+		birows->vmstate_is_saved = 0;
+		ret = bdrv_truncate(birows->irow_btmp, birows->bitmap_size + birows->vm_state_size);
+	}
+
+end:
+	return ret;
+}
+
+static int irow_update_meta(BDRVIrowState *birows, const char *current_btmp, int change_copy_on_demand_state) {
+	int i,  ret = 0;
+	uint32_t copy_on_demand;
+	IRowMeta meta;
+	IRowSnapshotHeader snap_header;
+	IRowSnapshot *snap_ptr;
+
+	if(change_copy_on_demand_state == 0 && birows->snapshots_is_dirty == 0 && current_btmp == NULL)
+		goto end;
+
+	if(bdrv_pread (birows->irow_meta, 0, &meta, sizeof(meta)) != sizeof(meta)) {
+			fprintf (stderr, "Failed to read the meta data from %s\n", birows->meta_file);
+			ret = -1;
+			goto end;
+	}
+	if(change_copy_on_demand_state) {
+		copy_on_demand = meta.copy_on_demand;
+		be32_to_cpus(&copy_on_demand);
+		copy_on_demand = copy_on_demand ? 0 : 1;
+		meta.copy_on_demand = cpu_to_be32(copy_on_demand);
+	}
+	if(current_btmp != NULL) {
+		memset(meta.current_btmp, 0, MAX_FILE_NAME_LENGTH);
+		strncpy(meta.current_btmp, current_btmp, MAX_FILE_NAME_LENGTH);
+	}
+
+	if(birows->snapshots_is_dirty) {
+		meta.nb_snapshots = cpu_to_be32(birows->nb_snapshots);
+		for(i = 0; i < birows->nb_snapshots; i++) {
+			memset(&snap_header, 0, sizeof(snap_header));
+			snap_ptr = birows->snapshots + i;
+			snap_header.snap_magic = cpu_to_be32(IROW_SNAPHEADER_MAGIC);
+			snap_header.date_sec = snap_ptr->date_sec;
+			snap_header.date_nsec = snap_ptr->date_nsec;
+			snap_header.vm_clock_nsec = snap_ptr->vm_clock_nsec;
+			snap_header.vm_state_size = snap_ptr->vm_state_size;
+			snap_header.nb_children = snap_ptr->nb_children;
+			snap_header.is_deleted = snap_ptr->is_deleted;
+			if(snap_ptr->id_str != NULL)
+				strncpy(snap_header.id_str, snap_ptr->id_str, 128);
+			if(snap_ptr->name != NULL)
+				strncpy(snap_header.name, snap_ptr->name, 256);
+			if(snap_ptr->btmp_file == NULL) {
+				fprintf(stderr, "Void btmp filename\n");
+				ret = -1;
+				goto end;
+			}
+			strncpy(snap_header.btmp_file, snap_ptr->btmp_file, MAX_FILE_NAME_LENGTH);
+			if(snap_ptr->irvd_file == NULL) {
+				fprintf(stderr, "Void irvd filename\n");
+				ret = -1;
+				goto end;
+			}
+			strncpy(snap_header.irvd_file, snap_ptr->irvd_file, MAX_FILE_NAME_LENGTH);
+			if(snap_ptr->father_btmp_file != NULL)
+				strncpy(snap_header.father_btmp_file, snap_ptr->father_btmp_file, MAX_FILE_NAME_LENGTH);
+
+			if(bdrv_pwrite(birows->irow_meta, sizeof(meta) + i * sizeof(IRowSnapshotHeader), &snap_header, sizeof(snap_header)) != sizeof(snap_header)) {
+				fprintf (stderr, "Failed to write the snapshot #%d info to %s\n", i, birows->meta_file);
+				ret = -1;
+				goto end;
+			}
+		}
+		birows->snapshots_is_dirty = 0;
+	}
+
+	if(bdrv_pwrite(birows->irow_meta, 0, &meta, sizeof(meta)) != sizeof(meta)) {
+		fprintf (stderr, "Failed to write the meta data to %s\n", birows->meta_file);
+		ret = -1;
+		goto end;
+	}
+
+	ret = bdrv_truncate(birows->irow_meta, sizeof(meta) + (birows->nb_snapshots) * sizeof(IRowSnapshotHeader));
+
+end:
+	return ret;
+}
+
+static void irow_close_previous_state(BDRVIrowState *birows) {
+	birows->irow_meta = NULL;
+	irow_close_state(birows);
+	g_free(birows);
+}
+
+static void irow_free_birows_cache(BDRVIrowState *birows) {
+	int i;
+	if(birows_cache != NULL) {
+		for(i = 0; i < birows->nb_snapshots; i++) {
+			if(birows_cache[i] != NULL) {
+				irow_close_previous_state(birows_cache[i]);
+			}
+		}
+		g_free(birows_cache);
+		birows_cache = NULL;
+	}
+}
+
+static void irow_close(BlockDriverState *bs) {
+
+	BDRVIrowState *s = bs->opaque;
+
+	irow_free_birows_cache(s);
+	irow_close_state(s);
+
+}
+
+static int irow_open_snapshots(BDRVIrowState *birows) {
+	int i, ret = 0;
+	IRowSnapshotHeader snap_header;
+	IRowSnapshot *snap_ptr;
+	int64_t offset;
+
+	birows->snapshots = g_malloc0(sizeof(IRowSnapshot) * birows->nb_snapshots);
+	offset = IROW_SNAPSHOT_OFFSET;
+	for(i = 0; i < birows->nb_snapshots; i++) {
+		if(bdrv_pread(birows->irow_meta, offset, &snap_header, sizeof(snap_header)) != sizeof(snap_header)) {
+			fprintf(stderr, "Failed to read snapshot #%d info from %s\n", i, birows->meta_file);
+			ret = -1;
+			goto fail;
+		}
+		snap_ptr = birows->snapshots + i;
+		snap_ptr->date_sec = snap_header.date_sec;
+		snap_ptr->date_nsec = snap_header.date_nsec;
+		snap_ptr->vm_clock_nsec = snap_header.vm_clock_nsec;
+		snap_ptr->vm_state_size = snap_header.vm_state_size;
+		snap_ptr->nb_children = snap_header.nb_children;
+		snap_ptr->is_deleted = snap_header.is_deleted;
+
+		if(snap_header.id_str[0] != '\0') {
+			snap_ptr->id_str = g_malloc0(128);
+			strncpy(snap_ptr->id_str, snap_header.id_str, 128);
+		}
+		if(snap_header.name[0] != '\0') {
+			snap_ptr->name = g_malloc0(256);
+			strncpy(snap_ptr->name, snap_header.name, 256);
+		}
+		if(snap_header.btmp_file == '\0') {
+			fprintf(stderr, "Invalid btmp file name. (snapshot #%d)\n", i);
+			ret = -1;
+			goto fail;
+		}
+		snap_ptr->btmp_file = g_malloc0(MAX_FILE_NAME_LENGTH);
+		strncpy(snap_ptr->btmp_file, snap_header.btmp_file, MAX_FILE_NAME_LENGTH);
+		if(snap_header.irvd_file == '\0') {
+			fprintf(stderr, "Invalid irvd file name. (snapshot #%d)\n", i);
+			ret = -1;
+			goto fail;
+		}
+		snap_ptr->irvd_file = g_malloc0(MAX_FILE_NAME_LENGTH);
+		strncpy(snap_ptr->irvd_file, snap_header.irvd_file, MAX_FILE_NAME_LENGTH);
+		if(snap_header.father_btmp_file[0] != '\0') {
+			snap_ptr->father_btmp_file = g_malloc0(MAX_FILE_NAME_LENGTH);
+			strncpy(snap_ptr->father_btmp_file, snap_header.father_btmp_file, MAX_FILE_NAME_LENGTH);
+		}
+		offset += sizeof(snap_header);
+	}
+	birows->snapshots_is_dirty = 0;
+
+	return ret;
+fail:
+	irow_close_snapshots(birows);
+	return ret;
+
+}
+
+static int irow_open_meta(BlockDriverState *bs, BDRVIrowState *birows, const char *filename, int flags) {
+	int ret = 0;
+	IRowMeta meta;
+
+	birows->irow_meta = bdrv_new ("");
+	ret = bdrv_file_open(&birows->irow_meta, filename, flags);
+	if (ret < 0) {
+		fprintf (stderr, "Failed to open %s\n", filename);
+		goto end;
+	}
+	if (bdrv_pread (birows->irow_meta, 0, &meta, sizeof(meta)) != sizeof(meta)) {
+		fprintf (stderr, "Failed to read the IROW meta data from %s\n", filename);
+		ret = -1;
+		goto end;
+	}
+	be32_to_cpus(&meta.magic);
+	be32_to_cpus(&meta.version);
+	be32_to_cpus(&meta.copy_on_demand);
+	be32_to_cpus(&meta.cluster_size);
+	be32_to_cpus(&meta.cluster_bits);
+	be64_to_cpus(&meta.total_clusters);
+	be32_to_cpus(&meta.sectors_per_cluster);
+	be64_to_cpus(&meta.disk_size);
+	be32_to_cpus(&meta.nb_snapshots);
+
+	if(meta.magic != IROW_MAGIC || meta.version != IROW_VERSION) {
+		fprintf (stderr, "Invalid magic number or version number!\n");
+		ret = -1;
+		goto end;
+	}
+	if((meta.cluster_bits < MIN_CLUSTER_BITS) || (meta.cluster_bits > MAX_CLUSTER_BITS)) {
+		fprintf (stderr, "Invalid cluster_bits!\n");
+		ret = -1;
+		goto end;
+	}
+	if(meta.cluster_bits != get_bits_from_size(meta.cluster_size)) {
+		fprintf (stderr, "cluster_size and cluster_bits do not match!\n");
+		ret = -1;
+		goto end;
+	}
+	if(meta.total_clusters != ((meta.disk_size + meta.cluster_size - 1) >> meta.cluster_bits)) {
+		fprintf (stderr, "total_clusters and disk_size do not match!\n");
+		ret = -1;
+		goto end;
+	}
+	if(meta.sectors_per_cluster != (meta.cluster_size >> BDRV_SECTOR_BITS)) {
+		fprintf (stderr, "Invalid sectors_per_cluster!\n");
+		ret = -1;
+		goto end;
+	}
+	birows->copy_on_demand = meta.copy_on_demand;
+	birows->cluster_size = meta.cluster_size;
+	birows->cluster_bits = meta.cluster_bits;
+	birows->total_clusters = meta.total_clusters;
+	birows->sectors_per_cluster = meta.sectors_per_cluster;
+	birows->disk_size = meta.disk_size;
+	bs->total_sectors = meta.disk_size / BDRV_SECTOR_SIZE;
+	birows->bitmap_size = (birows->total_clusters + 7) >> 3;
+	birows->nb_snapshots = meta.nb_snapshots;
+	birows->meta_file = g_malloc(MAX_FILE_NAME_LENGTH);
+	strncpy(birows->meta_file, filename, MAX_FILE_NAME_LENGTH);
+	birows->current_btmp_file = g_malloc(MAX_FILE_NAME_LENGTH);
+	strncpy(birows->current_btmp_file, meta.current_btmp, MAX_FILE_NAME_LENGTH);
+	strncpy(bs->backing_file, meta.backing_file, sizeof(bs->backing_file));
+
+	if(cluster_cache == NULL) {
+		cluster_cache = g_malloc0(sizeof(ClusterCache));
+		if(cluster_cache != NULL) {
+			cluster_cache->cache = qemu_memalign(512, birows->cluster_size);
+			if(cluster_cache->cache != NULL)
+				memset(cluster_cache->cache, 0, birows->cluster_size);
+			else {
+				fprintf(stderr, "Failed to create father cache\n");
+				ret = -1;
+				goto end;
+			}
+			cluster_cache->cluster_num = -1;
+		} else {
+			fprintf(stderr, "Failed to create father cache\n");
+			ret = -1;
+			goto end;
+		}
+	}
+
+	if(irow_open_snapshots(birows) < 0) {
+		fprintf(stderr, "Failed to read snapshots info from %s\n", birows->meta_file);
+		ret = -1;
+		goto end;
+	}
+
+end:
+	return ret;
+}
+
+static int irow_open_btmp(BDRVIrowState *birows,  const char *filename, int flags) {
+	int ret;
+
+	birows->irow_btmp = bdrv_new ("");
+	ret = bdrv_file_open(&birows->irow_btmp, filename, flags);
+	if (ret < 0) {
+		return ret;
+	}
+	birows->bitmap = qemu_memalign(512, birows->bitmap_size);
+	if(bdrv_pread(birows->irow_btmp, 0, birows->bitmap, birows->bitmap_size) != birows->bitmap_size) {
+		fprintf(stderr, "Failed to read bitmap from %s\n", filename);
+		return -1;
+	}
+	birows->bitmap_is_dirty = 0;
+	birows->vmstate_is_saved = 0;
+	if(irow_check_bitmap(birows)) {
+		birows->complete_image = 1;
+	} else {
+		birows->complete_image = 0;
+	}
+	return ret;
+}
+
+static int irow_open_vd(BDRVIrowState *birows, const char *filename, int flags) {
+	int ret;
+	birows->irow_irvd =  bdrv_new ("");
+   ret = bdrv_file_open(&birows->irow_irvd, filename, flags);
+	return ret;
+}
+
+static int irow_open_data(BDRVIrowState *birows, int flags) {
+
+	int ret = 0;
+
+	if(birows->opened_btmp_file == NULL || birows->opened_btmp_file[0] == '\0') {
+		fprintf (stderr, "Void btmp file name\n");
+		ret = -1;
+		goto end;
+	}
+	if(irow_open_btmp(birows, birows->opened_btmp_file, flags) < 0) {
+		fprintf (stderr, "Failed to open %s\n", birows->opened_btmp_file);
+		ret = -1;
+		goto end;
+	}
+
+	if(birows->irvd_file == NULL || birows->irvd_file[0] == '\0') {
+		fprintf (stderr, "Void irvd file name\n");
+		ret = -1;
+		goto end;
+	}
+	if(irow_open_vd(birows, birows->irvd_file, flags) < 0) {
+		fprintf (stderr, "Failed to open %s\n", birows->irvd_file);
+		ret = -1;
+		goto end;
+	}
+
+end:
+	return ret;
+}
+
+static int irow_find_snapshot_by_btmp(BDRVIrowState *birows, const char *btmp) {
+	int i;
+
+	for(i = 0; i < birows->nb_snapshots; i++) {
+		if(birows->snapshots[i].btmp_file != NULL) {
+			if(strcmp(birows->snapshots[i].btmp_file, btmp) == 0) {
+				return i;
+			}
+		}
+	}
+	return -1;
+}
+
+static int irow_load_info_from_snapshot(BDRVIrowState *birows, int snapshot_index) {
+	IRowSnapshot *snap;
+	int ret = 0;
+
+	if(snapshot_index < 0) {
+    	fprintf (stderr, "Invalid snapshot index.\n");
+    	ret = -1;
+      	goto end;
+     }
+    snap = birows->snapshots + snapshot_index;
+    if(snap->btmp_file == NULL) {
+    	fprintf (stderr, "Void btmp file name in snap info\n");
+    	ret = -1;
+    	goto end;
+    }
+    if(snap->irvd_file == NULL) {
+    	fprintf (stderr, "Void irvd file name in snap info\n");
+    	ret = -1;
+    	goto end;
+    }
+    birows->opened_btmp_file = g_malloc0(MAX_FILE_NAME_LENGTH);
+    birows->irvd_file = g_malloc0(MAX_FILE_NAME_LENGTH);
+    strncpy(birows->opened_btmp_file, snap->btmp_file, MAX_FILE_NAME_LENGTH);
+    strncpy(birows->irvd_file, snap->irvd_file, MAX_FILE_NAME_LENGTH);
+    if(snap->father_btmp_file) {
+    	birows->father_btmp_file = g_malloc0(MAX_FILE_NAME_LENGTH);
+    	strncpy(birows->father_btmp_file, snap->father_btmp_file, MAX_FILE_NAME_LENGTH);
+    }
+    birows->vm_state_size = snap->vm_state_size;
+end:
+	return ret;
+}
+
+static BDRVIrowState *irow_open_previous_state(BDRVIrowState *birows, int snap_index) {
+	BDRVIrowState *new_birows = g_malloc0(sizeof(BDRVIrowState));
+
+	new_birows->cluster_size = birows->cluster_size;
+	new_birows->cluster_bits = birows->cluster_bits;
+	new_birows->total_clusters = birows->total_clusters;
+	new_birows->sectors_per_cluster = birows->sectors_per_cluster;
+	new_birows->disk_size = birows->disk_size;
+	new_birows->bitmap_size = birows->bitmap_size;
+	new_birows->current_btmp_file = g_malloc0(MAX_FILE_NAME_LENGTH);
+	strcpy(new_birows->current_btmp_file, birows->current_btmp_file);
+
+	new_birows->nb_snapshots = birows->nb_snapshots;
+	new_birows->irow_meta = birows->irow_meta;
+	irow_open_snapshots(new_birows);
+
+	if(irow_load_info_from_snapshot(new_birows, snap_index) < 0) {
+		goto fail;
+	}
+	new_birows->open_flags = birows->open_flags;
+	if(irow_open_data(new_birows, new_birows->open_flags) < 0) {
+		goto fail;
+	}
+
+	return new_birows;
+
+fail:
+	if(new_birows != NULL) {
+		irow_close_previous_state(new_birows);
+		new_birows = NULL;
+	}
+
+	return NULL;
+}
+
+static int irow_init_birows_cache(BDRVIrowState *birows) {
+	int ret = 0;
+	birows_cache = g_malloc0(sizeof(BDRVIrowState *) * birows->nb_snapshots);
+	if(birows_cache == NULL) {
+		ret = -1;
+		goto end;
+	}
+end:
+	return ret;
+}
+
+static int irow_open(BlockDriverState *bs, int flags) {
+    BDRVIrowState *s = bs->opaque;
+
+    int snap_index;
+
+	s->open_flags = flags;
+    if(irow_open_meta(bs, s, bs->filename, flags) < 0) {
+    	fprintf (stderr, "Failed to open %s\n", bs->filename);
+    	goto fail;
+    }
+
+    snap_index = irow_find_snapshot_by_btmp(s, s->current_btmp_file);
+    if(irow_load_info_from_snapshot(s, snap_index) < 0) {
+    	fprintf (stderr, "Failed to load filename from snapshot\n");
+    	goto fail;
+    }
+
+    if(irow_open_data(s, flags) < 0) {
+    	goto fail;
+    }
+
+   	if(irow_init_birows_cache(s) < 0) {
+    	fprintf (stderr, "Failed to create birows_cache\n");
+    	goto fail;
+	}
+    return 0;
+
+fail:
+	irow_close (bs);
+	return -1;
+}
+
+static int irow_get_bit(BDRVIrowState *birows, int64_t cluster_index) {
+	int64_t byte_index, bit_index;
+
+	byte_index = cluster_index >> 3;
+	bit_index = cluster_index & 0x7;
+	return (birows->bitmap[byte_index] >> bit_index) & 1;
+}
+
+static void irow_set_bit(BDRVIrowState *birows, int64_t cluster_index) {
+	int64_t byte_index, bit_index;
+	int old_bit;
+
+	if(cluster_cache != NULL) {
+		if(cluster_index == cluster_cache->cluster_num)
+			cluster_cache->cluster_num = -1;
+	}
+
+	byte_index = cluster_index >> 3;
+	bit_index = cluster_index & 0x7;
+	old_bit = (birows->bitmap[byte_index] >> bit_index) & 1;
+	if(old_bit == 0) {
+		birows->bitmap[byte_index] |= (1 <<  bit_index);
+		birows->bitmap_is_dirty = 1;
+	}
+}
+
+static int irow_read_missing_clusters2(BlockDriverState *bs, BDRVIrowState *birows, int64_t start_cluster, int64_t nb_clusters, uint8_t *buf, uint8_t *buf_bitmap, uint64_t buf_start) {
+	int64_t continuous_missing_clusters, continuous_appearing_clusters, i, cluster_index, buf_index;
+	int64_t backing_len, backing_sector_num, backing_nb_sectors;
+	uint8_t *backing_buf;
+	int snap_index, ret = 0;
+	BlockDriver *drv;
+
+	continuous_missing_clusters = 0;
+	continuous_appearing_clusters = 0;
+	for(i = 0; i < nb_clusters; i++) {
+		if(irow_get_bit(birows, start_cluster + i) == 0) {
+			buf_bitmap[buf_start + i] = 1;
+			continuous_missing_clusters += 1;
+			if(continuous_appearing_clusters != 0) {
+				if(strcmp(birows->current_btmp_file, birows->opened_btmp_file) != 0) {
+					cluster_index = start_cluster + i - continuous_appearing_clusters;
+					buf_index = buf_start + i - continuous_appearing_clusters;
+					if(cluster_cache != NULL) {
+						if(cluster_cache->cache != NULL) {
+							if(cluster_index == cluster_cache->cluster_num) {
+								memcpy(buf + buf_index * birows->cluster_size, cluster_cache->cache, birows->cluster_size);
+								cluster_index += 1;
+								buf_index += 1;
+								continuous_appearing_clusters -= 1;
+								if(continuous_appearing_clusters == 0) {
+									continue;
+								}
+							}
+						}
+					}
+					drv = birows->irow_irvd->drv;
+					if(bdrv_read(birows->irow_irvd,
+								cluster_index * birows->sectors_per_cluster,
+								buf + buf_index * birows->cluster_size,
+								continuous_appearing_clusters * birows->sectors_per_cluster) < 0) {
+							fprintf(stderr, "Failed to read clusters from %s\n", birows->irvd_file);
+							ret = -1;
+							goto end;
+						}
+					if(cluster_cache != NULL) {
+						if(cluster_cache->cache != NULL) {
+							memcpy(cluster_cache->cache, buf + (buf_start + i - 1) * birows->cluster_size, birows->cluster_size);
+							cluster_cache->cluster_num = start_cluster + i - 1;
+						}
+					}
+					}
+				continuous_appearing_clusters = 0;
+			}
+		} else {
+			continuous_appearing_clusters += 1;
+			if(continuous_missing_clusters != 0) {
+				if(birows->father_btmp_file != NULL) {
+					snap_index = irow_find_snapshot_by_btmp(birows, birows->father_btmp_file);
+					if(birows_cache[snap_index] == NULL) {
+						birows_cache[snap_index] = irow_open_previous_state(birows, snap_index);
+						if(birows_cache[snap_index] == NULL) {
+							ret = -1;
+							goto end;
+						}
+					}
+					ret = irow_read_missing_clusters2(bs,
+																birows_cache[snap_index],
+																start_cluster + i - continuous_missing_clusters,
+																continuous_missing_clusters,
+																buf,
+																buf_bitmap,
+																buf_start + i - continuous_missing_clusters);
+
+				} else {
+					if(bs->backing_hd) {
+					    backing_len = bdrv_getlength(bs->backing_hd) / 512;
+					    backing_sector_num = (start_cluster + i - continuous_missing_clusters) * birows->sectors_per_cluster;
+					    backing_nb_sectors = continuous_missing_clusters * birows->sectors_per_cluster;
+					    backing_buf = buf + (buf_start + i - continuous_missing_clusters) * birows->cluster_size;
+					    if(backing_sector_num < backing_len) {
+					    	if(backing_nb_sectors  > backing_len - backing_sector_num) {
+					    		backing_nb_sectors = backing_len - backing_sector_num;
+					    	}
+					    	if(bdrv_read(bs->backing_hd, backing_sector_num, backing_buf, backing_nb_sectors)<0) {
+					    		fprintf(stderr, "failed to read base image: %s\n", bs->backing_file);
+					    		ret = -1;
+								goto end;
+					    	}
+					    }
+					}
+				}
+				continuous_missing_clusters = 0;
+			}
+		}
+	}
+	if(continuous_missing_clusters != 0) {
+		if(birows->father_btmp_file != NULL) {
+			snap_index = irow_find_snapshot_by_btmp(birows, birows->father_btmp_file);
+			if(birows_cache[snap_index] == NULL) {
+				birows_cache[snap_index] = irow_open_previous_state(birows, snap_index);
+				if(birows_cache[snap_index] == NULL) {
+					ret = -1;
+					goto end;
+				}
+			}
+			ret = irow_read_missing_clusters2(bs,
+														birows_cache[snap_index],
+														start_cluster + i - continuous_missing_clusters,
+														continuous_missing_clusters,
+														buf,
+														buf_bitmap,
+														buf_start + i - continuous_missing_clusters);
+
+		} else {
+			if(bs->backing_hd) {
+			    backing_len = bdrv_getlength(bs->backing_hd) / 512;
+			    backing_sector_num = (start_cluster + i - continuous_missing_clusters) * birows->sectors_per_cluster;
+			    backing_nb_sectors = continuous_missing_clusters * birows->sectors_per_cluster;
+			    backing_buf = buf + (buf_start + i - continuous_missing_clusters) * birows->cluster_size;
+			    if(backing_sector_num  < backing_len) {
+			    	if(backing_nb_sectors > backing_len - backing_sector_num) {
+			    		backing_nb_sectors = backing_len - backing_sector_num;
+			    	}
+			    	if(bdrv_read(bs->backing_hd, backing_sector_num, backing_buf, backing_nb_sectors)<0) {
+			    		fprintf(stderr, "failed to read base image: %s\n", bs->backing_file);
+			    		ret = -1;
+						goto end;
+			    	}
+			    }
+			}
+		}
+		continuous_missing_clusters = 0;
+	}
+
+	if(continuous_appearing_clusters != 0) {
+		if(strcmp(birows->current_btmp_file, birows->opened_btmp_file) != 0) {
+			cluster_index = start_cluster + i - continuous_appearing_clusters;
+			buf_index = buf_start + i - continuous_appearing_clusters;
+			if(cluster_cache != NULL) {
+				if(cluster_cache->cache != NULL) {
+
+					if(cluster_index == cluster_cache->cluster_num) {
+						memcpy(buf + buf_index * birows->cluster_size, cluster_cache->cache, birows->cluster_size);
+						cluster_index += 1;
+						buf_index += 1;
+						continuous_appearing_clusters -= 1;
+						if(continuous_appearing_clusters == 0) {
+							goto end;
+						}
+					}
+				}
+			}
+			drv = birows->irow_irvd->drv;
+			if(bdrv_read(birows->irow_irvd,
+						cluster_index * birows->sectors_per_cluster,
+						buf + buf_index * birows->cluster_size,
+						continuous_appearing_clusters * birows->sectors_per_cluster) < 0) {
+					fprintf(stderr, "Failed to read clusters from %s\n", birows->irvd_file);
+					ret = -1;
+				}
+			if(cluster_cache != NULL) {
+				if(cluster_cache->cache != NULL) {
+					memcpy(cluster_cache->cache, buf + (buf_start + i - 1) * birows->cluster_size, birows->cluster_size);
+					cluster_cache->cluster_num = start_cluster + i - 1;
+				}
+			}
+		}
+		continuous_appearing_clusters = 0;
+	}
+
+end:
+	return ret;
+}
+
+static int irow_read_missing_clusters(BlockDriverState *bs, int64_t first_cluster, int64_t last_cluster, uint8_t *buf, uint8_t *buf_bitmap, int is_read) {
+	BDRVIrowState *birows = bs->opaque;
+	int64_t nb_clusters;
+	int ret = 0;
+
+	if(first_cluster >= birows->total_clusters) {
+			fprintf (stderr, "Invalid first_cluster!\n");
+		ret  = -1;
+		goto end;
+	}
+	if(last_cluster >= birows->total_clusters) {
+			fprintf (stderr, "Invalid last_cluster!\n");
+		ret = -1;
+		goto end;
+	}
+
+	if(is_read) {
+		nb_clusters = last_cluster - first_cluster + 1;
+		ret = irow_read_missing_clusters2(bs, birows, first_cluster, nb_clusters, buf, buf_bitmap, 0);
+		if(ret < 0)
+			goto end;
+
+	} else {
+		ret = irow_read_missing_clusters2(bs, birows, first_cluster, 1, buf, buf_bitmap, 0);
+		if(ret < 0)
+			goto end;
+		if(first_cluster != last_cluster) {
+			ret = irow_read_missing_clusters2(bs, birows, last_cluster, 1, buf, buf_bitmap, 1);
+		}
+	}
+
+
+end:
+	return ret;
+}
+
+static int irow_write_clusters(BDRVIrowState *birows, int64_t cluster_index, const uint8_t *buf, int nb_clusters) {
+	int ret = 0;
+	BlockDriver *drv;
+
+	if(cluster_index >= birows->total_clusters) {
+			fprintf (stderr, "Invalid cluster_index!\n");
+		ret  = -1;
+		goto end;
+	}
+	if((cluster_index + nb_clusters -1) >= birows->total_clusters) {
+			fprintf (stderr, "Invalid cluster_index or nb_clusters!\n");
+		ret = -1;
+		goto end;
+	}
+	drv = birows->irow_irvd->drv;
+	ret = bdrv_write(birows->irow_irvd, birows->sectors_per_cluster * cluster_index, buf, birows->sectors_per_cluster * nb_clusters);
+
+end:
+	return ret;
+}
+
+static int64_t first_sector_in_cluster(BDRVIrowState *birows, int64_t cluster_index) {
+	return cluster_index * birows->sectors_per_cluster;
+}
+
+static int64_t last_sector_in_cluster(BDRVIrowState *birows, int64_t cluster_index) {
+	return (cluster_index + 1) * birows->sectors_per_cluster - 1;
+}
+
+static int irow_assert_clusters(BlockDriverState *bs, ClusterBuffer *cbuf, int64_t sector_num, int nb_sectors, int op_type) {
+	BDRVIrowState *birows = bs->opaque;
+	int64_t nb_clusters, i, first_cluster, last_cluster, continuous_cluster, cluster_offset;
+	uint8_t *buffer_offset;// *zero_buf = NULL;
+	int ret = 0;
+
+	first_cluster = sector_num / birows->sectors_per_cluster;
+	last_cluster = (sector_num + nb_sectors - 1) / birows->sectors_per_cluster;
+	nb_clusters = last_cluster - first_cluster + 1;
+
+	switch(op_type) {
+	case IROW_READ:
+	case IROW_AIO_READ:
+		if(irow_read_missing_clusters(bs, first_cluster, last_cluster, cbuf->buf, cbuf->read_from_father, 1) < 0) {
+			ret = -1;
+			goto end;
+		}
+
+		if(birows->copy_on_demand) {
+			continuous_cluster = 0;
+			for(i = 0; i < nb_clusters + 1; i++) {
+				if(cbuf->read_from_father[i] == 0) {
+					if(continuous_cluster == 0)
+						continue;
+					cluster_offset = first_cluster + i - continuous_cluster;
+					buffer_offset = cbuf->buf + (i - continuous_cluster) * birows->cluster_size;
+					if(irow_write_clusters(birows, cluster_offset, buffer_offset, continuous_cluster) < 0) {
+						ret = -1;
+						goto end;
+					}
+					continuous_cluster = 0;
+				} else {
+					continuous_cluster += 1;
+					irow_set_bit(birows, first_cluster + i);
+				}
+			}
+		}
+		break;
+	case IROW_WRITE:
+	case IROW_AIO_WRITE:
+		if(sector_num == first_sector_in_cluster(birows, first_cluster)) {
+			if((sector_num + nb_sectors - 1) == last_sector_in_cluster(birows, last_cluster)) {
+				break;
+			} else {
+				if(irow_read_missing_clusters(bs, last_cluster, last_cluster, cbuf->buf, cbuf->read_from_father, 0) < 0) {
+					ret = -1;
+					goto end;
+				}
+				if(cbuf->read_from_father[0] == 1) {
+					if(irow_write_clusters(birows, last_cluster , cbuf->buf, 1) < 0) {
+						ret = -1;
+						goto end;
+					}
+					irow_set_bit(birows, last_cluster);
+				}
+				break;
+			}
+		} else {
+			if((sector_num + nb_sectors - 1) == last_sector_in_cluster(birows, last_cluster)) {
+				if(irow_read_missing_clusters(bs, first_cluster, first_cluster, cbuf->buf, cbuf->read_from_father, 0) < 0) {
+					ret = -1;
+					goto end;
+				}
+				if(cbuf->read_from_father[0] == 1) {
+					if(irow_write_clusters(birows, first_cluster , cbuf->buf, 1) < 0) {
+						ret = -1;
+						goto end;
+					}
+					irow_set_bit(birows, first_cluster);
+				}
+				break;
+			} else {
+				if(irow_read_missing_clusters(bs, first_cluster, last_cluster, cbuf->buf, cbuf->read_from_father, 0) < 0) {
+					ret = -1;
+					goto end;
+				}
+				if(cbuf->read_from_father[0] == 1) {
+					if(irow_write_clusters(birows, first_cluster, cbuf->buf, 1) < 0) {
+						ret = -1;
+						goto end;
+					}
+					irow_set_bit(birows, first_cluster);
+				}
+				if(cbuf->read_from_father[1] == 1) {
+					if(irow_write_clusters(birows, last_cluster, cbuf->buf + birows->cluster_size, 1) < 0) {
+						ret = -1;
+						goto end;
+					}
+					irow_set_bit(birows, last_cluster);
+				}
+				break;
+			}
+		}
+	}
+
+end:
+	return ret;
+}
+
+static int irow_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors) {
+
+	BDRVIrowState *s = bs->opaque;
+	int64_t first_cluster, last_cluster, nb_clusters, sector_index, cluster_index, buf_offset, temp_buf_offset, temp_buf_index;
+	int first_cluster_copied = 0;
+	BlockDriver *drv;
+	ClusterBuffer cbuf;
+	int remain_sectors, cbuf_offset, len, ret = 0;
+	uint8_t *temp_buf = NULL;
+
+	first_cluster = sector_num / s->sectors_per_cluster;
+	last_cluster = (sector_num + nb_sectors - 1) / s->sectors_per_cluster;
+	nb_clusters = last_cluster - first_cluster + 1;
+	temp_buf_offset = (sector_num & (s->sectors_per_cluster - 1)) * BDRV_SECTOR_SIZE;
+	temp_buf_index = 0;
+	cbuf.buf = NULL;
+	cbuf.read_from_father = NULL;
+
+	if(first_cluster >= s->total_clusters) {
+		fprintf (stderr, "Invalid sector_num.\n");
+		ret = -1;
+		goto end;
+	}
+	if(last_cluster >= s->total_clusters) {
+		fprintf (stderr, "Invalid nb_sectors.\n");
+		ret = -1;
+		goto end;
+	}
+
+	temp_buf = qemu_memalign(512, nb_clusters * s->cluster_size);
+	memset(temp_buf, 0, nb_clusters * s->cluster_size);
+	if(temp_buf == NULL) {
+		fprintf (stderr, "Failed to create temp_buf.\n");
+		ret = -1;
+		goto end;
+	}
+	if(cluster_cache != NULL) {
+		if(cluster_cache->cache != NULL) {
+			if(first_cluster == cluster_cache->cluster_num) {
+				memcpy(temp_buf, cluster_cache->cache, s->cluster_size);
+				first_cluster_copied = 1;
+				first_cluster += 1;
+				nb_clusters -= 1;
+				temp_buf_index += 1;
+			}
+		}
+	}
+
+	if(nb_clusters != 0) {
+		drv = s->irow_irvd->drv;
+		ret = bdrv_read(s->irow_irvd, first_cluster * s->sectors_per_cluster, temp_buf + temp_buf_index * s->cluster_size,  nb_clusters * s->sectors_per_cluster);
+		if(ret < 0) {
+			goto end;
+		}
+	}
+
+	memcpy(buf, temp_buf + temp_buf_offset, nb_sectors * BDRV_SECTOR_SIZE);
+
+	if(nb_clusters != 0) {
+		if(first_cluster_copied) {
+			first_cluster -= 1;
+			nb_clusters += 1;
+		}
+		if(cluster_cache != NULL) {
+			if(cluster_cache->cache != NULL) {
+				if(irow_get_bit(s, last_cluster)) {
+				memcpy(cluster_cache->cache, temp_buf + (nb_clusters - 1) * s->cluster_size, s->cluster_size);
+				cluster_cache->cluster_num = last_cluster;
+				}
+			}
+		}
+
+		if(s->complete_image != 1) {
+			cbuf.buf = qemu_memalign(512, nb_clusters * s->cluster_size);
+			memset(cbuf.buf, 0, nb_clusters * s->cluster_size);
+			cbuf.read_from_father = g_malloc0(nb_clusters  + 1);
+
+			if(irow_assert_clusters(bs, &cbuf, first_sector_in_cluster(s, first_cluster), nb_clusters * s->sectors_per_cluster, IROW_READ) < 0) {
+				fprintf (stderr, "irow_assert_clusters() failed.\n");
+				ret = -1;
+				goto end;
+			}
+
+			irow_update_btmp(s);
+
+			sector_index = sector_num;
+			remain_sectors = nb_sectors;
+			buf_offset = 0;
+
+			while(remain_sectors > 0) {
+				cluster_index = sector_index / s->sectors_per_cluster;
+				len = last_sector_in_cluster(s, cluster_index) - sector_index + 1;
+				if(len > remain_sectors)
+					len = remain_sectors;
+
+				if(cbuf.read_from_father[cluster_index - first_cluster] == 1) {
+					cbuf_offset = (sector_index & (s->sectors_per_cluster - 1)) + (cluster_index - first_cluster) * s->sectors_per_cluster;
+					memcpy(buf + buf_offset, cbuf.buf + cbuf_offset * BDRV_SECTOR_SIZE, len * BDRV_SECTOR_SIZE);
+				}
+				sector_index = first_sector_in_cluster(s, cluster_index + 1);
+				remain_sectors -= len;
+				buf_offset += len * BDRV_SECTOR_SIZE;
+			}
+		}
+
+	}
+
+end:
+	if(cbuf.buf != NULL) {
+		g_free(cbuf.buf);
+		cbuf.buf = NULL;
+	}
+	if(cbuf.read_from_father != NULL) {
+		g_free(cbuf.read_from_father);
+		cbuf.read_from_father = NULL;
+	}
+	if(temp_buf != NULL) {
+		g_free(temp_buf);
+		temp_buf = NULL;
+	}
+	return ret;
+}
+
+static int irow_write(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors) {
+	BDRVIrowState *s = bs->opaque;
+	int64_t first_cluster, last_cluster, current_cluster;
+	ClusterBuffer cbuf;
+	BlockDriver *drv;
+	int ret = 0;
+
+	first_cluster = sector_num / s->sectors_per_cluster;
+	last_cluster = (sector_num + nb_sectors - 1) / s->sectors_per_cluster;
+
+
+	if(first_cluster >= s->total_clusters) {
+		fprintf (stderr, "Invalid sector_num!\n");
+		ret = -1;
+		goto end;
+	}
+	if(last_cluster >= s->total_clusters) {
+		fprintf (stderr, "Invalid nb_sectors!\n");
+		ret = -1;
+		goto end;
+	}
+
+	cbuf.buf = NULL;
+	cbuf.read_from_father = NULL;
+	if(s->complete_image != 1) {
+		cbuf.buf = qemu_memalign(512, 2 * s->cluster_size);
+		memset(cbuf.buf, 0, 2 * s->cluster_size);
+		cbuf.read_from_father = g_malloc0(2);
+		if(irow_assert_clusters(bs, &cbuf, sector_num, nb_sectors, IROW_WRITE) < 0) {
+			ret = -1;
+			goto end;
+		}
+	}
+
+	for(current_cluster = first_cluster; current_cluster <= last_cluster; current_cluster++) {
+			irow_set_bit(s, current_cluster);
+	}
+
+	drv = s->irow_irvd->drv;
+	ret = bdrv_write(s->irow_irvd, sector_num, buf, nb_sectors);
+	if(ret < 0) {
+		goto end;
+	}
+
+	if(irow_update_btmp(s) < 0) {
+		fprintf (stderr, "Failed to update btmp file. (%s)\n", s->opened_btmp_file);
+		ret = -1;
+		goto end;
+	}
+
+end:
+	if(cbuf.buf != NULL) {
+		g_free(cbuf.buf);
+		cbuf.buf = NULL;
+	}
+	if(cbuf.read_from_father != NULL) {
+		g_free(cbuf.read_from_father);
+		cbuf.read_from_father = NULL;
+	}
+
+	return ret;
+}
+
+static int irow_generate_filename(char *dest, const char *prefix, const char *body, const char *suffix) {
+	if(strlen(prefix) + strlen(body) + strlen(suffix) + 2 >= MAX_FILE_NAME_LENGTH) {
+		fprintf(stderr, "Invalid filename length, max is %d\n", MAX_FILE_NAME_LENGTH);
+		return -1;
+	}
+	strcpy(dest, prefix);
+	strcat(dest, "-");
+	strcat(dest, body);
+	strcat(dest, ".");
+	strcat(dest, suffix);
+	return 0;
+}
+
+static int irow_create_meta(IRowCreateState *cs) {
+	IRowMeta meta;
+	IRowSnapshotHeader snap_header;
+	uint32_t cluster_size, copy_on_demand;
+	uint64_t disk_size;
+	qemu_timeval tv;
+	int fd, cluster_bits, ret = 0;
+
+	if(cs->disk_size == 0) {
+		fprintf(stderr, "Invalid disk_size\n");
+		ret = -1;
+		goto end;
+	}
+	disk_size = cs->disk_size;
+
+	if(cs->cluster_size == 0) {
+		fprintf(stderr, "Invalid cluster_size\n");
+		ret = -1;
+		goto end;
+	}
+	cluster_size = cs->cluster_size;
+
+   cluster_bits = get_bits_from_size(cluster_size);
+   cs->cluster_bits = cluster_bits;
+   if ((cluster_bits < MIN_CLUSTER_BITS) || (cluster_bits > MAX_CLUSTER_BITS)) {
+    	fprintf(stderr, "Cluster size must be a power of two between %d and %dk\n",
+            1 << MIN_CLUSTER_BITS,
+            1 << (MAX_CLUSTER_BITS - 10));
+    	ret =  -1;
+    	goto end;
+
+    }
+   copy_on_demand = cs->copy_on_demand;
+   if(cs->meta_file[0] == '\0') {
+	   fprintf(stderr, "Void meta file name\n");
+	   ret = -1;
+	   goto end;
+   }
+   fd = open(cs->meta_file, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
+	if (fd < 0) {
+		fprintf(stderr, "Can not open %s\n", cs->meta_file);
+		ret = -1;
+		goto end;
+	}
+	memset(&meta, 0, sizeof(meta));
+	meta.magic = cpu_to_be32(IROW_MAGIC);
+   	meta.version = cpu_to_be32(IROW_VERSION);
+   	meta.copy_on_demand = cpu_to_be32(copy_on_demand);
+   	meta.cluster_size = cpu_to_be32(cluster_size);
+   	meta.cluster_bits = cpu_to_be32(cluster_bits);
+   	meta.total_clusters = cpu_to_be64((disk_size + cluster_size -1) >> cluster_bits);
+   	meta.sectors_per_cluster = cpu_to_be32(cluster_size >> BDRV_SECTOR_BITS);
+   	meta.disk_size = cpu_to_be64(disk_size);
+   	meta.nb_snapshots = cpu_to_be32(1);
+
+   	if(irow_generate_filename(meta.current_btmp, cs->meta_file, cs->time_value, "btmp") < 0) {
+   		ret = -1;
+   		goto end;
+   	}
+
+   	if(irow_generate_filename(cs->irvd_file, cs->meta_file, cs->time_value, "irvd") < 0) {
+   	   	ret = -1;
+   	   	goto end;
+   	}
+
+   	if(cs->backing_file != NULL) {
+   		strncpy(meta.backing_file, cs->backing_file, MAX_FILE_NAME_LENGTH);
+   	}
+
+   	strncpy(cs->btmp_file, meta.current_btmp, MAX_FILE_NAME_LENGTH);
+
+   	memset(&snap_header, 0, sizeof(snap_header));
+
+   snap_header.snap_magic = cpu_to_be32(IROW_SNAPHEADER_MAGIC);
+   sprintf(snap_header.id_str, "0");
+   sprintf(snap_header.name, "current state");
+   	strncpy(snap_header.btmp_file, cs->btmp_file, MAX_FILE_NAME_LENGTH);
+   	strncpy(snap_header.irvd_file, cs->irvd_file, MAX_FILE_NAME_LENGTH);
+   	qemu_gettimeofday(&tv);
+   	snap_header.date_sec = tv.tv_sec;
+   	snap_header.date_nsec = tv.tv_usec * 1000;
+   	snap_header.nb_children = 0;
+   	snap_header.is_deleted = 0;
+
+   	if(write(fd, &meta, sizeof(meta))==-1){
+   		ret = -1;
+   		goto end;
+   	}
+   	if(write(fd, &snap_header, sizeof(snap_header))==-1){
+   		ret = -1;
+   		goto end;
+   	}
+
+   	if(close(fd) != 0) {
+   		ret = -1;
+   	}
+end:
+	return ret;
+}
+
+static int irow_create_btmp(IRowCreateState *cs) {
+
+	char *bitmap = NULL;
+	int fd,  bitmap_size, ret = 0;
+
+	if(cs->btmp_file[0] == '\0') {
+		fprintf(stderr, "Void btmp file name\n");
+		ret = -1;
+		goto end;
+	}
+	fd = open(cs->btmp_file, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
+	if(fd < 0) {
+		fprintf(stderr, "Can not open %s\n", cs->btmp_file);
+		ret = -1;
+		goto end;
+	}
+
+	bitmap_size = (((cs->disk_size + cs->cluster_size - 1) >> cs->cluster_bits) + 7) >> 3;
+	bitmap = g_malloc(bitmap_size);
+	memset(bitmap, 0, bitmap_size);
+
+	if(write(fd, bitmap, bitmap_size)==-1){
+		ret = -1;
+		goto end;
+	}
+
+	if(close(fd) != 0) {
+		ret = -1;
+	}
+
+end:
+	if(bitmap != NULL)
+		g_free(bitmap);
+	return ret;
+}
+
+static int irow_create_vd(IRowCreateState *cs) {
+	int fd, ret = 0;
+
+	if(cs->irvd_file[0] == '\0') {
+		fprintf(stderr, "Void irvd file name\n");
+		ret = -1;
+		goto end;
+	}
+
+	fd = open(cs->irvd_file, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
+	if(fd < 0) {
+		fprintf(stderr, "Can not open %s\n", cs->irvd_file);
+		ret = -1;
+		goto end;
+	}
+	if(fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, cs->disk_size) < 0) {
+		;
+	}
+	if (ftruncate(fd, cs->disk_size) != 0) {
+		fprintf(stderr, "Can not truncate %s to %" PRId64 " bytes\n", cs->irvd_file, cs->disk_size);
+		ret = -1;
+	}
+	if (close(fd) != 0) {
+		ret = -1;
+	}
+
+
+end:
+	return ret;
+}
+
+static IRowCreateState *irow_create_state_new(void) {
+	IRowCreateState *cs = g_malloc0(sizeof(IRowCreateState));
+	qemu_timeval tv;
+
+	cs->meta_file = g_malloc0(MAX_FILE_NAME_LENGTH);
+	cs->btmp_file = g_malloc0(MAX_FILE_NAME_LENGTH);
+	cs->irvd_file = g_malloc0(MAX_FILE_NAME_LENGTH);
+	cs->time_value = g_malloc0(MAX_FILE_NAME_LENGTH);
+	cs->father_btmp_file = g_malloc0(MAX_FILE_NAME_LENGTH);
+
+	qemu_gettimeofday(&tv);
+	sprintf(cs->time_value, "%lx%lx", tv.tv_sec, tv.tv_usec);
+	return cs;
+}
+
+static void irow_create_state_delete(IRowCreateState *cs) {
+	if(cs->meta_file != NULL)
+		g_free(cs->meta_file);
+	if(cs->btmp_file != NULL)
+		g_free(cs->btmp_file);
+	if(cs->irvd_file != NULL)
+		g_free(cs->irvd_file);
+	if(cs->time_value != NULL)
+		g_free(cs->time_value);
+	if(cs->father_btmp_file != NULL)
+		g_free(cs->father_btmp_file);
+	g_free(cs);
+}
+
+static int irow_create(const char *filename, QEMUOptionParameter *options) {
+	IRowCreateState *cs = irow_create_state_new();
+	int ret = 0;
+
+	if(cs == NULL) {
+		ret = -1;
+		goto end;
+	}
+	cs->cluster_size = 65536;
+	cs->copy_on_demand = 0;
+	cs->backing_file = NULL;
+	strncpy(cs->meta_file, filename, MAX_FILE_NAME_LENGTH);
+	while (options && options->name) {
+		if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+			cs->disk_size= options->value.n;
+			} else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
+				if (options->value.n) {
+					cs->cluster_size = options->value.n;
+				}
+			} else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
+	            cs->backing_file = options->value.s;
+			} else if(!strcmp(options->name, "copy_on_demand")) {
+				cs->copy_on_demand = options->value.n;
+			}
+	        options++;
+	}
+
+    if(irow_create_meta(cs) < 0) {
+    	fprintf(stderr, "Fail to create meta file of %s\n", filename);
+    	ret = -1;
+    	goto end;
+    }
+
+    if(irow_create_btmp(cs) < 0) {
+    	fprintf(stderr, "Fail to create bitmap file of %s\n", filename);
+    	ret = -1;
+    	goto end;
+    }
+
+    if(irow_create_vd(cs) < 0) {
+    	fprintf(stderr, "Fail to create virtual machine disk file of %s\n", filename);
+    	ret = -1;
+    	goto end;
+    }
+
+end:
+	if(cs != NULL) {
+		irow_create_state_delete(cs);
+	}
+	return ret;
+}
+
+static int coroutine_fn irow_flush(BlockDriverState *bs) {
+	BDRVIrowState *s = bs->opaque;
+
+	return bdrv_flush(s->irow_irvd);
+}
+
+typedef struct IRowAIOCB {
+    BlockDriverAIOCB common;
+    int64_t sector_num;
+    QEMUIOVector *qiov;
+    int nb_sectors;
+    BlockDriverAIOCB *irvd_aiocb;
+
+} IRowAIOCB;
+
+static void irow_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+	IRowAIOCB *acb = (IRowAIOCB *)blockacb;
+    if (acb->irvd_aiocb)
+        bdrv_aio_cancel(acb->irvd_aiocb);
+    qemu_aio_release(acb);
+}
+
+static AIOCBInfo irow_aio_pool = {
+    .aiocb_size         = sizeof(IRowAIOCB),
+    .cancel             = irow_aio_cancel,
+};
+
+
+static IRowAIOCB *irow_aio_setup(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    IRowAIOCB *acb;
+
+    acb = qemu_aio_get(&irow_aio_pool, bs, cb, opaque);
+    if (!acb)
+        return NULL;
+    acb->irvd_aiocb = NULL;
+    acb->sector_num = sector_num;
+    acb->qiov = qiov;
+    acb->nb_sectors = nb_sectors;
+    return acb;
+}
+
+static void irow_aio_readv_cb(void *opaque, int ret) {
+	IRowAIOCB *acb = opaque;
+	BlockDriverState *bs = acb->common.bs;
+	BDRVIrowState *birows = bs->opaque;
+	int64_t first_cluster, last_cluster, nb_clusters, sector_index, cluster_index, buf_offset;
+	ClusterBuffer cbuf;
+	void *buf = NULL;
+	int remain_sectors, cbuf_offset, len;
+
+	if(ret < 0) {
+		fprintf(stderr, "aio_readv failed\n");
+		goto end;
+	}
+	   first_cluster = acb->sector_num / birows->sectors_per_cluster;
+	   last_cluster = (acb->sector_num + acb->nb_sectors - 1) / birows->sectors_per_cluster;
+
+		if(first_cluster >= birows->total_clusters) {
+			fprintf (stderr, "Invalid sector_num.\n");
+			ret = -1;
+			goto end;
+		}
+		if(last_cluster >= birows->total_clusters) {
+			fprintf (stderr, "Invalid nb_sectors.\n");
+			ret = -1;
+			goto end;
+		}
+
+		cbuf.buf = NULL;
+		cbuf.read_from_father = NULL;
+		if(birows->complete_image != 1) {
+			nb_clusters = last_cluster - first_cluster + 1;
+			cbuf.buf = qemu_memalign(512, nb_clusters * birows->cluster_size);
+			memset(cbuf.buf, 0, nb_clusters * birows->cluster_size);
+			cbuf.read_from_father = g_malloc0(nb_clusters  + 1);
+		   if(irow_assert_clusters(bs, &cbuf, acb->sector_num, acb->nb_sectors, IROW_AIO_READ) < 0) {
+			   fprintf (stderr, "irow_assert_clusters() failed.\n");
+			   ret = -1;
+			   goto end;
+		   }
+			irow_update_btmp(birows);
+
+		   buf = g_malloc(acb->qiov->size);
+		   qemu_iovec_to_buf(acb->qiov, 0, buf, acb->qiov->size);
+
+			sector_index = acb->sector_num;
+			remain_sectors = acb->nb_sectors;
+			buf_offset = 0;
+			while(remain_sectors > 0) {
+				cluster_index = sector_index / birows->sectors_per_cluster;
+				len = last_sector_in_cluster(birows, cluster_index) - sector_index + 1;
+				if(len > remain_sectors)
+					len = remain_sectors;
+				if(cbuf.read_from_father[cluster_index - first_cluster] == 1) {
+					cbuf_offset = (sector_index & (birows->sectors_per_cluster - 1)) + (cluster_index - first_cluster) * birows->sectors_per_cluster;
+					memcpy(buf + buf_offset, cbuf.buf + cbuf_offset * BDRV_SECTOR_SIZE, len * BDRV_SECTOR_SIZE);
+				}
+				sector_index = first_sector_in_cluster(birows, cluster_index + 1);
+				remain_sectors -= len;
+				buf_offset += len * BDRV_SECTOR_SIZE;
+			}
+
+			qemu_iovec_from_buf(acb->qiov, 0, buf, acb->qiov->size);
+		}
+
+	end:
+		if(buf != NULL) {
+			g_free(buf);
+			buf = NULL;
+		}
+		if(cbuf.buf != NULL) {
+			g_free(cbuf.buf);
+			cbuf.buf = NULL;
+		}
+		if(cbuf.read_from_father != NULL) {
+			g_free(cbuf.read_from_father);
+			cbuf.read_from_father = NULL;
+		}
+	    acb->common.cb(acb->common.opaque, ret);
+	    qemu_aio_release(acb);
+}
+
+static BlockDriverAIOCB *irow_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque) {
+    IRowAIOCB *acb;
+    BDRVIrowState *birows = bs->opaque;
+    BlockDriver *drv;
+
+    acb = irow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
+    if (!acb)
+        return NULL;
+	drv = birows->irow_irvd->drv;
+	acb->irvd_aiocb = drv->bdrv_aio_readv(birows->irow_irvd, sector_num, qiov, nb_sectors, irow_aio_readv_cb, acb);
+	if(acb->irvd_aiocb == NULL){
+		qemu_aio_release(acb);
+		return NULL;
+	}
+   return &acb->common;
+}
+
+static BlockDriverAIOCB *irow_aio_writev(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque) {
+	BDRVIrowState *s = bs->opaque;
+	int64_t first_cluster, last_cluster, current_cluster;
+	ClusterBuffer cbuf;
+	BlockDriver *drv;
+	BlockDriverAIOCB *ret = NULL;
+
+   first_cluster = sector_num / s->sectors_per_cluster;
+   last_cluster = (sector_num + nb_sectors - 1) / s->sectors_per_cluster;
+
+	if(first_cluster >= s->total_clusters) {
+		fprintf (stderr, "Invalid sector_num!\n");
+		goto end;
+	}
+	if(last_cluster >= s->total_clusters) {
+		fprintf (stderr, "Invalid nb_sectors!\n");
+		goto end;
+	}
+	cbuf.buf = NULL;
+	cbuf.read_from_father = NULL;
+	if(s->complete_image != 1) {
+		cbuf.buf = qemu_memalign(512, 2  * s->cluster_size);
+		cbuf.read_from_father = g_malloc0(2);
+		if(irow_assert_clusters(bs, &cbuf, sector_num, nb_sectors, IROW_AIO_WRITE) < 0) {
+			fprintf (stderr, "irow_assert_clusters() failed.\n");
+			goto end;
+		}
+	}
+
+	for(current_cluster = first_cluster; current_cluster <= last_cluster; current_cluster++) {
+			irow_set_bit(s, current_cluster);
+	}
+
+	drv = s->irow_irvd->drv;
+   ret = drv->bdrv_aio_writev(s->irow_irvd, sector_num, qiov, nb_sectors, cb, opaque );
+   if(ret == NULL) {
+	   goto end;
+   }
+
+	if(irow_update_btmp(s) < 0) {
+		fprintf (stderr, "Failed to update btmp file. (%s)\n", s->opened_btmp_file);
+		ret = NULL;
+		goto end;
+	}
+
+end:
+	if(cbuf.buf != NULL) {
+		g_free(cbuf.buf);
+		cbuf.buf = NULL;
+	}
+	if(cbuf.read_from_father != NULL) {
+		g_free(cbuf.read_from_father);
+		cbuf.read_from_father = NULL;
+	}
+   return ret;
+}
+
+static BlockDriverAIOCB *irow_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque) {
+	BDRVIrowState *s = bs->opaque;
+	BlockDriverAIOCB *ret = NULL;
+
+	ret = bdrv_aio_flush(s->irow_irvd, cb, opaque);
+
+	return ret;
+}
+
+static void irow_new_snapshot_id(BDRVIrowState *birows, char *id_str, int id_str_size) {
+	IRowSnapshot *snap_ptr;
+   uint i, id, found;
+
+   for(id = 1; id < 0xffffffff; id++) {
+	   found = 1;
+	   for(i = 0; i < birows->nb_snapshots; i++) {
+		   snap_ptr = birows->snapshots + i;
+		   if(snap_ptr->id_str != NULL) {
+			   if(id == strtoul(snap_ptr->id_str, NULL, 10)) {
+				   found = 0;
+				   break;
+			   }
+		   }
+	   }
+	   if(found)
+		   break;
+   }
+  snprintf(id_str, id_str_size, "%d", id);
+}
+
+static int irow_find_snapshot_by_id(BDRVIrowState *birows, const char *id_str) {
+	int i;
+
+	for(i = 0; i < birows->nb_snapshots; i++) {
+		if(birows->snapshots[i].id_str != NULL) {
+			if(strcmp(birows->snapshots[i].id_str, id_str) == 0) {
+				return i;
+			}
+		}
+	}
+	return -1;
+}
+
+static int irow_find_snapshot_by_name(BDRVIrowState *birows, const char *name) {
+	int i;
+
+	for(i = 0; i < birows->nb_snapshots; i++) {
+		if(birows->snapshots[i].name != NULL) {
+			if(strcmp(birows->snapshots[i].name, name) == 0) {
+				return i;
+			}
+		}
+	}
+	return -1;
+}
+
+static int irow_find_free_snapshot(BDRVIrowState *birows) {
+	int i;
+
+	for(i = 0; i < birows->nb_snapshots; i++) {
+		if(birows->snapshots[i].nb_children == 0 && birows->snapshots[i].is_deleted == 1) {
+			return i;
+		}
+	}
+	return -1;
+}
+
+static int irow_update_nb_children(BDRVIrowState *birows, IRowSnapshot *snap, int value) {
+	IRowSnapshot *father_snap;
+	int snap_index, ret = 0;
+	snap->nb_children += value;
+	if(snap->nb_children == 0 && snap->is_deleted == 1) {
+		if(snap->father_btmp_file) {
+			snap_index = irow_find_snapshot_by_btmp(birows, snap->father_btmp_file);
+			if(snap_index < 0) {
+				fprintf(stderr, "Failed to find father snapshot\n");
+				ret = -1;
+				goto end;
+			}
+			father_snap = birows->snapshots + snap_index;
+			irow_update_nb_children(birows, father_snap, value);
+		}
+	}
+
+end:
+	return ret;
+}
+
+static int irow_snapshot_add(BDRVIrowState *birows, IRowCreateState *cs, QEMUSnapshotInfo *sn_info) {
+	IRowSnapshot *new_snap, *snap;
+	qemu_timeval tv;
+	int snap_index;
+
+	birows->snapshots = g_realloc(birows->snapshots, (birows->nb_snapshots + 1) * sizeof(IRowSnapshot));
+
+	snap_index = irow_find_snapshot_by_btmp(birows, birows->current_btmp_file);
+	if(snap_index < 0) {
+		return -1;
+	}
+	snap = birows->snapshots + snap_index;
+
+	new_snap = birows->snapshots + birows->nb_snapshots;
+	memset(new_snap, 0, sizeof(IRowSnapshot));
+
+	snap->date_sec = sn_info->date_sec;
+	snap->date_nsec = sn_info->date_nsec;
+	snap->vm_clock_nsec = sn_info->vm_clock_nsec;
+	snap->vm_state_size = sn_info->vm_state_size;
+	irow_update_nb_children(birows, snap, 1);
+
+	if(snap->id_str == NULL) {
+		snap->id_str = g_malloc0(128);
+	} else {
+		memset(snap->id_str, 0, 128);
+	}
+	strncpy(snap->id_str, sn_info->id_str, 128);
+
+	if(snap->name == NULL) {
+		snap->name = g_malloc0(256);
+	} else {
+		memset(snap->name, 0, 256);
+	}
+	strncpy(snap->name, sn_info->name, 256);
+
+	new_snap->id_str = g_malloc0(128);
+	sprintf(new_snap->id_str, "0");
+	new_snap->name = g_malloc0(256);
+	sprintf(new_snap->name, "current state");
+	new_snap->btmp_file = g_malloc0(MAX_FILE_NAME_LENGTH);
+	strncpy(new_snap->btmp_file, cs->btmp_file, MAX_FILE_NAME_LENGTH);
+	new_snap->irvd_file = g_malloc0(MAX_FILE_NAME_LENGTH);
+	strncpy(new_snap->irvd_file, cs->irvd_file, MAX_FILE_NAME_LENGTH);
+	if(cs->father_btmp_file != NULL) {
+		new_snap->father_btmp_file = g_malloc0(MAX_FILE_NAME_LENGTH);
+		strncpy(new_snap->father_btmp_file, cs->father_btmp_file, MAX_FILE_NAME_LENGTH);
+	}
+	qemu_gettimeofday(&tv);
+	new_snap->date_sec = tv.tv_sec;
+	new_snap->date_nsec = tv.tv_usec * 1000;
+
+	birows->nb_snapshots += 1;
+	birows_cache = g_realloc(birows_cache, sizeof(BDRVIrowState *) * birows->nb_snapshots);
+	memset(birows_cache, 0, sizeof(BDRVIrowState *) * birows->nb_snapshots);
+	birows->snapshots_is_dirty = 1;
+
+	return 0;
+}
+
+static void irow_snapshot_copy(IRowSnapshot *dst, IRowSnapshot *src) {
+
+	if(src->id_str) {
+		dst->id_str = g_malloc0(128);
+		strncpy(dst->id_str, src->id_str, 128);
+	}
+	if(src->name) {
+		dst->name = g_malloc0(256);
+		strncpy(dst->name, src->name, 256);
+	}
+	if(src->btmp_file) {
+		dst->btmp_file = g_malloc0(MAX_FILE_NAME_LENGTH);
+		strncpy(dst->btmp_file, src->btmp_file, MAX_FILE_NAME_LENGTH);
+	}
+	if(src->irvd_file) {
+		dst->irvd_file = g_malloc0(MAX_FILE_NAME_LENGTH);
+		strncpy(dst->irvd_file, src->irvd_file, MAX_FILE_NAME_LENGTH);
+	}
+	if(src->father_btmp_file) {
+		dst->father_btmp_file = g_malloc0(MAX_FILE_NAME_LENGTH);
+		strncpy(dst->father_btmp_file, src->father_btmp_file, MAX_FILE_NAME_LENGTH);
+	}
+	dst->date_sec = src->date_sec;
+	dst->date_nsec = src->date_nsec;
+	dst->vm_clock_nsec = src->vm_clock_nsec;
+	dst->vm_state_size = src->vm_state_size;
+	dst->nb_children = src->nb_children;
+	dst->is_deleted = src->is_deleted;
+}
+
+static int irow_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) {
+	BDRVIrowState *s = bs->opaque;
+	IRowCreateState *cs = NULL;
+	IRowSnapshot *free_snap, *old_snap, *snap;
+	int snap_index, offset, ret = 0;
+
+	if(sn_info->id_str[0] == '\0') {
+		irow_new_snapshot_id(s, sn_info->id_str, sizeof(sn_info->id_str));
+	}
+
+	if(irow_find_snapshot_by_id(s, sn_info->id_str) >= 0) {
+		fprintf(stderr, "Duplicated snapshot id\n");
+		ret = -1;
+		goto end;
+	}
+
+	if(irow_find_snapshot_by_name(s, sn_info->name) >= 0) {
+		fprintf(stderr, "Duplicated snapshot name\n");
+		ret = -1;
+		goto end;
+	}
+
+	cs = irow_create_state_new();
+	cs->cluster_bits = s->cluster_bits;
+	cs->cluster_size = s->cluster_size;
+	cs->disk_size = s->disk_size;
+	strncpy(cs->meta_file, s->meta_file, MAX_FILE_NAME_LENGTH);
+	strncpy(cs->father_btmp_file, s->current_btmp_file, MAX_FILE_NAME_LENGTH); // 其father文件为老的当前镜像
+
+	snap_index = irow_find_free_snapshot(s);
+	if(snap_index >= 0) {
+		free_snap = s->snapshots + snap_index;
+		strcpy(cs->btmp_file, free_snap->btmp_file);
+		strcpy(cs->irvd_file, free_snap->irvd_file);
+		old_snap = s->snapshots;
+		s->snapshots = g_malloc0((s->nb_snapshots - 1) * sizeof(IRowSnapshot));
+		offset = 0;
+		for(snap_index = 0; snap_index < s->nb_snapshots; snap_index++) {
+			snap = old_snap + snap_index;
+			if(snap != free_snap) {
+				irow_snapshot_copy(s->snapshots + offset, snap);
+				offset += 1;
+			}
+		}
+
+		irow_close_snapshots2(old_snap, s->nb_snapshots);
+		s->nb_snapshots -= 1;
+	} else {
+		irow_generate_filename(cs->btmp_file, cs->meta_file, cs->time_value, "btmp");
+		irow_generate_filename(cs->irvd_file, cs->meta_file, cs->time_value, "irvd");
+
+		if(irow_create_btmp(cs) < 0) {
+			fprintf(stderr, "Failed to create new btmp file (%s)\n", cs->btmp_file);
+			ret = -1;
+			goto end;
+		}
+
+		if(irow_create_vd(cs) < 0) {
+			fprintf(stderr, "Failed to create new irvd file (%s)\n", cs->irvd_file);
+			ret = -1;
+			goto end;
+		}
+	}
+
+	if(irow_snapshot_add(s, cs, sn_info) < 0) {
+		fprintf(stderr, "Failed to add new snapshot in mem\n");
+		ret = -1;
+		goto end;
+	}
+
+	if(irow_update_meta(s, cs->btmp_file, 0) < 0) {
+		fprintf(stderr, "Failed to update meta file (%s)\n", s->meta_file);
+		ret = -1;
+		goto end;
+	}
+
+	s->vm_state_size = sn_info->vm_state_size;
+	irow_update_btmp(s);
+
+	irow_close_btmp(s);
+	irow_close_irvd(s);
+
+	strncpy(s->current_btmp_file, cs->btmp_file, MAX_FILE_NAME_LENGTH);
+	snap_index = irow_find_snapshot_by_btmp(s, s->current_btmp_file);
+	if(irow_load_info_from_snapshot(s, snap_index) < 0) {
+		ret = -1;
+		goto end;
+	}
+	ret = irow_open_data(s, s->open_flags);
+	memset(s->bitmap, 0, s->bitmap_size);
+	s->bitmap_is_dirty = 1;
+	if(irow_update_btmp(s) < 0) {
+		fprintf(stderr, "Failed to update btmp file\n");
+		ret = -1;
+		goto end;
+	}
+
+end:
+	if(cs != NULL) {
+		irow_create_state_delete(cs);
+		cs = NULL;
+	}
+	return ret;
+}
+
+static int64_t irow_vm_state_offset(BDRVIrowState *birows) {
+	return birows->bitmap_size;
+}
+
+static int irow_load_vmstate2(BDRVIrowState *birows, uint8_t *buf, int64_t pos, int size) {
+
+	return bdrv_pread(birows->irow_btmp, irow_vm_state_offset(birows) + pos, buf, size);
+
+}
+
+static int irow_save_vmstate2(BDRVIrowState *birows, const uint8_t *buf, int64_t pos, int size) {
+	birows->vmstate_is_saved = 1;
+	return bdrv_pwrite(birows->irow_btmp, irow_vm_state_offset(birows) + pos, buf, size);
+
+}
+
+static int irow_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) {
+
+	BDRVIrowState *s = bs->opaque;
+	IRowSnapshot *target_snap, *current_snap, *father_snap;
+	int snap_index, ret = 0;
+
+	if(strcmp(snapshot_id, "0") == 0 || strcmp(snapshot_id, "current state") == 0) {
+		fprintf(stderr, "No need to goto current state.\n");
+		goto end;
+	}
+
+	snap_index = irow_find_snapshot_by_id(s, snapshot_id);
+	if(snap_index < 0) {
+		snap_index = irow_find_snapshot_by_name(s, snapshot_id);
+		if(snap_index < 0) {
+			fprintf(stderr, "Failed to find snapshot %s\n", snapshot_id);
+			ret = -1;
+			goto end;
+		}
+	}
+	target_snap = s->snapshots + snap_index;
+
+	if(target_snap->is_deleted) {
+		fprintf(stderr, "Can not go to deleted snapshot %s\n", snapshot_id);
+		ret = -1;
+		goto end;
+	}
+
+	snap_index = irow_find_snapshot_by_btmp(s, s->current_btmp_file);
+	if(snap_index < 0) {
+		fprintf(stderr, "Failed to find current state.\n");
+		ret = -1;
+		goto end;
+	}
+	current_snap = s->snapshots + snap_index;
+	snap_index = irow_find_snapshot_by_btmp(s, s->father_btmp_file);
+	if(snap_index < 0) {
+		fprintf(stderr, "Failed to find father snapshot.\n");
+		ret = -1;
+		goto end;
+	}
+	father_snap = s->snapshots + snap_index;
+	strncpy(s->father_btmp_file, target_snap->btmp_file, MAX_FILE_NAME_LENGTH);
+	strncpy(current_snap->father_btmp_file, target_snap->btmp_file, MAX_FILE_NAME_LENGTH);
+
+	irow_update_nb_children(s, father_snap, -1);
+	irow_update_nb_children(s, target_snap, 1);
+
+	current_snap->date_sec = target_snap->date_sec;
+	current_snap->date_nsec = target_snap->date_nsec;
+	current_snap->vm_clock_nsec = target_snap->vm_clock_nsec;
+	current_snap->vm_state_size = 0;
+
+	memset(s->bitmap, 0, s->bitmap_size);
+	s->bitmap_is_dirty = 1;
+	if(irow_update_btmp(s) < 0) {
+		fprintf(stderr, "Failed to update btmp file\n");
+		ret = -1;
+		goto end;
+	}
+
+	s->snapshots_is_dirty = 1;
+	if(irow_update_meta(s, NULL, 0) < 0) {
+		fprintf(stderr, "Failed to update meta file\n");
+		ret = -1;
+	}
+
+
+end:
+	return ret;
+}
+
+static int irow_snapshot_delete(BlockDriverState *bs, const char *snapshot_id) {
+
+	BDRVIrowState *s = bs->opaque;
+	IRowSnapshot *target_snap, *father_snap;
+	int snap_index, ret = 0;
+
+	if(strcmp(snapshot_id, "0") == 0 || strcmp(snapshot_id, "current state") == 0) {
+		fprintf(stderr, "Can not delete current state.\n");
+		goto end;
+	}
+
+	snap_index = irow_find_snapshot_by_id(s, snapshot_id);
+	if(snap_index < 0) {
+		snap_index = irow_find_snapshot_by_name(s, snapshot_id);
+		if(snap_index < 0) {
+			fprintf(stderr, "Failed to find snapshot %s\n", snapshot_id);
+			ret = -1;
+			goto end;
+		}
+	}
+	target_snap = s->snapshots + snap_index;
+
+	if(target_snap->is_deleted) {
+		fprintf(stderr, "Can not delete deleted snapshot %s\n", snapshot_id);
+		ret = -1;
+		goto end;
+	}
+
+	target_snap->is_deleted = 1;
+	strncat(target_snap->name, "_del", 255-strlen(target_snap->name));
+
+	if(target_snap->nb_children == 0) {
+		if(target_snap->father_btmp_file) {
+			snap_index = irow_find_snapshot_by_btmp(s, target_snap->father_btmp_file);
+			if(snap_index < 0) {
+				fprintf(stderr, "Failed to find father snapshot\n");
+				ret = -1;
+				goto end;
+			}
+			father_snap = s->snapshots + snap_index;
+			irow_update_nb_children(s, father_snap, -1);
+		}
+	}
+
+	s->snapshots_is_dirty = 1;
+	irow_update_meta(s, NULL, 0);
+end:
+	return ret;
+}
+
+static int irow_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) {
+
+	BDRVIrowState *s = bs->opaque;
+   QEMUSnapshotInfo *snap_tab, *snap_info;
+   IRowSnapshot *snap;
+   int i, offset, nb_del_snapshots = 0;
+
+   if (s->nb_snapshots == 0) {
+	   *psn_tab = NULL;
+      return s->nb_snapshots;
+   }
+
+   for(i = 0; i < s->nb_snapshots; i++) {
+	   snap = s->snapshots + i;
+	   if(snap->is_deleted)
+		   nb_del_snapshots += 1;
+   }
+   snap_tab = g_malloc0((s->nb_snapshots - nb_del_snapshots) * sizeof(QEMUSnapshotInfo));
+   offset = 0;
+   for(i = 0; i < s->nb_snapshots; i++) {
+	   snap_info = snap_tab + offset;
+	   snap = s->snapshots + i;
+	   if(snap->is_deleted != 1) {
+		   if(snap->id_str != NULL) {
+			   pstrcpy(snap_info->id_str, sizeof(snap_info->id_str), snap->id_str);
+		   }
+		   if(snap->name != NULL) {
+			   pstrcpy(snap_info->name, sizeof(snap_info->name), snap->name);
+		   }
+		   snap_info->vm_state_size = snap->vm_state_size;
+		   snap_info->date_sec = snap->date_sec;
+		   snap_info->date_nsec = snap->date_nsec;
+		   snap_info->vm_clock_nsec = snap->vm_clock_nsec;
+
+		   offset += 1;
+	   }
+   }
+   *psn_tab = snap_tab;
+   return s->nb_snapshots - nb_del_snapshots;
+}
+
+static int irow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) {
+	BDRVIrowState *s = bs->opaque;
+	bdi->cluster_size = s->cluster_size;
+	bdi->vm_state_offset = irow_vm_state_offset(s);
+	return 0;
+}
+
+static int irow_save_vmstate(BlockDriverState *bs, const uint8_t *buf, int64_t pos, int size) {
+
+	BDRVIrowState *birows = bs->opaque;
+	int ret = 0;
+
+	ret = irow_save_vmstate2(birows, buf, pos, size);
+	return ret;
+}
+
+static int irow_load_vmstate(BlockDriverState *bs, uint8_t *buf, int64_t pos, int size) {
+
+	BDRVIrowState *target_birows = NULL, *birows = bs->opaque;
+	int target_index, ret = 0;
+
+	target_index = irow_find_snapshot_by_btmp(birows, birows->father_btmp_file);
+	if(target_index < 0) {
+		ret = -1;
+		goto end;
+	}
+
+	target_birows = irow_open_previous_state(birows, target_index);
+	if(target_birows == NULL) {
+		ret = -1;
+		goto end;
+	}
+
+	ret = irow_load_vmstate2(target_birows, buf, pos, size);
+
+end:
+	if(target_birows != NULL) {
+		irow_close_previous_state(target_birows);
+		target_birows = NULL;
+	}
+	return ret;
+}
+
+static int irow_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix){
+	BDRVIrowState *birows = bs->opaque;
+	char user_input[100];
+	printf("current copy_on_demand state is ");
+	if(birows->copy_on_demand) {
+		printf("ON\n");
+	} else {
+		printf("OFF\n");
+	}
+	while(1) {
+		printf("do you want to change copy_on_demand state? (y/n)");
+		if(scanf("%s", user_input)== EOF){
+			return 1;
+		}
+		user_input[0] = tolower(user_input[0]);
+		if(user_input[0] == 'y') {
+			birows->copy_on_demand = birows->copy_on_demand ? 0 : 1;
+			irow_update_meta(birows, NULL, 1);
+			break;
+		}
+		if(user_input[0] == 'n')
+			break;
+	}
+	return 0;
+}
+
+static int64_t irow_get_length(BlockDriverState *bs) {
+	BDRVIrowState *birows = bs->opaque;
+	int64_t ret;
+	ret = birows->disk_size;
+	return ret;
+}
+
+static QEMUOptionParameter irow_create_options[] = {
+    {
+        .name = BLOCK_OPT_SIZE,
+        .type = OPT_SIZE,
+        .help = "Virtual disk size"
+    },
+    {
+        .name = BLOCK_OPT_CLUSTER_SIZE,
+        .type = OPT_SIZE,
+        .help = "irow cluster size"
+    },
+    {
+        .name = BLOCK_OPT_BACKING_FILE,
+        .type = OPT_STRING,
+        .help = "File name of a base image"
+    },
+    {
+        .name = "copy_on_demand",
+        .type = OPT_FLAG,
+        .help = "copy clusters to current irvd when needed"
+    },
+    { NULL }
+};
+
+static BlockDriver bdrv_irow = {
+    .format_name	= "irow",
+    .instance_size	= sizeof(BDRVIrowState),
+    .bdrv_probe		= irow_probe,
+    .bdrv_open		= irow_open,
+    .bdrv_read		= irow_read,
+    .bdrv_write		= irow_write,
+    .bdrv_close		= irow_close,
+    .bdrv_create	= irow_create,
+
+    .bdrv_co_flush_to_disk = irow_flush,
+
+    .bdrv_aio_readv		= irow_aio_readv,
+    .bdrv_aio_writev	= irow_aio_writev,
+    .bdrv_aio_flush		= irow_aio_flush,
+
+    .bdrv_snapshot_create   = irow_snapshot_create,
+    .bdrv_snapshot_goto     = irow_snapshot_goto,
+    .bdrv_snapshot_delete   = irow_snapshot_delete,
+    .bdrv_snapshot_list     = irow_snapshot_list,
+
+    .bdrv_get_info	= irow_get_info,
+    .bdrv_getlength = irow_get_length,
+
+    .bdrv_save_vmstate    = irow_save_vmstate,
+    .bdrv_load_vmstate    = irow_load_vmstate,
+
+    .create_options = irow_create_options,
+    .bdrv_check = irow_check,
+};
+
+static void bdrv_irow_init(void)
+{
+    bdrv_register(&bdrv_irow);
+}
+
+block_init(bdrv_irow_init);
diff --git a/block/irow.h b/block/irow.h
new file mode 100644
index 0000000..131b741
--- /dev/null
+++ b/block/irow.h
@@ -0,0 +1,135 @@
+/* IROW（Improved ROW）Disk Format
+ * */
+/*
+ * iRow (imporved Redirect-on-Write) is a disk format supporting high-efficiency VM disk snapshot.
+ * iROW uses bitmap to reduce the amount of metadata, so that both the VM disk snapshot key operations
+ * performance and the VM disk I/O performance would be enhanced at the same time.
+ *
+ *The iROW VM disk image consists of a meta file and several snapshots.
+ *
+ *A snapshot consists of 2 files: a bitmap file (btmp file) and a VM disk data file (irvd file).
+ *The current state of the iROW VM disk also occupies a snapshot.
+ *
+ *The meta file consists of the meta header and the snapshots information. The meta header is used to
+ *store basic information of VM disk image. The snapshots information sequentially stores every snapshot’s name,
+ *id and others related information.
+ *
+ *The btmp file consists of a bitmap and the VM state data. The bitmap is used to indicate whether the
+ *clusters exist in corresponding irvd file. Each cluster in the VM disk image is mapped to a bit in the bitmap.
+ *
+ *The irvd file is used to store the actual data of the VM disk image. The smallest unit of storage is cluster.
+ *iROW does not decide the address of the data clusters. It just writes the clusters to the same VM disk image
+ *addresses as the virtual addresses of the clusters. Because of host machine’s file system support sparse files,
+ *iROW also achieves the gradual growth of the VM disk image size with the actual disk usage.
+ *
+ */
+#define IROW_MAGIC (('I' << 24) | ('R' << 16) | ('O' << 8) | 'W')
+#define IROW_VERSION 1
+
+#define IROW_SNAPHEADER_MAGIC (('S' << 24) | ('N' << 16) | ('A' << 8) | 'P')
+
+#define MIN_CLUSTER_BITS 9
+#define MAX_CLUSTER_BITS 21
+#define MAX_FILE_NAME_LENGTH 256
+
+#define IROW_READ 1
+#define IROW_WRITE 2
+#define IROW_AIO_READ 3
+#define IROW_AIO_WRITE 4
+
+
+typedef struct __attribute__((packed)) IRowMeta {
+    uint32_t magic;
+    uint32_t version;
+    uint32_t copy_on_demand;
+    uint32_t nb_snapshots;
+    uint32_t cluster_size;
+    uint32_t cluster_bits;
+    uint32_t sectors_per_cluster;
+    uint64_t total_clusters;
+    uint64_t disk_size;
+    char current_btmp[MAX_FILE_NAME_LENGTH];
+    char backing_file[MAX_FILE_NAME_LENGTH];
+} IRowMeta;
+
+typedef struct __attribute__((packed)) IRowSnapshotHeader {
+	uint32_t snap_magic;
+	char id_str[128];
+	char name[256];
+	char btmp_file[MAX_FILE_NAME_LENGTH];
+	char irvd_file[MAX_FILE_NAME_LENGTH];
+	char father_btmp_file[MAX_FILE_NAME_LENGTH];
+	uint32_t vm_state_size;
+	uint32_t date_sec;
+	uint32_t date_nsec;
+	uint64_t vm_clock_nsec;
+	uint32_t nb_children;
+	uint32_t is_deleted;
+} IRowSnapshotHeader;
+
+typedef struct IRowSnapshot {
+	char *id_str;
+	char *name;
+	char *btmp_file;
+	char *irvd_file;
+	char *father_btmp_file;
+	uint32_t vm_state_size;
+	uint32_t date_sec;
+	uint32_t date_nsec;
+	uint64_t vm_clock_nsec;
+	uint32_t nb_children;
+	uint32_t is_deleted;
+} IRowSnapshot;
+
+typedef struct IRowCreateState {
+	uint64_t disk_size;
+	uint32_t cluster_size;
+	uint32_t cluster_bits;
+	uint32_t copy_on_demand;
+	char *meta_file;
+	char *father_btmp_file;
+	char *btmp_file;
+	char *irvd_file;
+	char *time_value;
+	char *backing_file;
+} IRowCreateState;
+
+typedef struct ClusterCache {
+	uint8_t *cache;
+	int64_t cluster_num;
+} ClusterCache;
+
+typedef struct BDRVIrowState {
+    BlockDriverState *irow_meta;
+    BlockDriverState *irow_btmp;
+    BlockDriverState *irow_irvd;
+    uint64_t disk_size;
+    uint64_t bitmap_size;
+    uint32_t cluster_size;
+    uint32_t cluster_bits;
+    uint64_t total_clusters;
+    uint32_t sectors_per_cluster;
+    uint32_t nb_snapshots;
+    uint32_t vm_state_size;
+    uint32_t copy_on_demand;
+    int open_flags;
+    IRowSnapshot *snapshots;
+    uint32_t snapshots_is_dirty;
+    uint8_t *bitmap;
+    uint32_t bitmap_is_dirty;
+    uint32_t vmstate_is_saved;
+    uint32_t complete_image;
+    char *meta_file;
+    char *current_btmp_file;
+    char *father_btmp_file;
+    char *opened_btmp_file;
+    char *irvd_file;
+} BDRVIrowState;
+
+typedef struct ClusterBuffer {
+	uint8_t *buf;
+	uint8_t *read_from_father;
+} ClusterBuffer;
+
+#define IROW_SNAPSHOT_OFFSET sizeof(IRowMeta)
+#define MAX_MERGE_BUFFER 16 * 1024 * 1024
