@@ -33,6 +33,13 @@ static AIOPool qed_aio_pool = {
.cancel = qed_aio_cancel,
};
+static BlockDriverAIOCB *qed_aio_writev_check(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque);
+
static int bdrv_qed_probe(const uint8_t *buf, int buf_size,
const char *filename)
{
@@ -871,9 +878,8 @@ static void qed_aio_write_l1_update(void *opaque, int ret)
/**
* Update L2 table with new cluster offsets and write them out
*/
-static void qed_aio_write_l2_update(void *opaque, int ret)
+static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
{
- QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1;
int index;
@@ -889,7 +895,7 @@ static void qed_aio_write_l2_update(void *opaque, int ret)
index = qed_l2_index(s, acb->cur_pos);
qed_update_l2_table(s, acb->request.l2_table->table, index, acb->cur_nclusters,
- acb->cur_cluster);
+ offset);
if (need_alloc) {
/* Write out the whole new L2 table */
@@ -906,6 +912,51 @@ err:
qed_aio_complete(acb, ret);
}
+static void qed_aio_write_l2_update_cb(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ qed_aio_write_l2_update(acb, ret, acb->cur_cluster);
+}
+
+/**
+ * Determine if we have a zero write to a block of clusters
+ *
+ * We validate that the write is aligned to a cluster boundary, and that it's
+ * a multiple of cluster size with all zeros.
+ */
+static bool qed_is_zero_write(QEDAIOCB *acb)
+{
+ BDRVQEDState *s = acb_to_s(acb);
+ int i;
+
+ if (!qed_offset_is_cluster_aligned(s, acb->cur_pos)) {
+ return false;
+ }
+
+ if (!qed_offset_is_cluster_aligned(s, acb->cur_qiov.size)) {
+ return false;
+ }
+
+ for (i = 0; i < acb->cur_qiov.niov; i++) {
+ struct iovec *iov = &acb->cur_qiov.iov[i];
+ uint64_t *v;
+ int j;
+
+ if ((iov->iov_len & 0x07)) {
+ return false;
+ }
+
+ v = iov->iov_base;
+ for (j = 0; j < iov->iov_len; j += sizeof(v[0])) {
+ if (v[j >> 3]) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
/**
* Flush new data clusters before updating the L2 table
*
@@ -920,7 +971,7 @@ static void qed_aio_write_flush_before_l2_update(void *opaque, int ret)
QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
- if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update, opaque)) {
+ if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update_cb, opaque)) {
qed_aio_complete(acb, -EIO);
}
}
@@ -950,7 +1001,7 @@ static void qed_aio_write_main(void *opaque, int ret)
if (s->bs->backing_hd) {
next_fn = qed_aio_write_flush_before_l2_update;
} else {
- next_fn = qed_aio_write_l2_update;
+ next_fn = qed_aio_write_l2_update_cb;
}
}
@@ -1016,6 +1067,18 @@ static bool qed_should_set_need_check(BDRVQEDState *s)
return !(s->header.features & QED_F_NEED_CHECK);
}
+static void qed_aio_write_zero_cluster(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+
+ if (ret) {
+ qed_aio_complete(acb, ret);
+ return;
+ }
+
+ qed_aio_write_l2_update(acb, 0, 1);
+}
+
/**
* Write new data cluster
*
@@ -1027,6 +1090,7 @@ static bool qed_should_set_need_check(BDRVQEDState *s)
static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
{
BDRVQEDState *s = acb_to_s(acb);
+ BlockDriverCompletionFunc *cb;
/* Freeze this request if another allocating write is in progress */
if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
@@ -1041,11 +1105,18 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
+ cb = qed_aio_write_prefill;
+
+ /* Zero write detection */
+ if (acb->check_zero_write && qed_is_zero_write(acb)) {
+ cb = qed_aio_write_zero_cluster;
+ }
+
if (qed_should_set_need_check(s)) {
s->header.features |= QED_F_NEED_CHECK;
- qed_write_header(s, qed_aio_write_prefill, acb);
+ qed_write_header(s, cb, acb);
} else {
- qed_aio_write_prefill(acb, 0);
+ cb(acb, 0);
}
}
@@ -1116,11 +1187,11 @@ static void qed_copy_on_read_cb(void *opaque, int ret)
BDRVQEDState *s = acb_to_s(acb);
BlockDriverAIOCB *cor_acb;
- cor_acb = bdrv_aio_writev(s->bs,
- acb->cur_pos / BDRV_SECTOR_SIZE,
- &acb->cur_qiov,
- acb->cur_qiov.size / BDRV_SECTOR_SIZE,
- qed_aio_next_io, acb);
+ cor_acb = qed_aio_writev_check(s->bs,
+ acb->cur_pos / BDRV_SECTOR_SIZE,
+ &acb->cur_qiov,
+ acb->cur_qiov.size / BDRV_SECTOR_SIZE,
+ qed_aio_next_io, acb);
if (!cor_acb) {
qed_aio_complete(acb, -EIO);
}
@@ -1226,7 +1297,8 @@ static QEDAIOCB *qed_aio_setup(BlockDriverState *bs,
int64_t sector_num,
QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb,
- void *opaque, bool is_write)
+ void *opaque, bool is_write,
+ bool check_zero_write)
{
QEDAIOCB *acb = qemu_aio_get(&qed_aio_pool, bs, cb, opaque);
@@ -1235,6 +1307,7 @@ static QEDAIOCB *qed_aio_setup(BlockDriverState *bs,
acb->is_write = is_write;
acb->finished = NULL;
+ acb->check_zero_write = check_zero_write;
acb->qiov = qiov;
acb->qiov_offset = 0;
acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
@@ -1249,12 +1322,13 @@ static BlockDriverAIOCB *bdrv_qed_aio_setup(BlockDriverState *bs,
int64_t sector_num,
QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb,
- void *opaque, bool is_write)
+ void *opaque, bool is_write,
+ bool check_zero_write)
{
QEDAIOCB *acb;
acb = qed_aio_setup(bs, sector_num, qiov, nb_sectors,
- cb, opaque, is_write);
+ cb, opaque, is_write, check_zero_write);
/* Start request */
qed_aio_next_io(acb, 0);
@@ -1268,7 +1342,7 @@ static BlockDriverAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs,
void *opaque)
{
return bdrv_qed_aio_setup(bs, sector_num, qiov, nb_sectors,
- cb, opaque, false);
+ cb, opaque, false, false);
}
static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
@@ -1278,7 +1352,21 @@ static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
void *opaque)
{
return bdrv_qed_aio_setup(bs, sector_num, qiov, nb_sectors,
- cb, opaque, true);
+ cb, opaque, true, false);
+}
+
+/**
+ * Perform a write with a zero-check.
+ */
+static BlockDriverAIOCB *qed_aio_writev_check(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ return bdrv_qed_aio_setup(bs, sector_num, qiov, nb_sectors,
+ cb, opaque, true, true);
}
typedef struct QEDStreamData {
@@ -1405,7 +1493,7 @@ static BlockDriverAIOCB *bdrv_qed_aio_stream(BlockDriverState *bs,
acb = qed_aio_setup(bs, sector_num, qiov,
cluster_size / BDRV_SECTOR_SIZE,
- qed_aio_stream_cb, stream_data, false);
+ qed_aio_stream_cb, stream_data, false, false);
stream_data->acb = acb;
qed_find_cluster(s, &acb->request, acb->cur_pos,
@@ -135,6 +135,7 @@ typedef struct QEDAIOCB {
bool is_write; /* false - read, true - write */
bool *finished; /* signal for cancel completion */
uint64_t end_pos; /* request end on block device, in bytes */
+ bool check_zero_write; /* true - check blocks for zero write */
/* User scatter-gather list */
QEMUIOVector *qiov;