diff mbox

[v2,07/22] block: Add statistics for failed and invalid I/O operations

Message ID 84322c385b597a29ec217efd7fe99560a0bbc5d8.1443793122.git.berto@igalia.com
State New
Headers show

Commit Message

Alberto Garcia Oct. 2, 2015, 2:26 p.m. UTC
This patch adds the block_acct_failed() and block_acct_invalid()
functions to allow keeping track of failed and invalid I/O operations.

The number of failed and invalid operations is exposed in
BlockDeviceStats.

We don't keep track of the time spent on invalid operations because
they are cancelled immediately when they are started.

Signed-off-by: Alberto Garcia <berto@igalia.com>
---
 block/accounting.c         | 18 ++++++++++++++++++
 block/qapi.c               | 10 ++++++++++
 include/block/accounting.h |  4 ++++
 qapi/block-core.json       | 23 ++++++++++++++++++++++-
 qmp-commands.hx            | 12 ++++++++++++
 5 files changed, 66 insertions(+), 1 deletion(-)

Comments

Stefan Hajnoczi Oct. 13, 2015, 3:42 p.m. UTC | #1
On Fri, Oct 02, 2015 at 05:26:17PM +0300, Alberto Garcia wrote:
> +void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
> +{
> +    int64_t time_ns = qemu_clock_get_ns(clock_type);
> +
> +    assert(cookie->type < BLOCK_MAX_IOTYPE);
> +
> +    stats->failed_ops[cookie->type]++;
> +    stats->total_time_ns[cookie->type] += time_ns - cookie->start_time_ns;
> +    stats->last_access_time_ns = time_ns;
> +}
> +
> +void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type)
> +{
> +    assert(type < BLOCK_MAX_IOTYPE);
> +
> +    stats->invalid_ops[type]++;
> +    stats->last_access_time_ns = qemu_clock_get_ns(clock_type);
> +}

block_acct_failed() updates total_time_ns[] but block_acct_invalid()
does not.  I guess that's because block_acct_invalid() is expected to
happen during request submission and has effectively 0 duration?

This deserves a comment.
Alberto Garcia Oct. 13, 2015, 3:51 p.m. UTC | #2
On Tue 13 Oct 2015 05:42:33 PM CEST, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> On Fri, Oct 02, 2015 at 05:26:17PM +0300, Alberto Garcia wrote:
>> +void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
>> +{
>> +    int64_t time_ns = qemu_clock_get_ns(clock_type);
>> +
>> +    assert(cookie->type < BLOCK_MAX_IOTYPE);
>> +
>> +    stats->failed_ops[cookie->type]++;
>> +    stats->total_time_ns[cookie->type] += time_ns - cookie->start_time_ns;
>> +    stats->last_access_time_ns = time_ns;
>> +}
>> +
>> +void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type)
>> +{
>> +    assert(type < BLOCK_MAX_IOTYPE);
>> +
>> +    stats->invalid_ops[type]++;
>> +    stats->last_access_time_ns = qemu_clock_get_ns(clock_type);
>> +}
>
> block_acct_failed() updates total_time_ns[] but block_acct_invalid()
> does not.  I guess that's because block_acct_invalid() is expected to
> happen during request submission and has effectively 0 duration?

That's right, I put it in the commit message but I guess it's a good
idea to mention it here as well.

I was actually updating total_time_ns[] in block_acct_invalid() as well,
but then when I was adding the block_acct_invalid() calls to the device
models I realized that most/all of them happen before we even call
block_acct_start(). So I decided to leave it like it is now, first
because it makes the code simpler and second because as you say there
would be no I/O to measure.

Berto
diff mbox

Patch

diff --git a/block/accounting.c b/block/accounting.c
index d427fa8..eb86a47 100644
--- a/block/accounting.c
+++ b/block/accounting.c
@@ -51,6 +51,24 @@  void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
     stats->last_access_time_ns = time_ns;
 }
 
+void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
+{
+    int64_t time_ns = qemu_clock_get_ns(clock_type);
+
+    assert(cookie->type < BLOCK_MAX_IOTYPE);
+
+    stats->failed_ops[cookie->type]++;
+    stats->total_time_ns[cookie->type] += time_ns - cookie->start_time_ns;
+    stats->last_access_time_ns = time_ns;
+}
+
+void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type)
+{
+    assert(type < BLOCK_MAX_IOTYPE);
+
+    stats->invalid_ops[type]++;
+    stats->last_access_time_ns = qemu_clock_get_ns(clock_type);
+}
 
 void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
                       int num_requests)
diff --git a/block/qapi.c b/block/qapi.c
index 518b658..1b787ba 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -353,6 +353,16 @@  static BlockStats *bdrv_query_stats(const BlockDriverState *bs,
         s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
         s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
         s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];
+
+        s->stats->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ];
+        s->stats->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE];
+        s->stats->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH];
+
+        s->stats->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ];
+        s->stats->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE];
+        s->stats->invalid_flush_operations =
+            stats->invalid_ops[BLOCK_ACCT_FLUSH];
+
         s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ];
         s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
         s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
diff --git a/include/block/accounting.h b/include/block/accounting.h
index 4b2b999..b50e3cc 100644
--- a/include/block/accounting.h
+++ b/include/block/accounting.h
@@ -38,6 +38,8 @@  enum BlockAcctType {
 typedef struct BlockAcctStats {
     uint64_t nr_bytes[BLOCK_MAX_IOTYPE];
     uint64_t nr_ops[BLOCK_MAX_IOTYPE];
+    uint64_t invalid_ops[BLOCK_MAX_IOTYPE];
+    uint64_t failed_ops[BLOCK_MAX_IOTYPE];
     uint64_t total_time_ns[BLOCK_MAX_IOTYPE];
     uint64_t merged[BLOCK_MAX_IOTYPE];
     int64_t last_access_time_ns;
@@ -52,6 +54,8 @@  typedef struct BlockAcctCookie {
 void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
                       int64_t bytes, enum BlockAcctType type);
 void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie);
+void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie);
+void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type);
 void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
                            int num_requests);
 int64_t block_acct_idle_time_ns(BlockAcctStats *stats);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 903884b..f1c7277 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -452,6 +452,24 @@ 
 #                miliseconds. If the field is absent it means that
 #                there haven't been any operations yet (Since 2.5).
 #
+# @failed_rd_operations: The number of failed read operations
+#                        performed by the device (Since 2.5)
+#
+# @failed_wr_operations: The number of failed write operations
+#                        performed by the device (Since 2.5)
+#
+# @failed_flush_operations: The number of failed flush operations
+#                           performed by the device (Since 2.5)
+#
+# @invalid_rd_operations: The number of invalid read operations
+#                          performed by the device (Since 2.5)
+#
+# @invalid_wr_operations: The number of invalid write operations
+#                         performed by the device (Since 2.5)
+#
+# @invalid_flush_operations: The number of invalid flush operations
+#                            performed by the device (Since 2.5)
+#
 # Since: 0.14.0
 ##
 { 'struct': 'BlockDeviceStats',
@@ -459,7 +477,10 @@ 
            'wr_operations': 'int', 'flush_operations': 'int',
            'flush_total_time_ns': 'int', 'wr_total_time_ns': 'int',
            'rd_total_time_ns': 'int', 'wr_highest_offset': 'int',
-           'rd_merged': 'int', 'wr_merged': 'int', '*idle_time_ns': 'int' } }
+           'rd_merged': 'int', 'wr_merged': 'int', '*idle_time_ns': 'int',
+           'failed_rd_operations': 'int', 'failed_wr_operations': 'int',
+           'failed_flush_operations': 'int', 'invalid_rd_operations': 'int',
+           'invalid_wr_operations': 'int', 'invalid_flush_operations': 'int'  } }
 
 ##
 # @BlockStats:
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 2f48bb6..4985e3b 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -2516,6 +2516,18 @@  Each json-object contain the following:
                       miliseconds. If the field is absent it means
                       that there haven't been any operations yet
                       (json-int, optional)
+    - "failed_rd_operations": number of failed read operations
+                              (json-int)
+    - "failed_wr_operations": number of failed write operations
+                              (json-int)
+    - "failed_flush_operations": number of failed flush operations
+                               (json-int)
+    - "invalid_rd_operations": number of invalid read operations
+                               (json-int)
+    - "invalid_wr_operations": number of invalid write operations
+                               (json-int)
+    - "invalid_flush_operations": number of invalid flush operations
+                                  (json-int)
 - "parent": Contains recursively the statistics of the underlying
             protocol (e.g. the host file for a qcow2 image). If there is
             no underlying protocol, this field is omitted