diff mbox series

[RFC,1/2] qdev: add debug interface to kick/call eventfd

Message ID 20210115002730.1279-2-dongli.zhang@oracle.com
State New
Headers show
Series Add debug interface to kick/call on purpose | expand

Commit Message

Dongli Zhang Jan. 15, 2021, 12:27 a.m. UTC
The virtio device/driver (e.g., vhost-scsi) may hang due to the lost of IRQ
or the lost of doorbell register kick, e.g.,

https://lists.gnu.org/archive/html/qemu-devel/2018-12/msg01711.html

This patch adds a new debug interface 'DeviceEvent' to DeviceClass to help
narrow down if the issue is due to lost of irq/kick. So far the new
interface handles only two events: 'call' and 'kick'. Any device (e.g.,
e1000e or vhost-scsi) may implement (e.g., via eventfd, MSI-X or legacy
IRQ).

The 'call' is to inject irq on purpose by admin for a specific device (e.g.,
vhost-scsi) from QEMU/host to VM, while the 'kick' is to kick the doorbell
on purpose by admin at QEMU/host side for a specific device.

Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
---
 hmp-commands.hx        | 14 ++++++++++++++
 include/hw/qdev-core.h |  6 ++++++
 include/monitor/hmp.h  |  1 +
 qapi/qdev.json         | 30 ++++++++++++++++++++++++++++++
 softmmu/qdev-monitor.c | 41 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 92 insertions(+)

Comments

Eric Blake Jan. 19, 2021, 10:20 p.m. UTC | #1
On 1/14/21 6:27 PM, Dongli Zhang wrote:
> The virtio device/driver (e.g., vhost-scsi) may hang due to the lost of IRQ

s/lost/loss/

> or the lost of doorbell register kick, e.g.,

and again

> 
> https://lists.gnu.org/archive/html/qemu-devel/2018-12/msg01711.html
> 
> This patch adds a new debug interface 'DeviceEvent' to DeviceClass to help
> narrow down if the issue is due to lost of irq/kick. So far the new

and again

> interface handles only two events: 'call' and 'kick'. Any device (e.g.,
> e1000e or vhost-scsi) may implement (e.g., via eventfd, MSI-X or legacy
> IRQ).
> 
> The 'call' is to inject irq on purpose by admin for a specific device (e.g.,
> vhost-scsi) from QEMU/host to VM, while the 'kick' is to kick the doorbell
> on purpose by admin at QEMU/host side for a specific device.
> 
> Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
> ---

> +++ b/qapi/qdev.json
> @@ -124,3 +124,33 @@
>  ##
>  { 'event': 'DEVICE_DELETED',
>    'data': { '*device': 'str', 'path': 'str' } }
> +
> +##
> +# @x-debug-device-event:
> +#
> +# Generate device event for a specific device queue
> +#
> +# @dev: device path
> +#
> +# @event: event (e.g., kick or call) to trigger
> +#
> +# @queue: queue id
> +#
> +# Returns: Nothing on success
> +#
> +# Since: 5.3

The next release is named 6.0, not 5.3.

> +#
> +# Notes: This is used to debug VM driver hang issue. The 'kick' event is to
> +#        send notification to QEMU/vhost while the 'call' event is to
> +#        interrupt VM on purpose.
> +#
> +# Example:
> +#
> +# -> { "execute": "x-debug-device_event",
> +#      "arguments": { "dev": "/machine/peripheral/vscsi0", "event": "kick",
> +#                     "queue": "1" } }

Your example has queue typed as a string...

> +# <- { "return": {} }
> +#
> +##
> +{ 'command': 'x-debug-device-event',
> +  'data': {'dev': 'str', 'event': 'str', 'queue': 'int'} }

...which does not match its actual type as an integer.

event should be an enum type (the finite choice of 'kick' or 'call', and
introspectible if we add new choices in the future) rather than an
open-coded str.
diff mbox series

Patch

diff --git a/hmp-commands.hx b/hmp-commands.hx
index 73e0832ea1..0fbb72568f 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1867,3 +1867,17 @@  ERST
         .flags      = "p",
     },
 
+    {
+        .name       = "x-debug-device-event",
+        .args_type  = "dev:s,event:s,queue:l",
+        .params     = "dev event queue",
+        .help       = "generate device event for a specific device queue",
+        .cmd        = hmp_x_debug_device_event,
+        .flags      = "p",
+    },
+
+SRST
+``x-debug-device-event`` *dev* *event* *queue*
+  Generate device event *event* for specific *queue* of *dev*
+ERST
+
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index bafc311bfa..83df3bab89 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -29,9 +29,14 @@  typedef enum DeviceCategory {
     DEVICE_CATEGORY_MAX
 } DeviceCategory;
 
+#define DEVICE_EVENT_CALL 1
+#define DEVICE_EVENT_KICK 2
+
 typedef void (*DeviceRealize)(DeviceState *dev, Error **errp);
 typedef void (*DeviceUnrealize)(DeviceState *dev);
 typedef void (*DeviceReset)(DeviceState *dev);
+typedef void (*DeviceEvent)(DeviceState *dev, int event, int queue,
+                            Error **errp);
 typedef void (*BusRealize)(BusState *bus, Error **errp);
 typedef void (*BusUnrealize)(BusState *bus);
 
@@ -132,6 +137,7 @@  struct DeviceClass {
     DeviceReset reset;
     DeviceRealize realize;
     DeviceUnrealize unrealize;
+    DeviceEvent event;
 
     /* device state */
     const VMStateDescription *vmsd;
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index ed2913fd18..ffb48fce06 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -133,5 +133,6 @@  void hmp_info_replay(Monitor *mon, const QDict *qdict);
 void hmp_replay_break(Monitor *mon, const QDict *qdict);
 void hmp_replay_delete_break(Monitor *mon, const QDict *qdict);
 void hmp_replay_seek(Monitor *mon, const QDict *qdict);
+void hmp_x_debug_device_event(Monitor *mon, const QDict *qdict);
 
 #endif
diff --git a/qapi/qdev.json b/qapi/qdev.json
index b83178220b..6fc7a5bfc1 100644
--- a/qapi/qdev.json
+++ b/qapi/qdev.json
@@ -124,3 +124,33 @@ 
 ##
 { 'event': 'DEVICE_DELETED',
   'data': { '*device': 'str', 'path': 'str' } }
+
+##
+# @x-debug-device-event:
+#
+# Generate device event for a specific device queue
+#
+# @dev: device path
+#
+# @event: event (e.g., kick or call) to trigger
+#
+# @queue: queue id
+#
+# Returns: Nothing on success
+#
+# Since: 5.3
+#
+# Notes: This is used to debug VM driver hang issue. The 'kick' event is to
+#        send notification to QEMU/vhost while the 'call' event is to
+#        interrupt VM on purpose.
+#
+# Example:
+#
+# -> { "execute": "x-debug-device_event",
+#      "arguments": { "dev": "/machine/peripheral/vscsi0", "event": "kick",
+#                     "queue": "1" } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'x-debug-device-event',
+  'data': {'dev': 'str', 'event': 'str', 'queue': 'int'} }
diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c
index 8dc656becc..63dee5f1a6 100644
--- a/softmmu/qdev-monitor.c
+++ b/softmmu/qdev-monitor.c
@@ -915,6 +915,47 @@  void hmp_device_del(Monitor *mon, const QDict *qdict)
     hmp_handle_error(mon, err);
 }
 
+void qmp_x_debug_device_event(const char *dev, const char *event,
+                              int64_t queue, Error **errp)
+{
+    DeviceState *device = find_device_state(dev, NULL);
+    DeviceClass *dc;
+    int evt;
+
+    if (!device) {
+        error_setg(errp, "Device %s not found", dev);
+        return;
+    }
+
+    dc = DEVICE_GET_CLASS(device);
+    if (!dc->event) {
+        error_setg(errp, "device_event is not supported");
+        return;
+    }
+
+    if (!strcmp(event, "kick"))
+        evt = DEVICE_EVENT_KICK;
+    else if (!strcmp(event, "call"))
+        evt = DEVICE_EVENT_CALL;
+    else {
+        error_setg(errp, "Unsupported event %s", event);
+        return;
+    }
+
+    dc->event(device, evt, queue, errp);
+}
+
+void hmp_x_debug_device_event(Monitor *mon, const QDict *qdict)
+{
+    const char *dev = qdict_get_str(qdict, "dev");
+    const char *event = qdict_get_str(qdict, "event");
+    int queue = qdict_get_try_int(qdict, "queue", -1);
+    Error *err = NULL;
+
+    qmp_x_debug_device_event(dev, event, queue, &err);
+    hmp_handle_error(mon, err);
+}
+
 BlockBackend *blk_by_qdev_id(const char *id, Error **errp)
 {
     DeviceState *dev;