diff mbox

[v18,3/7] introduce a new qom device to deal with panicked event

Message ID da2dd07b7332d797eb2882372910c440a7ced80e.1365564298.git.hutao@cn.fujitsu.com
State New
Headers show

Commit Message

Hu Tao April 10, 2013, 3:33 a.m. UTC
pvpanic device is used to send guest panic event from guest to qemu.

When guest panic happens, pvpanic device driver will write a event
number to IO port 0x505(which is the IO port occupied by pvpanic device,
by default). On receiving the event, pvpanic device will pause guest
cpu(s), and send a qmp event QEVENT_GUEST_PANICKED.

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
 hw/misc/Makefile.objs |   2 +
 hw/misc/pvpanic.c     | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 118 insertions(+)
 create mode 100644 hw/misc/pvpanic.c

Comments

Markus Armbruster April 10, 2013, 11:54 a.m. UTC | #1
Hu Tao <hutao@cn.fujitsu.com> writes:

> pvpanic device is used to send guest panic event from guest to qemu.
>
> When guest panic happens, pvpanic device driver will write a event
> number to IO port 0x505(which is the IO port occupied by pvpanic device,
> by default). On receiving the event, pvpanic device will pause guest
> cpu(s), and send a qmp event QEVENT_GUEST_PANICKED.
>
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> ---
>  hw/misc/Makefile.objs |   2 +
>  hw/misc/pvpanic.c     | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 118 insertions(+)
>  create mode 100644 hw/misc/pvpanic.c
>
> diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
> index 03699c3..d72ea83 100644
> --- a/hw/misc/Makefile.objs
> +++ b/hw/misc/Makefile.objs
> @@ -38,3 +38,5 @@ obj-$(CONFIG_OMAP) += omap_tap.o
>  obj-$(CONFIG_PXA2XX) += pxa2xx_pcmcia.o
>  obj-$(CONFIG_SLAVIO) += slavio_misc.o
>  obj-$(CONFIG_ZYNQ) += zynq_slcr.o
> +
> +common-obj-y += pvpanic.o
> diff --git a/hw/misc/pvpanic.c b/hw/misc/pvpanic.c
> new file mode 100644
> index 0000000..5118fd7
> --- /dev/null
> +++ b/hw/misc/pvpanic.c
> @@ -0,0 +1,116 @@
> +/*
> + * QEMU simulated pvpanic device.
> + *
> + * Copyright Fujitsu, Corp. 2013
> + *
> + * Authors:
> + *     Wen Congyang <wency@cn.fujitsu.com>
> + *     Hu Tao <hutao@cn.fujitsu.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include <qapi/qmp/qobject.h>
> +#include <qapi/qmp/qjson.h>
> +#include <monitor/monitor.h>
> +#include <sysemu/sysemu.h>
> +#include <sysemu/kvm.h>
> +
> +/* The bit of supported pv event */
> +#define PVPANIC_F_PANICKED      0
> +
> +/* The pv event value */
> +#define PVPANIC_PANICKED        (1 << PVPANIC_F_PANICKED)
> +
> +#define TYPE_ISA_PVPANIC_DEVICE    "pvpanic"
> +#define ISA_PVPANIC_DEVICE(obj)    \
> +    OBJECT_CHECK(PVPanicState, (obj), TYPE_ISA_PVPANIC_DEVICE)
> +
> +static void panicked_mon_event(const char *action)
> +{
> +    QObject *data;
> +
> +    data = qobject_from_jsonf("{ 'action': %s }", action);
> +    monitor_protocol_event(QEVENT_GUEST_PANICKED, data);
> +    qobject_decref(data);
> +}
> +
> +static void handle_event(int event)
> +{
> +    if (event == PVPANIC_PANICKED) {
> +        panicked_mon_event("pause");
> +        vm_stop(RUN_STATE_GUEST_PANICKED);
> +        return;
> +    }
> +}

I've asked these questions before, if you answered them, I missed it:

1. Your event value looks like it encodes multiple events as bits.  Only
one bit is defined so far (PVEVENT_F_PANICKED).  But you recognize this
bit only when the other bits are all off.  Why?  Won't we regret this if
we ever want to define additional bits?

2. You silently ignore unrecognized event values.  Shouldn't we log
them?

> +
> +#include "hw/isa/isa.h"
> +
> +typedef struct PVPanicState {
> +    ISADevice parent_obj;
> +
> +    MemoryRegion io;
> +    uint16_t ioport;
> +} PVPanicState;
> +
> +/* return supported events on read */
> +static uint64_t pvpanic_ioport_read(void *opaque, hwaddr addr, unsigned size)
> +{
> +    return PVPANIC_PANICKED;
> +}
> +
> +static void pvpanic_ioport_write(void *opaque, hwaddr addr, uint64_t val,
> +                                 unsigned size)
> +{
> +    handle_event(val);
> +}
> +
> +static const MemoryRegionOps pvpanic_ops = {
> +    .read = pvpanic_ioport_read,
> +    .write = pvpanic_ioport_write,
> +    .impl = {
> +        .min_access_size = 1,
> +        .max_access_size = 1,
> +    },
> +};
> +
> +static int pvpanic_isa_initfn(ISADevice *dev)
> +{
> +    PVPanicState *s = ISA_PVPANIC_DEVICE(dev);
> +
> +    memory_region_init_io(&s->io, &pvpanic_ops, s, "pvpanic", 1);
> +    isa_register_ioport(dev, &s->io, s->ioport);
> +
> +    return 0;
> +}
> +
> +static Property pvpanic_isa_properties[] = {
> +    DEFINE_PROP_UINT16("ioport", PVPanicState, ioport, 0x505),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +static void pvpanic_isa_class_init(ObjectClass *klass, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(klass);
> +    ISADeviceClass *ic = ISA_DEVICE_CLASS(klass);
> +
> +    ic->init = pvpanic_isa_initfn;
> +    dc->no_user = 1;
> +    dc->props = pvpanic_isa_properties;
> +}
> +
> +static TypeInfo pvpanic_isa_info = {
> +    .name          = TYPE_ISA_PVPANIC_DEVICE,
> +    .parent        = TYPE_ISA_DEVICE,
> +    .instance_size = sizeof(PVPanicState),
> +    .class_init    = pvpanic_isa_class_init,
> +};
> +
> +static void pvpanic_register_types(void)
> +{
> +    type_register_static(&pvpanic_isa_info);
> +}
> +
> +type_init(pvpanic_register_types)
Hu Tao April 11, 2013, 6:39 a.m. UTC | #2
Hi,

On Wed, Apr 10, 2013 at 01:54:04PM +0200, Markus Armbruster wrote:
> Hu Tao <hutao@cn.fujitsu.com> writes:
> 
> > pvpanic device is used to send guest panic event from guest to qemu.
> >
> > When guest panic happens, pvpanic device driver will write a event
> > number to IO port 0x505(which is the IO port occupied by pvpanic device,
> > by default). On receiving the event, pvpanic device will pause guest
> > cpu(s), and send a qmp event QEVENT_GUEST_PANICKED.
> >
> > Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> > Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> > ---
> >  hw/misc/Makefile.objs |   2 +
> >  hw/misc/pvpanic.c     | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 118 insertions(+)
> >  create mode 100644 hw/misc/pvpanic.c
> >
> > diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
> > index 03699c3..d72ea83 100644
> > --- a/hw/misc/Makefile.objs
> > +++ b/hw/misc/Makefile.objs
> > @@ -38,3 +38,5 @@ obj-$(CONFIG_OMAP) += omap_tap.o
> >  obj-$(CONFIG_PXA2XX) += pxa2xx_pcmcia.o
> >  obj-$(CONFIG_SLAVIO) += slavio_misc.o
> >  obj-$(CONFIG_ZYNQ) += zynq_slcr.o
> > +
> > +common-obj-y += pvpanic.o
> > diff --git a/hw/misc/pvpanic.c b/hw/misc/pvpanic.c
> > new file mode 100644
> > index 0000000..5118fd7
> > --- /dev/null
> > +++ b/hw/misc/pvpanic.c
> > @@ -0,0 +1,116 @@
> > +/*
> > + * QEMU simulated pvpanic device.
> > + *
> > + * Copyright Fujitsu, Corp. 2013
> > + *
> > + * Authors:
> > + *     Wen Congyang <wency@cn.fujitsu.com>
> > + *     Hu Tao <hutao@cn.fujitsu.com>
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#include <qapi/qmp/qobject.h>
> > +#include <qapi/qmp/qjson.h>
> > +#include <monitor/monitor.h>
> > +#include <sysemu/sysemu.h>
> > +#include <sysemu/kvm.h>
> > +
> > +/* The bit of supported pv event */
> > +#define PVPANIC_F_PANICKED      0
> > +
> > +/* The pv event value */
> > +#define PVPANIC_PANICKED        (1 << PVPANIC_F_PANICKED)
> > +
> > +#define TYPE_ISA_PVPANIC_DEVICE    "pvpanic"
> > +#define ISA_PVPANIC_DEVICE(obj)    \
> > +    OBJECT_CHECK(PVPanicState, (obj), TYPE_ISA_PVPANIC_DEVICE)
> > +
> > +static void panicked_mon_event(const char *action)
> > +{
> > +    QObject *data;
> > +
> > +    data = qobject_from_jsonf("{ 'action': %s }", action);
> > +    monitor_protocol_event(QEVENT_GUEST_PANICKED, data);
> > +    qobject_decref(data);
> > +}
> > +
> > +static void handle_event(int event)
> > +{
> > +    if (event == PVPANIC_PANICKED) {
> > +        panicked_mon_event("pause");
> > +        vm_stop(RUN_STATE_GUEST_PANICKED);
> > +        return;
> > +    }
> > +}
> 
> I've asked these questions before, if you answered them, I missed it:

Sorry, I must have missed them.

> 
> 1. Your event value looks like it encodes multiple events as bits.  Only
> one bit is defined so far (PVEVENT_F_PANICKED).  But you recognize this

It was the intention to support multiple events, but Gleb suggested to do
only panic event. So pvpanic device supports only panic event.

> bit only when the other bits are all off.  Why?  Won't we regret this if
> we ever want to define additional bits?

Other bits are reserved now, and must be written as 0. (see patch 5/7)
If we define these bits in the further for whatever purposes, we have to
update code here.

> 
> 2. You silently ignore unrecognized event values.  Shouldn't we log
> them?

See above.
Markus Armbruster April 11, 2013, 8:52 a.m. UTC | #3
Hu Tao <hutao@cn.fujitsu.com> writes:

> Hi,
>
> On Wed, Apr 10, 2013 at 01:54:04PM +0200, Markus Armbruster wrote:
>> Hu Tao <hutao@cn.fujitsu.com> writes:
>> 
>> > pvpanic device is used to send guest panic event from guest to qemu.
>> >
>> > When guest panic happens, pvpanic device driver will write a event
>> > number to IO port 0x505(which is the IO port occupied by pvpanic device,
>> > by default). On receiving the event, pvpanic device will pause guest
>> > cpu(s), and send a qmp event QEVENT_GUEST_PANICKED.
>> >
>> > Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>> > Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
>> > ---
>> >  hw/misc/Makefile.objs |   2 +
>> >  hw/misc/pvpanic.c | 116
>> > ++++++++++++++++++++++++++++++++++++++++++++++++++
>> >  2 files changed, 118 insertions(+)
>> >  create mode 100644 hw/misc/pvpanic.c
>> >
>> > diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
>> > index 03699c3..d72ea83 100644
>> > --- a/hw/misc/Makefile.objs
>> > +++ b/hw/misc/Makefile.objs
>> > @@ -38,3 +38,5 @@ obj-$(CONFIG_OMAP) += omap_tap.o
>> >  obj-$(CONFIG_PXA2XX) += pxa2xx_pcmcia.o
>> >  obj-$(CONFIG_SLAVIO) += slavio_misc.o
>> >  obj-$(CONFIG_ZYNQ) += zynq_slcr.o
>> > +
>> > +common-obj-y += pvpanic.o
>> > diff --git a/hw/misc/pvpanic.c b/hw/misc/pvpanic.c
>> > new file mode 100644
>> > index 0000000..5118fd7
>> > --- /dev/null
>> > +++ b/hw/misc/pvpanic.c
>> > @@ -0,0 +1,116 @@
>> > +/*
>> > + * QEMU simulated pvpanic device.
>> > + *
>> > + * Copyright Fujitsu, Corp. 2013
>> > + *
>> > + * Authors:
>> > + *     Wen Congyang <wency@cn.fujitsu.com>
>> > + *     Hu Tao <hutao@cn.fujitsu.com>
>> > + *
>> > + * This work is licensed under the terms of the GNU GPL, version
>> > 2 or later.
>> > + * See the COPYING file in the top-level directory.
>> > + *
>> > + */
>> > +
>> > +#include <qapi/qmp/qobject.h>
>> > +#include <qapi/qmp/qjson.h>
>> > +#include <monitor/monitor.h>
>> > +#include <sysemu/sysemu.h>
>> > +#include <sysemu/kvm.h>
>> > +
>> > +/* The bit of supported pv event */
>> > +#define PVPANIC_F_PANICKED      0
>> > +
>> > +/* The pv event value */
>> > +#define PVPANIC_PANICKED        (1 << PVPANIC_F_PANICKED)
>> > +
>> > +#define TYPE_ISA_PVPANIC_DEVICE    "pvpanic"
>> > +#define ISA_PVPANIC_DEVICE(obj)    \
>> > +    OBJECT_CHECK(PVPanicState, (obj), TYPE_ISA_PVPANIC_DEVICE)
>> > +
>> > +static void panicked_mon_event(const char *action)
>> > +{
>> > +    QObject *data;
>> > +
>> > +    data = qobject_from_jsonf("{ 'action': %s }", action);
>> > +    monitor_protocol_event(QEVENT_GUEST_PANICKED, data);
>> > +    qobject_decref(data);
>> > +}
>> > +
>> > +static void handle_event(int event)
>> > +{
>> > +    if (event == PVPANIC_PANICKED) {
>> > +        panicked_mon_event("pause");
>> > +        vm_stop(RUN_STATE_GUEST_PANICKED);
>> > +        return;
>> > +    }
>> > +}
>> 
>> I've asked these questions before, if you answered them, I missed it:
>
> Sorry, I must have missed them.
>
>> 
>> 1. Your event value looks like it encodes multiple events as bits.  Only
>> one bit is defined so far (PVEVENT_F_PANICKED).  But you recognize this
>
> It was the intention to support multiple events, but Gleb suggested to do
> only panic event. So pvpanic device supports only panic event.
>
>> bit only when the other bits are all off.  Why?  Won't we regret this if
>> we ever want to define additional bits?
>
> Other bits are reserved now, and must be written as 0. (see patch 5/7)
> If we define these bits in the further for whatever purposes, we have to
> update code here.

Not only that, we have to make sure all combinations of old/new device
and old/new guest device drivers work.

>> 2. You silently ignore unrecognized event values.  Shouldn't we log
>> them?
>
> See above.

PATCH 5/7 describes your your device's guest ABI:

    pvpanic uses port 0x505 to receive a panic event from the guest. On
    write, bit 0 is set to indicate guest panic has happened. On read, bit
    0 is set to indicate guest panic notification is supported. Remaining
    bits are reserved, and should be written as 0, and ignored on read.

For what it's worth, real hardware usually doesn't work that way.  It
usually ignores the bits it doesn't implement, and recognizes the bits
it implements regardless of what's written to the others.

Let's explore extending the device: add another event, and encode it as
bit 1.  Then read returns 3, and the guest may write 0, 1, 2 or 3.

New device, old guest device driver: driver reads 3.  A scrupulously
correct driver masks out the bits it doesn't understand (3 & 1), gets 1
as it expects, and continues to work as before.  A sloppy driver may not
check the bits; also continues to work as before.  A confused driver may
choke on the value 3, and fail, most probably in a way that's visible in
dmesg.

Old device, new guest device driver: driver reads 1, masks out the bits
it doesn't understand (1 & 3), gets 1.  Say the driver wants to report
both events.  A well-behaved driver recognizes that the device doesn't
understand the new event, and writes 1.  Works.  A sloppy driver may
write 3.  Your device ignores that write silently.

I don't like that.  I'd prefer it to work like real hardware usually
does: recognize bit 0 even when other bits are set.

ABI description becomes:

    pvpanic exposes a single I/O port, by default 0x505.  Each bit of
    the port corresponds to an event.

    On read, the bits recognized by the device are set.  Software should
    ignore bits it doesn't recognize.

    On write, the bits not recognized by the device are ignored.
    Software should set only bits both itself and the device recognize.

    Currently, only bit 0 is recognized.  Setting it indicates a guest
    panic has happened.

Regardless of whether you make the device ignore unrecognized bits or
not, please consider logging when a guest sets unrecognized bits.  That
way we can catch misbehaving guest device drivers much more easily.
Recommend to log only the first occurence, to prevent denial of service.
diff mbox

Patch

diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
index 03699c3..d72ea83 100644
--- a/hw/misc/Makefile.objs
+++ b/hw/misc/Makefile.objs
@@ -38,3 +38,5 @@  obj-$(CONFIG_OMAP) += omap_tap.o
 obj-$(CONFIG_PXA2XX) += pxa2xx_pcmcia.o
 obj-$(CONFIG_SLAVIO) += slavio_misc.o
 obj-$(CONFIG_ZYNQ) += zynq_slcr.o
+
+common-obj-y += pvpanic.o
diff --git a/hw/misc/pvpanic.c b/hw/misc/pvpanic.c
new file mode 100644
index 0000000..5118fd7
--- /dev/null
+++ b/hw/misc/pvpanic.c
@@ -0,0 +1,116 @@ 
+/*
+ * QEMU simulated pvpanic device.
+ *
+ * Copyright Fujitsu, Corp. 2013
+ *
+ * Authors:
+ *     Wen Congyang <wency@cn.fujitsu.com>
+ *     Hu Tao <hutao@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include <qapi/qmp/qobject.h>
+#include <qapi/qmp/qjson.h>
+#include <monitor/monitor.h>
+#include <sysemu/sysemu.h>
+#include <sysemu/kvm.h>
+
+/* The bit of supported pv event */
+#define PVPANIC_F_PANICKED      0
+
+/* The pv event value */
+#define PVPANIC_PANICKED        (1 << PVPANIC_F_PANICKED)
+
+#define TYPE_ISA_PVPANIC_DEVICE    "pvpanic"
+#define ISA_PVPANIC_DEVICE(obj)    \
+    OBJECT_CHECK(PVPanicState, (obj), TYPE_ISA_PVPANIC_DEVICE)
+
+static void panicked_mon_event(const char *action)
+{
+    QObject *data;
+
+    data = qobject_from_jsonf("{ 'action': %s }", action);
+    monitor_protocol_event(QEVENT_GUEST_PANICKED, data);
+    qobject_decref(data);
+}
+
+static void handle_event(int event)
+{
+    if (event == PVPANIC_PANICKED) {
+        panicked_mon_event("pause");
+        vm_stop(RUN_STATE_GUEST_PANICKED);
+        return;
+    }
+}
+
+#include "hw/isa/isa.h"
+
+typedef struct PVPanicState {
+    ISADevice parent_obj;
+
+    MemoryRegion io;
+    uint16_t ioport;
+} PVPanicState;
+
+/* return supported events on read */
+static uint64_t pvpanic_ioport_read(void *opaque, hwaddr addr, unsigned size)
+{
+    return PVPANIC_PANICKED;
+}
+
+static void pvpanic_ioport_write(void *opaque, hwaddr addr, uint64_t val,
+                                 unsigned size)
+{
+    handle_event(val);
+}
+
+static const MemoryRegionOps pvpanic_ops = {
+    .read = pvpanic_ioport_read,
+    .write = pvpanic_ioport_write,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 1,
+    },
+};
+
+static int pvpanic_isa_initfn(ISADevice *dev)
+{
+    PVPanicState *s = ISA_PVPANIC_DEVICE(dev);
+
+    memory_region_init_io(&s->io, &pvpanic_ops, s, "pvpanic", 1);
+    isa_register_ioport(dev, &s->io, s->ioport);
+
+    return 0;
+}
+
+static Property pvpanic_isa_properties[] = {
+    DEFINE_PROP_UINT16("ioport", PVPanicState, ioport, 0x505),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pvpanic_isa_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    ISADeviceClass *ic = ISA_DEVICE_CLASS(klass);
+
+    ic->init = pvpanic_isa_initfn;
+    dc->no_user = 1;
+    dc->props = pvpanic_isa_properties;
+}
+
+static TypeInfo pvpanic_isa_info = {
+    .name          = TYPE_ISA_PVPANIC_DEVICE,
+    .parent        = TYPE_ISA_DEVICE,
+    .instance_size = sizeof(PVPanicState),
+    .class_init    = pvpanic_isa_class_init,
+};
+
+static void pvpanic_register_types(void)
+{
+    type_register_static(&pvpanic_isa_info);
+}
+
+type_init(pvpanic_register_types)