diff mbox series

[qemu,v2,2/2] ppc/spapr: Implement H_WATCHDOG

Message ID 20220617060703.951747-3-aik@ozlabs.ru
State New
Headers show
Series ppc/spapr: Implement H_WATCHDOG | expand

Commit Message

Alexey Kardashevskiy June 17, 2022, 6:07 a.m. UTC
The new PAPR 2.12 defines a watchdog facility managed via the new
H_WATCHDOG hypercall.

This adds H_WATCHDOG support which a proposed driver for pseries uses:
https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=303120

This was tested by running QEMU with a debug kernel and command line:
-append \
 "pseries-wdt.timeout=60 pseries-wdt.nowayout=1 pseries-wdt.action=2"

and running "echo V > /dev/watchdog0" inside the VM.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
Changes:
v2:
* QOM'ed timers, "action" and "expire" are available via QMP
* removed @timeout from SpaprWatchdog
* moved the driver to hw/watchdog
* fixed error handling in the hcall handler
* used new SETFIELD/GETFIELD
---
 include/hw/ppc/spapr.h       |  29 +++-
 hw/ppc/spapr.c               |   4 +
 hw/watchdog/spapr_watchdog.c | 248 +++++++++++++++++++++++++++++++++++
 hw/watchdog/meson.build      |   1 +
 hw/watchdog/trace-events     |   7 +
 5 files changed, 288 insertions(+), 1 deletion(-)
 create mode 100644 hw/watchdog/spapr_watchdog.c

Comments

Daniel Henrique Barboza June 17, 2022, 4:49 p.m. UTC | #1
On 6/17/22 03:07, Alexey Kardashevskiy wrote:
> The new PAPR 2.12 defines a watchdog facility managed via the new
> H_WATCHDOG hypercall.
> 
> This adds H_WATCHDOG support which a proposed driver for pseries uses:
> https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=303120
> 
> This was tested by running QEMU with a debug kernel and command line:
> -append \
>   "pseries-wdt.timeout=60 pseries-wdt.nowayout=1 pseries-wdt.action=2"
> 
> and running "echo V > /dev/watchdog0" inside the VM.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---

LGTM. The watchdogs can be found under /machines/wdtN:

(qemu) info qom-tree /machine/wdt1
/wdt1 (spapr-wdt)
(qemu) info qom-tree /machine/wdt2
/wdt2 (spapr-wdt)
(qemu) info qom-tree /machine/wdt3
/wdt3 (spapr-wdt)
(qemu) info qom-tree /machine/wdt4
/wdt4 (spapr-wdt)


Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>

> Changes:
> v2:
> * QOM'ed timers, "action" and "expire" are available via QMP
> * removed @timeout from SpaprWatchdog
> * moved the driver to hw/watchdog
> * fixed error handling in the hcall handler
> * used new SETFIELD/GETFIELD
> ---
>   include/hw/ppc/spapr.h       |  29 +++-
>   hw/ppc/spapr.c               |   4 +
>   hw/watchdog/spapr_watchdog.c | 248 +++++++++++++++++++++++++++++++++++
>   hw/watchdog/meson.build      |   1 +
>   hw/watchdog/trace-events     |   7 +
>   5 files changed, 288 insertions(+), 1 deletion(-)
>   create mode 100644 hw/watchdog/spapr_watchdog.c
> 
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 072dda2c7265..ef1e38abd5c7 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -164,6 +164,25 @@ struct SpaprMachineClass {
>       SpaprIrq *irq;
>   };
>   
> +#define WDT_MAX_WATCHDOGS       4      /* Maximum number of watchdog devices */
> +
> +#define WDT_HARD_POWER_OFF      0
> +#define WDT_HARD_RESTART        1
> +#define WDT_DUMP_RESTART        2
> +
> +#define TYPE_SPAPR_WDT "spapr-wdt"
> +OBJECT_DECLARE_SIMPLE_TYPE(SpaprWatchdog, SPAPR_WDT)
> +
> +typedef struct SpaprWatchdog {
> +    /*< private >*/
> +    DeviceState parent_obj;
> +    /*< public >*/
> +
> +    unsigned num;
> +    QEMUTimer timer;
> +    uint8_t action;
> +} SpaprWatchdog;
> +
>   /**
>    * SpaprMachineState:
>    */
> @@ -264,6 +283,8 @@ struct SpaprMachineState {
>       uint32_t FORM2_assoc_array[NUMA_NODES_MAX_NUM][FORM2_NUMA_ASSOC_SIZE];
>   
>       Error *fwnmi_migration_blocker;
> +
> +    SpaprWatchdog wds[WDT_MAX_WATCHDOGS];
>   };
>   
>   #define H_SUCCESS         0
> @@ -344,6 +365,7 @@ struct SpaprMachineState {
>   #define H_P7              -60
>   #define H_P8              -61
>   #define H_P9              -62
> +#define H_NOOP            -63
>   #define H_UNSUPPORTED     -67
>   #define H_OVERLAP         -68
>   #define H_UNSUPPORTED_FLAG -256
> @@ -564,8 +586,9 @@ struct SpaprMachineState {
>   #define H_SCM_HEALTH            0x400
>   #define H_RPT_INVALIDATE        0x448
>   #define H_SCM_FLUSH             0x44C
> +#define H_WATCHDOG              0x45C
>   
> -#define MAX_HCALL_OPCODE        H_SCM_FLUSH
> +#define MAX_HCALL_OPCODE        H_WATCHDOG
>   
>   /* The hcalls above are standardized in PAPR and implemented by pHyp
>    * as well.
> @@ -1027,6 +1050,7 @@ extern const VMStateDescription vmstate_spapr_cap_large_decr;
>   extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
>   extern const VMStateDescription vmstate_spapr_cap_fwnmi;
>   extern const VMStateDescription vmstate_spapr_cap_rpt_invalidate;
> +extern const VMStateDescription vmstate_spapr_wdt;
>   
>   static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
>   {
> @@ -1063,4 +1087,7 @@ target_ulong spapr_vof_client_architecture_support(MachineState *ms,
>                                                      target_ulong ovec_addr);
>   void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt);
>   
> +/* H_WATCHDOG */
> +void spapr_watchdog_init(SpaprMachineState *spapr);
> +
>   #endif /* HW_SPAPR_H */
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index fd4942e8813c..9a5382d5270f 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -898,6 +898,8 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
>           add_str(hypertas, "hcall-hpt-resize");
>       }
>   
> +    add_str(hypertas, "hcall-watchdog");
> +
>       _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
>                        hypertas->str, hypertas->len));
>       g_string_free(hypertas, TRUE);
> @@ -3051,6 +3053,8 @@ static void spapr_machine_init(MachineState *machine)
>           spapr->vof->fw_size = fw_size; /* for claim() on itself */
>           spapr_register_hypercall(KVMPPC_H_VOF_CLIENT, spapr_h_vof_client);
>       }
> +
> +    spapr_watchdog_init(spapr);
>   }
>   
>   #define DEFAULT_KVM_TYPE "auto"
> diff --git a/hw/watchdog/spapr_watchdog.c b/hw/watchdog/spapr_watchdog.c
> new file mode 100644
> index 000000000000..aeaf7c52cbad
> --- /dev/null
> +++ b/hw/watchdog/spapr_watchdog.c
> @@ -0,0 +1,248 @@
> +/*
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qapi/error.h"
> +#include "target/ppc/cpu.h"
> +#include "migration/vmstate.h"
> +#include "trace.h"
> +
> +#include "hw/ppc/spapr.h"
> +
> +/*
> + * Bits 47: "leaveOtherWatchdogsRunningOnTimeout", specified on
> + * the "Start watchdog" operation,
> + * 0 - stop out-standing watchdogs on timeout,
> + * 1 - leave outstanding watchdogs running on timeout
> + */
> +#define PSERIES_WDTF_LEAVE_OTHER    PPC_BIT(47)
> +
> +/*    Bits 48-55: "operation" */
> +#define PSERIES_WDTF_OP(op)             SETFIELD(PPC_BITMASK(48, 55), 0, (op))
> +#define PSERIES_WDTF_OP_START           PSERIES_WDTF_OP(0x1)
> +#define PSERIES_WDTF_OP_STOP            PSERIES_WDTF_OP(0x2)
> +#define PSERIES_WDTF_OP_QUERY           PSERIES_WDTF_OP(0x3)
> +#define PSERIES_WDTF_OP_QUERY_LPM       PSERIES_WDTF_OP(0x4)
> +
> +/*    Bits 56-63: "timeoutAction" */
> +#define PSERIES_WDTF_ACTION(ac)         SETFIELD(PPC_BITMASK(56, 63), 0, (ac))
> +#define PSERIES_WDTF_ACTION_HARD_POWER_OFF  PSERIES_WDTF_ACTION(0x1)
> +#define PSERIES_WDTF_ACTION_HARD_RESTART    PSERIES_WDTF_ACTION(0x2)
> +#define PSERIES_WDTF_ACTION_DUMP_RESTART    PSERIES_WDTF_ACTION(0x3)
> +#define PSERIES_WDTF_RESERVED           PPC_BITMASK(0, 46)
> +
> +/*
> + * For the "Query watchdog capabilities" operation, a uint64 structure
> + * defined as:
> + * Bits 0-15: The minimum supported timeout in milliseconds
> + * Bits 16-31: The number of watchdogs supported
> + * Bits 32-63: Reserved
> + */
> +#define PSERIES_WDTQ_MIN_TIMEOUT(ms)    SETFIELD(PPC_BITMASK(0, 15), 0, (ms))
> +#define PSERIES_WDTQ_NUM(n)             SETFIELD(PPC_BITMASK(16, 31), 0, (n))
> +
> +/*
> + * For the "Query watchdog LPM requirement" operation:
> + * 1 = The given "watchdogNumber" must be stopped prior to suspending
> + * 2 = The given "watchdogNumber" does not have to be stopped prior to
> + * suspending
> + */
> +#define PSERIES_WDTQL_STOPPED               1
> +#define PSERIES_WDTQL_QUERY_NOT_STOPPED     2
> +
> +#define WDT_MIN_TIMEOUT 1 /* 1ms */
> +
> +static void watchdog_expired(void *pw)
> +{
> +    struct SpaprWatchdog *w = pw;
> +    CPUState *cs;
> +
> +    trace_spapr_watchdog_expired(w->num, w->action);
> +    switch (w->action) {
> +    case WDT_HARD_POWER_OFF:
> +        qemu_system_vmstop_request(RUN_STATE_SHUTDOWN);
> +        break;
> +    case WDT_HARD_RESTART:
> +        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
> +        break;
> +    case WDT_DUMP_RESTART:
> +        CPU_FOREACH(cs) {
> +            async_run_on_cpu(cs, spapr_do_system_reset_on_cpu, RUN_ON_CPU_NULL);
> +        }
> +        break;
> +    }
> +}
> +
> +static target_ulong watchdog_stop(unsigned watchdogNumber,
> +                                  struct SpaprWatchdog *w)
> +{
> +    target_ulong ret = H_NOOP;
> +
> +    if (timer_pending(&w->timer)) {
> +        timer_del(&w->timer);
> +        ret = H_SUCCESS;
> +    }
> +    trace_spapr_watchdog_stop(watchdogNumber, ret);
> +
> +    return ret;
> +}
> +
> +static target_ulong h_watchdog(PowerPCCPU *cpu,
> +                               SpaprMachineState *spapr,
> +                               target_ulong opcode, target_ulong *args)
> +{
> +    target_ulong flags = args[0];
> +    target_ulong watchdogNumber = args[1];
> +    target_ulong timeoutInMs = args[2];
> +    unsigned operation = flags & PSERIES_WDTF_OP(~0);
> +    unsigned timeoutAction = flags & PSERIES_WDTF_ACTION(~0);
> +    struct SpaprWatchdog *w;
> +
> +    if (flags & PSERIES_WDTF_RESERVED) {
> +        return H_PARAMETER;
> +    }
> +
> +    switch (operation) {
> +    case PSERIES_WDTF_OP_START:
> +        if (watchdogNumber > ARRAY_SIZE(spapr->wds)) {
> +            return H_P2;
> +        }
> +        if (timeoutInMs <= WDT_MIN_TIMEOUT) {
> +            return H_P3;
> +        }
> +
> +        w = &spapr->wds[watchdogNumber - 1];
> +        switch (timeoutAction) {
> +        case PSERIES_WDTF_ACTION_HARD_POWER_OFF:
> +            w->action = WDT_HARD_POWER_OFF;
> +            break;
> +        case PSERIES_WDTF_ACTION_HARD_RESTART:
> +            w->action = WDT_HARD_RESTART;
> +            break;
> +        case PSERIES_WDTF_ACTION_DUMP_RESTART:
> +            w->action = WDT_DUMP_RESTART;
> +            break;
> +        default:
> +            return H_PARAMETER;
> +        }
> +        timer_mod(&w->timer,
> +                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + timeoutInMs);
> +        trace_spapr_watchdog_start(flags, watchdogNumber, timeoutInMs);
> +        break;
> +    case PSERIES_WDTF_OP_STOP:
> +        if (watchdogNumber == (uint64_t) ~0) {
> +            int i;
> +
> +            for (i = 1; i <= ARRAY_SIZE(spapr->wds); ++i) {
> +                watchdog_stop(i, &spapr->wds[i - 1]);
> +            }
> +        } else if (watchdogNumber <= ARRAY_SIZE(spapr->wds)) {
> +            watchdog_stop(watchdogNumber, &spapr->wds[watchdogNumber - 1]);
> +        } else {
> +            return H_P2;
> +        }
> +        break;
> +    case PSERIES_WDTF_OP_QUERY:
> +        args[0] = PSERIES_WDTQ_MIN_TIMEOUT(WDT_MIN_TIMEOUT) |
> +            PSERIES_WDTQ_NUM(ARRAY_SIZE(spapr->wds));
> +        trace_spapr_watchdog_query(args[0]);
> +        break;
> +    case PSERIES_WDTF_OP_QUERY_LPM:
> +        if (watchdogNumber > ARRAY_SIZE(spapr->wds)) {
> +            return H_P2;
> +        }
> +        args[0] = PSERIES_WDTQL_QUERY_NOT_STOPPED;
> +        trace_spapr_watchdog_query_lpm(args[0]);
> +        break;
> +    default:
> +        return H_PARAMETER;
> +    }
> +
> +    return H_SUCCESS;
> +}
> +
> +void spapr_watchdog_init(SpaprMachineState *spapr)
> +{
> +    int i;
> +
> +    for (i = 0; i < ARRAY_SIZE(spapr->wds); ++i) {
> +        char name[16];
> +        SpaprWatchdog *w = &spapr->wds[i];
> +
> +        w->num = i + 1;
> +        snprintf(name, sizeof(name) - 1, "wdt%d", i + 1);
> +        object_initialize_child_with_props(OBJECT(spapr), name, w,
> +                                           sizeof(SpaprWatchdog),
> +                                           TYPE_SPAPR_WDT,
> +                                           &error_fatal, NULL);
> +        qdev_realize(DEVICE(w), NULL, &error_fatal);
> +    }
> +}
> +
> +static bool watchdog_needed(void *opaque)
> +{
> +    SpaprWatchdog *w = opaque;
> +
> +    return timer_pending(&w->timer);
> +}
> +
> +static const VMStateDescription vmstate_wdt = {
> +    .name = "spapr_watchdog",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = watchdog_needed,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT8(action, SpaprWatchdog),
> +        VMSTATE_TIMER(timer, SpaprWatchdog),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> +static void spapr_wdt_realize(DeviceState *dev, Error **errp)
> +{
> +    SpaprWatchdog *w = SPAPR_WDT(dev);
> +
> +    timer_init_ms(&w->timer, QEMU_CLOCK_VIRTUAL, watchdog_expired, w);
> +
> +    object_property_add_uint64_ptr(OBJECT(dev), "expire",
> +                                   (uint64_t *)&w->timer.expire_time,
> +                                   OBJ_PROP_FLAG_READ);
> +    object_property_add_uint8_ptr(OBJECT(dev), "action", &w->action,
> +                                  OBJ_PROP_FLAG_READ);
> +}
> +
> +static void spapr_wdt_class_init(ObjectClass *oc, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(oc);
> +
> +    dc->realize = spapr_wdt_realize;
> +    dc->vmsd = &vmstate_wdt;
> +    dc->user_creatable = false;
> +}
> +
> +static const TypeInfo spapr_wdt_info = {
> +    .name          = TYPE_SPAPR_WDT,
> +    .parent        = TYPE_DEVICE,
> +    .instance_size = sizeof(SpaprWatchdog),
> +    .class_init    = spapr_wdt_class_init,
> +};
> +
> +static void spapr_watchdog_register_types(void)
> +{
> +    spapr_register_hypercall(H_WATCHDOG, h_watchdog);
> +    type_register_static(&spapr_wdt_info);
> +}
> +
> +type_init(spapr_watchdog_register_types)
> diff --git a/hw/watchdog/meson.build b/hw/watchdog/meson.build
> index 054c403dea7c..8974b5cf4c8a 100644
> --- a/hw/watchdog/meson.build
> +++ b/hw/watchdog/meson.build
> @@ -6,3 +6,4 @@ softmmu_ss.add(when: 'CONFIG_WDT_DIAG288', if_true: files('wdt_diag288.c'))
>   softmmu_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files('wdt_aspeed.c'))
>   softmmu_ss.add(when: 'CONFIG_WDT_IMX2', if_true: files('wdt_imx2.c'))
>   softmmu_ss.add(when: 'CONFIG_WDT_SBSA', if_true: files('sbsa_gwdt.c'))
> +specific_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr_watchdog.c'))
> diff --git a/hw/watchdog/trace-events b/hw/watchdog/trace-events
> index e7523e22aaf2..89ccbcfdfd20 100644
> --- a/hw/watchdog/trace-events
> +++ b/hw/watchdog/trace-events
> @@ -9,3 +9,10 @@ cmsdk_apb_watchdog_lock(uint32_t lock) "CMSDK APB watchdog: lock %" PRIu32
>   # wdt-aspeed.c
>   aspeed_wdt_read(uint64_t addr, uint32_t size) "@0x%" PRIx64 " size=%d"
>   aspeed_wdt_write(uint64_t addr, uint32_t size, uint64_t data) "@0x%" PRIx64 " size=%d value=0x%"PRIx64
> +
> +# spapr_watchdog.c
> +spapr_watchdog_start(uint64_t flags, uint64_t num, uint64_t timeout) "Flags 0x%" PRIx64 " num=%" PRId64 " %" PRIu64 "ms"
> +spapr_watchdog_stop(uint64_t num, uint64_t ret) "num=%" PRIu64 " ret=%" PRId64
> +spapr_watchdog_query(uint64_t caps) "caps=0x%" PRIx64
> +spapr_watchdog_query_lpm(uint64_t caps) "caps=0x%" PRIx64
> +spapr_watchdog_expired(uint64_t num, unsigned action) "num=%" PRIu64 " action=%u"
Cédric Le Goater June 18, 2022, 11:01 a.m. UTC | #2
On 6/17/22 08:07, Alexey Kardashevskiy wrote:
> The new PAPR 2.12 defines a watchdog facility managed via the new
> H_WATCHDOG hypercall.
> 
> This adds H_WATCHDOG support which a proposed driver for pseries uses:
> https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=303120
> 
> This was tested by running QEMU with a debug kernel and command line:
> -append \
>   "pseries-wdt.timeout=60 pseries-wdt.nowayout=1 pseries-wdt.action=2"
> 
> and running "echo V > /dev/watchdog0" inside the VM.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
> Changes:
> v2:
> * QOM'ed timers, "action" and "expire" are available via QMP
> * removed @timeout from SpaprWatchdog
> * moved the driver to hw/watchdog
> * fixed error handling in the hcall handler
> * used new SETFIELD/GETFIELD
> ---
>   include/hw/ppc/spapr.h       |  29 +++-
>   hw/ppc/spapr.c               |   4 +
>   hw/watchdog/spapr_watchdog.c | 248 +++++++++++++++++++++++++++++++++++
>   hw/watchdog/meson.build      |   1 +
>   hw/watchdog/trace-events     |   7 +
>   5 files changed, 288 insertions(+), 1 deletion(-)
>   create mode 100644 hw/watchdog/spapr_watchdog.c
> 
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 072dda2c7265..ef1e38abd5c7 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -164,6 +164,25 @@ struct SpaprMachineClass {
>       SpaprIrq *irq;
>   };
>   
> +#define WDT_MAX_WATCHDOGS       4      /* Maximum number of watchdog devices */
> +
> +#define WDT_HARD_POWER_OFF      0
> +#define WDT_HARD_RESTART        1
> +#define WDT_DUMP_RESTART        2
> +
> +#define TYPE_SPAPR_WDT "spapr-wdt"
> +OBJECT_DECLARE_SIMPLE_TYPE(SpaprWatchdog, SPAPR_WDT)
> +
> +typedef struct SpaprWatchdog {
> +    /*< private >*/
> +    DeviceState parent_obj;
> +    /*< public >*/
> +
> +    unsigned num;

uint8_t should be enough no ? I see num is only used for trace events.

> +    QEMUTimer timer;
> +    uint8_t action;
> +} SpaprWatchdog;
> +
>   /**
>    * SpaprMachineState:
>    */
> @@ -264,6 +283,8 @@ struct SpaprMachineState {
>       uint32_t FORM2_assoc_array[NUMA_NODES_MAX_NUM][FORM2_NUMA_ASSOC_SIZE];
>   
>       Error *fwnmi_migration_blocker;
> +
> +    SpaprWatchdog wds[WDT_MAX_WATCHDOGS];
>   };
>   
>   #define H_SUCCESS         0
> @@ -344,6 +365,7 @@ struct SpaprMachineState {
>   #define H_P7              -60
>   #define H_P8              -61
>   #define H_P9              -62
> +#define H_NOOP            -63
>   #define H_UNSUPPORTED     -67
>   #define H_OVERLAP         -68
>   #define H_UNSUPPORTED_FLAG -256
> @@ -564,8 +586,9 @@ struct SpaprMachineState {
>   #define H_SCM_HEALTH            0x400
>   #define H_RPT_INVALIDATE        0x448
>   #define H_SCM_FLUSH             0x44C
> +#define H_WATCHDOG              0x45C
>   
> -#define MAX_HCALL_OPCODE        H_SCM_FLUSH
> +#define MAX_HCALL_OPCODE        H_WATCHDOG
>   
>   /* The hcalls above are standardized in PAPR and implemented by pHyp
>    * as well.
> @@ -1027,6 +1050,7 @@ extern const VMStateDescription vmstate_spapr_cap_large_decr;
>   extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
>   extern const VMStateDescription vmstate_spapr_cap_fwnmi;
>   extern const VMStateDescription vmstate_spapr_cap_rpt_invalidate;
> +extern const VMStateDescription vmstate_spapr_wdt;
>   
>   static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
>   {
> @@ -1063,4 +1087,7 @@ target_ulong spapr_vof_client_architecture_support(MachineState *ms,
>                                                      target_ulong ovec_addr);
>   void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt);
>   
> +/* H_WATCHDOG */
> +void spapr_watchdog_init(SpaprMachineState *spapr);
> +
>   #endif /* HW_SPAPR_H */
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index fd4942e8813c..9a5382d5270f 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -898,6 +898,8 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
>           add_str(hypertas, "hcall-hpt-resize");
>       }
>   
> +    add_str(hypertas, "hcall-watchdog");
> +
>       _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
>                        hypertas->str, hypertas->len));
>       g_string_free(hypertas, TRUE);
> @@ -3051,6 +3053,8 @@ static void spapr_machine_init(MachineState *machine)
>           spapr->vof->fw_size = fw_size; /* for claim() on itself */
>           spapr_register_hypercall(KVMPPC_H_VOF_CLIENT, spapr_h_vof_client);
>       }
> +
> +    spapr_watchdog_init(spapr);
>   }
>   
>   #define DEFAULT_KVM_TYPE "auto"
> diff --git a/hw/watchdog/spapr_watchdog.c b/hw/watchdog/spapr_watchdog.c
> new file mode 100644
> index 000000000000..aeaf7c52cbad
> --- /dev/null
> +++ b/hw/watchdog/spapr_watchdog.c
> @@ -0,0 +1,248 @@
> +/*
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qapi/error.h"
> +#include "target/ppc/cpu.h"
> +#include "migration/vmstate.h"
> +#include "trace.h"
> +
> +#include "hw/ppc/spapr.h"
> +
> +/*
> + * Bits 47: "leaveOtherWatchdogsRunningOnTimeout", specified on
> + * the "Start watchdog" operation,
> + * 0 - stop out-standing watchdogs on timeout,
> + * 1 - leave outstanding watchdogs running on timeout
> + */
> +#define PSERIES_WDTF_LEAVE_OTHER    PPC_BIT(47)
> +
> +/*    Bits 48-55: "operation" */
> +#define PSERIES_WDTF_OP(op)             SETFIELD(PPC_BITMASK(48, 55), 0, (op))
> +#define PSERIES_WDTF_OP_START           PSERIES_WDTF_OP(0x1)
> +#define PSERIES_WDTF_OP_STOP            PSERIES_WDTF_OP(0x2)
> +#define PSERIES_WDTF_OP_QUERY           PSERIES_WDTF_OP(0x3)
> +#define PSERIES_WDTF_OP_QUERY_LPM       PSERIES_WDTF_OP(0x4)
> +
> +/*    Bits 56-63: "timeoutAction" */
> +#define PSERIES_WDTF_ACTION(ac)         SETFIELD(PPC_BITMASK(56, 63), 0, (ac))
> +#define PSERIES_WDTF_ACTION_HARD_POWER_OFF  PSERIES_WDTF_ACTION(0x1)
> +#define PSERIES_WDTF_ACTION_HARD_RESTART    PSERIES_WDTF_ACTION(0x2)
> +#define PSERIES_WDTF_ACTION_DUMP_RESTART    PSERIES_WDTF_ACTION(0x3)
> +#define PSERIES_WDTF_RESERVED           PPC_BITMASK(0, 46)
> +
> +/*
> + * For the "Query watchdog capabilities" operation, a uint64 structure
> + * defined as:
> + * Bits 0-15: The minimum supported timeout in milliseconds
> + * Bits 16-31: The number of watchdogs supported
> + * Bits 32-63: Reserved
> + */
> +#define PSERIES_WDTQ_MIN_TIMEOUT(ms)    SETFIELD(PPC_BITMASK(0, 15), 0, (ms))
> +#define PSERIES_WDTQ_NUM(n)             SETFIELD(PPC_BITMASK(16, 31), 0, (n))
> +
> +/*
> + * For the "Query watchdog LPM requirement" operation:
> + * 1 = The given "watchdogNumber" must be stopped prior to suspending
> + * 2 = The given "watchdogNumber" does not have to be stopped prior to
> + * suspending
> + */
> +#define PSERIES_WDTQL_STOPPED               1
> +#define PSERIES_WDTQL_QUERY_NOT_STOPPED     2
> +
> +#define WDT_MIN_TIMEOUT 1 /* 1ms */
> +
> +static void watchdog_expired(void *pw)
> +{
> +    struct SpaprWatchdog *w = pw;

s/struct//

> +    CPUState *cs;
> +
> +    trace_spapr_watchdog_expired(w->num, w->action);
> +    switch (w->action) {
> +    case WDT_HARD_POWER_OFF:
> +        qemu_system_vmstop_request(RUN_STATE_SHUTDOWN);
> +        break;
> +    case WDT_HARD_RESTART:
> +        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
> +        break;
> +    case WDT_DUMP_RESTART:
> +        CPU_FOREACH(cs) {
> +            async_run_on_cpu(cs, spapr_do_system_reset_on_cpu, RUN_ON_CPU_NULL);
> +        }
> +        break;
> +    }
> +}
> +
> +static target_ulong watchdog_stop(unsigned watchdogNumber,
> +                                  struct SpaprWatchdog *w)
> +{
> +    target_ulong ret = H_NOOP;
> +
> +    if (timer_pending(&w->timer)) {
> +        timer_del(&w->timer);
> +        ret = H_SUCCESS;
> +    }
> +    trace_spapr_watchdog_stop(watchdogNumber, ret);
> +
> +    return ret;
> +}
> +
> +static target_ulong h_watchdog(PowerPCCPU *cpu,
> +                               SpaprMachineState *spapr,
> +                               target_ulong opcode, target_ulong *args)
> +{
> +    target_ulong flags = args[0];
> +    target_ulong watchdogNumber = args[1];
> +    target_ulong timeoutInMs = args[2];
> +    unsigned operation = flags & PSERIES_WDTF_OP(~0);
> +    unsigned timeoutAction = flags & PSERIES_WDTF_ACTION(~0);
> +    struct SpaprWatchdog *w;
> +
> +    if (flags & PSERIES_WDTF_RESERVED) {
> +        return H_PARAMETER;
> +    }
> +
> +    switch (operation) {
> +    case PSERIES_WDTF_OP_START:
> +        if (watchdogNumber > ARRAY_SIZE(spapr->wds)) {
> +            return H_P2;
> +        }
> +        if (timeoutInMs <= WDT_MIN_TIMEOUT) {
> +            return H_P3;
> +        }
> +
> +        w = &spapr->wds[watchdogNumber - 1];

So first index is 1 in PAPR ...

> +        switch (timeoutAction) {
> +        case PSERIES_WDTF_ACTION_HARD_POWER_OFF:
> +            w->action = WDT_HARD_POWER_OFF;
> +            break;
> +        case PSERIES_WDTF_ACTION_HARD_RESTART:
> +            w->action = WDT_HARD_RESTART;
> +            break;
> +        case PSERIES_WDTF_ACTION_DUMP_RESTART:
> +            w->action = WDT_DUMP_RESTART;
> +            break;
> +        default:
> +            return H_PARAMETER;
> +        }
> +        timer_mod(&w->timer,
> +                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + timeoutInMs);
> +        trace_spapr_watchdog_start(flags, watchdogNumber, timeoutInMs);
> +        break;
> +    case PSERIES_WDTF_OP_STOP:
> +        if (watchdogNumber == (uint64_t) ~0) {

May be add a define for this special value. It's better for readability.

> +            int i;
> +
> +            for (i = 1; i <= ARRAY_SIZE(spapr->wds); ++i) {
> +                watchdog_stop(i, &spapr->wds[i - 1]);
> +            }
> +        } else if (watchdogNumber <= ARRAY_SIZE(spapr->wds)) {
> +            watchdog_stop(watchdogNumber, &spapr->wds[watchdogNumber - 1]);
> +        } else {
> +            return H_P2;
> +        }
> +        break;
> +    case PSERIES_WDTF_OP_QUERY:
> +        args[0] = PSERIES_WDTQ_MIN_TIMEOUT(WDT_MIN_TIMEOUT) |
> +            PSERIES_WDTQ_NUM(ARRAY_SIZE(spapr->wds));
> +        trace_spapr_watchdog_query(args[0]);
> +        break;
> +    case PSERIES_WDTF_OP_QUERY_LPM:
> +        if (watchdogNumber > ARRAY_SIZE(spapr->wds)) {
> +            return H_P2;
> +        }
> +        args[0] = PSERIES_WDTQL_QUERY_NOT_STOPPED;
> +        trace_spapr_watchdog_query_lpm(args[0]);
> +        break;
> +    default:
> +        return H_PARAMETER;
> +    }
> +
> +    return H_SUCCESS;
> +}
> +
> +void spapr_watchdog_init(SpaprMachineState *spapr)

This could have a 'Error **errp' parameter.

> +{
> +    int i;
> +
> +    for (i = 0; i < ARRAY_SIZE(spapr->wds); ++i) {
> +        char name[16];g_autofree char *name = g_strdup_printf("wdt%d", i + 1);

> +        SpaprWatchdog *w = &spapr->wds[i];
> +
> +        w->num = i + 1;

it should be a property.

Thanks,

C.

> +        snprintf(name, sizeof(name) - 1, "wdt%d", i + 1);
> +        object_initialize_child_with_props(OBJECT(spapr), name, w,
> +                                           sizeof(SpaprWatchdog),
> +                                           TYPE_SPAPR_WDT,
> +                                           &error_fatal, NULL);
> +        qdev_realize(DEVICE(w), NULL, &error_fatal);
> +    }
> +}
> +
> +static bool watchdog_needed(void *opaque)
> +{
> +    SpaprWatchdog *w = opaque;
> +
> +    return timer_pending(&w->timer);
> +}
> +
> +static const VMStateDescription vmstate_wdt = {
> +    .name = "spapr_watchdog",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = watchdog_needed,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT8(action, SpaprWatchdog),
> +        VMSTATE_TIMER(timer, SpaprWatchdog),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> +static void spapr_wdt_realize(DeviceState *dev, Error **errp)
> +{
> +    SpaprWatchdog *w = SPAPR_WDT(dev);
> +
> +    timer_init_ms(&w->timer, QEMU_CLOCK_VIRTUAL, watchdog_expired, w);
> +
> +    object_property_add_uint64_ptr(OBJECT(dev), "expire",
> +                                   (uint64_t *)&w->timer.expire_time,
> +                                   OBJ_PROP_FLAG_READ);
> +    object_property_add_uint8_ptr(OBJECT(dev), "action", &w->action,
> +                                  OBJ_PROP_FLAG_READ);
> +}
> +
> +static void spapr_wdt_class_init(ObjectClass *oc, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(oc);
> +
> +    dc->renualize = spapr_wdt_realize;
> +    dc->vmsd = &vmstate_wdt;
> +    dc->user_creatable = false;
> +}
> +
> +static const TypeInfo spapr_wdt_info = {
> +    .name          = TYPE_SPAPR_WDT,
> +    .parent        = TYPE_DEVICE,
> +    .instance_size = sizeof(SpaprWatchdog),
> +    .class_init    = spapr_wdt_class_init,
> +};
> +
> +static void spapr_watchdog_register_types(void)
> +{
> +    spapr_register_hypercall(H_WATCHDOG, h_watchdog);
> +    type_register_static(&spapr_wdt_info);
> +}
> +
> +type_init(spapr_watchdog_register_types)
> diff --git a/hw/watchdog/meson.build b/hw/watchdog/meson.build
> index 054c403dea7c..8974b5cf4c8a 100644
> --- a/hw/watchdog/meson.build
> +++ b/hw/watchdog/meson.build
> @@ -6,3 +6,4 @@ softmmu_ss.add(when: 'CONFIG_WDT_DIAG288', if_true: files('wdt_diag288.c'))
>   softmmu_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files('wdt_aspeed.c'))
>   softmmu_ss.add(when: 'CONFIG_WDT_IMX2', if_true: files('wdt_imx2.c'))
>   softmmu_ss.add(when: 'CONFIG_WDT_SBSA', if_true: files('sbsa_gwdt.c'))
> +specific_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr_watchdog.c'))
> diff --git a/hw/watchdog/trace-events b/hw/watchdog/trace-events
> index e7523e22aaf2..89ccbcfdfd20 100644
> --- a/hw/watchdog/trace-events
> +++ b/hw/watchdog/trace-events
> @@ -9,3 +9,10 @@ cmsdk_apb_watchdog_lock(uint32_t lock) "CMSDK APB watchdog: lock %" PRIu32
>   # wdt-aspeed.c
>   aspeed_wdt_read(uint64_t addr, uint32_t size) "@0x%" PRIx64 " size=%d"
>   aspeed_wdt_write(uint64_t addr, uint32_t size, uint64_t data) "@0x%" PRIx64 " size=%d value=0x%"PRIx64
> +
> +# spapr_watchdog.c
> +spapr_watchdog_start(uint64_t flags, uint64_t num, uint64_t timeout) "Flags 0x%" PRIx64 " num=%" PRId64 " %" PRIu64 "ms"
> +spapr_watchdog_stop(uint64_t num, uint64_t ret) "num=%" PRIu64 " ret=%" PRId64
> +spapr_watchdog_query(uint64_t caps) "caps=0x%" PRIx64
> +spapr_watchdog_query_lpm(uint64_t caps) "caps=0x%" PRIx64
> +spapr_watchdog_expired(uint64_t num, unsigned action) "num=%" PRIu64 " action=%u"
Alexey Kardashevskiy June 20, 2022, 3:13 a.m. UTC | #3
On 6/18/22 21:01, Cédric Le Goater wrote:
> On 6/17/22 08:07, Alexey Kardashevskiy wrote:
>> The new PAPR 2.12 defines a watchdog facility managed via the new
>> H_WATCHDOG hypercall.
>>
>> This adds H_WATCHDOG support which a proposed driver for pseries uses:
>> https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=303120
>>
>> This was tested by running QEMU with a debug kernel and command line:
>> -append \
>>   "pseries-wdt.timeout=60 pseries-wdt.nowayout=1 pseries-wdt.action=2"
>>
>> and running "echo V > /dev/watchdog0" inside the VM.
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> ---
>> Changes:
>> v2:
>> * QOM'ed timers, "action" and "expire" are available via QMP
>> * removed @timeout from SpaprWatchdog
>> * moved the driver to hw/watchdog
>> * fixed error handling in the hcall handler
>> * used new SETFIELD/GETFIELD
>> ---
>>   include/hw/ppc/spapr.h       |  29 +++-
>>   hw/ppc/spapr.c               |   4 +
>>   hw/watchdog/spapr_watchdog.c | 248 +++++++++++++++++++++++++++++++++++
>>   hw/watchdog/meson.build      |   1 +
>>   hw/watchdog/trace-events     |   7 +
>>   5 files changed, 288 insertions(+), 1 deletion(-)
>>   create mode 100644 hw/watchdog/spapr_watchdog.c
>>
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index 072dda2c7265..ef1e38abd5c7 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -164,6 +164,25 @@ struct SpaprMachineClass {
>>       SpaprIrq *irq;
>>   };
>> +#define WDT_MAX_WATCHDOGS       4      /* Maximum number of watchdog 
>> devices */
>> +
>> +#define WDT_HARD_POWER_OFF      0
>> +#define WDT_HARD_RESTART        1
>> +#define WDT_DUMP_RESTART        2
>> +
>> +#define TYPE_SPAPR_WDT "spapr-wdt"
>> +OBJECT_DECLARE_SIMPLE_TYPE(SpaprWatchdog, SPAPR_WDT)
>> +
>> +typedef struct SpaprWatchdog {
>> +    /*< private >*/
>> +    DeviceState parent_obj;
>> +    /*< public >*/
>> +
>> +    unsigned num;
> 
> uint8_t should be enough no ? I see num is only used for trace events.


It should but why? It is not migrating, and using uint8_t creates 
alignment gap here, and no benefit :) And I am removing it anyway, see 
below.

> 
>> +    QEMUTimer timer;
>> +    uint8_t action;
>> +} SpaprWatchdog;
>> +
>>   /**
>>    * SpaprMachineState:
>>    */
>> @@ -264,6 +283,8 @@ struct SpaprMachineState {
>>       uint32_t 
>> FORM2_assoc_array[NUMA_NODES_MAX_NUM][FORM2_NUMA_ASSOC_SIZE];
>>       Error *fwnmi_migration_blocker;
>> +
>> +    SpaprWatchdog wds[WDT_MAX_WATCHDOGS];
>>   };
>>   #define H_SUCCESS         0
>> @@ -344,6 +365,7 @@ struct SpaprMachineState {
>>   #define H_P7              -60
>>   #define H_P8              -61
>>   #define H_P9              -62
>> +#define H_NOOP            -63
>>   #define H_UNSUPPORTED     -67
>>   #define H_OVERLAP         -68
>>   #define H_UNSUPPORTED_FLAG -256
>> @@ -564,8 +586,9 @@ struct SpaprMachineState {
>>   #define H_SCM_HEALTH            0x400
>>   #define H_RPT_INVALIDATE        0x448
>>   #define H_SCM_FLUSH             0x44C
>> +#define H_WATCHDOG              0x45C
>> -#define MAX_HCALL_OPCODE        H_SCM_FLUSH
>> +#define MAX_HCALL_OPCODE        H_WATCHDOG
>>   /* The hcalls above are standardized in PAPR and implemented by pHyp
>>    * as well.
>> @@ -1027,6 +1050,7 @@ extern const VMStateDescription 
>> vmstate_spapr_cap_large_decr;
>>   extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
>>   extern const VMStateDescription vmstate_spapr_cap_fwnmi;
>>   extern const VMStateDescription vmstate_spapr_cap_rpt_invalidate;
>> +extern const VMStateDescription vmstate_spapr_wdt;
>>   static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
>>   {
>> @@ -1063,4 +1087,7 @@ target_ulong 
>> spapr_vof_client_architecture_support(MachineState *ms,
>>                                                      target_ulong 
>> ovec_addr);
>>   void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt);
>> +/* H_WATCHDOG */
>> +void spapr_watchdog_init(SpaprMachineState *spapr);
>> +
>>   #endif /* HW_SPAPR_H */
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index fd4942e8813c..9a5382d5270f 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -898,6 +898,8 @@ static void spapr_dt_rtas(SpaprMachineState 
>> *spapr, void *fdt)
>>           add_str(hypertas, "hcall-hpt-resize");
>>       }
>> +    add_str(hypertas, "hcall-watchdog");
>> +
>>       _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
>>                        hypertas->str, hypertas->len));
>>       g_string_free(hypertas, TRUE);
>> @@ -3051,6 +3053,8 @@ static void spapr_machine_init(MachineState 
>> *machine)
>>           spapr->vof->fw_size = fw_size; /* for claim() on itself */
>>           spapr_register_hypercall(KVMPPC_H_VOF_CLIENT, 
>> spapr_h_vof_client);
>>       }
>> +
>> +    spapr_watchdog_init(spapr);
>>   }
>>   #define DEFAULT_KVM_TYPE "auto"
>> diff --git a/hw/watchdog/spapr_watchdog.c b/hw/watchdog/spapr_watchdog.c
>> new file mode 100644
>> index 000000000000..aeaf7c52cbad
>> --- /dev/null
>> +++ b/hw/watchdog/spapr_watchdog.c
>> @@ -0,0 +1,248 @@
>> +/*
>> + * This library is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * This library is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with this library; if not, see 
>> <http://www.gnu.org/licenses/>.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qapi/error.h"
>> +#include "target/ppc/cpu.h"
>> +#include "migration/vmstate.h"
>> +#include "trace.h"
>> +
>> +#include "hw/ppc/spapr.h"
>> +
>> +/*
>> + * Bits 47: "leaveOtherWatchdogsRunningOnTimeout", specified on
>> + * the "Start watchdog" operation,
>> + * 0 - stop out-standing watchdogs on timeout,
>> + * 1 - leave outstanding watchdogs running on timeout
>> + */
>> +#define PSERIES_WDTF_LEAVE_OTHER    PPC_BIT(47)
>> +
>> +/*    Bits 48-55: "operation" */
>> +#define PSERIES_WDTF_OP(op)             SETFIELD(PPC_BITMASK(48, 55), 
>> 0, (op))
>> +#define PSERIES_WDTF_OP_START           PSERIES_WDTF_OP(0x1)
>> +#define PSERIES_WDTF_OP_STOP            PSERIES_WDTF_OP(0x2)
>> +#define PSERIES_WDTF_OP_QUERY           PSERIES_WDTF_OP(0x3)
>> +#define PSERIES_WDTF_OP_QUERY_LPM       PSERIES_WDTF_OP(0x4)
>> +
>> +/*    Bits 56-63: "timeoutAction" */
>> +#define PSERIES_WDTF_ACTION(ac)         SETFIELD(PPC_BITMASK(56, 63), 
>> 0, (ac))
>> +#define PSERIES_WDTF_ACTION_HARD_POWER_OFF  PSERIES_WDTF_ACTION(0x1)
>> +#define PSERIES_WDTF_ACTION_HARD_RESTART    PSERIES_WDTF_ACTION(0x2)
>> +#define PSERIES_WDTF_ACTION_DUMP_RESTART    PSERIES_WDTF_ACTION(0x3)
>> +#define PSERIES_WDTF_RESERVED           PPC_BITMASK(0, 46)
>> +
>> +/*
>> + * For the "Query watchdog capabilities" operation, a uint64 structure
>> + * defined as:
>> + * Bits 0-15: The minimum supported timeout in milliseconds
>> + * Bits 16-31: The number of watchdogs supported
>> + * Bits 32-63: Reserved
>> + */
>> +#define PSERIES_WDTQ_MIN_TIMEOUT(ms)    SETFIELD(PPC_BITMASK(0, 15), 
>> 0, (ms))
>> +#define PSERIES_WDTQ_NUM(n)             SETFIELD(PPC_BITMASK(16, 31), 
>> 0, (n))
>> +
>> +/*
>> + * For the "Query watchdog LPM requirement" operation:
>> + * 1 = The given "watchdogNumber" must be stopped prior to suspending
>> + * 2 = The given "watchdogNumber" does not have to be stopped prior to
>> + * suspending
>> + */
>> +#define PSERIES_WDTQL_STOPPED               1
>> +#define PSERIES_WDTQL_QUERY_NOT_STOPPED     2
>> +
>> +#define WDT_MIN_TIMEOUT 1 /* 1ms */
>> +
>> +static void watchdog_expired(void *pw)
>> +{
>> +    struct SpaprWatchdog *w = pw;
> 
> s/struct//
> 
>> +    CPUState *cs;
>> +
>> +    trace_spapr_watchdog_expired(w->num, w->action);
>> +    switch (w->action) {
>> +    case WDT_HARD_POWER_OFF:
>> +        qemu_system_vmstop_request(RUN_STATE_SHUTDOWN);
>> +        break;
>> +    case WDT_HARD_RESTART:
>> +        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
>> +        break;
>> +    case WDT_DUMP_RESTART:
>> +        CPU_FOREACH(cs) {
>> +            async_run_on_cpu(cs, spapr_do_system_reset_on_cpu, 
>> RUN_ON_CPU_NULL);
>> +        }
>> +        break;
>> +    }
>> +}
>> +
>> +static target_ulong watchdog_stop(unsigned watchdogNumber,
>> +                                  struct SpaprWatchdog *w)
>> +{
>> +    target_ulong ret = H_NOOP;
>> +
>> +    if (timer_pending(&w->timer)) {
>> +        timer_del(&w->timer);
>> +        ret = H_SUCCESS;
>> +    }
>> +    trace_spapr_watchdog_stop(watchdogNumber, ret);
>> +
>> +    return ret;
>> +}
>> +
>> +static target_ulong h_watchdog(PowerPCCPU *cpu,
>> +                               SpaprMachineState *spapr,
>> +                               target_ulong opcode, target_ulong *args)
>> +{
>> +    target_ulong flags = args[0];
>> +    target_ulong watchdogNumber = args[1];
>> +    target_ulong timeoutInMs = args[2];
>> +    unsigned operation = flags & PSERIES_WDTF_OP(~0);
>> +    unsigned timeoutAction = flags & PSERIES_WDTF_ACTION(~0);
>> +    struct SpaprWatchdog *w;
>> +
>> +    if (flags & PSERIES_WDTF_RESERVED) {
>> +        return H_PARAMETER;
>> +    }
>> +
>> +    switch (operation) {
>> +    case PSERIES_WDTF_OP_START:
>> +        if (watchdogNumber > ARRAY_SIZE(spapr->wds)) {
>> +            return H_P2;
>> +        }
>> +        if (timeoutInMs <= WDT_MIN_TIMEOUT) {
>> +            return H_P3;
>> +        }
>> +
>> +        w = &spapr->wds[watchdogNumber - 1];
> 
> So first index is 1 in PAPR ...


Yes, I thought I commented on this somewhere but lost in rebases.


> 
>> +        switch (timeoutAction) {
>> +        case PSERIES_WDTF_ACTION_HARD_POWER_OFF:
>> +            w->action = WDT_HARD_POWER_OFF;
>> +            break;
>> +        case PSERIES_WDTF_ACTION_HARD_RESTART:
>> +            w->action = WDT_HARD_RESTART;
>> +            break;
>> +        case PSERIES_WDTF_ACTION_DUMP_RESTART:
>> +            w->action = WDT_DUMP_RESTART;
>> +            break;
>> +        default:
>> +            return H_PARAMETER;
>> +        }
>> +        timer_mod(&w->timer,
>> +                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + timeoutInMs);
>> +        trace_spapr_watchdog_start(flags, watchdogNumber, timeoutInMs);
>> +        break;
>> +    case PSERIES_WDTF_OP_STOP:
>> +        if (watchdogNumber == (uint64_t) ~0) {
> 
> May be add a define for this special value. It's better for readability.

Will do.


>> +            int i;
>> +
>> +            for (i = 1; i <= ARRAY_SIZE(spapr->wds); ++i) {
>> +                watchdog_stop(i, &spapr->wds[i - 1]);
>> +            }
>> +        } else if (watchdogNumber <= ARRAY_SIZE(spapr->wds)) {
>> +            watchdog_stop(watchdogNumber, &spapr->wds[watchdogNumber 
>> - 1]);
>> +        } else {
>> +            return H_P2;
>> +        }
>> +        break;
>> +    case PSERIES_WDTF_OP_QUERY:
>> +        args[0] = PSERIES_WDTQ_MIN_TIMEOUT(WDT_MIN_TIMEOUT) |
>> +            PSERIES_WDTQ_NUM(ARRAY_SIZE(spapr->wds));
>> +        trace_spapr_watchdog_query(args[0]);
>> +        break;
>> +    case PSERIES_WDTF_OP_QUERY_LPM:
>> +        if (watchdogNumber > ARRAY_SIZE(spapr->wds)) {
>> +            return H_P2;
>> +        }
>> +        args[0] = PSERIES_WDTQL_QUERY_NOT_STOPPED;
>> +        trace_spapr_watchdog_query_lpm(args[0]);
>> +        break;
>> +    default:
>> +        return H_PARAMETER;
>> +    }
>> +
>> +    return H_SUCCESS;
>> +}
>> +
>> +void spapr_watchdog_init(SpaprMachineState *spapr)
> 
> This could have a 'Error **errp' parameter.


I was repeating somewhat similar spapr_rtc_create(), and the called - 
spapr_machine_init() - does not have *errp. Seems pointless as it fails 
- something is horrendously broken.


> 
>> +{
>> +    int i;
>> +
>> +    for (i = 0; i < ARRAY_SIZE(spapr->wds); ++i) {
>> +        char name[16];g_autofree char *name = 
>> g_strdup_printf("wdt%d", i + 1);
> 
>> +        SpaprWatchdog *w = &spapr->wds[i];
>> +
>> +        w->num = i + 1;
> 
> it should be a property.

This cannot change and used only for tracing, and the QOM name has the 
number as well. I am replacing it with

SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
unsigned num = w - spapr->wds;

and removing the @num from the struct.

Thanks,

> 
> Thanks,
> 
> C.
> 
>> +        snprintf(name, sizeof(name) - 1, "wdt%d", i + 1);
>> +        object_initialize_child_with_props(OBJECT(spapr), name, w,
>> +                                           sizeof(SpaprWatchdog),
>> +                                           TYPE_SPAPR_WDT,
>> +                                           &error_fatal, NULL);
>> +        qdev_realize(DEVICE(w), NULL, &error_fatal);
>> +    }
>> +}
>> +
>> +static bool watchdog_needed(void *opaque)
>> +{
>> +    SpaprWatchdog *w = opaque;
>> +
>> +    return timer_pending(&w->timer);
>> +}
>> +
>> +static const VMStateDescription vmstate_wdt = {
>> +    .name = "spapr_watchdog",
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .needed = watchdog_needed,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_UINT8(action, SpaprWatchdog),
>> +        VMSTATE_TIMER(timer, SpaprWatchdog),
>> +        VMSTATE_END_OF_LIST()
>> +    }
>> +};
>> +
>> +static void spapr_wdt_realize(DeviceState *dev, Error **errp)
>> +{
>> +    SpaprWatchdog *w = SPAPR_WDT(dev);
>> +
>> +    timer_init_ms(&w->timer, QEMU_CLOCK_VIRTUAL, watchdog_expired, w);
>> +
>> +    object_property_add_uint64_ptr(OBJECT(dev), "expire",
>> +                                   (uint64_t *)&w->timer.expire_time,
>> +                                   OBJ_PROP_FLAG_READ);
>> +    object_property_add_uint8_ptr(OBJECT(dev), "action", &w->action,
>> +                                  OBJ_PROP_FLAG_READ);
>> +}
>> +
>> +static void spapr_wdt_class_init(ObjectClass *oc, void *data)
>> +{
>> +    DeviceClass *dc = DEVICE_CLASS(oc);
>> +
>> +    dc->renualize = spapr_wdt_realize;
>> +    dc->vmsd = &vmstate_wdt;
>> +    dc->user_creatable = false;
>> +}
>> +
>> +static const TypeInfo spapr_wdt_info = {
>> +    .name          = TYPE_SPAPR_WDT,
>> +    .parent        = TYPE_DEVICE,
>> +    .instance_size = sizeof(SpaprWatchdog),
>> +    .class_init    = spapr_wdt_class_init,
>> +};
>> +
>> +static void spapr_watchdog_register_types(void)
>> +{
>> +    spapr_register_hypercall(H_WATCHDOG, h_watchdog);
>> +    type_register_static(&spapr_wdt_info);
>> +}
>> +
>> +type_init(spapr_watchdog_register_types)
>> diff --git a/hw/watchdog/meson.build b/hw/watchdog/meson.build
>> index 054c403dea7c..8974b5cf4c8a 100644
>> --- a/hw/watchdog/meson.build
>> +++ b/hw/watchdog/meson.build
>> @@ -6,3 +6,4 @@ softmmu_ss.add(when: 'CONFIG_WDT_DIAG288', if_true: 
>> files('wdt_diag288.c'))
>>   softmmu_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: 
>> files('wdt_aspeed.c'))
>>   softmmu_ss.add(when: 'CONFIG_WDT_IMX2', if_true: files('wdt_imx2.c'))
>>   softmmu_ss.add(when: 'CONFIG_WDT_SBSA', if_true: files('sbsa_gwdt.c'))
>> +specific_ss.add(when: 'CONFIG_PSERIES', if_true: 
>> files('spapr_watchdog.c'))
>> diff --git a/hw/watchdog/trace-events b/hw/watchdog/trace-events
>> index e7523e22aaf2..89ccbcfdfd20 100644
>> --- a/hw/watchdog/trace-events
>> +++ b/hw/watchdog/trace-events
>> @@ -9,3 +9,10 @@ cmsdk_apb_watchdog_lock(uint32_t lock) "CMSDK APB 
>> watchdog: lock %" PRIu32
>>   # wdt-aspeed.c
>>   aspeed_wdt_read(uint64_t addr, uint32_t size) "@0x%" PRIx64 " size=%d"
>>   aspeed_wdt_write(uint64_t addr, uint32_t size, uint64_t data) "@0x%" 
>> PRIx64 " size=%d value=0x%"PRIx64
>> +
>> +# spapr_watchdog.c
>> +spapr_watchdog_start(uint64_t flags, uint64_t num, uint64_t timeout) 
>> "Flags 0x%" PRIx64 " num=%" PRId64 " %" PRIu64 "ms"
>> +spapr_watchdog_stop(uint64_t num, uint64_t ret) "num=%" PRIu64 " 
>> ret=%" PRId64
>> +spapr_watchdog_query(uint64_t caps) "caps=0x%" PRIx64
>> +spapr_watchdog_query_lpm(uint64_t caps) "caps=0x%" PRIx64
>> +spapr_watchdog_expired(uint64_t num, unsigned action) "num=%" PRIu64 
>> " action=%u"
Cédric Le Goater June 20, 2022, 6:23 a.m. UTC | #4
On 6/20/22 05:13, Alexey Kardashevskiy wrote:
> 
> 
> On 6/18/22 21:01, Cédric Le Goater wrote:
>> On 6/17/22 08:07, Alexey Kardashevskiy wrote:
>>> The new PAPR 2.12 defines a watchdog facility managed via the new
>>> H_WATCHDOG hypercall.
>>>
>>> This adds H_WATCHDOG support which a proposed driver for pseries uses:
>>> https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=303120
>>>
>>> This was tested by running QEMU with a debug kernel and command line:
>>> -append \
>>>   "pseries-wdt.timeout=60 pseries-wdt.nowayout=1 pseries-wdt.action=2"
>>>
>>> and running "echo V > /dev/watchdog0" inside the VM.
>>>
>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>> ---
>>> Changes:
>>> v2:
>>> * QOM'ed timers, "action" and "expire" are available via QMP
>>> * removed @timeout from SpaprWatchdog
>>> * moved the driver to hw/watchdog
>>> * fixed error handling in the hcall handler
>>> * used new SETFIELD/GETFIELD
>>> ---
>>>   include/hw/ppc/spapr.h       |  29 +++-
>>>   hw/ppc/spapr.c               |   4 +
>>>   hw/watchdog/spapr_watchdog.c | 248 +++++++++++++++++++++++++++++++++++
>>>   hw/watchdog/meson.build      |   1 +
>>>   hw/watchdog/trace-events     |   7 +
>>>   5 files changed, 288 insertions(+), 1 deletion(-)
>>>   create mode 100644 hw/watchdog/spapr_watchdog.c
>>>
>>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>>> index 072dda2c7265..ef1e38abd5c7 100644
>>> --- a/include/hw/ppc/spapr.h
>>> +++ b/include/hw/ppc/spapr.h
>>> @@ -164,6 +164,25 @@ struct SpaprMachineClass {
>>>       SpaprIrq *irq;
>>>   };
>>> +#define WDT_MAX_WATCHDOGS       4      /* Maximum number of watchdog devices */
>>> +
>>> +#define WDT_HARD_POWER_OFF      0
>>> +#define WDT_HARD_RESTART        1
>>> +#define WDT_DUMP_RESTART        2
>>> +
>>> +#define TYPE_SPAPR_WDT "spapr-wdt"
>>> +OBJECT_DECLARE_SIMPLE_TYPE(SpaprWatchdog, SPAPR_WDT)
>>> +
>>> +typedef struct SpaprWatchdog {
>>> +    /*< private >*/
>>> +    DeviceState parent_obj;
>>> +    /*< public >*/
>>> +
>>> +    unsigned num;
>>
>> uint8_t should be enough no ? I see num is only used for trace events.
> 
> 
> It should but why? It is not migrating, and using uint8_t creates alignment gap here, and no benefit :) And I am removing it anyway, see below.
> 
>>
>>> +    QEMUTimer timer;
>>> +    uint8_t action;
>>> +} SpaprWatchdog;
>>> +
>>>   /**
>>>    * SpaprMachineState:
>>>    */
>>> @@ -264,6 +283,8 @@ struct SpaprMachineState {
>>>       uint32_t FORM2_assoc_array[NUMA_NODES_MAX_NUM][FORM2_NUMA_ASSOC_SIZE];
>>>       Error *fwnmi_migration_blocker;
>>> +
>>> +    SpaprWatchdog wds[WDT_MAX_WATCHDOGS];
>>>   };
>>>   #define H_SUCCESS         0
>>> @@ -344,6 +365,7 @@ struct SpaprMachineState {
>>>   #define H_P7              -60
>>>   #define H_P8              -61
>>>   #define H_P9              -62
>>> +#define H_NOOP            -63
>>>   #define H_UNSUPPORTED     -67
>>>   #define H_OVERLAP         -68
>>>   #define H_UNSUPPORTED_FLAG -256
>>> @@ -564,8 +586,9 @@ struct SpaprMachineState {
>>>   #define H_SCM_HEALTH            0x400
>>>   #define H_RPT_INVALIDATE        0x448
>>>   #define H_SCM_FLUSH             0x44C
>>> +#define H_WATCHDOG              0x45C
>>> -#define MAX_HCALL_OPCODE        H_SCM_FLUSH
>>> +#define MAX_HCALL_OPCODE        H_WATCHDOG
>>>   /* The hcalls above are standardized in PAPR and implemented by pHyp
>>>    * as well.
>>> @@ -1027,6 +1050,7 @@ extern const VMStateDescription vmstate_spapr_cap_large_decr;
>>>   extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
>>>   extern const VMStateDescription vmstate_spapr_cap_fwnmi;
>>>   extern const VMStateDescription vmstate_spapr_cap_rpt_invalidate;
>>> +extern const VMStateDescription vmstate_spapr_wdt;
>>>   static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
>>>   {
>>> @@ -1063,4 +1087,7 @@ target_ulong spapr_vof_client_architecture_support(MachineState *ms,
>>>                                                      target_ulong ovec_addr);
>>>   void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt);
>>> +/* H_WATCHDOG */
>>> +void spapr_watchdog_init(SpaprMachineState *spapr);
>>> +
>>>   #endif /* HW_SPAPR_H */
>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>> index fd4942e8813c..9a5382d5270f 100644
>>> --- a/hw/ppc/spapr.c
>>> +++ b/hw/ppc/spapr.c
>>> @@ -898,6 +898,8 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
>>>           add_str(hypertas, "hcall-hpt-resize");
>>>       }
>>> +    add_str(hypertas, "hcall-watchdog");
>>> +
>>>       _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
>>>                        hypertas->str, hypertas->len));
>>>       g_string_free(hypertas, TRUE);
>>> @@ -3051,6 +3053,8 @@ static void spapr_machine_init(MachineState *machine)
>>>           spapr->vof->fw_size = fw_size; /* for claim() on itself */
>>>           spapr_register_hypercall(KVMPPC_H_VOF_CLIENT, spapr_h_vof_client);
>>>       }
>>> +
>>> +    spapr_watchdog_init(spapr);
>>>   }
>>>   #define DEFAULT_KVM_TYPE "auto"
>>> diff --git a/hw/watchdog/spapr_watchdog.c b/hw/watchdog/spapr_watchdog.c
>>> new file mode 100644
>>> index 000000000000..aeaf7c52cbad
>>> --- /dev/null
>>> +++ b/hw/watchdog/spapr_watchdog.c
>>> @@ -0,0 +1,248 @@
>>> +/*
>>> + * This library is free software; you can redistribute it and/or
>>> + * modify it under the terms of the GNU Lesser General Public
>>> + * License as published by the Free Software Foundation; either
>>> + * version 2.1 of the License, or (at your option) any later version.
>>> + *
>>> + * This library is distributed in the hope that it will be useful,
>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> + * Lesser General Public License for more details.
>>> + *
>>> + * You should have received a copy of the GNU Lesser General Public
>>> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
>>> + */
>>> +
>>> +#include "qemu/osdep.h"
>>> +#include "qapi/error.h"
>>> +#include "target/ppc/cpu.h"
>>> +#include "migration/vmstate.h"
>>> +#include "trace.h"
>>> +
>>> +#include "hw/ppc/spapr.h"
>>> +
>>> +/*
>>> + * Bits 47: "leaveOtherWatchdogsRunningOnTimeout", specified on
>>> + * the "Start watchdog" operation,
>>> + * 0 - stop out-standing watchdogs on timeout,
>>> + * 1 - leave outstanding watchdogs running on timeout
>>> + */
>>> +#define PSERIES_WDTF_LEAVE_OTHER    PPC_BIT(47)
>>> +
>>> +/*    Bits 48-55: "operation" */
>>> +#define PSERIES_WDTF_OP(op)             SETFIELD(PPC_BITMASK(48, 55), 0, (op))
>>> +#define PSERIES_WDTF_OP_START           PSERIES_WDTF_OP(0x1)
>>> +#define PSERIES_WDTF_OP_STOP            PSERIES_WDTF_OP(0x2)
>>> +#define PSERIES_WDTF_OP_QUERY           PSERIES_WDTF_OP(0x3)
>>> +#define PSERIES_WDTF_OP_QUERY_LPM       PSERIES_WDTF_OP(0x4)
>>> +
>>> +/*    Bits 56-63: "timeoutAction" */
>>> +#define PSERIES_WDTF_ACTION(ac)         SETFIELD(PPC_BITMASK(56, 63), 0, (ac))
>>> +#define PSERIES_WDTF_ACTION_HARD_POWER_OFF  PSERIES_WDTF_ACTION(0x1)
>>> +#define PSERIES_WDTF_ACTION_HARD_RESTART    PSERIES_WDTF_ACTION(0x2)
>>> +#define PSERIES_WDTF_ACTION_DUMP_RESTART    PSERIES_WDTF_ACTION(0x3)
>>> +#define PSERIES_WDTF_RESERVED           PPC_BITMASK(0, 46)
>>> +
>>> +/*
>>> + * For the "Query watchdog capabilities" operation, a uint64 structure
>>> + * defined as:
>>> + * Bits 0-15: The minimum supported timeout in milliseconds
>>> + * Bits 16-31: The number of watchdogs supported
>>> + * Bits 32-63: Reserved
>>> + */
>>> +#define PSERIES_WDTQ_MIN_TIMEOUT(ms)    SETFIELD(PPC_BITMASK(0, 15), 0, (ms))
>>> +#define PSERIES_WDTQ_NUM(n)             SETFIELD(PPC_BITMASK(16, 31), 0, (n))
>>> +
>>> +/*
>>> + * For the "Query watchdog LPM requirement" operation:
>>> + * 1 = The given "watchdogNumber" must be stopped prior to suspending
>>> + * 2 = The given "watchdogNumber" does not have to be stopped prior to
>>> + * suspending
>>> + */
>>> +#define PSERIES_WDTQL_STOPPED               1
>>> +#define PSERIES_WDTQL_QUERY_NOT_STOPPED     2
>>> +
>>> +#define WDT_MIN_TIMEOUT 1 /* 1ms */
>>> +
>>> +static void watchdog_expired(void *pw)
>>> +{
>>> +    struct SpaprWatchdog *w = pw;
>>
>> s/struct//
>>
>>> +    CPUState *cs;
>>> +
>>> +    trace_spapr_watchdog_expired(w->num, w->action);
>>> +    switch (w->action) {
>>> +    case WDT_HARD_POWER_OFF:
>>> +        qemu_system_vmstop_request(RUN_STATE_SHUTDOWN);
>>> +        break;
>>> +    case WDT_HARD_RESTART:
>>> +        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
>>> +        break;
>>> +    case WDT_DUMP_RESTART:
>>> +        CPU_FOREACH(cs) {
>>> +            async_run_on_cpu(cs, spapr_do_system_reset_on_cpu, RUN_ON_CPU_NULL);
>>> +        }
>>> +        break;
>>> +    }
>>> +}
>>> +
>>> +static target_ulong watchdog_stop(unsigned watchdogNumber,
>>> +                                  struct SpaprWatchdog *w)
>>> +{
>>> +    target_ulong ret = H_NOOP;
>>> +
>>> +    if (timer_pending(&w->timer)) {
>>> +        timer_del(&w->timer);
>>> +        ret = H_SUCCESS;
>>> +    }
>>> +    trace_spapr_watchdog_stop(watchdogNumber, ret);
>>> +
>>> +    return ret;
>>> +}
>>> +
>>> +static target_ulong h_watchdog(PowerPCCPU *cpu,
>>> +                               SpaprMachineState *spapr,
>>> +                               target_ulong opcode, target_ulong *args)
>>> +{
>>> +    target_ulong flags = args[0];
>>> +    target_ulong watchdogNumber = args[1];
>>> +    target_ulong timeoutInMs = args[2];
>>> +    unsigned operation = flags & PSERIES_WDTF_OP(~0);
>>> +    unsigned timeoutAction = flags & PSERIES_WDTF_ACTION(~0);
>>> +    struct SpaprWatchdog *w;
>>> +
>>> +    if (flags & PSERIES_WDTF_RESERVED) {
>>> +        return H_PARAMETER;
>>> +    }
>>> +
>>> +    switch (operation) {
>>> +    case PSERIES_WDTF_OP_START:
>>> +        if (watchdogNumber > ARRAY_SIZE(spapr->wds)) {
>>> +            return H_P2;
>>> +        }
>>> +        if (timeoutInMs <= WDT_MIN_TIMEOUT) {
>>> +            return H_P3;
>>> +        }
>>> +
>>> +        w = &spapr->wds[watchdogNumber - 1];
>>
>> So first index is 1 in PAPR ...
> 
> 
> Yes, I thought I commented on this somewhere but lost in rebases.
> 
> 
>>
>>> +        switch (timeoutAction) {
>>> +        case PSERIES_WDTF_ACTION_HARD_POWER_OFF:
>>> +            w->action = WDT_HARD_POWER_OFF;
>>> +            break;
>>> +        case PSERIES_WDTF_ACTION_HARD_RESTART:
>>> +            w->action = WDT_HARD_RESTART;
>>> +            break;
>>> +        case PSERIES_WDTF_ACTION_DUMP_RESTART:
>>> +            w->action = WDT_DUMP_RESTART;
>>> +            break;
>>> +        default:
>>> +            return H_PARAMETER;
>>> +        }
>>> +        timer_mod(&w->timer,
>>> +                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + timeoutInMs);
>>> +        trace_spapr_watchdog_start(flags, watchdogNumber, timeoutInMs);
>>> +        break;
>>> +    case PSERIES_WDTF_OP_STOP:
>>> +        if (watchdogNumber == (uint64_t) ~0) {
>>
>> May be add a define for this special value. It's better for readability.
> 
> Will do.
> 
> 
>>> +            int i;
>>> +
>>> +            for (i = 1; i <= ARRAY_SIZE(spapr->wds); ++i) {
>>> +                watchdog_stop(i, &spapr->wds[i - 1]);
>>> +            }
>>> +        } else if (watchdogNumber <= ARRAY_SIZE(spapr->wds)) {
>>> +            watchdog_stop(watchdogNumber, &spapr->wds[watchdogNumber - 1]);
>>> +        } else {
>>> +            return H_P2;
>>> +        }
>>> +        break;
>>> +    case PSERIES_WDTF_OP_QUERY:
>>> +        args[0] = PSERIES_WDTQ_MIN_TIMEOUT(WDT_MIN_TIMEOUT) |
>>> +            PSERIES_WDTQ_NUM(ARRAY_SIZE(spapr->wds));
>>> +        trace_spapr_watchdog_query(args[0]);
>>> +        break;
>>> +    case PSERIES_WDTF_OP_QUERY_LPM:
>>> +        if (watchdogNumber > ARRAY_SIZE(spapr->wds)) {
>>> +            return H_P2;
>>> +        }
>>> +        args[0] = PSERIES_WDTQL_QUERY_NOT_STOPPED;
>>> +        trace_spapr_watchdog_query_lpm(args[0]);
>>> +        break;
>>> +    default:
>>> +        return H_PARAMETER;
>>> +    }
>>> +
>>> +    return H_SUCCESS;
>>> +}
>>> +
>>> +void spapr_watchdog_init(SpaprMachineState *spapr)
>>
>> This could have a 'Error **errp' parameter.
> 
> 
> I was repeating somewhat similar spapr_rtc_create(), and the called - spapr_machine_init() - does not have *errp. Seems pointless as it fails - something is horrendously broken.


Well, it's up to the caller to decide what to do in case of
errors. If in this case, it is "log an error and exit",
I would simply :

   spapr_watchdog_init(spapr, &error_fatal);

But it is not necessarily fatal to fail to initialize some
device.

> 
> 
>>
>>> +{
>>> +    int i;
>>> +
>>> +    for (i = 0; i < ARRAY_SIZE(spapr->wds); ++i) {
>>> +        char name[16];g_autofree char *name = g_strdup_printf("wdt%d", i + 1);
>>
>>> +        SpaprWatchdog *w = &spapr->wds[i];
>>> +
>>> +        w->num = i + 1;
>>
>> it should be a property.
> 
> This cannot change and used only for tracing, and the QOM name has the number as well. I am replacing it with
> 
> SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> unsigned num = w - spapr->wds;
> 
> and removing the @num from the struct.

Nice !

Thanks

C.


> 
> Thanks,
> 
>>
>> Thanks,
>>
>> C.
>>
>>> +        snprintf(name, sizeof(name) - 1, "wdt%d", i + 1);
>>> +        object_initialize_child_with_props(OBJECT(spapr), name, w,
>>> +                                           sizeof(SpaprWatchdog),
>>> +                                           TYPE_SPAPR_WDT,
>>> +                                           &error_fatal, NULL);
>>> +        qdev_realize(DEVICE(w), NULL, &error_fatal);
>>> +    }
>>> +}
>>> +
>>> +static bool watchdog_needed(void *opaque)
>>> +{
>>> +    SpaprWatchdog *w = opaque;
>>> +
>>> +    return timer_pending(&w->timer);
>>> +}
>>> +
>>> +static const VMStateDescription vmstate_wdt = {
>>> +    .name = "spapr_watchdog",
>>> +    .version_id = 1,
>>> +    .minimum_version_id = 1,
>>> +    .needed = watchdog_needed,
>>> +    .fields = (VMStateField[]) {
>>> +        VMSTATE_UINT8(action, SpaprWatchdog),
>>> +        VMSTATE_TIMER(timer, SpaprWatchdog),
>>> +        VMSTATE_END_OF_LIST()
>>> +    }
>>> +};
>>> +
>>> +static void spapr_wdt_realize(DeviceState *dev, Error **errp)
>>> +{
>>> +    SpaprWatchdog *w = SPAPR_WDT(dev);
>>> +
>>> +    timer_init_ms(&w->timer, QEMU_CLOCK_VIRTUAL, watchdog_expired, w);
>>> +
>>> +    object_property_add_uint64_ptr(OBJECT(dev), "expire",
>>> +                                   (uint64_t *)&w->timer.expire_time,
>>> +                                   OBJ_PROP_FLAG_READ);
>>> +    object_property_add_uint8_ptr(OBJECT(dev), "action", &w->action,
>>> +                                  OBJ_PROP_FLAG_READ);
>>> +}
>>> +
>>> +static void spapr_wdt_class_init(ObjectClass *oc, void *data)
>>> +{
>>> +    DeviceClass *dc = DEVICE_CLASS(oc);
>>> +
>>> +    dc->renualize = spapr_wdt_realize;
>>> +    dc->vmsd = &vmstate_wdt;
>>> +    dc->user_creatable = false;
>>> +}
>>> +
>>> +static const TypeInfo spapr_wdt_info = {
>>> +    .name          = TYPE_SPAPR_WDT,
>>> +    .parent        = TYPE_DEVICE,
>>> +    .instance_size = sizeof(SpaprWatchdog),
>>> +    .class_init    = spapr_wdt_class_init,
>>> +};
>>> +
>>> +static void spapr_watchdog_register_types(void)
>>> +{
>>> +    spapr_register_hypercall(H_WATCHDOG, h_watchdog);
>>> +    type_register_static(&spapr_wdt_info);
>>> +}
>>> +
>>> +type_init(spapr_watchdog_register_types)
>>> diff --git a/hw/watchdog/meson.build b/hw/watchdog/meson.build
>>> index 054c403dea7c..8974b5cf4c8a 100644
>>> --- a/hw/watchdog/meson.build
>>> +++ b/hw/watchdog/meson.build
>>> @@ -6,3 +6,4 @@ softmmu_ss.add(when: 'CONFIG_WDT_DIAG288', if_true: files('wdt_diag288.c'))
>>>   softmmu_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files('wdt_aspeed.c'))
>>>   softmmu_ss.add(when: 'CONFIG_WDT_IMX2', if_true: files('wdt_imx2.c'))
>>>   softmmu_ss.add(when: 'CONFIG_WDT_SBSA', if_true: files('sbsa_gwdt.c'))
>>> +specific_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr_watchdog.c'))
>>> diff --git a/hw/watchdog/trace-events b/hw/watchdog/trace-events
>>> index e7523e22aaf2..89ccbcfdfd20 100644
>>> --- a/hw/watchdog/trace-events
>>> +++ b/hw/watchdog/trace-events
>>> @@ -9,3 +9,10 @@ cmsdk_apb_watchdog_lock(uint32_t lock) "CMSDK APB watchdog: lock %" PRIu32
>>>   # wdt-aspeed.c
>>>   aspeed_wdt_read(uint64_t addr, uint32_t size) "@0x%" PRIx64 " size=%d"
>>>   aspeed_wdt_write(uint64_t addr, uint32_t size, uint64_t data) "@0x%" PRIx64 " size=%d value=0x%"PRIx64
>>> +
>>> +# spapr_watchdog.c
>>> +spapr_watchdog_start(uint64_t flags, uint64_t num, uint64_t timeout) "Flags 0x%" PRIx64 " num=%" PRId64 " %" PRIu64 "ms"
>>> +spapr_watchdog_stop(uint64_t num, uint64_t ret) "num=%" PRIu64 " ret=%" PRId64
>>> +spapr_watchdog_query(uint64_t caps) "caps=0x%" PRIx64
>>> +spapr_watchdog_query_lpm(uint64_t caps) "caps=0x%" PRIx64
>>> +spapr_watchdog_expired(uint64_t num, unsigned action) "num=%" PRIu64 " action=%u"
>
Alexey Kardashevskiy June 20, 2022, 8:28 a.m. UTC | #5
On 6/20/22 16:23, Cédric Le Goater wrote:
> On 6/20/22 05:13, Alexey Kardashevskiy wrote:
>>
>>
>> On 6/18/22 21:01, Cédric Le Goater wrote:
>>> On 6/17/22 08:07, Alexey Kardashevskiy wrote:
>>>> The new PAPR 2.12 defines a watchdog facility managed via the new
>>>> H_WATCHDOG hypercall.
>>>>
>>>> This adds H_WATCHDOG support which a proposed driver for pseries uses:
>>>> https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=303120
>>>>
>>>> This was tested by running QEMU with a debug kernel and command line:
>>>> -append \
>>>>   "pseries-wdt.timeout=60 pseries-wdt.nowayout=1 pseries-wdt.action=2"
>>>>
>>>> and running "echo V > /dev/watchdog0" inside the VM.
>>>>
>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>>> ---
>>>> Changes:
>>>> v2:
>>>> * QOM'ed timers, "action" and "expire" are available via QMP
>>>> * removed @timeout from SpaprWatchdog
>>>> * moved the driver to hw/watchdog
>>>> * fixed error handling in the hcall handler
>>>> * used new SETFIELD/GETFIELD
>>>> ---
>>>>   include/hw/ppc/spapr.h       |  29 +++-
>>>>   hw/ppc/spapr.c               |   4 +
>>>>   hw/watchdog/spapr_watchdog.c | 248 
>>>> +++++++++++++++++++++++++++++++++++
>>>>   hw/watchdog/meson.build      |   1 +
>>>>   hw/watchdog/trace-events     |   7 +
>>>>   5 files changed, 288 insertions(+), 1 deletion(-)
>>>>   create mode 100644 hw/watchdog/spapr_watchdog.c
>>>>
>>>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>>>> index 072dda2c7265..ef1e38abd5c7 100644
>>>> --- a/include/hw/ppc/spapr.h
>>>> +++ b/include/hw/ppc/spapr.h
>>>> @@ -164,6 +164,25 @@ struct SpaprMachineClass {
>>>>       SpaprIrq *irq;
>>>>   };
>>>> +#define WDT_MAX_WATCHDOGS       4      /* Maximum number of 
>>>> watchdog devices */
>>>> +
>>>> +#define WDT_HARD_POWER_OFF      0
>>>> +#define WDT_HARD_RESTART        1
>>>> +#define WDT_DUMP_RESTART        2
>>>> +
>>>> +#define TYPE_SPAPR_WDT "spapr-wdt"
>>>> +OBJECT_DECLARE_SIMPLE_TYPE(SpaprWatchdog, SPAPR_WDT)
>>>> +
>>>> +typedef struct SpaprWatchdog {
>>>> +    /*< private >*/
>>>> +    DeviceState parent_obj;
>>>> +    /*< public >*/
>>>> +
>>>> +    unsigned num;
>>>
>>> uint8_t should be enough no ? I see num is only used for trace events.
>>
>>
>> It should but why? It is not migrating, and using uint8_t creates 
>> alignment gap here, and no benefit :) And I am removing it anyway, see 
>> below.
>>
>>>
>>>> +    QEMUTimer timer;
>>>> +    uint8_t action;
>>>> +} SpaprWatchdog;
>>>> +
>>>>   /**
>>>>    * SpaprMachineState:
>>>>    */
>>>> @@ -264,6 +283,8 @@ struct SpaprMachineState {
>>>>       uint32_t 
>>>> FORM2_assoc_array[NUMA_NODES_MAX_NUM][FORM2_NUMA_ASSOC_SIZE];
>>>>       Error *fwnmi_migration_blocker;
>>>> +
>>>> +    SpaprWatchdog wds[WDT_MAX_WATCHDOGS];
>>>>   };
>>>>   #define H_SUCCESS         0
>>>> @@ -344,6 +365,7 @@ struct SpaprMachineState {
>>>>   #define H_P7              -60
>>>>   #define H_P8              -61
>>>>   #define H_P9              -62
>>>> +#define H_NOOP            -63
>>>>   #define H_UNSUPPORTED     -67
>>>>   #define H_OVERLAP         -68
>>>>   #define H_UNSUPPORTED_FLAG -256
>>>> @@ -564,8 +586,9 @@ struct SpaprMachineState {
>>>>   #define H_SCM_HEALTH            0x400
>>>>   #define H_RPT_INVALIDATE        0x448
>>>>   #define H_SCM_FLUSH             0x44C
>>>> +#define H_WATCHDOG              0x45C
>>>> -#define MAX_HCALL_OPCODE        H_SCM_FLUSH
>>>> +#define MAX_HCALL_OPCODE        H_WATCHDOG
>>>>   /* The hcalls above are standardized in PAPR and implemented by pHyp
>>>>    * as well.
>>>> @@ -1027,6 +1050,7 @@ extern const VMStateDescription 
>>>> vmstate_spapr_cap_large_decr;
>>>>   extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
>>>>   extern const VMStateDescription vmstate_spapr_cap_fwnmi;
>>>>   extern const VMStateDescription vmstate_spapr_cap_rpt_invalidate;
>>>> +extern const VMStateDescription vmstate_spapr_wdt;
>>>>   static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int 
>>>> cap)
>>>>   {
>>>> @@ -1063,4 +1087,7 @@ target_ulong 
>>>> spapr_vof_client_architecture_support(MachineState *ms,
>>>>                                                      target_ulong 
>>>> ovec_addr);
>>>>   void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void 
>>>> *fdt);
>>>> +/* H_WATCHDOG */
>>>> +void spapr_watchdog_init(SpaprMachineState *spapr);
>>>> +
>>>>   #endif /* HW_SPAPR_H */
>>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>>> index fd4942e8813c..9a5382d5270f 100644
>>>> --- a/hw/ppc/spapr.c
>>>> +++ b/hw/ppc/spapr.c
>>>> @@ -898,6 +898,8 @@ static void spapr_dt_rtas(SpaprMachineState 
>>>> *spapr, void *fdt)
>>>>           add_str(hypertas, "hcall-hpt-resize");
>>>>       }
>>>> +    add_str(hypertas, "hcall-watchdog");
>>>> +
>>>>       _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
>>>>                        hypertas->str, hypertas->len));
>>>>       g_string_free(hypertas, TRUE);
>>>> @@ -3051,6 +3053,8 @@ static void spapr_machine_init(MachineState 
>>>> *machine)
>>>>           spapr->vof->fw_size = fw_size; /* for claim() on itself */
>>>>           spapr_register_hypercall(KVMPPC_H_VOF_CLIENT, 
>>>> spapr_h_vof_client);
>>>>       }
>>>> +
>>>> +    spapr_watchdog_init(spapr);
>>>>   }
>>>>   #define DEFAULT_KVM_TYPE "auto"
>>>> diff --git a/hw/watchdog/spapr_watchdog.c 
>>>> b/hw/watchdog/spapr_watchdog.c
>>>> new file mode 100644
>>>> index 000000000000..aeaf7c52cbad
>>>> --- /dev/null
>>>> +++ b/hw/watchdog/spapr_watchdog.c
>>>> @@ -0,0 +1,248 @@
>>>> +/*
>>>> + * This library is free software; you can redistribute it and/or
>>>> + * modify it under the terms of the GNU Lesser General Public
>>>> + * License as published by the Free Software Foundation; either
>>>> + * version 2.1 of the License, or (at your option) any later version.
>>>> + *
>>>> + * This library is distributed in the hope that it will be useful,
>>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>>> + * Lesser General Public License for more details.
>>>> + *
>>>> + * You should have received a copy of the GNU Lesser General Public
>>>> + * License along with this library; if not, see 
>>>> <http://www.gnu.org/licenses/>.
>>>> + */
>>>> +
>>>> +#include "qemu/osdep.h"
>>>> +#include "qapi/error.h"
>>>> +#include "target/ppc/cpu.h"
>>>> +#include "migration/vmstate.h"
>>>> +#include "trace.h"
>>>> +
>>>> +#include "hw/ppc/spapr.h"
>>>> +
>>>> +/*
>>>> + * Bits 47: "leaveOtherWatchdogsRunningOnTimeout", specified on
>>>> + * the "Start watchdog" operation,
>>>> + * 0 - stop out-standing watchdogs on timeout,
>>>> + * 1 - leave outstanding watchdogs running on timeout
>>>> + */
>>>> +#define PSERIES_WDTF_LEAVE_OTHER    PPC_BIT(47)
>>>> +
>>>> +/*    Bits 48-55: "operation" */
>>>> +#define PSERIES_WDTF_OP(op)             SETFIELD(PPC_BITMASK(48, 
>>>> 55), 0, (op))
>>>> +#define PSERIES_WDTF_OP_START           PSERIES_WDTF_OP(0x1)
>>>> +#define PSERIES_WDTF_OP_STOP            PSERIES_WDTF_OP(0x2)
>>>> +#define PSERIES_WDTF_OP_QUERY           PSERIES_WDTF_OP(0x3)
>>>> +#define PSERIES_WDTF_OP_QUERY_LPM       PSERIES_WDTF_OP(0x4)
>>>> +
>>>> +/*    Bits 56-63: "timeoutAction" */
>>>> +#define PSERIES_WDTF_ACTION(ac)         SETFIELD(PPC_BITMASK(56, 
>>>> 63), 0, (ac))
>>>> +#define PSERIES_WDTF_ACTION_HARD_POWER_OFF  PSERIES_WDTF_ACTION(0x1)
>>>> +#define PSERIES_WDTF_ACTION_HARD_RESTART    PSERIES_WDTF_ACTION(0x2)
>>>> +#define PSERIES_WDTF_ACTION_DUMP_RESTART    PSERIES_WDTF_ACTION(0x3)
>>>> +#define PSERIES_WDTF_RESERVED           PPC_BITMASK(0, 46)
>>>> +
>>>> +/*
>>>> + * For the "Query watchdog capabilities" operation, a uint64 structure
>>>> + * defined as:
>>>> + * Bits 0-15: The minimum supported timeout in milliseconds
>>>> + * Bits 16-31: The number of watchdogs supported
>>>> + * Bits 32-63: Reserved
>>>> + */
>>>> +#define PSERIES_WDTQ_MIN_TIMEOUT(ms)    SETFIELD(PPC_BITMASK(0, 
>>>> 15), 0, (ms))
>>>> +#define PSERIES_WDTQ_NUM(n)             SETFIELD(PPC_BITMASK(16, 
>>>> 31), 0, (n))
>>>> +
>>>> +/*
>>>> + * For the "Query watchdog LPM requirement" operation:
>>>> + * 1 = The given "watchdogNumber" must be stopped prior to suspending
>>>> + * 2 = The given "watchdogNumber" does not have to be stopped prior to
>>>> + * suspending
>>>> + */
>>>> +#define PSERIES_WDTQL_STOPPED               1
>>>> +#define PSERIES_WDTQL_QUERY_NOT_STOPPED     2
>>>> +
>>>> +#define WDT_MIN_TIMEOUT 1 /* 1ms */
>>>> +
>>>> +static void watchdog_expired(void *pw)
>>>> +{
>>>> +    struct SpaprWatchdog *w = pw;
>>>
>>> s/struct//
>>>
>>>> +    CPUState *cs;
>>>> +
>>>> +    trace_spapr_watchdog_expired(w->num, w->action);
>>>> +    switch (w->action) {
>>>> +    case WDT_HARD_POWER_OFF:
>>>> +        qemu_system_vmstop_request(RUN_STATE_SHUTDOWN);
>>>> +        break;
>>>> +    case WDT_HARD_RESTART:
>>>> +        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
>>>> +        break;
>>>> +    case WDT_DUMP_RESTART:
>>>> +        CPU_FOREACH(cs) {
>>>> +            async_run_on_cpu(cs, spapr_do_system_reset_on_cpu, 
>>>> RUN_ON_CPU_NULL);
>>>> +        }
>>>> +        break;
>>>> +    }
>>>> +}
>>>> +
>>>> +static target_ulong watchdog_stop(unsigned watchdogNumber,
>>>> +                                  struct SpaprWatchdog *w)
>>>> +{
>>>> +    target_ulong ret = H_NOOP;
>>>> +
>>>> +    if (timer_pending(&w->timer)) {
>>>> +        timer_del(&w->timer);
>>>> +        ret = H_SUCCESS;
>>>> +    }
>>>> +    trace_spapr_watchdog_stop(watchdogNumber, ret);
>>>> +
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +static target_ulong h_watchdog(PowerPCCPU *cpu,
>>>> +                               SpaprMachineState *spapr,
>>>> +                               target_ulong opcode, target_ulong 
>>>> *args)
>>>> +{
>>>> +    target_ulong flags = args[0];
>>>> +    target_ulong watchdogNumber = args[1];
>>>> +    target_ulong timeoutInMs = args[2];
>>>> +    unsigned operation = flags & PSERIES_WDTF_OP(~0);
>>>> +    unsigned timeoutAction = flags & PSERIES_WDTF_ACTION(~0);
>>>> +    struct SpaprWatchdog *w;
>>>> +
>>>> +    if (flags & PSERIES_WDTF_RESERVED) {
>>>> +        return H_PARAMETER;
>>>> +    }
>>>> +
>>>> +    switch (operation) {
>>>> +    case PSERIES_WDTF_OP_START:
>>>> +        if (watchdogNumber > ARRAY_SIZE(spapr->wds)) {
>>>> +            return H_P2;
>>>> +        }
>>>> +        if (timeoutInMs <= WDT_MIN_TIMEOUT) {
>>>> +            return H_P3;
>>>> +        }
>>>> +
>>>> +        w = &spapr->wds[watchdogNumber - 1];
>>>
>>> So first index is 1 in PAPR ...
>>
>>
>> Yes, I thought I commented on this somewhere but lost in rebases.
>>
>>
>>>
>>>> +        switch (timeoutAction) {
>>>> +        case PSERIES_WDTF_ACTION_HARD_POWER_OFF:
>>>> +            w->action = WDT_HARD_POWER_OFF;
>>>> +            break;
>>>> +        case PSERIES_WDTF_ACTION_HARD_RESTART:
>>>> +            w->action = WDT_HARD_RESTART;
>>>> +            break;
>>>> +        case PSERIES_WDTF_ACTION_DUMP_RESTART:
>>>> +            w->action = WDT_DUMP_RESTART;
>>>> +            break;
>>>> +        default:
>>>> +            return H_PARAMETER;
>>>> +        }
>>>> +        timer_mod(&w->timer,
>>>> +                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 
>>>> timeoutInMs);
>>>> +        trace_spapr_watchdog_start(flags, watchdogNumber, 
>>>> timeoutInMs);
>>>> +        break;
>>>> +    case PSERIES_WDTF_OP_STOP:
>>>> +        if (watchdogNumber == (uint64_t) ~0) {
>>>
>>> May be add a define for this special value. It's better for readability.
>>
>> Will do.
>>
>>
>>>> +            int i;
>>>> +
>>>> +            for (i = 1; i <= ARRAY_SIZE(spapr->wds); ++i) {
>>>> +                watchdog_stop(i, &spapr->wds[i - 1]);
>>>> +            }
>>>> +        } else if (watchdogNumber <= ARRAY_SIZE(spapr->wds)) {
>>>> +            watchdog_stop(watchdogNumber, 
>>>> &spapr->wds[watchdogNumber - 1]);
>>>> +        } else {
>>>> +            return H_P2;
>>>> +        }
>>>> +        break;
>>>> +    case PSERIES_WDTF_OP_QUERY:
>>>> +        args[0] = PSERIES_WDTQ_MIN_TIMEOUT(WDT_MIN_TIMEOUT) |
>>>> +            PSERIES_WDTQ_NUM(ARRAY_SIZE(spapr->wds));
>>>> +        trace_spapr_watchdog_query(args[0]);
>>>> +        break;
>>>> +    case PSERIES_WDTF_OP_QUERY_LPM:
>>>> +        if (watchdogNumber > ARRAY_SIZE(spapr->wds)) {
>>>> +            return H_P2;
>>>> +        }
>>>> +        args[0] = PSERIES_WDTQL_QUERY_NOT_STOPPED;
>>>> +        trace_spapr_watchdog_query_lpm(args[0]);
>>>> +        break;
>>>> +    default:
>>>> +        return H_PARAMETER;
>>>> +    }
>>>> +
>>>> +    return H_SUCCESS;
>>>> +}
>>>> +
>>>> +void spapr_watchdog_init(SpaprMachineState *spapr)
>>>
>>> This could have a 'Error **errp' parameter.
>>
>>
>> I was repeating somewhat similar spapr_rtc_create(), and the called - 
>> spapr_machine_init() - does not have *errp. Seems pointless as it 
>> fails - something is horrendously broken.
> 
> 
> Well, it's up to the caller to decide what to do in case of
> errors. If in this case, it is "log an error and exit",
> I would simply :
> 
>    spapr_watchdog_init(spapr, &error_fatal);
> 
> But it is not necessarily fatal to fail to initialize some
> device.

The device is so simple and not configurable via the command line that 
it is fatal. And it does not touch *errp anyway.

Passing errp also means spapr_watchdog_init() should do clean up of 
whatever it did in a loop before hitting the error, for example in this 
case - unrealize() previously realized watchdogs. This adds useless code 
as it is going to be fatal anyway. Thanks,


> 
>>
>>
>>>
>>>> +{
>>>> +    int i;
>>>> +
>>>> +    for (i = 0; i < ARRAY_SIZE(spapr->wds); ++i) {
>>>> +        char name[16];g_autofree char *name = 
>>>> g_strdup_printf("wdt%d", i + 1);
>>>
>>>> +        SpaprWatchdog *w = &spapr->wds[i];
>>>> +
>>>> +        w->num = i + 1;
>>>
>>> it should be a property.
>>
>> This cannot change and used only for tracing, and the QOM name has the 
>> number as well. I am replacing it with
>>
>> SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>> unsigned num = w - spapr->wds;
>>
>> and removing the @num from the struct.
> 
> Nice !
> 
> Thanks
> 
> C.
> 
> 
>>
>> Thanks,
>>
>>>
>>> Thanks,
>>>
>>> C.
>>>
>>>> +        snprintf(name, sizeof(name) - 1, "wdt%d", i + 1);
>>>> +        object_initialize_child_with_props(OBJECT(spapr), name, w,
>>>> +                                           sizeof(SpaprWatchdog),
>>>> +                                           TYPE_SPAPR_WDT,
>>>> +                                           &error_fatal, NULL);
>>>> +        qdev_realize(DEVICE(w), NULL, &error_fatal);
>>>> +    }
>>>> +}
>>>> +
>>>> +static bool watchdog_needed(void *opaque)
>>>> +{
>>>> +    SpaprWatchdog *w = opaque;
>>>> +
>>>> +    return timer_pending(&w->timer);
>>>> +}
>>>> +
>>>> +static const VMStateDescription vmstate_wdt = {
>>>> +    .name = "spapr_watchdog",
>>>> +    .version_id = 1,
>>>> +    .minimum_version_id = 1,
>>>> +    .needed = watchdog_needed,
>>>> +    .fields = (VMStateField[]) {
>>>> +        VMSTATE_UINT8(action, SpaprWatchdog),
>>>> +        VMSTATE_TIMER(timer, SpaprWatchdog),
>>>> +        VMSTATE_END_OF_LIST()
>>>> +    }
>>>> +};
>>>> +
>>>> +static void spapr_wdt_realize(DeviceState *dev, Error **errp)
>>>> +{
>>>> +    SpaprWatchdog *w = SPAPR_WDT(dev);
>>>> +
>>>> +    timer_init_ms(&w->timer, QEMU_CLOCK_VIRTUAL, watchdog_expired, w);
>>>> +
>>>> +    object_property_add_uint64_ptr(OBJECT(dev), "expire",
>>>> +                                   (uint64_t *)&w->timer.expire_time,
>>>> +                                   OBJ_PROP_FLAG_READ);
>>>> +    object_property_add_uint8_ptr(OBJECT(dev), "action", &w->action,
>>>> +                                  OBJ_PROP_FLAG_READ);
>>>> +}
>>>> +
>>>> +static void spapr_wdt_class_init(ObjectClass *oc, void *data)
>>>> +{
>>>> +    DeviceClass *dc = DEVICE_CLASS(oc);
>>>> +
>>>> +    dc->renualize = spapr_wdt_realize;
>>>> +    dc->vmsd = &vmstate_wdt;
>>>> +    dc->user_creatable = false;
>>>> +}
>>>> +
>>>> +static const TypeInfo spapr_wdt_info = {
>>>> +    .name          = TYPE_SPAPR_WDT,
>>>> +    .parent        = TYPE_DEVICE,
>>>> +    .instance_size = sizeof(SpaprWatchdog),
>>>> +    .class_init    = spapr_wdt_class_init,
>>>> +};
>>>> +
>>>> +static void spapr_watchdog_register_types(void)
>>>> +{
>>>> +    spapr_register_hypercall(H_WATCHDOG, h_watchdog);
>>>> +    type_register_static(&spapr_wdt_info);
>>>> +}
>>>> +
>>>> +type_init(spapr_watchdog_register_types)
>>>> diff --git a/hw/watchdog/meson.build b/hw/watchdog/meson.build
>>>> index 054c403dea7c..8974b5cf4c8a 100644
>>>> --- a/hw/watchdog/meson.build
>>>> +++ b/hw/watchdog/meson.build
>>>> @@ -6,3 +6,4 @@ softmmu_ss.add(when: 'CONFIG_WDT_DIAG288', if_true: 
>>>> files('wdt_diag288.c'))
>>>>   softmmu_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: 
>>>> files('wdt_aspeed.c'))
>>>>   softmmu_ss.add(when: 'CONFIG_WDT_IMX2', if_true: files('wdt_imx2.c'))
>>>>   softmmu_ss.add(when: 'CONFIG_WDT_SBSA', if_true: 
>>>> files('sbsa_gwdt.c'))
>>>> +specific_ss.add(when: 'CONFIG_PSERIES', if_true: 
>>>> files('spapr_watchdog.c'))
>>>> diff --git a/hw/watchdog/trace-events b/hw/watchdog/trace-events
>>>> index e7523e22aaf2..89ccbcfdfd20 100644
>>>> --- a/hw/watchdog/trace-events
>>>> +++ b/hw/watchdog/trace-events
>>>> @@ -9,3 +9,10 @@ cmsdk_apb_watchdog_lock(uint32_t lock) "CMSDK APB 
>>>> watchdog: lock %" PRIu32
>>>>   # wdt-aspeed.c
>>>>   aspeed_wdt_read(uint64_t addr, uint32_t size) "@0x%" PRIx64 " 
>>>> size=%d"
>>>>   aspeed_wdt_write(uint64_t addr, uint32_t size, uint64_t data) 
>>>> "@0x%" PRIx64 " size=%d value=0x%"PRIx64
>>>> +
>>>> +# spapr_watchdog.c
>>>> +spapr_watchdog_start(uint64_t flags, uint64_t num, uint64_t 
>>>> timeout) "Flags 0x%" PRIx64 " num=%" PRId64 " %" PRIu64 "ms"
>>>> +spapr_watchdog_stop(uint64_t num, uint64_t ret) "num=%" PRIu64 " 
>>>> ret=%" PRId64
>>>> +spapr_watchdog_query(uint64_t caps) "caps=0x%" PRIx64
>>>> +spapr_watchdog_query_lpm(uint64_t caps) "caps=0x%" PRIx64
>>>> +spapr_watchdog_expired(uint64_t num, unsigned action) "num=%" 
>>>> PRIu64 " action=%u"
>>
>
diff mbox series

Patch

diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 072dda2c7265..ef1e38abd5c7 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -164,6 +164,25 @@  struct SpaprMachineClass {
     SpaprIrq *irq;
 };
 
+#define WDT_MAX_WATCHDOGS       4      /* Maximum number of watchdog devices */
+
+#define WDT_HARD_POWER_OFF      0
+#define WDT_HARD_RESTART        1
+#define WDT_DUMP_RESTART        2
+
+#define TYPE_SPAPR_WDT "spapr-wdt"
+OBJECT_DECLARE_SIMPLE_TYPE(SpaprWatchdog, SPAPR_WDT)
+
+typedef struct SpaprWatchdog {
+    /*< private >*/
+    DeviceState parent_obj;
+    /*< public >*/
+
+    unsigned num;
+    QEMUTimer timer;
+    uint8_t action;
+} SpaprWatchdog;
+
 /**
  * SpaprMachineState:
  */
@@ -264,6 +283,8 @@  struct SpaprMachineState {
     uint32_t FORM2_assoc_array[NUMA_NODES_MAX_NUM][FORM2_NUMA_ASSOC_SIZE];
 
     Error *fwnmi_migration_blocker;
+
+    SpaprWatchdog wds[WDT_MAX_WATCHDOGS];
 };
 
 #define H_SUCCESS         0
@@ -344,6 +365,7 @@  struct SpaprMachineState {
 #define H_P7              -60
 #define H_P8              -61
 #define H_P9              -62
+#define H_NOOP            -63
 #define H_UNSUPPORTED     -67
 #define H_OVERLAP         -68
 #define H_UNSUPPORTED_FLAG -256
@@ -564,8 +586,9 @@  struct SpaprMachineState {
 #define H_SCM_HEALTH            0x400
 #define H_RPT_INVALIDATE        0x448
 #define H_SCM_FLUSH             0x44C
+#define H_WATCHDOG              0x45C
 
-#define MAX_HCALL_OPCODE        H_SCM_FLUSH
+#define MAX_HCALL_OPCODE        H_WATCHDOG
 
 /* The hcalls above are standardized in PAPR and implemented by pHyp
  * as well.
@@ -1027,6 +1050,7 @@  extern const VMStateDescription vmstate_spapr_cap_large_decr;
 extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
 extern const VMStateDescription vmstate_spapr_cap_fwnmi;
 extern const VMStateDescription vmstate_spapr_cap_rpt_invalidate;
+extern const VMStateDescription vmstate_spapr_wdt;
 
 static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
 {
@@ -1063,4 +1087,7 @@  target_ulong spapr_vof_client_architecture_support(MachineState *ms,
                                                    target_ulong ovec_addr);
 void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt);
 
+/* H_WATCHDOG */
+void spapr_watchdog_init(SpaprMachineState *spapr);
+
 #endif /* HW_SPAPR_H */
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index fd4942e8813c..9a5382d5270f 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -898,6 +898,8 @@  static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
         add_str(hypertas, "hcall-hpt-resize");
     }
 
+    add_str(hypertas, "hcall-watchdog");
+
     _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
                      hypertas->str, hypertas->len));
     g_string_free(hypertas, TRUE);
@@ -3051,6 +3053,8 @@  static void spapr_machine_init(MachineState *machine)
         spapr->vof->fw_size = fw_size; /* for claim() on itself */
         spapr_register_hypercall(KVMPPC_H_VOF_CLIENT, spapr_h_vof_client);
     }
+
+    spapr_watchdog_init(spapr);
 }
 
 #define DEFAULT_KVM_TYPE "auto"
diff --git a/hw/watchdog/spapr_watchdog.c b/hw/watchdog/spapr_watchdog.c
new file mode 100644
index 000000000000..aeaf7c52cbad
--- /dev/null
+++ b/hw/watchdog/spapr_watchdog.c
@@ -0,0 +1,248 @@ 
+/*
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "target/ppc/cpu.h"
+#include "migration/vmstate.h"
+#include "trace.h"
+
+#include "hw/ppc/spapr.h"
+
+/*
+ * Bits 47: "leaveOtherWatchdogsRunningOnTimeout", specified on
+ * the "Start watchdog" operation,
+ * 0 - stop out-standing watchdogs on timeout,
+ * 1 - leave outstanding watchdogs running on timeout
+ */
+#define PSERIES_WDTF_LEAVE_OTHER    PPC_BIT(47)
+
+/*    Bits 48-55: "operation" */
+#define PSERIES_WDTF_OP(op)             SETFIELD(PPC_BITMASK(48, 55), 0, (op))
+#define PSERIES_WDTF_OP_START           PSERIES_WDTF_OP(0x1)
+#define PSERIES_WDTF_OP_STOP            PSERIES_WDTF_OP(0x2)
+#define PSERIES_WDTF_OP_QUERY           PSERIES_WDTF_OP(0x3)
+#define PSERIES_WDTF_OP_QUERY_LPM       PSERIES_WDTF_OP(0x4)
+
+/*    Bits 56-63: "timeoutAction" */
+#define PSERIES_WDTF_ACTION(ac)         SETFIELD(PPC_BITMASK(56, 63), 0, (ac))
+#define PSERIES_WDTF_ACTION_HARD_POWER_OFF  PSERIES_WDTF_ACTION(0x1)
+#define PSERIES_WDTF_ACTION_HARD_RESTART    PSERIES_WDTF_ACTION(0x2)
+#define PSERIES_WDTF_ACTION_DUMP_RESTART    PSERIES_WDTF_ACTION(0x3)
+#define PSERIES_WDTF_RESERVED           PPC_BITMASK(0, 46)
+
+/*
+ * For the "Query watchdog capabilities" operation, a uint64 structure
+ * defined as:
+ * Bits 0-15: The minimum supported timeout in milliseconds
+ * Bits 16-31: The number of watchdogs supported
+ * Bits 32-63: Reserved
+ */
+#define PSERIES_WDTQ_MIN_TIMEOUT(ms)    SETFIELD(PPC_BITMASK(0, 15), 0, (ms))
+#define PSERIES_WDTQ_NUM(n)             SETFIELD(PPC_BITMASK(16, 31), 0, (n))
+
+/*
+ * For the "Query watchdog LPM requirement" operation:
+ * 1 = The given "watchdogNumber" must be stopped prior to suspending
+ * 2 = The given "watchdogNumber" does not have to be stopped prior to
+ * suspending
+ */
+#define PSERIES_WDTQL_STOPPED               1
+#define PSERIES_WDTQL_QUERY_NOT_STOPPED     2
+
+#define WDT_MIN_TIMEOUT 1 /* 1ms */
+
+static void watchdog_expired(void *pw)
+{
+    struct SpaprWatchdog *w = pw;
+    CPUState *cs;
+
+    trace_spapr_watchdog_expired(w->num, w->action);
+    switch (w->action) {
+    case WDT_HARD_POWER_OFF:
+        qemu_system_vmstop_request(RUN_STATE_SHUTDOWN);
+        break;
+    case WDT_HARD_RESTART:
+        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
+        break;
+    case WDT_DUMP_RESTART:
+        CPU_FOREACH(cs) {
+            async_run_on_cpu(cs, spapr_do_system_reset_on_cpu, RUN_ON_CPU_NULL);
+        }
+        break;
+    }
+}
+
+static target_ulong watchdog_stop(unsigned watchdogNumber,
+                                  struct SpaprWatchdog *w)
+{
+    target_ulong ret = H_NOOP;
+
+    if (timer_pending(&w->timer)) {
+        timer_del(&w->timer);
+        ret = H_SUCCESS;
+    }
+    trace_spapr_watchdog_stop(watchdogNumber, ret);
+
+    return ret;
+}
+
+static target_ulong h_watchdog(PowerPCCPU *cpu,
+                               SpaprMachineState *spapr,
+                               target_ulong opcode, target_ulong *args)
+{
+    target_ulong flags = args[0];
+    target_ulong watchdogNumber = args[1];
+    target_ulong timeoutInMs = args[2];
+    unsigned operation = flags & PSERIES_WDTF_OP(~0);
+    unsigned timeoutAction = flags & PSERIES_WDTF_ACTION(~0);
+    struct SpaprWatchdog *w;
+
+    if (flags & PSERIES_WDTF_RESERVED) {
+        return H_PARAMETER;
+    }
+
+    switch (operation) {
+    case PSERIES_WDTF_OP_START:
+        if (watchdogNumber > ARRAY_SIZE(spapr->wds)) {
+            return H_P2;
+        }
+        if (timeoutInMs <= WDT_MIN_TIMEOUT) {
+            return H_P3;
+        }
+
+        w = &spapr->wds[watchdogNumber - 1];
+        switch (timeoutAction) {
+        case PSERIES_WDTF_ACTION_HARD_POWER_OFF:
+            w->action = WDT_HARD_POWER_OFF;
+            break;
+        case PSERIES_WDTF_ACTION_HARD_RESTART:
+            w->action = WDT_HARD_RESTART;
+            break;
+        case PSERIES_WDTF_ACTION_DUMP_RESTART:
+            w->action = WDT_DUMP_RESTART;
+            break;
+        default:
+            return H_PARAMETER;
+        }
+        timer_mod(&w->timer,
+                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + timeoutInMs);
+        trace_spapr_watchdog_start(flags, watchdogNumber, timeoutInMs);
+        break;
+    case PSERIES_WDTF_OP_STOP:
+        if (watchdogNumber == (uint64_t) ~0) {
+            int i;
+
+            for (i = 1; i <= ARRAY_SIZE(spapr->wds); ++i) {
+                watchdog_stop(i, &spapr->wds[i - 1]);
+            }
+        } else if (watchdogNumber <= ARRAY_SIZE(spapr->wds)) {
+            watchdog_stop(watchdogNumber, &spapr->wds[watchdogNumber - 1]);
+        } else {
+            return H_P2;
+        }
+        break;
+    case PSERIES_WDTF_OP_QUERY:
+        args[0] = PSERIES_WDTQ_MIN_TIMEOUT(WDT_MIN_TIMEOUT) |
+            PSERIES_WDTQ_NUM(ARRAY_SIZE(spapr->wds));
+        trace_spapr_watchdog_query(args[0]);
+        break;
+    case PSERIES_WDTF_OP_QUERY_LPM:
+        if (watchdogNumber > ARRAY_SIZE(spapr->wds)) {
+            return H_P2;
+        }
+        args[0] = PSERIES_WDTQL_QUERY_NOT_STOPPED;
+        trace_spapr_watchdog_query_lpm(args[0]);
+        break;
+    default:
+        return H_PARAMETER;
+    }
+
+    return H_SUCCESS;
+}
+
+void spapr_watchdog_init(SpaprMachineState *spapr)
+{
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(spapr->wds); ++i) {
+        char name[16];
+        SpaprWatchdog *w = &spapr->wds[i];
+
+        w->num = i + 1;
+        snprintf(name, sizeof(name) - 1, "wdt%d", i + 1);
+        object_initialize_child_with_props(OBJECT(spapr), name, w,
+                                           sizeof(SpaprWatchdog),
+                                           TYPE_SPAPR_WDT,
+                                           &error_fatal, NULL);
+        qdev_realize(DEVICE(w), NULL, &error_fatal);
+    }
+}
+
+static bool watchdog_needed(void *opaque)
+{
+    SpaprWatchdog *w = opaque;
+
+    return timer_pending(&w->timer);
+}
+
+static const VMStateDescription vmstate_wdt = {
+    .name = "spapr_watchdog",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = watchdog_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8(action, SpaprWatchdog),
+        VMSTATE_TIMER(timer, SpaprWatchdog),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void spapr_wdt_realize(DeviceState *dev, Error **errp)
+{
+    SpaprWatchdog *w = SPAPR_WDT(dev);
+
+    timer_init_ms(&w->timer, QEMU_CLOCK_VIRTUAL, watchdog_expired, w);
+
+    object_property_add_uint64_ptr(OBJECT(dev), "expire",
+                                   (uint64_t *)&w->timer.expire_time,
+                                   OBJ_PROP_FLAG_READ);
+    object_property_add_uint8_ptr(OBJECT(dev), "action", &w->action,
+                                  OBJ_PROP_FLAG_READ);
+}
+
+static void spapr_wdt_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    dc->realize = spapr_wdt_realize;
+    dc->vmsd = &vmstate_wdt;
+    dc->user_creatable = false;
+}
+
+static const TypeInfo spapr_wdt_info = {
+    .name          = TYPE_SPAPR_WDT,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(SpaprWatchdog),
+    .class_init    = spapr_wdt_class_init,
+};
+
+static void spapr_watchdog_register_types(void)
+{
+    spapr_register_hypercall(H_WATCHDOG, h_watchdog);
+    type_register_static(&spapr_wdt_info);
+}
+
+type_init(spapr_watchdog_register_types)
diff --git a/hw/watchdog/meson.build b/hw/watchdog/meson.build
index 054c403dea7c..8974b5cf4c8a 100644
--- a/hw/watchdog/meson.build
+++ b/hw/watchdog/meson.build
@@ -6,3 +6,4 @@  softmmu_ss.add(when: 'CONFIG_WDT_DIAG288', if_true: files('wdt_diag288.c'))
 softmmu_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files('wdt_aspeed.c'))
 softmmu_ss.add(when: 'CONFIG_WDT_IMX2', if_true: files('wdt_imx2.c'))
 softmmu_ss.add(when: 'CONFIG_WDT_SBSA', if_true: files('sbsa_gwdt.c'))
+specific_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr_watchdog.c'))
diff --git a/hw/watchdog/trace-events b/hw/watchdog/trace-events
index e7523e22aaf2..89ccbcfdfd20 100644
--- a/hw/watchdog/trace-events
+++ b/hw/watchdog/trace-events
@@ -9,3 +9,10 @@  cmsdk_apb_watchdog_lock(uint32_t lock) "CMSDK APB watchdog: lock %" PRIu32
 # wdt-aspeed.c
 aspeed_wdt_read(uint64_t addr, uint32_t size) "@0x%" PRIx64 " size=%d"
 aspeed_wdt_write(uint64_t addr, uint32_t size, uint64_t data) "@0x%" PRIx64 " size=%d value=0x%"PRIx64
+
+# spapr_watchdog.c
+spapr_watchdog_start(uint64_t flags, uint64_t num, uint64_t timeout) "Flags 0x%" PRIx64 " num=%" PRId64 " %" PRIu64 "ms"
+spapr_watchdog_stop(uint64_t num, uint64_t ret) "num=%" PRIu64 " ret=%" PRId64
+spapr_watchdog_query(uint64_t caps) "caps=0x%" PRIx64
+spapr_watchdog_query_lpm(uint64_t caps) "caps=0x%" PRIx64
+spapr_watchdog_expired(uint64_t num, unsigned action) "num=%" PRIu64 " action=%u"