diff mbox

core/fast-reboot.c: Add sreset opal call

Message ID 1479178153-19982-1-git-send-email-alistair@popple.id.au
State Deferred
Headers show

Commit Message

Alistair Popple Nov. 15, 2016, 2:49 a.m. UTC
Sending a NMI to other CPUs regardless of their current state requires
a way to reset them. POWER hardware has a method of directly injecting
resets via direct thread control, however this only works if the
thread is not active (eg. in a sleep or nap state).

Resetting an active thread can be performed either via forcing the
threads to an inactive state (as fast reboot does) or by ramming an
instruction sequence that simulates an sreset. This patch implements
the latter as forcing a thread to the inactive state is not ideal for
debug purposes as the threads loose state.

Signed-off-by: Alistair Popple <alistair@popple.id.au>
---

Currently active threads on the currently executing core cannot be
sreset as a thread cannot ram other threads on the same core. This
means the caller will need to reset these threads to make the call
from a different core.

core/fast-reboot.c                            | 349 +++++++++++++++++++++++++-
 doc/opal-api/opal-signal-system-reset-128.txt |  30 +++
 include/opal-api.h                            |   3 +-
 include/skiboot.h                             |   1 +
 platforms/astbmc/common.c                     |   2 +
 5 files changed, 375 insertions(+), 10 deletions(-)
 create mode 100644 doc/opal-api/opal-signal-system-reset-128.txt

--
2.1.4

Comments

Joel Stanley Nov. 15, 2016, 3:27 a.m. UTC | #1
On Tue, Nov 15, 2016 at 1:19 PM, Alistair Popple <alistair@popple.id.au> wrote:
> Sending a NMI to other CPUs regardless of their current state requires
> a way to reset them. POWER hardware has a method of directly injecting
> resets via direct thread control, however this only works if the
> thread is not active (eg. in a sleep or nap state).
>
> Resetting an active thread can be performed either via forcing the
> threads to an inactive state (as fast reboot does) or by ramming an
> instruction sequence that simulates an sreset. This patch implements
> the latter as forcing a thread to the inactive state is not ideal for
> debug purposes as the threads loose state.

Lose.

>
> Signed-off-by: Alistair Popple <alistair@popple.id.au>
> ---
>
> Currently active threads on the currently executing core cannot be
> sreset as a thread cannot ram other threads on the same core. This
> means the caller will need to reset these threads to make the call
> from a different core.
>
> core/fast-reboot.c                            | 349 +++++++++++++++++++++++++-
>  doc/opal-api/opal-signal-system-reset-128.txt |  30 +++
>  include/opal-api.h                            |   3 +-
>  include/skiboot.h                             |   1 +
>  platforms/astbmc/common.c                     |   2 +
>  5 files changed, 375 insertions(+), 10 deletions(-)
>  create mode 100644 doc/opal-api/opal-signal-system-reset-128.txt
>
> diff --git a/core/fast-reboot.c b/core/fast-reboot.c
> index 66b3182..08d7c25 100644
> --- a/core/fast-reboot.c
> +++ b/core/fast-reboot.c
> @@ -28,10 +28,33 @@
>  #include <chiptod.h>
>
>  #define P8_EX_TCTL_DIRECT_CONTROLS(t)  (0x10013000 + (t) * 0x10)
> -#define P8_DIRECT_CTL_STOP             PPC_BIT(63)
>  #define P8_DIRECT_CTL_PRENAP           PPC_BIT(47)
>  #define P8_DIRECT_CTL_SRESET           PPC_BIT(60)
> -
> +#define P8_DIRECT_CTL_START            PPC_BIT(62)
> +#define P8_DIRECT_CTL_STOP             PPC_BIT(63)
> +#define P8_EX_TCTL_RAS_STATUS(t)       (0x10013002 + (t) * 0x10)
> +#define RAS_STATUS_SRQ_EMPTY           PPC_BIT(8)
> +#define RAS_STATUS_LSU_QUIESCED        PPC_BIT(9)
> +#define RAS_STATUS_INST_COMPLETE       PPC_BIT(12)
> +#define RAS_STATUS_THREAD_ACTIVE       PPC_BIT(48)
> +#define RAS_STATUS_TS_QUIESCE          PPC_BIT(49)
> +#define P8_EX_THREAD_ACTIVE            0x1001310e
> +#define P8_EX_SPRC_REG                 0x10013280
> +#define SPRC_REG_SCRATCH_SPR           PPC_BIT(57)
> +#define P8_EX_SPR_MODE_REG             0x10013281
> +#define SPR_MODE_SPRC_WR_EN            PPC_BIT(3)
> +#define SPR_MODE_SPRC_SEL              PPC_BITMASK(16, 19)
> +#define SPR_MODE_SPRC_T_SEL            PPC_BITMASK(20, 27)
> +#define P8_EX_SCR0_REG                 0x10013283
> +#define P8_EX_RAM_MODE_REG             0x10013c00
> +#define RAM_MODE_ENABLE                PPC_BIT(0)
> +#define P8_EX_RAM_CTRL_REG             0x10013c01
> +#define RAM_CTRL_THREAD_SELECT         PPC_BITMASK(0, 2)
> +#define RAM_CTRL_INSTR                 PPC_BITMASK(3, 34)
> +#define P8_EX_RAM_STATUS_REG           0x10013c02
> +#define RAM_STATUS                     PPC_BIT(1)
> +
> +#define RAS_STATUS_TIMEOUT             10
>
>  /* Flag tested by the OPAL entry code */
>  uint8_t reboot_in_progress;
> @@ -39,6 +62,14 @@ static volatile bool fast_boot_release;
>  static struct cpu_thread *last_man_standing;
>  static struct lock reset_lock = LOCK_UNLOCKED;
>
> +/* opcodes for instruction ramming */
> +#define MFNIA(r) (0x00000004UL | ((r) << 21))
> +#define MTNIA(r) (0x00000002UL | ((r) << 21))
> +#define MFMSR(r) (0x7c0000a6UL | ((r) << 21))
> +#define MTMSR(r) (0x7c000124UL | ((r) << 21))
> +#define MFSPR(r, s) (0x7c0002a6UL | ((r) << 21) | (((s) & 0x1f) << 16) | (((s) & 0x3e0) << 6))
> +#define MTSPR(s, r) (0x7c0003a6UL | ((r) << 21) | (((s) & 0x1f) << 16) | (((s) & 0x3e0) << 6))
> +
>  static int set_special_wakeup(struct cpu_thread *cpu)
>  {
>         uint64_t val, poll_target, stamp;
> @@ -203,11 +234,10 @@ static int clr_special_wakeup(struct cpu_thread *cpu)
>
>  extern unsigned long callthru_tcl(const char *str, int len);
>
> -static void set_direct_ctl(struct cpu_thread *cpu, uint64_t bits)
> +static void set_direct_ctl(struct cpu_thread *cpu, uint32_t thread_id, uint64_t bits)
>  {
>         uint32_t core_id = pir_to_core_id(cpu->pir);
>         uint32_t chip_id = pir_to_chip_id(cpu->pir);
> -       uint32_t thread_id = pir_to_thread_id(cpu->pir);
>         uint32_t xscom_addr;
>         char tcl_cmd[50];
>
> @@ -225,6 +255,35 @@ static void set_direct_ctl(struct cpu_thread *cpu, uint64_t bits)
>         xscom_write(chip_id, xscom_addr, bits);
>  }
>
> +static uint64_t get_ras_status(struct cpu_thread *cpu, uint32_t thread_id)
> +{
> +       uint32_t core_id = pir_to_core_id(cpu->pir);
> +       uint32_t chip_id = pir_to_chip_id(cpu->pir);
> +       uint32_t xscom_addr;
> +       uint64_t ras_status;
> +
> +       xscom_addr = XSCOM_ADDR_P8_EX(core_id, P8_EX_TCTL_RAS_STATUS(thread_id));
> +       xscom_read(chip_id, xscom_addr, &ras_status);
> +
> +       return ras_status;
> +}
> +
> +static void set_ram_thread_active(struct cpu_thread *cpu, uint32_t thread_id, bool active)
> +{
> +       uint32_t core_id = pir_to_core_id(cpu->pir);
> +       uint32_t chip_id = pir_to_chip_id(cpu->pir);
> +       uint32_t xscom_addr;
> +       uint64_t thread_active;
> +
> +       xscom_addr = XSCOM_ADDR_P8_EX(core_id, P8_EX_THREAD_ACTIVE);
> +       xscom_read(chip_id, xscom_addr, &thread_active);
> +       if (active)
> +               thread_active |= PPC_BIT(8) >> thread_id;
> +       else
> +               thread_active &= ~(PPC_BIT(8) >> thread_id);
> +       xscom_write(chip_id, xscom_addr, thread_active);
> +}
> +
>  static bool fast_reset_p8(void)
>  {
>         struct cpu_thread *cpu;
> @@ -247,7 +306,7 @@ static bool fast_reset_p8(void)
>         /* Put everybody in stop except myself */
>         for_each_cpu(cpu) {
>                 if (cpu != this_cpu())
> -                       set_direct_ctl(cpu, P8_DIRECT_CTL_STOP);
> +                       set_direct_ctl(cpu, pir_to_thread_id(cpu->pir), P8_DIRECT_CTL_STOP);
>
>                 /* Also make sure that saved_r1 is 0 ! That's what will
>                  * make our reset vector jump to fast_reboot_entry
> @@ -264,7 +323,7 @@ static bool fast_reset_p8(void)
>         /* Put everybody in pre-nap except myself */
>         for_each_cpu(cpu) {
>                 if (cpu != this_cpu())
> -                       set_direct_ctl(cpu, P8_DIRECT_CTL_PRENAP);
> +                       set_direct_ctl(cpu, pir_to_thread_id(cpu->pir), P8_DIRECT_CTL_PRENAP);
>         }
>
>         prlog(PR_DEBUG, "RESET: Resetting all threads but one...\n");
> @@ -272,7 +331,7 @@ static bool fast_reset_p8(void)
>         /* Reset everybody except my own core threads */
>         for_each_cpu(cpu) {
>                 if (cpu != this_cpu())
> -                       set_direct_ctl(cpu, P8_DIRECT_CTL_SRESET);
> +                       set_direct_ctl(cpu, pir_to_thread_id(cpu->pir), P8_DIRECT_CTL_SRESET);
>         }
>
>         return true;
> @@ -441,8 +500,8 @@ void __noreturn fast_reboot_entry(void)
>         lock(&reset_lock);
>         if (last_man_standing && next_cpu(first_cpu())) {
>                 prlog(PR_DEBUG, "RESET: last man standing fixup...\n");
> -               set_direct_ctl(last_man_standing, P8_DIRECT_CTL_PRENAP);
> -               set_direct_ctl(last_man_standing, P8_DIRECT_CTL_SRESET);
> +               set_direct_ctl(last_man_standing, pir_to_thread_id(last_man_standing->pir), P8_DIRECT_CTL_PRENAP);
> +               set_direct_ctl(last_man_standing, pir_to_thread_id(last_man_standing->pir), P8_DIRECT_CTL_SRESET);
>         }
>         last_man_standing = NULL;
>         unlock(&reset_lock);
> @@ -544,3 +603,275 @@ void __noreturn fast_reboot_entry(void)
>         /* Load and boot payload */
>         load_and_boot_kernel(true);
>  }
> +
> +/*
> + * RAMs the opcodes in *opcodes and store the results of each opcode
> + * into *results. *results must point to an array the same size as
> + * *opcodes. Each entry from *results is put into SCR0 prior to
> + * executing an opcode so that it may also be used to pass in
> + * data. Note that only register r0 is saved and restored so opcodes
> + * must not touch other registers.
> + */
> +static int ram_instructions(struct cpu_thread *cpu, uint32_t thread_id, uint64_t *opcodes,
> +                           uint64_t *results, int len, unsigned int lpar)
> +{
> +       int i, rc = OPAL_SUCCESS;
> +       uint64_t ram_mode, val, opcode, r0 = 0;
> +       uint32_t chip_id = pir_to_chip_id(cpu->pir);
> +       uint32_t core_id = pir_to_core_id(cpu->pir);
> +
> +
> +       /* Activate RAM mode */
> +       xscom_read(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_MODE_REG), &ram_mode);
> +       ram_mode |= RAM_MODE_ENABLE;
> +
> +       /* Enable HV mode on thread */
> +       ram_mode |= PPC_BIT(2) >> thread_id*2;
> +       xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_MODE_REG), ram_mode);
> +
> +       /* Setup SPRC to use SPRD */
> +       val = SPR_MODE_SPRC_WR_EN;
> +       val = SETFIELD(SPR_MODE_SPRC_SEL, val, 1 << (3 - lpar));
> +       val = SETFIELD(SPR_MODE_SPRC_T_SEL, val, 1 << (7 - thread_id));
> +       xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SPR_MODE_REG), val);
> +       xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SPRC_REG), SPRC_REG_SCRATCH_SPR);
> +
> +       for (i = -1; i <= len; i++) {
> +               if (i < 0)
> +                       /* Save r0 (assumes opcodes don't touch other registers) */
> +                       opcode = MTSPR(277, 0);

This looked like a bug. Perhaps add { }.

> +               else if (i < len) {
> +                       xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SCR0_REG), results[i]);
> +                       opcode = opcodes[i];
> +               } else if (i >= len) {
> +                       /* Restore r0 */
> +                       xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SCR0_REG), r0);
> +                       opcode = MFSPR(0, 277);
> +               }
> +
> +               /* ram instruction */
> +               val = SETFIELD(RAM_CTRL_THREAD_SELECT, 0ULL, thread_id);
> +               val = SETFIELD(RAM_CTRL_INSTR, val, opcode);
> +               xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_CTRL_REG), val);
> +
> +               /* wait for completion */
> +               do {
> +                       xscom_read(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_STATUS_REG), &val);
> +               } while (!val);
> +
> +               if (!(val & RAM_STATUS)) {
> +                       prlog(PR_ERR, "Instruction ramming failed with status 0x%llx\n", val);
> +                       rc = OPAL_HARDWARE;
> +               }
> +
> +               /* Save the results */
> +               xscom_read(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SCR0_REG), &val);
> +               if (i < 0)
> +                       r0 = val;
> +               else if (i < len)
> +                       results[i] = val;
> +       }
> +
> +       /* Disable RAM mode */
> +       ram_mode &= ~RAM_MODE_ENABLE;
> +
> +       /* Disable HV mode on thread */
> +       ram_mode &= ~(PPC_BIT(2) >> thread_id*2);
> +       xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_MODE_REG), ram_mode);
> +
> +       return rc;
> +}
> +
> +#define SRESET_SRR1 0x9000000000101001
> +static int emulate_sreset(struct cpu_thread *cpu, int thread_id)
> +{
> +       int rc = OPAL_SUCCESS;
> +       uint64_t opcodes[] = {
> +               MFMSR(0), MTSPR(277, 0),                /* Get MSR */
> +               MFNIA(0), MTSPR(277, 0),                /* Get NIA */
> +               MFSPR(0, 277), MTMSR(0),                /* Put modified MSR back */
> +               MFSPR(0, 277), MTSPR(SPR_SRR0, 0),      /* Set SRR0 to NIA */
> +               MFSPR(0, 277), MTSPR(SPR_SRR1, 0),      /* Set SRR1 to SRESET value */
> +               MFSPR(0, 277), MTNIA(0),                /* Set NIA */
> +       };
> +       uint64_t results[] = {

Does it make sense to put a size in both of these so the compiler
warns when the lengths are different?

> +               0, 0,
> +               0, 0,
> +               0, 0,
> +               0, 0,                                   /* SRR0 */
> +               SRESET_SRR1, 0,                         /* SRR1 SRESET value */
> +               0x100, 0,                               /* Set NIA = 0x100 */
> +       };
> +
> +       BUILD_ASSERT(ARRAY_SIZE(opcodes) == ARRAY_SIZE(results));
> +
> +       set_ram_thread_active(cpu, thread_id, true);
> +
> +       prlog(PR_ERR, "SRESET: About to ram cpu 0x%x thread 0x%x\n",
> +             cpu->pir, thread_id);
> +
> +       /* Ram the first 4 instructions to get MSR and NIA */
> +       rc = ram_instructions(cpu, thread_id, opcodes, results, 4, 0);
> +
> +       /* Set MSR, SRR0 = NIA and ram remaining instructions*/
> +       results[4] = (results[1] & ~(MSR_IR | MSR_DR | MSR_FE0 | MSR_FE1 | MSR_EE | MSR_RI)) | MSR_HV;
> +       results[6] = results[3];
> +       rc |= ram_instructions(cpu, thread_id, &opcodes[4], &results[4], ARRAY_SIZE(opcodes) - 4, 0);
> +
> +       set_ram_thread_active(cpu, thread_id, false);
> +
> +       return rc;
> +}
> +
> +/*
> + * Apply an sreset to the given threads in a core. When ramming
> + * instructions the whole core must be quiesced so we can't apply an
> + * sreset to active threads on the same core as we're running
> + */
> +static int64_t sreset_core(struct cpu_thread *cpu, unsigned int thread_mask)
> +{
> +       uint32_t thread_id;
> +       uint32_t sreset_mask = 0, ram_mask = 0;
> +       uint64_t ras_status;
> +       int timeout;
> +       int64_t rc = 0;
> +       unsigned int max_thread_id = pir_to_thread_id(-1U);
> +       unsigned int max_thread_mask = (1 << (max_thread_id + 1)) - 1;
> +
> +       assert(cpu == cpu->primary);
> +       thread_mask &= max_thread_mask;
> +       if (this_cpu() == cpu) {
> +               prlog(PR_WARNING, "SRESET: Unable to reset threads on self\n");
> +               return OPAL_PARAMETER;
> +       }
> +
> +       prlog(PR_INFO, "SRESET: Start reset for cpu 0x%x thread_mask 0x%x\n",
> +             cpu->pir, thread_mask);
> +
> +       /* cpu is the primary thread */
> +       if (set_special_wakeup(cpu) != OPAL_SUCCESS)
> +               return OPAL_BUSY;
> +
> +       /* Stop threads selected for sreset */
> +       for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> +               if (!((1 << thread_id) & thread_mask))
> +                       continue;
> +
> +               set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_STOP);
> +               ras_status = get_ras_status(cpu, thread_id);
> +               if (!(ras_status & RAS_STATUS_THREAD_ACTIVE))
> +                       sreset_mask |= 1 << thread_id;
> +       }
> +
> +       if (thread_mask == sreset_mask)
> +               /* All threads selected for sreset can be sreset directly */
> +               prlog(PR_INFO, "SRESET: All threads inactive, doing direct sreset\n");
> +               for (thread_id = 0; thread_id <= max_thread_id; thread_id++)
> +                       if ((1 << thread_id) & thread_mask) {
> +                               set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_SRESET);
> +                               rc = OPAL_SUCCESS;
> +                               goto out;
> +                       }
> +
> +       /* Need to emulate sreset so stop all other threads */
> +       for (thread_id = 0; thread_id <= max_thread_id; thread_id++)
> +               if (!((1 << thread_id) & thread_mask))
> +                       set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_STOP);
> +
> +       /* Work out which threads to sreset and which need sreset emulation */
> +       for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> +               ras_status = get_ras_status(cpu, thread_id);
> +               if (!(ras_status & RAS_STATUS_THREAD_ACTIVE))
> +                       sreset_mask |= 1 << thread_id;
> +               else {
> +                       for (timeout = 0; timeout < RAS_STATUS_TIMEOUT; timeout++) {
> +                               ras_status = get_ras_status(cpu, thread_id);
> +                               if ((ras_status & RAS_STATUS_SRQ_EMPTY)
> +                                   && (ras_status & RAS_STATUS_LSU_QUIESCED)
> +                                   && (ras_status & RAS_STATUS_TS_QUIESCE)) {
> +                                       ram_mask |= 1 << thread_id;
> +                                       break;
> +                               }
> +                       }
> +               }
> +       }
> +
> +       /*
> +        * To emulate sreset we need to make sure all threads on a core are either:
> +        *  a) Quiesced
> +        *  b) Not active (recorded in sreset_mask)
> +        * So skip ramming threads if we're not in the right state.
> +        */
> +       if ((ram_mask & thread_mask) && (ram_mask | sreset_mask) != max_thread_mask) {
> +               prlog(PR_ERR, "SRESET: Unable to quiesce all threads for ramming (sreset_mask 0x%08x / ram_mask 0x%08x)\n",
> +                     sreset_mask, ram_mask);
> +               ram_mask = 0;
> +               rc = OPAL_PARTIAL;
> +       }
> +
> +       /* We need to ram threads before doing the direct sresets as
> +        * that makes the threads active */
> +       for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> +               if (!((1 << thread_id) & thread_mask))
> +                       continue;
> +
> +               if ((1 << thread_id) & ram_mask)
> +                       emulate_sreset(cpu, thread_id);
> +       }
> +
> +       for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> +               if (!((1 << thread_id) & thread_mask))
> +                       continue;
> +
> +               if ((1 << thread_id) & sreset_mask) {
> +                       prlog(PR_ERR, "SRESET: cpu 0x%x thread 0x%x not active, applying SRESET directly\n",
> +                             cpu->pir, thread_id);
> +                       set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_SRESET);
> +               }
> +       }
> +
> +out:
> +       /* Start all threads */
> +       for (thread_id = 0; thread_id <= max_thread_id; thread_id++)
> +               set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_START);
> +
> +       clr_special_wakeup(cpu);
> +
> +       return rc;
> +}
> +
> +#define SYS_RESET_ALLBUTSELF -2
> +int64_t signal_system_reset(int cpu_nr)
> +{
> +       int64_t rc = 0;
> +       struct cpu_thread *cpu;
> +       uint32_t thread_id;
> +
> +       if (proc_gen != proc_gen_p8)
> +               return OPAL_UNSUPPORTED;
> +
> +       /* Reset a single CPU */
> +       if (cpu_nr >= 0) {

Did you mean cpu_nr == SYS_RESET_ALLBUTSELF?

> +               cpu = find_cpu_by_server(cpu_nr);
> +               if (!cpu)
> +                       return OPAL_PARAMETER;
> +
> +               thread_id = pir_to_thread_id(cpu->pir);
> +               cpu = cpu->primary;
> +               return sreset_core(cpu, 1 << thread_id);
> +       }
> +
> +       /* Otherwise reset all CPUs */
> +       for_each_cpu(cpu) {
> +               if (cpu->primary == this_cpu()->primary)
> +                       continue;
> +
> +               if (cpu->primary != cpu)
> +                       continue;
> +
> +               /* sreset all threads on a core */
> +               rc |= sreset_core(cpu, -1);
> +       }
> +
> +       return rc;
> +}
> diff --git a/doc/opal-api/opal-signal-system-reset-128.txt b/doc/opal-api/opal-signal-system-reset-128.txt
> new file mode 100644
> index 0000000..bb1b869
> --- /dev/null
> +++ b/doc/opal-api/opal-signal-system-reset-128.txt
> @@ -0,0 +1,30 @@
> +OPAL_SIGNAL_SYSTEM_RESET
> +-------------------
> +
> +#define OPAL_SIGNAL_SYSTEM_RESET                       128
> +
> +int64_t signal_system_reset(int cpu_nr)
> +
> +Arguments:
> +
> +  int cpu_nr
> +    Either the cpu server number of the target cpu to reset or
> +    SYS_RESET_ALLBUTSELF (-2) to indicate all but the current cpu
> +    should be reset.
> +
> +This OPAL call causes the specified cpu(s) to be reset to the system
> +reset exception handler (0x100). Sleeping cpus will be woken with
> +SRR1[42:45] = 0x0100 indicating an interrupt caused by SCOM when in
> +power saving mode. Active cpus will also indicate interrupt caused by
> +SCOM but will have SRR1[46:47] = 0 as the interrupt did not occur
> +during a power saving mode.
> +
> +Resetting active threads on the same core as this call is run is
> +currently not supported.
> +
> +Return Values:
> +OPAL_SUCCESS: the power down was updated successful
> +OPAL_PARAMETER: a parameter was incorrect
> +OPAL_HARDWARE: hardware indicated failure during instruction ramming
> +OPAL_PARTIAL: not all requested cpus could be reset at this time
> +OPAL_UNSUPPORTED: this processor generation is not supported
> diff --git a/include/opal-api.h b/include/opal-api.h
> index 05ff51d..be9a534 100644
> --- a/include/opal-api.h
> +++ b/include/opal-api.h
> @@ -181,7 +181,8 @@
>  #define OPAL_INT_SET_MFRR                      125
>  #define OPAL_PCI_TCE_KILL                      126
>  #define OPAL_NMMU_SET_PTCR                     127
> -#define OPAL_LAST                              127
> +#define OPAL_SIGNAL_SYSTEM_RESET               128
> +#define OPAL_LAST                              128
>
>  /* Device tree flags */
>
> diff --git a/include/skiboot.h b/include/skiboot.h
> index 2ef7677..b796a2c 100644
> --- a/include/skiboot.h
> +++ b/include/skiboot.h
> @@ -191,6 +191,7 @@ extern unsigned long get_symbol(unsigned long addr,
>
>  /* Fast reboot support */
>  extern void fast_reboot(void);
> +extern int64_t signal_system_reset(int __unused targets);
>  extern void __noreturn __secondary_cpu_entry(void);
>  extern void __noreturn load_and_boot_kernel(bool is_reboot);
>  extern void cleanup_tlb(void);
> diff --git a/platforms/astbmc/common.c b/platforms/astbmc/common.c
> index e1a8a4d..e4761ee 100644
> --- a/platforms/astbmc/common.c
> +++ b/platforms/astbmc/common.c
> @@ -356,4 +356,6 @@ void astbmc_early_init(void)
>         uart_init();
>
>         prd_init();
> +
> +       opal_register(OPAL_SIGNAL_SYSTEM_RESET, signal_system_reset, 1);
>  }
> --
> 2.1.4
> _______________________________________________
> Skiboot mailing list
> Skiboot@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/skiboot
Alistair Popple Nov. 15, 2016, 3:49 a.m. UTC | #2
On Tue, 15 Nov 2016 01:57:07 PM Joel Stanley wrote:
> On Tue, Nov 15, 2016 at 1:19 PM, Alistair Popple <alistair@popple.id.au> wrote:
> > Sending a NMI to other CPUs regardless of their current state requires
> > a way to reset them. POWER hardware has a method of directly injecting
> > resets via direct thread control, however this only works if the
> > thread is not active (eg. in a sleep or nap state).
> >
> > Resetting an active thread can be performed either via forcing the
> > threads to an inactive state (as fast reboot does) or by ramming an
> > instruction sequence that simulates an sreset. This patch implements
> > the latter as forcing a thread to the inactive state is not ideal for
> > debug purposes as the threads loose state.
> 
> Lose.
> 
> >
> > Signed-off-by: Alistair Popple <alistair@popple.id.au>
> > ---
> >
> > Currently active threads on the currently executing core cannot be
> > sreset as a thread cannot ram other threads on the same core. This
> > means the caller will need to reset these threads to make the call
> > from a different core.
> >
> > core/fast-reboot.c                            | 349 +++++++++++++++++++++++++-
> >  doc/opal-api/opal-signal-system-reset-128.txt |  30 +++
> >  include/opal-api.h                            |   3 +-
> >  include/skiboot.h                             |   1 +
> >  platforms/astbmc/common.c                     |   2 +
> >  5 files changed, 375 insertions(+), 10 deletions(-)
> >  create mode 100644 doc/opal-api/opal-signal-system-reset-128.txt
> >
> > diff --git a/core/fast-reboot.c b/core/fast-reboot.c
> > index 66b3182..08d7c25 100644
> > --- a/core/fast-reboot.c
> > +++ b/core/fast-reboot.c
> > @@ -28,10 +28,33 @@
> >  #include <chiptod.h>
> >
> >  #define P8_EX_TCTL_DIRECT_CONTROLS(t)  (0x10013000 + (t) * 0x10)
> > -#define P8_DIRECT_CTL_STOP             PPC_BIT(63)
> >  #define P8_DIRECT_CTL_PRENAP           PPC_BIT(47)
> >  #define P8_DIRECT_CTL_SRESET           PPC_BIT(60)
> > -
> > +#define P8_DIRECT_CTL_START            PPC_BIT(62)
> > +#define P8_DIRECT_CTL_STOP             PPC_BIT(63)
> > +#define P8_EX_TCTL_RAS_STATUS(t)       (0x10013002 + (t) * 0x10)
> > +#define RAS_STATUS_SRQ_EMPTY           PPC_BIT(8)
> > +#define RAS_STATUS_LSU_QUIESCED        PPC_BIT(9)
> > +#define RAS_STATUS_INST_COMPLETE       PPC_BIT(12)
> > +#define RAS_STATUS_THREAD_ACTIVE       PPC_BIT(48)
> > +#define RAS_STATUS_TS_QUIESCE          PPC_BIT(49)
> > +#define P8_EX_THREAD_ACTIVE            0x1001310e
> > +#define P8_EX_SPRC_REG                 0x10013280
> > +#define SPRC_REG_SCRATCH_SPR           PPC_BIT(57)
> > +#define P8_EX_SPR_MODE_REG             0x10013281
> > +#define SPR_MODE_SPRC_WR_EN            PPC_BIT(3)
> > +#define SPR_MODE_SPRC_SEL              PPC_BITMASK(16, 19)
> > +#define SPR_MODE_SPRC_T_SEL            PPC_BITMASK(20, 27)
> > +#define P8_EX_SCR0_REG                 0x10013283
> > +#define P8_EX_RAM_MODE_REG             0x10013c00
> > +#define RAM_MODE_ENABLE                PPC_BIT(0)
> > +#define P8_EX_RAM_CTRL_REG             0x10013c01
> > +#define RAM_CTRL_THREAD_SELECT         PPC_BITMASK(0, 2)
> > +#define RAM_CTRL_INSTR                 PPC_BITMASK(3, 34)
> > +#define P8_EX_RAM_STATUS_REG           0x10013c02
> > +#define RAM_STATUS                     PPC_BIT(1)
> > +
> > +#define RAS_STATUS_TIMEOUT             10
> >
> >  /* Flag tested by the OPAL entry code */
> >  uint8_t reboot_in_progress;
> > @@ -39,6 +62,14 @@ static volatile bool fast_boot_release;
> >  static struct cpu_thread *last_man_standing;
> >  static struct lock reset_lock = LOCK_UNLOCKED;
> >
> > +/* opcodes for instruction ramming */
> > +#define MFNIA(r) (0x00000004UL | ((r) << 21))
> > +#define MTNIA(r) (0x00000002UL | ((r) << 21))
> > +#define MFMSR(r) (0x7c0000a6UL | ((r) << 21))
> > +#define MTMSR(r) (0x7c000124UL | ((r) << 21))
> > +#define MFSPR(r, s) (0x7c0002a6UL | ((r) << 21) | (((s) & 0x1f) << 16) | (((s) & 0x3e0) << 6))
> > +#define MTSPR(s, r) (0x7c0003a6UL | ((r) << 21) | (((s) & 0x1f) << 16) | (((s) & 0x3e0) << 6))
> > +
> >  static int set_special_wakeup(struct cpu_thread *cpu)
> >  {
> >         uint64_t val, poll_target, stamp;
> > @@ -203,11 +234,10 @@ static int clr_special_wakeup(struct cpu_thread *cpu)
> >
> >  extern unsigned long callthru_tcl(const char *str, int len);
> >
> > -static void set_direct_ctl(struct cpu_thread *cpu, uint64_t bits)
> > +static void set_direct_ctl(struct cpu_thread *cpu, uint32_t thread_id, uint64_t bits)
> >  {
> >         uint32_t core_id = pir_to_core_id(cpu->pir);
> >         uint32_t chip_id = pir_to_chip_id(cpu->pir);
> > -       uint32_t thread_id = pir_to_thread_id(cpu->pir);
> >         uint32_t xscom_addr;
> >         char tcl_cmd[50];
> >
> > @@ -225,6 +255,35 @@ static void set_direct_ctl(struct cpu_thread *cpu, uint64_t bits)
> >         xscom_write(chip_id, xscom_addr, bits);
> >  }
> >
> > +static uint64_t get_ras_status(struct cpu_thread *cpu, uint32_t thread_id)
> > +{
> > +       uint32_t core_id = pir_to_core_id(cpu->pir);
> > +       uint32_t chip_id = pir_to_chip_id(cpu->pir);
> > +       uint32_t xscom_addr;
> > +       uint64_t ras_status;
> > +
> > +       xscom_addr = XSCOM_ADDR_P8_EX(core_id, P8_EX_TCTL_RAS_STATUS(thread_id));
> > +       xscom_read(chip_id, xscom_addr, &ras_status);
> > +
> > +       return ras_status;
> > +}
> > +
> > +static void set_ram_thread_active(struct cpu_thread *cpu, uint32_t thread_id, bool active)
> > +{
> > +       uint32_t core_id = pir_to_core_id(cpu->pir);
> > +       uint32_t chip_id = pir_to_chip_id(cpu->pir);
> > +       uint32_t xscom_addr;
> > +       uint64_t thread_active;
> > +
> > +       xscom_addr = XSCOM_ADDR_P8_EX(core_id, P8_EX_THREAD_ACTIVE);
> > +       xscom_read(chip_id, xscom_addr, &thread_active);
> > +       if (active)
> > +               thread_active |= PPC_BIT(8) >> thread_id;
> > +       else
> > +               thread_active &= ~(PPC_BIT(8) >> thread_id);
> > +       xscom_write(chip_id, xscom_addr, thread_active);
> > +}
> > +
> >  static bool fast_reset_p8(void)
> >  {
> >         struct cpu_thread *cpu;
> > @@ -247,7 +306,7 @@ static bool fast_reset_p8(void)
> >         /* Put everybody in stop except myself */
> >         for_each_cpu(cpu) {
> >                 if (cpu != this_cpu())
> > -                       set_direct_ctl(cpu, P8_DIRECT_CTL_STOP);
> > +                       set_direct_ctl(cpu, pir_to_thread_id(cpu->pir), P8_DIRECT_CTL_STOP);
> >
> >                 /* Also make sure that saved_r1 is 0 ! That's what will
> >                  * make our reset vector jump to fast_reboot_entry
> > @@ -264,7 +323,7 @@ static bool fast_reset_p8(void)
> >         /* Put everybody in pre-nap except myself */
> >         for_each_cpu(cpu) {
> >                 if (cpu != this_cpu())
> > -                       set_direct_ctl(cpu, P8_DIRECT_CTL_PRENAP);
> > +                       set_direct_ctl(cpu, pir_to_thread_id(cpu->pir), P8_DIRECT_CTL_PRENAP);
> >         }
> >
> >         prlog(PR_DEBUG, "RESET: Resetting all threads but one...\n");
> > @@ -272,7 +331,7 @@ static bool fast_reset_p8(void)
> >         /* Reset everybody except my own core threads */
> >         for_each_cpu(cpu) {
> >                 if (cpu != this_cpu())
> > -                       set_direct_ctl(cpu, P8_DIRECT_CTL_SRESET);
> > +                       set_direct_ctl(cpu, pir_to_thread_id(cpu->pir), P8_DIRECT_CTL_SRESET);
> >         }
> >
> >         return true;
> > @@ -441,8 +500,8 @@ void __noreturn fast_reboot_entry(void)
> >         lock(&reset_lock);
> >         if (last_man_standing && next_cpu(first_cpu())) {
> >                 prlog(PR_DEBUG, "RESET: last man standing fixup...\n");
> > -               set_direct_ctl(last_man_standing, P8_DIRECT_CTL_PRENAP);
> > -               set_direct_ctl(last_man_standing, P8_DIRECT_CTL_SRESET);
> > +               set_direct_ctl(last_man_standing, pir_to_thread_id(last_man_standing->pir), P8_DIRECT_CTL_PRENAP);
> > +               set_direct_ctl(last_man_standing, pir_to_thread_id(last_man_standing->pir), P8_DIRECT_CTL_SRESET);
> >         }
> >         last_man_standing = NULL;
> >         unlock(&reset_lock);
> > @@ -544,3 +603,275 @@ void __noreturn fast_reboot_entry(void)
> >         /* Load and boot payload */
> >         load_and_boot_kernel(true);
> >  }
> > +
> > +/*
> > + * RAMs the opcodes in *opcodes and store the results of each opcode
> > + * into *results. *results must point to an array the same size as
> > + * *opcodes. Each entry from *results is put into SCR0 prior to
> > + * executing an opcode so that it may also be used to pass in
> > + * data. Note that only register r0 is saved and restored so opcodes
> > + * must not touch other registers.
> > + */
> > +static int ram_instructions(struct cpu_thread *cpu, uint32_t thread_id, uint64_t *opcodes,
> > +                           uint64_t *results, int len, unsigned int lpar)
> > +{
> > +       int i, rc = OPAL_SUCCESS;
> > +       uint64_t ram_mode, val, opcode, r0 = 0;
> > +       uint32_t chip_id = pir_to_chip_id(cpu->pir);
> > +       uint32_t core_id = pir_to_core_id(cpu->pir);
> > +
> > +
> > +       /* Activate RAM mode */
> > +       xscom_read(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_MODE_REG), &ram_mode);
> > +       ram_mode |= RAM_MODE_ENABLE;
> > +
> > +       /* Enable HV mode on thread */
> > +       ram_mode |= PPC_BIT(2) >> thread_id*2;
> > +       xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_MODE_REG), ram_mode);
> > +
> > +       /* Setup SPRC to use SPRD */
> > +       val = SPR_MODE_SPRC_WR_EN;
> > +       val = SETFIELD(SPR_MODE_SPRC_SEL, val, 1 << (3 - lpar));
> > +       val = SETFIELD(SPR_MODE_SPRC_T_SEL, val, 1 << (7 - thread_id));
> > +       xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SPR_MODE_REG), val);
> > +       xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SPRC_REG), SPRC_REG_SCRATCH_SPR);
> > +
> > +       for (i = -1; i <= len; i++) {
> > +               if (i < 0)
> > +                       /* Save r0 (assumes opcodes don't touch other registers) */
> > +                       opcode = MTSPR(277, 0);
> 
> This looked like a bug. Perhaps add { }.

Fair point, will add.

> > +               else if (i < len) {
> > +                       xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SCR0_REG), results[i]);
> > +                       opcode = opcodes[i];
> > +               } else if (i >= len) {
> > +                       /* Restore r0 */
> > +                       xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SCR0_REG), r0);
> > +                       opcode = MFSPR(0, 277);
> > +               }
> > +
> > +               /* ram instruction */
> > +               val = SETFIELD(RAM_CTRL_THREAD_SELECT, 0ULL, thread_id);
> > +               val = SETFIELD(RAM_CTRL_INSTR, val, opcode);
> > +               xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_CTRL_REG), val);
> > +
> > +               /* wait for completion */
> > +               do {
> > +                       xscom_read(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_STATUS_REG), &val);
> > +               } while (!val);
> > +
> > +               if (!(val & RAM_STATUS)) {
> > +                       prlog(PR_ERR, "Instruction ramming failed with status 0x%llx\n", val);
> > +                       rc = OPAL_HARDWARE;
> > +               }
> > +
> > +               /* Save the results */
> > +               xscom_read(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SCR0_REG), &val);
> > +               if (i < 0)
> > +                       r0 = val;
> > +               else if (i < len)
> > +                       results[i] = val;
> > +       }
> > +
> > +       /* Disable RAM mode */
> > +       ram_mode &= ~RAM_MODE_ENABLE;
> > +
> > +       /* Disable HV mode on thread */
> > +       ram_mode &= ~(PPC_BIT(2) >> thread_id*2);
> > +       xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_MODE_REG), ram_mode);
> > +
> > +       return rc;
> > +}
> > +
> > +#define SRESET_SRR1 0x9000000000101001
> > +static int emulate_sreset(struct cpu_thread *cpu, int thread_id)
> > +{
> > +       int rc = OPAL_SUCCESS;
> > +       uint64_t opcodes[] = {
> > +               MFMSR(0), MTSPR(277, 0),                /* Get MSR */
> > +               MFNIA(0), MTSPR(277, 0),                /* Get NIA */
> > +               MFSPR(0, 277), MTMSR(0),                /* Put modified MSR back */
> > +               MFSPR(0, 277), MTSPR(SPR_SRR0, 0),      /* Set SRR0 to NIA */
> > +               MFSPR(0, 277), MTSPR(SPR_SRR1, 0),      /* Set SRR1 to SRESET value */
> > +               MFSPR(0, 277), MTNIA(0),                /* Set NIA */
> > +       };
> > +       uint64_t results[] = {
> 
> Does it make sense to put a size in both of these so the compiler
> warns when the lengths are different?

The BUILD_ASSERT below should cause a compiler error if the sizes are different.
 
> > +               0, 0,
> > +               0, 0,
> > +               0, 0,
> > +               0, 0,                                   /* SRR0 */
> > +               SRESET_SRR1, 0,                         /* SRR1 SRESET value */
> > +               0x100, 0,                               /* Set NIA = 0x100 */
> > +       };
> > +
> > +       BUILD_ASSERT(ARRAY_SIZE(opcodes) == ARRAY_SIZE(results));
> > +
> > +       set_ram_thread_active(cpu, thread_id, true);
> > +
> > +       prlog(PR_ERR, "SRESET: About to ram cpu 0x%x thread 0x%x\n",
> > +             cpu->pir, thread_id);
> > +
> > +       /* Ram the first 4 instructions to get MSR and NIA */
> > +       rc = ram_instructions(cpu, thread_id, opcodes, results, 4, 0);
> > +
> > +       /* Set MSR, SRR0 = NIA and ram remaining instructions*/
> > +       results[4] = (results[1] & ~(MSR_IR | MSR_DR | MSR_FE0 | MSR_FE1 | MSR_EE | MSR_RI)) | MSR_HV;
> > +       results[6] = results[3];
> > +       rc |= ram_instructions(cpu, thread_id, &opcodes[4], &results[4], ARRAY_SIZE(opcodes) - 4, 0);
> > +
> > +       set_ram_thread_active(cpu, thread_id, false);
> > +
> > +       return rc;
> > +}
> > +
> > +/*
> > + * Apply an sreset to the given threads in a core. When ramming
> > + * instructions the whole core must be quiesced so we can't apply an
> > + * sreset to active threads on the same core as we're running
> > + */
> > +static int64_t sreset_core(struct cpu_thread *cpu, unsigned int thread_mask)
> > +{
> > +       uint32_t thread_id;
> > +       uint32_t sreset_mask = 0, ram_mask = 0;
> > +       uint64_t ras_status;
> > +       int timeout;
> > +       int64_t rc = 0;
> > +       unsigned int max_thread_id = pir_to_thread_id(-1U);
> > +       unsigned int max_thread_mask = (1 << (max_thread_id + 1)) - 1;
> > +
> > +       assert(cpu == cpu->primary);
> > +       thread_mask &= max_thread_mask;
> > +       if (this_cpu() == cpu) {
> > +               prlog(PR_WARNING, "SRESET: Unable to reset threads on self\n");
> > +               return OPAL_PARAMETER;
> > +       }
> > +
> > +       prlog(PR_INFO, "SRESET: Start reset for cpu 0x%x thread_mask 0x%x\n",
> > +             cpu->pir, thread_mask);
> > +
> > +       /* cpu is the primary thread */
> > +       if (set_special_wakeup(cpu) != OPAL_SUCCESS)
> > +               return OPAL_BUSY;
> > +
> > +       /* Stop threads selected for sreset */
> > +       for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> > +               if (!((1 << thread_id) & thread_mask))
> > +                       continue;
> > +
> > +               set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_STOP);
> > +               ras_status = get_ras_status(cpu, thread_id);
> > +               if (!(ras_status & RAS_STATUS_THREAD_ACTIVE))
> > +                       sreset_mask |= 1 << thread_id;
> > +       }
> > +
> > +       if (thread_mask == sreset_mask)
> > +               /* All threads selected for sreset can be sreset directly */
> > +               prlog(PR_INFO, "SRESET: All threads inactive, doing direct sreset\n");
> > +               for (thread_id = 0; thread_id <= max_thread_id; thread_id++)
> > +                       if ((1 << thread_id) & thread_mask) {
> > +                               set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_SRESET);
> > +                               rc = OPAL_SUCCESS;
> > +                               goto out;
> > +                       }
> > +
> > +       /* Need to emulate sreset so stop all other threads */
> > +       for (thread_id = 0; thread_id <= max_thread_id; thread_id++)
> > +               if (!((1 << thread_id) & thread_mask))
> > +                       set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_STOP);
> > +
> > +       /* Work out which threads to sreset and which need sreset emulation */
> > +       for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> > +               ras_status = get_ras_status(cpu, thread_id);
> > +               if (!(ras_status & RAS_STATUS_THREAD_ACTIVE))
> > +                       sreset_mask |= 1 << thread_id;
> > +               else {
> > +                       for (timeout = 0; timeout < RAS_STATUS_TIMEOUT; timeout++) {
> > +                               ras_status = get_ras_status(cpu, thread_id);
> > +                               if ((ras_status & RAS_STATUS_SRQ_EMPTY)
> > +                                   && (ras_status & RAS_STATUS_LSU_QUIESCED)
> > +                                   && (ras_status & RAS_STATUS_TS_QUIESCE)) {
> > +                                       ram_mask |= 1 << thread_id;
> > +                                       break;
> > +                               }
> > +                       }
> > +               }
> > +       }
> > +
> > +       /*
> > +        * To emulate sreset we need to make sure all threads on a core are either:
> > +        *  a) Quiesced
> > +        *  b) Not active (recorded in sreset_mask)
> > +        * So skip ramming threads if we're not in the right state.
> > +        */
> > +       if ((ram_mask & thread_mask) && (ram_mask | sreset_mask) != max_thread_mask) {
> > +               prlog(PR_ERR, "SRESET: Unable to quiesce all threads for ramming (sreset_mask 0x%08x / ram_mask 0x%08x)\n",
> > +                     sreset_mask, ram_mask);
> > +               ram_mask = 0;
> > +               rc = OPAL_PARTIAL;
> > +       }
> > +
> > +       /* We need to ram threads before doing the direct sresets as
> > +        * that makes the threads active */
> > +       for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> > +               if (!((1 << thread_id) & thread_mask))
> > +                       continue;
> > +
> > +               if ((1 << thread_id) & ram_mask)
> > +                       emulate_sreset(cpu, thread_id);
> > +       }
> > +
> > +       for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> > +               if (!((1 << thread_id) & thread_mask))
> > +                       continue;
> > +
> > +               if ((1 << thread_id) & sreset_mask) {
> > +                       prlog(PR_ERR, "SRESET: cpu 0x%x thread 0x%x not active, applying SRESET directly\n",
> > +                             cpu->pir, thread_id);
> > +                       set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_SRESET);
> > +               }
> > +       }
> > +
> > +out:
> > +       /* Start all threads */
> > +       for (thread_id = 0; thread_id <= max_thread_id; thread_id++)
> > +               set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_START);
> > +
> > +       clr_special_wakeup(cpu);
> > +
> > +       return rc;
> > +}
> > +
> > +#define SYS_RESET_ALLBUTSELF -2
> > +int64_t signal_system_reset(int cpu_nr)
> > +{
> > +       int64_t rc = 0;
> > +       struct cpu_thread *cpu;
> > +       uint32_t thread_id;
> > +
> > +       if (proc_gen != proc_gen_p8)
> > +               return OPAL_UNSUPPORTED;
> > +
> > +       /* Reset a single CPU */
> > +       if (cpu_nr >= 0) {
> 
> Did you mean cpu_nr == SYS_RESET_ALLBUTSELF?

No, the next case handles SYS_RESET_ALLBUTSELF although you're right, I
should add a check for cpu_nr == SYS_RESET_ALLBUTSELF somewhere.

> > +               cpu = find_cpu_by_server(cpu_nr);
> > +               if (!cpu)
> > +                       return OPAL_PARAMETER;
> > +
> > +               thread_id = pir_to_thread_id(cpu->pir);
> > +               cpu = cpu->primary;
> > +               return sreset_core(cpu, 1 << thread_id);
> > +       }
> > +
> > +       /* Otherwise reset all CPUs */
> > +       for_each_cpu(cpu) {
> > +               if (cpu->primary == this_cpu()->primary)
> > +                       continue;
> > +
> > +               if (cpu->primary != cpu)
> > +                       continue;
> > +
> > +               /* sreset all threads on a core */
> > +               rc |= sreset_core(cpu, -1);
> > +       }
> > +
> > +       return rc;
> > +}
> > diff --git a/doc/opal-api/opal-signal-system-reset-128.txt b/doc/opal-api/opal-signal-system-reset-128.txt
> > new file mode 100644
> > index 0000000..bb1b869
> > --- /dev/null
> > +++ b/doc/opal-api/opal-signal-system-reset-128.txt
> > @@ -0,0 +1,30 @@
> > +OPAL_SIGNAL_SYSTEM_RESET
> > +-------------------
> > +
> > +#define OPAL_SIGNAL_SYSTEM_RESET                       128
> > +
> > +int64_t signal_system_reset(int cpu_nr)
> > +
> > +Arguments:
> > +
> > +  int cpu_nr
> > +    Either the cpu server number of the target cpu to reset or
> > +    SYS_RESET_ALLBUTSELF (-2) to indicate all but the current cpu
> > +    should be reset.
> > +
> > +This OPAL call causes the specified cpu(s) to be reset to the system
> > +reset exception handler (0x100). Sleeping cpus will be woken with
> > +SRR1[42:45] = 0x0100 indicating an interrupt caused by SCOM when in
> > +power saving mode. Active cpus will also indicate interrupt caused by
> > +SCOM but will have SRR1[46:47] = 0 as the interrupt did not occur
> > +during a power saving mode.
> > +
> > +Resetting active threads on the same core as this call is run is
> > +currently not supported.
> > +
> > +Return Values:
> > +OPAL_SUCCESS: the power down was updated successful
> > +OPAL_PARAMETER: a parameter was incorrect
> > +OPAL_HARDWARE: hardware indicated failure during instruction ramming
> > +OPAL_PARTIAL: not all requested cpus could be reset at this time
> > +OPAL_UNSUPPORTED: this processor generation is not supported
> > diff --git a/include/opal-api.h b/include/opal-api.h
> > index 05ff51d..be9a534 100644
> > --- a/include/opal-api.h
> > +++ b/include/opal-api.h
> > @@ -181,7 +181,8 @@
> >  #define OPAL_INT_SET_MFRR                      125
> >  #define OPAL_PCI_TCE_KILL                      126
> >  #define OPAL_NMMU_SET_PTCR                     127
> > -#define OPAL_LAST                              127
> > +#define OPAL_SIGNAL_SYSTEM_RESET               128
> > +#define OPAL_LAST                              128
> >
> >  /* Device tree flags */
> >
> > diff --git a/include/skiboot.h b/include/skiboot.h
> > index 2ef7677..b796a2c 100644
> > --- a/include/skiboot.h
> > +++ b/include/skiboot.h
> > @@ -191,6 +191,7 @@ extern unsigned long get_symbol(unsigned long addr,
> >
> >  /* Fast reboot support */
> >  extern void fast_reboot(void);
> > +extern int64_t signal_system_reset(int __unused targets);
> >  extern void __noreturn __secondary_cpu_entry(void);
> >  extern void __noreturn load_and_boot_kernel(bool is_reboot);
> >  extern void cleanup_tlb(void);
> > diff --git a/platforms/astbmc/common.c b/platforms/astbmc/common.c
> > index e1a8a4d..e4761ee 100644
> > --- a/platforms/astbmc/common.c
> > +++ b/platforms/astbmc/common.c
> > @@ -356,4 +356,6 @@ void astbmc_early_init(void)
> >         uart_init();
> >
> >         prd_init();
> > +
> > +       opal_register(OPAL_SIGNAL_SYSTEM_RESET, signal_system_reset, 1);
> >  }
> > --
> > 2.1.4
> > _______________________________________________
> > Skiboot mailing list
> > Skiboot@lists.ozlabs.org
> > https://lists.ozlabs.org/listinfo/skiboot
Nicholas Piggin Nov. 21, 2016, 8:01 a.m. UTC | #3
On Tue, 15 Nov 2016 13:49:13 +1100
Alistair Popple <alistair@popple.id.au> wrote:

> Sending a NMI to other CPUs regardless of their current state requires
> a way to reset them. POWER hardware has a method of directly injecting
> resets via direct thread control, however this only works if the
> thread is not active (eg. in a sleep or nap state).
> 
> Resetting an active thread can be performed either via forcing the
> threads to an inactive state (as fast reboot does) or by ramming an
> instruction sequence that simulates an sreset. This patch implements
> the latter as forcing a thread to the inactive state is not ideal for
> debug purposes as the threads loose state.

For reference, this is part of an effort to make NMI interrupts usable
on powerpc platforms, and use that in Linux for crash dumping and debugging.

https://lists.ozlabs.org/pipermail/linuxppc-dev/2016-November/150684.html


> 
> Signed-off-by: Alistair Popple <alistair@popple.id.au>
> ---
> 
> Currently active threads on the currently executing core cannot be
> sreset as a thread cannot ram other threads on the same core. This
> means the caller will need to reset these threads to make the call
> from a different core.

Hi Alistair,

Great stuff. Do you see any way to lift this restriction in future,
or do we need to make this part of the API? We should be able to make
the Linux powernv platform code have NMI'ed cores bounce the NMI
back to our sibling threads without too much work or changes to the
platform independent NMI code.

So this should be fine, but we should ensure the API has a way to
communicate this type of failure (that requires an NMI bounce from
another core, as opposed to some other failure). Did you have any
thoughts there?

Thanks,
Nick
Alistair Popple Nov. 21, 2016, 11:59 p.m. UTC | #4
On Mon, 21 Nov 2016 07:01:42 PM Nicholas Piggin wrote:

<snip>

> > 
> > Currently active threads on the currently executing core cannot be
> > sreset as a thread cannot ram other threads on the same core. This
> > means the caller will need to reset these threads to make the call
> > from a different core.
> 
> Hi Alistair,
> 
> Great stuff. Do you see any way to lift this restriction in future,
> or do we need to make this part of the API? We should be able to make
> the Linux powernv platform code have NMI'ed cores bounce the NMI
> back to our sibling threads without too much work or changes to the
> platform independent NMI code.

The restriction comes from the hardware, which means one way or another we 
need to bounce around to different cores to do a complete reset, either in 
skiboot or Linux.

It would be possible to do it in skiboot by ramming one of the other cores to 
a trampoline in skiboot which would then go and reset the remaining cores 
before returning them to Linux, however this would create a couple of 
complications (eg. when all of the cores are sleeping) so I think it would be 
simpler to deal with this restriction in Linux.

> So this should be fine, but we should ensure the API has a way to
> communicate this type of failure (that requires an NMI bounce from
> another core, as opposed to some other failure). Did you have any
> thoughts there?

Right. Currently the call will return OPAL_PARTIAL when it couldn't reset all 
of the requested cores which I admit may be somewhat vague. We could I suppose 
have an API where we ask OPAL to reset all the cores and it returns a list of 
the ones which were successfully reset. This would be slightly more efficient 
than resetting one core at a time as we wouldn't need to continually quiesce 
other threads but I'd be interested in your thoughts here as well.

Regards,

Alistair

> Thanks,
> Nick
Nicholas Piggin Nov. 22, 2016, 1:29 a.m. UTC | #5
On Tue, 22 Nov 2016 10:59:58 +1100
Alistair Popple <alistair@popple.id.au> wrote:

> On Mon, 21 Nov 2016 07:01:42 PM Nicholas Piggin wrote:
> 
> <snip>
> 
> > > 
> > > Currently active threads on the currently executing core cannot be
> > > sreset as a thread cannot ram other threads on the same core. This
> > > means the caller will need to reset these threads to make the call
> > > from a different core.  
> > 
> > Hi Alistair,
> > 
> > Great stuff. Do you see any way to lift this restriction in future,
> > or do we need to make this part of the API? We should be able to make
> > the Linux powernv platform code have NMI'ed cores bounce the NMI
> > back to our sibling threads without too much work or changes to the
> > platform independent NMI code.  
> 
> The restriction comes from the hardware, which means one way or another we 
> need to bounce around to different cores to do a complete reset, either in 
> skiboot or Linux.
> 
> It would be possible to do it in skiboot by ramming one of the other cores to 
> a trampoline in skiboot which would then go and reset the remaining cores 
> before returning them to Linux, however this would create a couple of 
> complications (eg. when all of the cores are sleeping) so I think it would be 
> simpler to deal with this restriction in Linux.

That sounds reasonable.


> > So this should be fine, but we should ensure the API has a way to
> > communicate this type of failure (that requires an NMI bounce from
> > another core, as opposed to some other failure). Did you have any
> > thoughts there?  
> 
> Right. Currently the call will return OPAL_PARTIAL when it couldn't reset all 
> of the requested cores which I admit may be somewhat vague. We could I suppose 
> have an API where we ask OPAL to reset all the cores and it returns a list of 
> the ones which were successfully reset. This would be slightly more efficient 
> than resetting one core at a time as we wouldn't need to continually quiesce 
> other threads but I'd be interested in your thoughts here as well.

I'm not sure what's going to work best here. Unicast would make it easy for
Linux to build the list.

So perhaps if the platform does not support true broadcast, then a broadcast
request can just fail without resetting any CPUs, and the caller can go to
one at a time.

Then if we always proceed by signaling sibling threads first, we can detect
hardware/firmware that requires NMI bounce before any NMIs actually get sent,
which makes it a bit easier for Linux side to set up bouncing.

Thanks,
Nick
Stewart Smith Dec. 23, 2016, 12:32 a.m. UTC | #6
Alistair Popple <alistair@popple.id.au> writes:
> Sending a NMI to other CPUs regardless of their current state requires
> a way to reset them. POWER hardware has a method of directly injecting
> resets via direct thread control, however this only works if the
> thread is not active (eg. in a sleep or nap state).
>
> Resetting an active thread can be performed either via forcing the
> threads to an inactive state (as fast reboot does) or by ramming an
> instruction sequence that simulates an sreset. This patch implements
> the latter as forcing a thread to the inactive state is not ideal for
> debug purposes as the threads loose state.

do we have an idea about what's going to be involved with it on P9 ?

> @@ -225,6 +255,35 @@ static void set_direct_ctl(struct cpu_thread *cpu, uint64_t bits)
>  	xscom_write(chip_id, xscom_addr, bits);
>  }
>
> +static uint64_t get_ras_status(struct cpu_thread *cpu, uint32_t thread_id)
> +{
> +	uint32_t core_id = pir_to_core_id(cpu->pir);
> +	uint32_t chip_id = pir_to_chip_id(cpu->pir);
> +	uint32_t xscom_addr;
> +	uint64_t ras_status;
> +
> +	xscom_addr = XSCOM_ADDR_P8_EX(core_id, P8_EX_TCTL_RAS_STATUS(thread_id));
> +	xscom_read(chip_id, xscom_addr, &ras_status);

check return val?

> +
> +	return ras_status;
> +}
> +
> +static void set_ram_thread_active(struct cpu_thread *cpu, uint32_t thread_id, bool active)
> +{
> +	uint32_t core_id = pir_to_core_id(cpu->pir);
> +	uint32_t chip_id = pir_to_chip_id(cpu->pir);
> +	uint32_t xscom_addr;
> +	uint64_t thread_active;
> +
> +	xscom_addr = XSCOM_ADDR_P8_EX(core_id, P8_EX_THREAD_ACTIVE);
> +	xscom_read(chip_id, xscom_addr, &thread_active);

check result?

> +	if (active)
> +		thread_active |= PPC_BIT(8) >> thread_id;
> +	else
> +		thread_active &= ~(PPC_BIT(8) >> thread_id);
> +	xscom_write(chip_id, xscom_addr, thread_active);

same here, check result?

> +}
> +
>  static bool fast_reset_p8(void)
>  {
>  	struct cpu_thread *cpu;
> @@ -247,7 +306,7 @@ static bool fast_reset_p8(void)
>  	/* Put everybody in stop except myself */
>  	for_each_cpu(cpu) {
>  		if (cpu != this_cpu())
> -			set_direct_ctl(cpu, P8_DIRECT_CTL_STOP);
> +			set_direct_ctl(cpu, pir_to_thread_id(cpu->pir), P8_DIRECT_CTL_STOP);
>
>  		/* Also make sure that saved_r1 is 0 ! That's what will
>  		 * make our reset vector jump to fast_reboot_entry
> @@ -264,7 +323,7 @@ static bool fast_reset_p8(void)
>  	/* Put everybody in pre-nap except myself */
>  	for_each_cpu(cpu) {
>  		if (cpu != this_cpu())
> -			set_direct_ctl(cpu, P8_DIRECT_CTL_PRENAP);
> +			set_direct_ctl(cpu, pir_to_thread_id(cpu->pir), P8_DIRECT_CTL_PRENAP);
>  	}
>
>  	prlog(PR_DEBUG, "RESET: Resetting all threads but one...\n");
> @@ -272,7 +331,7 @@ static bool fast_reset_p8(void)
>  	/* Reset everybody except my own core threads */
>  	for_each_cpu(cpu) {
>  		if (cpu != this_cpu())
> -			set_direct_ctl(cpu, P8_DIRECT_CTL_SRESET);
> +			set_direct_ctl(cpu, pir_to_thread_id(cpu->pir), P8_DIRECT_CTL_SRESET);
>  	}
>
>  	return true;
> @@ -441,8 +500,8 @@ void __noreturn fast_reboot_entry(void)
>  	lock(&reset_lock);
>  	if (last_man_standing && next_cpu(first_cpu())) {
>  		prlog(PR_DEBUG, "RESET: last man standing fixup...\n");
> -		set_direct_ctl(last_man_standing, P8_DIRECT_CTL_PRENAP);
> -		set_direct_ctl(last_man_standing, P8_DIRECT_CTL_SRESET);
> +		set_direct_ctl(last_man_standing, pir_to_thread_id(last_man_standing->pir), P8_DIRECT_CTL_PRENAP);
> +		set_direct_ctl(last_man_standing, pir_to_thread_id(last_man_standing->pir), P8_DIRECT_CTL_SRESET);
>  	}
>  	last_man_standing = NULL;
>  	unlock(&reset_lock);
> @@ -544,3 +603,275 @@ void __noreturn fast_reboot_entry(void)
>  	/* Load and boot payload */
>  	load_and_boot_kernel(true);
>  }
> +
> +/*
> + * RAMs the opcodes in *opcodes and store the results of each opcode
> + * into *results. *results must point to an array the same size as
> + * *opcodes. Each entry from *results is put into SCR0 prior to
> + * executing an opcode so that it may also be used to pass in
> + * data. Note that only register r0 is saved and restored so opcodes
> + * must not touch other registers.
> + */
> +static int ram_instructions(struct cpu_thread *cpu, uint32_t thread_id, uint64_t *opcodes,
> +			    uint64_t *results, int len, unsigned int lpar)
> +{
> +	int i, rc = OPAL_SUCCESS;
> +	uint64_t ram_mode, val, opcode, r0 = 0;
> +	uint32_t chip_id = pir_to_chip_id(cpu->pir);
> +	uint32_t core_id = pir_to_core_id(cpu->pir);
> +
> +
> +	/* Activate RAM mode */
> +	xscom_read(chip_id, XSCOM_ADDR_P8_EX(core_id,
> P8_EX_RAM_MODE_REG), &ram_mode);

check return value.

> +	ram_mode |= RAM_MODE_ENABLE;
> +
> +	/* Enable HV mode on thread */
> +	ram_mode |= PPC_BIT(2) >> thread_id*2;
> +	xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_MODE_REG), ram_mode);
> +
> +	/* Setup SPRC to use SPRD */
> +	val = SPR_MODE_SPRC_WR_EN;
> +	val = SETFIELD(SPR_MODE_SPRC_SEL, val, 1 << (3 - lpar));
> +	val = SETFIELD(SPR_MODE_SPRC_T_SEL, val, 1 << (7 - thread_id));
> +	xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SPR_MODE_REG), val);
> +	xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SPRC_REG),
> SPRC_REG_SCRATCH_SPR);

check return values?

> +
> +	for (i = -1; i <= len; i++) {
> +		if (i < 0)
> +			/* Save r0 (assumes opcodes don't touch other registers) */
> +			opcode = MTSPR(277, 0);
> +		else if (i < len) {
> +			xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SCR0_REG), results[i]);
> +			opcode = opcodes[i];
> +		} else if (i >= len) {
> +			/* Restore r0 */
> +			xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SCR0_REG), r0);
> +			opcode = MFSPR(0, 277);
> +		}
> +
> +		/* ram instruction */
> +		val = SETFIELD(RAM_CTRL_THREAD_SELECT, 0ULL, thread_id);
> +		val = SETFIELD(RAM_CTRL_INSTR, val, opcode);
> +		xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_CTRL_REG), val);
> +
> +		/* wait for completion */
> +		do {
> +			xscom_read(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_STATUS_REG), &val);
> +		} while (!val);
> +
> +		if (!(val & RAM_STATUS)) {
> +			prlog(PR_ERR, "Instruction ramming failed with status 0x%llx\n", val);
> +			rc = OPAL_HARDWARE;
> +		}
> +
> +		/* Save the results */
> +		xscom_read(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SCR0_REG), &val);
> +		if (i < 0)
> +			r0 = val;
> +		else if (i < len)
> +			results[i] = val;
> +	}
> +
> +	/* Disable RAM mode */
> +	ram_mode &= ~RAM_MODE_ENABLE;
> +
> +	/* Disable HV mode on thread */
> +	ram_mode &= ~(PPC_BIT(2) >> thread_id*2);
> +	xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id,
> P8_EX_RAM_MODE_REG), ram_mode);

I'm guessing the only real *sensible* way to deal with any of these
xscoms failing is to bail out and return a "good luck with that" error
code back to the OS so it can then... just cry really.


> +/*
> + * Apply an sreset to the given threads in a core. When ramming
> + * instructions the whole core must be quiesced so we can't apply an
> + * sreset to active threads on the same core as we're running
> + */
> +static int64_t sreset_core(struct cpu_thread *cpu, unsigned int thread_mask)
> +{
> +	uint32_t thread_id;
> +	uint32_t sreset_mask = 0, ram_mask = 0;
> +	uint64_t ras_status;
> +	int timeout;
> +	int64_t rc = 0;
> +	unsigned int max_thread_id = pir_to_thread_id(-1U);
> +	unsigned int max_thread_mask = (1 << (max_thread_id + 1)) - 1;
> +
> +	assert(cpu == cpu->primary);
> +	thread_mask &= max_thread_mask;
> +	if (this_cpu() == cpu) {
> +		prlog(PR_WARNING, "SRESET: Unable to reset threads on self\n");
> +		return OPAL_PARAMETER;
> +	}
> +
> +	prlog(PR_INFO, "SRESET: Start reset for cpu 0x%x thread_mask 0x%x\n",
> +	      cpu->pir, thread_mask);
> +
> +	/* cpu is the primary thread */
> +	if (set_special_wakeup(cpu) != OPAL_SUCCESS)
> +		return OPAL_BUSY;
> +
> +	/* Stop threads selected for sreset */
> +	for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> +		if (!((1 << thread_id) & thread_mask))
> +			continue;
> +
> +		set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_STOP);
> +		ras_status = get_ras_status(cpu, thread_id);
> +		if (!(ras_status & RAS_STATUS_THREAD_ACTIVE))
> +			sreset_mask |= 1 << thread_id;
> +	}
> +
> +	if (thread_mask == sreset_mask)
> +		/* All threads selected for sreset can be sreset directly */
> +		prlog(PR_INFO, "SRESET: All threads inactive, doing direct sreset\n");
> +		for (thread_id = 0; thread_id <= max_thread_id; thread_id++)
> +			if ((1 << thread_id) & thread_mask) {
> +				set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_SRESET);
> +				rc = OPAL_SUCCESS;
> +				goto out;
> +			}
> +
> +	/* Need to emulate sreset so stop all other threads */
> +	for (thread_id = 0; thread_id <= max_thread_id; thread_id++)
> +		if (!((1 << thread_id) & thread_mask))
> +			set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_STOP);
> +
> +	/* Work out which threads to sreset and which need sreset emulation */
> +	for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> +		ras_status = get_ras_status(cpu, thread_id);
> +		if (!(ras_status & RAS_STATUS_THREAD_ACTIVE))
> +			sreset_mask |= 1 << thread_id;
> +		else {
> +			for (timeout = 0; timeout < RAS_STATUS_TIMEOUT; timeout++) {
> +				ras_status = get_ras_status(cpu, thread_id);
> +				if ((ras_status & RAS_STATUS_SRQ_EMPTY)
> +				    && (ras_status & RAS_STATUS_LSU_QUIESCED)
> +				    && (ras_status & RAS_STATUS_TS_QUIESCE)) {
> +					ram_mask |= 1 << thread_id;
> +					break;
> +				}
> +			}
> +		}
> +	}
> +
> +	/*
> +	 * To emulate sreset we need to make sure all threads on a core are either:
> +	 *  a) Quiesced
> +	 *  b) Not active (recorded in sreset_mask)
> +	 * So skip ramming threads if we're not in the right state.
> +	 */
> +	if ((ram_mask & thread_mask) && (ram_mask | sreset_mask) != max_thread_mask) {
> +		prlog(PR_ERR, "SRESET: Unable to quiesce all threads for ramming (sreset_mask 0x%08x / ram_mask 0x%08x)\n",
> +		      sreset_mask, ram_mask);
> +		ram_mask = 0;
> +		rc = OPAL_PARTIAL;
> +	}
> +
> +	/* We need to ram threads before doing the direct sresets as
> +	 * that makes the threads active */
> +	for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> +		if (!((1 << thread_id) & thread_mask))
> +			continue;
> +
> +		if ((1 << thread_id) & ram_mask)
> +			emulate_sreset(cpu, thread_id);
> +	}
> +
> +	for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> +		if (!((1 << thread_id) & thread_mask))
> +			continue;
> +
> +		if ((1 << thread_id) & sreset_mask) {
> +			prlog(PR_ERR, "SRESET: cpu 0x%x thread 0x%x not active, applying SRESET directly\n",
> +			      cpu->pir, thread_id);
> +			set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_SRESET);
> +		}
> +	}
> +
> +out:
> +	/* Start all threads */
> +	for (thread_id = 0; thread_id <= max_thread_id; thread_id++)
> +		set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_START);
> +
> +	clr_special_wakeup(cpu);
> +
> +	return rc;
> +}
> +
> +#define SYS_RESET_ALLBUTSELF -2

should be in opal-api.h ?

> +int64_t signal_system_reset(int cpu_nr)
> +{
> +	int64_t rc = 0;
> +	struct cpu_thread *cpu;
> +	uint32_t thread_id;
> +
> +	if (proc_gen != proc_gen_p8)
> +		return OPAL_UNSUPPORTED;
> +
> +	/* Reset a single CPU */
> +	if (cpu_nr >= 0) {
> +		cpu = find_cpu_by_server(cpu_nr);
> +		if (!cpu)
> +			return OPAL_PARAMETER;
> +
> +		thread_id = pir_to_thread_id(cpu->pir);
> +		cpu = cpu->primary;
> +		return sreset_core(cpu, 1 << thread_id);
> +	}
> +
> +	/* Otherwise reset all CPUs */
> +	for_each_cpu(cpu) {
> +		if (cpu->primary == this_cpu()->primary)
> +			continue;
> +
> +		if (cpu->primary != cpu)
> +			continue;
> +
> +		/* sreset all threads on a core */
> +		rc |= sreset_core(cpu, -1);
> +	}
> +
> +	return rc;
> +}
> diff --git a/doc/opal-api/opal-signal-system-reset-128.txt b/doc/opal-api/opal-signal-system-reset-128.txt
> new file mode 100644
> index 0000000..bb1b869
> --- /dev/null
> +++ b/doc/opal-api/opal-signal-system-reset-128.txt
> @@ -0,0 +1,30 @@
> +OPAL_SIGNAL_SYSTEM_RESET
> +-------------------
> +
> +#define OPAL_SIGNAL_SYSTEM_RESET			128
> +
> +int64_t signal_system_reset(int cpu_nr)
> +
> +Arguments:
> +
> +  int cpu_nr
> +    Either the cpu server number of the target cpu to reset or
> +    SYS_RESET_ALLBUTSELF (-2) to indicate all but the current cpu
> +    should be reset.

I'm thinking we should just copy what's going on in papr and also accept
-1 = target all online threads including the caller

even if we just return OPAL_UNSUPPORTED or OPAL_PARTIAL or something for
the -1 case.

> +This OPAL call causes the specified cpu(s) to be reset to the system
> +reset exception handler (0x100). Sleeping cpus will be woken with
> +SRR1[42:45] = 0x0100 indicating an interrupt caused by SCOM when in
> +power saving mode. Active cpus will also indicate interrupt caused by
> +SCOM but will have SRR1[46:47] = 0 as the interrupt did not occur
> +during a power saving mode.
> +
> +Resetting active threads on the same core as this call is run is
 > +currently not supported.
> +
> +Return Values:
> +OPAL_SUCCESS: the power down was updated successful
> +OPAL_PARAMETER: a parameter was incorrect
> +OPAL_HARDWARE: hardware indicated failure during instruction ramming
> +OPAL_PARTIAL: not all requested cpus could be reset at this time
> +OPAL_UNSUPPORTED: this processor generation is not supported

                     or requested operation is not supported?
                     or should that be OPAL_PARAMETER?
                     (for -1)


> index e1a8a4d..e4761ee 100644
> --- a/platforms/astbmc/common.c
> +++ b/platforms/astbmc/common.c
> @@ -356,4 +356,6 @@ void astbmc_early_init(void)
>  	uart_init();
>
>  	prd_init();
> +
> +	opal_register(OPAL_SIGNAL_SYSTEM_RESET, signal_system_reset, 1);
>  }

Anything that makes this specific to astbmc systems?
Benjamin Herrenschmidt Dec. 29, 2016, 3:56 a.m. UTC | #7
On Fri, 2016-12-23 at 11:32 +1100, Stewart Smith wrote:
> Alistair Popple <alistair@popple.id.au> writes:
> > Sending a NMI to other CPUs regardless of their current state
> > requires
> > a way to reset them. POWER hardware has a method of directly
> > injecting
> > resets via direct thread control, however this only works if the
> > thread is not active (eg. in a sleep or nap state).
> > 
> > Resetting an active thread can be performed either via forcing the
> > threads to an inactive state (as fast reboot does) or by ramming an
> > instruction sequence that simulates an sreset. This patch
> > implements
> > the latter as forcing a thread to the inactive state is not ideal
> > for
> > debug purposes as the threads loose state.
> 
> do we have an idea about what's going to be involved with it on P9 ?

It should be simpler, allegedly the sreset direct control works ...
well except they already found bugs in DD1, so we'll see...

> > @@ -225,6 +255,35 @@ static void set_direct_ctl(struct cpu_thread
> > *cpu, uint64_t bits)
> >  	xscom_write(chip_id, xscom_addr, bits);
> >  }
> > 
> > +static uint64_t get_ras_status(struct cpu_thread *cpu, uint32_t
> > thread_id)
> > +{
> > +	uint32_t core_id = pir_to_core_id(cpu->pir);
> > +	uint32_t chip_id = pir_to_chip_id(cpu->pir);
> > +	uint32_t xscom_addr;
> > +	uint64_t ras_status;
> > +
> > +	xscom_addr = XSCOM_ADDR_P8_EX(core_id,
> > P8_EX_TCTL_RAS_STATUS(thread_id));
> > +	xscom_read(chip_id, xscom_addr, &ras_status);
> 
> check return val?
> 
> > +
> > +	return ras_status;
> > +}
> > +
> > +static void set_ram_thread_active(struct cpu_thread *cpu, uint32_t
> > thread_id, bool active)
> > +{
> > +	uint32_t core_id = pir_to_core_id(cpu->pir);
> > +	uint32_t chip_id = pir_to_chip_id(cpu->pir);
> > +	uint32_t xscom_addr;
> > +	uint64_t thread_active;
> > +
> > +	xscom_addr = XSCOM_ADDR_P8_EX(core_id,
> > P8_EX_THREAD_ACTIVE);
> > +	xscom_read(chip_id, xscom_addr, &thread_active);
> 
> check result?
> 
> > +	if (active)
> > +		thread_active |= PPC_BIT(8) >> thread_id;
> > +	else
> > +		thread_active &= ~(PPC_BIT(8) >> thread_id);
> > +	xscom_write(chip_id, xscom_addr, thread_active);
> 
> same here, check result?
> 
> > +}
> > +
> >  static bool fast_reset_p8(void)
> >  {
> >  	struct cpu_thread *cpu;
> > @@ -247,7 +306,7 @@ static bool fast_reset_p8(void)
> >  	/* Put everybody in stop except myself */
> >  	for_each_cpu(cpu) {
> >  		if (cpu != this_cpu())
> > -			set_direct_ctl(cpu, P8_DIRECT_CTL_STOP);
> > +			set_direct_ctl(cpu, pir_to_thread_id(cpu-
> > >pir), P8_DIRECT_CTL_STOP);
> > 
> >  		/* Also make sure that saved_r1 is 0 ! That's what
> > will
> >  		 * make our reset vector jump to fast_reboot_entry
> > @@ -264,7 +323,7 @@ static bool fast_reset_p8(void)
> >  	/* Put everybody in pre-nap except myself */
> >  	for_each_cpu(cpu) {
> >  		if (cpu != this_cpu())
> > -			set_direct_ctl(cpu, P8_DIRECT_CTL_PRENAP);
> > +			set_direct_ctl(cpu, pir_to_thread_id(cpu-
> > >pir), P8_DIRECT_CTL_PRENAP);
> >  	}
> > 
> >  	prlog(PR_DEBUG, "RESET: Resetting all threads but
> > one...\n");
> > @@ -272,7 +331,7 @@ static bool fast_reset_p8(void)
> >  	/* Reset everybody except my own core threads */
> >  	for_each_cpu(cpu) {
> >  		if (cpu != this_cpu())
> > -			set_direct_ctl(cpu, P8_DIRECT_CTL_SRESET);
> > +			set_direct_ctl(cpu, pir_to_thread_id(cpu-
> > >pir), P8_DIRECT_CTL_SRESET);
> >  	}
> > 
> >  	return true;
> > @@ -441,8 +500,8 @@ void __noreturn fast_reboot_entry(void)
> >  	lock(&reset_lock);
> >  	if (last_man_standing && next_cpu(first_cpu())) {
> >  		prlog(PR_DEBUG, "RESET: last man standing
> > fixup...\n");
> > -		set_direct_ctl(last_man_standing,
> > P8_DIRECT_CTL_PRENAP);
> > -		set_direct_ctl(last_man_standing,
> > P8_DIRECT_CTL_SRESET);
> > +		set_direct_ctl(last_man_standing,
> > pir_to_thread_id(last_man_standing->pir), P8_DIRECT_CTL_PRENAP);
> > +		set_direct_ctl(last_man_standing,
> > pir_to_thread_id(last_man_standing->pir), P8_DIRECT_CTL_SRESET);
> >  	}
> >  	last_man_standing = NULL;
> >  	unlock(&reset_lock);
> > @@ -544,3 +603,275 @@ void __noreturn fast_reboot_entry(void)
> >  	/* Load and boot payload */
> >  	load_and_boot_kernel(true);
> >  }
> > +
> > +/*
> > + * RAMs the opcodes in *opcodes and store the results of each
> > opcode
> > + * into *results. *results must point to an array the same size as
> > + * *opcodes. Each entry from *results is put into SCR0 prior to
> > + * executing an opcode so that it may also be used to pass in
> > + * data. Note that only register r0 is saved and restored so
> > opcodes
> > + * must not touch other registers.
> > + */
> > +static int ram_instructions(struct cpu_thread *cpu, uint32_t
> > thread_id, uint64_t *opcodes,
> > +			    uint64_t *results, int len, unsigned
> > int lpar)
> > +{
> > +	int i, rc = OPAL_SUCCESS;
> > +	uint64_t ram_mode, val, opcode, r0 = 0;
> > +	uint32_t chip_id = pir_to_chip_id(cpu->pir);
> > +	uint32_t core_id = pir_to_core_id(cpu->pir);
> > +
> > +
> > +	/* Activate RAM mode */
> > +	xscom_read(chip_id, XSCOM_ADDR_P8_EX(core_id,
> > P8_EX_RAM_MODE_REG), &ram_mode);
> 
> check return value.
> 
> > +	ram_mode |= RAM_MODE_ENABLE;
> > +
> > +	/* Enable HV mode on thread */
> > +	ram_mode |= PPC_BIT(2) >> thread_id*2;
> > +	xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id,
> > P8_EX_RAM_MODE_REG), ram_mode);
> > +
> > +	/* Setup SPRC to use SPRD */
> > +	val = SPR_MODE_SPRC_WR_EN;
> > +	val = SETFIELD(SPR_MODE_SPRC_SEL, val, 1 << (3 - lpar));
> > +	val = SETFIELD(SPR_MODE_SPRC_T_SEL, val, 1 << (7 -
> > thread_id));
> > +	xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id,
> > P8_EX_SPR_MODE_REG), val);
> > +	xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id,
> > P8_EX_SPRC_REG),
> > SPRC_REG_SCRATCH_SPR);
> 
> check return values?
> 
> > +
> > +	for (i = -1; i <= len; i++) {
> > +		if (i < 0)
> > +			/* Save r0 (assumes opcodes don't touch
> > other registers) */
> > +			opcode = MTSPR(277, 0);
> > +		else if (i < len) {
> > +			xscom_write(chip_id,
> > XSCOM_ADDR_P8_EX(core_id, P8_EX_SCR0_REG), results[i]);
> > +			opcode = opcodes[i];
> > +		} else if (i >= len) {
> > +			/* Restore r0 */
> > +			xscom_write(chip_id,
> > XSCOM_ADDR_P8_EX(core_id, P8_EX_SCR0_REG), r0);
> > +			opcode = MFSPR(0, 277);
> > +		}
> > +
> > +		/* ram instruction */
> > +		val = SETFIELD(RAM_CTRL_THREAD_SELECT, 0ULL,
> > thread_id);
> > +		val = SETFIELD(RAM_CTRL_INSTR, val, opcode);
> > +		xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id,
> > P8_EX_RAM_CTRL_REG), val);
> > +
> > +		/* wait for completion */
> > +		do {
> > +			xscom_read(chip_id,
> > XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_STATUS_REG), &val);
> > +		} while (!val);
> > +
> > +		if (!(val & RAM_STATUS)) {
> > +			prlog(PR_ERR, "Instruction ramming failed
> > with status 0x%llx\n", val);
> > +			rc = OPAL_HARDWARE;
> > +		}
> > +
> > +		/* Save the results */
> > +		xscom_read(chip_id, XSCOM_ADDR_P8_EX(core_id,
> > P8_EX_SCR0_REG), &val);
> > +		if (i < 0)
> > +			r0 = val;
> > +		else if (i < len)
> > +			results[i] = val;
> > +	}
> > +
> > +	/* Disable RAM mode */
> > +	ram_mode &= ~RAM_MODE_ENABLE;
> > +
> > +	/* Disable HV mode on thread */
> > +	ram_mode &= ~(PPC_BIT(2) >> thread_id*2);
> > +	xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id,
> > P8_EX_RAM_MODE_REG), ram_mode);
> 
> I'm guessing the only real *sensible* way to deal with any of these
> xscoms failing is to bail out and return a "good luck with that"
> error
> code back to the OS so it can then... just cry really.
> 
> 
> > +/*
> > + * Apply an sreset to the given threads in a core. When ramming
> > + * instructions the whole core must be quiesced so we can't apply
> > an
> > + * sreset to active threads on the same core as we're running
> > + */
> > +static int64_t sreset_core(struct cpu_thread *cpu, unsigned int
> > thread_mask)
> > +{
> > +	uint32_t thread_id;
> > +	uint32_t sreset_mask = 0, ram_mask = 0;
> > +	uint64_t ras_status;
> > +	int timeout;
> > +	int64_t rc = 0;
> > +	unsigned int max_thread_id = pir_to_thread_id(-1U);
> > +	unsigned int max_thread_mask = (1 << (max_thread_id + 1))
> > - 1;
> > +
> > +	assert(cpu == cpu->primary);
> > +	thread_mask &= max_thread_mask;
> > +	if (this_cpu() == cpu) {
> > +		prlog(PR_WARNING, "SRESET: Unable to reset threads
> > on self\n");
> > +		return OPAL_PARAMETER;
> > +	}
> > +
> > +	prlog(PR_INFO, "SRESET: Start reset for cpu 0x%x
> > thread_mask 0x%x\n",
> > +	      cpu->pir, thread_mask);
> > +
> > +	/* cpu is the primary thread */
> > +	if (set_special_wakeup(cpu) != OPAL_SUCCESS)
> > +		return OPAL_BUSY;
> > +
> > +	/* Stop threads selected for sreset */
> > +	for (thread_id = 0; thread_id <= max_thread_id;
> > thread_id++) {
> > +		if (!((1 << thread_id) & thread_mask))
> > +			continue;
> > +
> > +		set_direct_ctl(cpu, thread_id,
> > P8_DIRECT_CTL_STOP);
> > +		ras_status = get_ras_status(cpu, thread_id);
> > +		if (!(ras_status & RAS_STATUS_THREAD_ACTIVE))
> > +			sreset_mask |= 1 << thread_id;
> > +	}
> > +
> > +	if (thread_mask == sreset_mask)
> > +		/* All threads selected for sreset can be sreset
> > directly */
> > +		prlog(PR_INFO, "SRESET: All threads inactive,
> > doing direct sreset\n");
> > +		for (thread_id = 0; thread_id <= max_thread_id;
> > thread_id++)
> > +			if ((1 << thread_id) & thread_mask) {
> > +				set_direct_ctl(cpu, thread_id,
> > P8_DIRECT_CTL_SRESET);
> > +				rc = OPAL_SUCCESS;
> > +				goto out;
> > +			}
> > +
> > +	/* Need to emulate sreset so stop all other threads */
> > +	for (thread_id = 0; thread_id <= max_thread_id;
> > thread_id++)
> > +		if (!((1 << thread_id) & thread_mask))
> > +			set_direct_ctl(cpu, thread_id,
> > P8_DIRECT_CTL_STOP);
> > +
> > +	/* Work out which threads to sreset and which need sreset
> > emulation */
> > +	for (thread_id = 0; thread_id <= max_thread_id;
> > thread_id++) {
> > +		ras_status = get_ras_status(cpu, thread_id);
> > +		if (!(ras_status & RAS_STATUS_THREAD_ACTIVE))
> > +			sreset_mask |= 1 << thread_id;
> > +		else {
> > +			for (timeout = 0; timeout <
> > RAS_STATUS_TIMEOUT; timeout++) {
> > +				ras_status = get_ras_status(cpu,
> > thread_id);
> > +				if ((ras_status &
> > RAS_STATUS_SRQ_EMPTY)
> > +				    && (ras_status &
> > RAS_STATUS_LSU_QUIESCED)
> > +				    && (ras_status &
> > RAS_STATUS_TS_QUIESCE)) {
> > +					ram_mask |= 1 <<
> > thread_id;
> > +					break;
> > +				}
> > +			}
> > +		}
> > +	}
> > +
> > +	/*
> > +	 * To emulate sreset we need to make sure all threads on a
> > core are either:
> > +	 *  a) Quiesced
> > +	 *  b) Not active (recorded in sreset_mask)
> > +	 * So skip ramming threads if we're not in the right
> > state.
> > +	 */
> > +	if ((ram_mask & thread_mask) && (ram_mask | sreset_mask)
> > != max_thread_mask) {
> > +		prlog(PR_ERR, "SRESET: Unable to quiesce all
> > threads for ramming (sreset_mask 0x%08x / ram_mask 0x%08x)\n",
> > +		      sreset_mask, ram_mask);
> > +		ram_mask = 0;
> > +		rc = OPAL_PARTIAL;
> > +	}
> > +
> > +	/* We need to ram threads before doing the direct sresets
> > as
> > +	 * that makes the threads active */
> > +	for (thread_id = 0; thread_id <= max_thread_id;
> > thread_id++) {
> > +		if (!((1 << thread_id) & thread_mask))
> > +			continue;
> > +
> > +		if ((1 << thread_id) & ram_mask)
> > +			emulate_sreset(cpu, thread_id);
> > +	}
> > +
> > +	for (thread_id = 0; thread_id <= max_thread_id;
> > thread_id++) {
> > +		if (!((1 << thread_id) & thread_mask))
> > +			continue;
> > +
> > +		if ((1 << thread_id) & sreset_mask) {
> > +			prlog(PR_ERR, "SRESET: cpu 0x%x thread
> > 0x%x not active, applying SRESET directly\n",
> > +			      cpu->pir, thread_id);
> > +			set_direct_ctl(cpu, thread_id,
> > P8_DIRECT_CTL_SRESET);
> > +		}
> > +	}
> > +
> > +out:
> > +	/* Start all threads */
> > +	for (thread_id = 0; thread_id <= max_thread_id;
> > thread_id++)
> > +		set_direct_ctl(cpu, thread_id,
> > P8_DIRECT_CTL_START);
> > +
> > +	clr_special_wakeup(cpu);
> > +
> > +	return rc;
> > +}
> > +
> > +#define SYS_RESET_ALLBUTSELF -2
> 
> should be in opal-api.h ?
> 
> > +int64_t signal_system_reset(int cpu_nr)
> > +{
> > +	int64_t rc = 0;
> > +	struct cpu_thread *cpu;
> > +	uint32_t thread_id;
> > +
> > +	if (proc_gen != proc_gen_p8)
> > +		return OPAL_UNSUPPORTED;
> > +
> > +	/* Reset a single CPU */
> > +	if (cpu_nr >= 0) {
> > +		cpu = find_cpu_by_server(cpu_nr);
> > +		if (!cpu)
> > +			return OPAL_PARAMETER;
> > +
> > +		thread_id = pir_to_thread_id(cpu->pir);
> > +		cpu = cpu->primary;
> > +		return sreset_core(cpu, 1 << thread_id);
> > +	}
> > +
> > +	/* Otherwise reset all CPUs */
> > +	for_each_cpu(cpu) {
> > +		if (cpu->primary == this_cpu()->primary)
> > +			continue;
> > +
> > +		if (cpu->primary != cpu)
> > +			continue;
> > +
> > +		/* sreset all threads on a core */
> > +		rc |= sreset_core(cpu, -1);
> > +	}
> > +
> > +	return rc;
> > +}
> > diff --git a/doc/opal-api/opal-signal-system-reset-128.txt
> > b/doc/opal-api/opal-signal-system-reset-128.txt
> > new file mode 100644
> > index 0000000..bb1b869
> > --- /dev/null
> > +++ b/doc/opal-api/opal-signal-system-reset-128.txt
> > @@ -0,0 +1,30 @@
> > +OPAL_SIGNAL_SYSTEM_RESET
> > +-------------------
> > +
> > +#define OPAL_SIGNAL_SYSTEM_RESET			128
> > +
> > +int64_t signal_system_reset(int cpu_nr)
> > +
> > +Arguments:
> > +
> > +  int cpu_nr
> > +    Either the cpu server number of the target cpu to reset or
> > +    SYS_RESET_ALLBUTSELF (-2) to indicate all but the current cpu
> > +    should be reset.
> 
> I'm thinking we should just copy what's going on in papr and also
> accept
> -1 = target all online threads including the caller
> 
> even if we just return OPAL_UNSUPPORTED or OPAL_PARTIAL or something
> for
> the -1 case.
> 
> > +This OPAL call causes the specified cpu(s) to be reset to the
> > system
> > +reset exception handler (0x100). Sleeping cpus will be woken with
> > +SRR1[42:45] = 0x0100 indicating an interrupt caused by SCOM when
> > in
> > +power saving mode. Active cpus will also indicate interrupt caused
> > by
> > +SCOM but will have SRR1[46:47] = 0 as the interrupt did not occur
> > +during a power saving mode.
> > +
> > +Resetting active threads on the same core as this call is run is
> 
>  > +currently not supported.
> > +
> > +Return Values:
> > +OPAL_SUCCESS: the power down was updated successful
> > +OPAL_PARAMETER: a parameter was incorrect
> > +OPAL_HARDWARE: hardware indicated failure during instruction
> > ramming
> > +OPAL_PARTIAL: not all requested cpus could be reset at this time
> > +OPAL_UNSUPPORTED: this processor generation is not supported
> 
>                      or requested operation is not supported?
>                      or should that be OPAL_PARAMETER?
>                      (for -1)
> 
> 
> > index e1a8a4d..e4761ee 100644
> > --- a/platforms/astbmc/common.c
> > +++ b/platforms/astbmc/common.c
> > @@ -356,4 +356,6 @@ void astbmc_early_init(void)
> >  	uart_init();
> > 
> >  	prd_init();
> > +
> > +	opal_register(OPAL_SIGNAL_SYSTEM_RESET,
> > signal_system_reset, 1);
> >  }
> 
> Anything that makes this specific to astbmc systems?
>
Alistair Popple Jan. 12, 2017, 5:36 a.m. UTC | #8
On Fri, 23 Dec 2016 11:32:02 AM Stewart Smith wrote:
> Alistair Popple <alistair@popple.id.au> writes:
> > Sending a NMI to other CPUs regardless of their current state requires
> > a way to reset them. POWER hardware has a method of directly injecting
> > resets via direct thread control, however this only works if the
> > thread is not active (eg. in a sleep or nap state).
> >
> > Resetting an active thread can be performed either via forcing the
> > threads to an inactive state (as fast reboot does) or by ramming an
> > instruction sequence that simulates an sreset. This patch implements
> > the latter as forcing a thread to the inactive state is not ideal for
> > debug purposes as the threads loose state.
> 
> do we have an idea about what's going to be involved with it on P9 ?

As Ben mentioned not yet. I have been hacking around on pdbg though and one of
the aims is to make it easier to integrate code from that into skiboot. That
way the instruction ramming code could be the same between pdbg/skiboot
although practically I think that's a little way off yet.

<snip>

> > +
> > +	/* Disable HV mode on thread */
> > +	ram_mode &= ~(PPC_BIT(2) >> thread_id*2);
> > +	xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id,
> > P8_EX_RAM_MODE_REG), ram_mode);
> 
> I'm guessing the only real *sensible* way to deal with any of these
> xscoms failing is to bail out and return a "good luck with that" error
> code back to the OS so it can then... just cry really.

I did kind of ignore all the return codes because I didn't really know what to
do if a scom access failed. There are plenty of other places that also don't
check check scom access return codes so perhaps we need a version of
scom_read/write that either panics or prints a warning in case of xscom
failure?

In the case of instruction ramming the only real thing we could do to recover
is print tears and die as the box will be totally hosed or about to checkstop.

I could add an assert(xscom_write(...)) I guess? Or would you rather return
to the OS? That said the OS itself is probably somewhat sick which is why this
function was called in the first place, so it might be better to just do the
simple thing and die.

> 
> 
> > +/*
> > + * Apply an sreset to the given threads in a core. When ramming
> > + * instructions the whole core must be quiesced so we can't apply an
> > + * sreset to active threads on the same core as we're running
> > + */
> > +static int64_t sreset_core(struct cpu_thread *cpu, unsigned int thread_mask)
> > +{
> > +	uint32_t thread_id;
> > +	uint32_t sreset_mask = 0, ram_mask = 0;
> > +	uint64_t ras_status;
> > +	int timeout;
> > +	int64_t rc = 0;
> > +	unsigned int max_thread_id = pir_to_thread_id(-1U);
> > +	unsigned int max_thread_mask = (1 << (max_thread_id + 1)) - 1;
> > +
> > +	assert(cpu == cpu->primary);
> > +	thread_mask &= max_thread_mask;
> > +	if (this_cpu() == cpu) {
> > +		prlog(PR_WARNING, "SRESET: Unable to reset threads on self\n");
> > +		return OPAL_PARAMETER;
> > +	}
> > +
> > +	prlog(PR_INFO, "SRESET: Start reset for cpu 0x%x thread_mask 0x%x\n",
> > +	      cpu->pir, thread_mask);
> > +
> > +	/* cpu is the primary thread */
> > +	if (set_special_wakeup(cpu) != OPAL_SUCCESS)
> > +		return OPAL_BUSY;
> > +
> > +	/* Stop threads selected for sreset */
> > +	for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> > +		if (!((1 << thread_id) & thread_mask))
> > +			continue;
> > +
> > +		set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_STOP);
> > +		ras_status = get_ras_status(cpu, thread_id);
> > +		if (!(ras_status & RAS_STATUS_THREAD_ACTIVE))
> > +			sreset_mask |= 1 << thread_id;
> > +	}
> > +
> > +	if (thread_mask == sreset_mask)
> > +		/* All threads selected for sreset can be sreset directly */
> > +		prlog(PR_INFO, "SRESET: All threads inactive, doing direct sreset\n");
> > +		for (thread_id = 0; thread_id <= max_thread_id; thread_id++)
> > +			if ((1 << thread_id) & thread_mask) {
> > +				set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_SRESET);
> > +				rc = OPAL_SUCCESS;
> > +				goto out;
> > +			}
> > +
> > +	/* Need to emulate sreset so stop all other threads */
> > +	for (thread_id = 0; thread_id <= max_thread_id; thread_id++)
> > +		if (!((1 << thread_id) & thread_mask))
> > +			set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_STOP);
> > +
> > +	/* Work out which threads to sreset and which need sreset emulation */
> > +	for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> > +		ras_status = get_ras_status(cpu, thread_id);
> > +		if (!(ras_status & RAS_STATUS_THREAD_ACTIVE))
> > +			sreset_mask |= 1 << thread_id;
> > +		else {
> > +			for (timeout = 0; timeout < RAS_STATUS_TIMEOUT; timeout++) {
> > +				ras_status = get_ras_status(cpu, thread_id);
> > +				if ((ras_status & RAS_STATUS_SRQ_EMPTY)
> > +				    && (ras_status & RAS_STATUS_LSU_QUIESCED)
> > +				    && (ras_status & RAS_STATUS_TS_QUIESCE)) {
> > +					ram_mask |= 1 << thread_id;
> > +					break;
> > +				}
> > +			}
> > +		}
> > +	}
> > +
> > +	/*
> > +	 * To emulate sreset we need to make sure all threads on a core are either:
> > +	 *  a) Quiesced
> > +	 *  b) Not active (recorded in sreset_mask)
> > +	 * So skip ramming threads if we're not in the right state.
> > +	 */
> > +	if ((ram_mask & thread_mask) && (ram_mask | sreset_mask) != max_thread_mask) {
> > +		prlog(PR_ERR, "SRESET: Unable to quiesce all threads for ramming (sreset_mask 0x%08x / ram_mask 0x%08x)\n",
> > +		      sreset_mask, ram_mask);
> > +		ram_mask = 0;
> > +		rc = OPAL_PARTIAL;
> > +	}
> > +
> > +	/* We need to ram threads before doing the direct sresets as
> > +	 * that makes the threads active */
> > +	for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> > +		if (!((1 << thread_id) & thread_mask))
> > +			continue;
> > +
> > +		if ((1 << thread_id) & ram_mask)
> > +			emulate_sreset(cpu, thread_id);
> > +	}
> > +
> > +	for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> > +		if (!((1 << thread_id) & thread_mask))
> > +			continue;
> > +
> > +		if ((1 << thread_id) & sreset_mask) {
> > +			prlog(PR_ERR, "SRESET: cpu 0x%x thread 0x%x not active, applying SRESET directly\n",
> > +			      cpu->pir, thread_id);
> > +			set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_SRESET);
> > +		}
> > +	}
> > +
> > +out:
> > +	/* Start all threads */
> > +	for (thread_id = 0; thread_id <= max_thread_id; thread_id++)
> > +		set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_START);
> > +
> > +	clr_special_wakeup(cpu);
> > +
> > +	return rc;
> > +}
> > +
> > +#define SYS_RESET_ALLBUTSELF -2
> 
> should be in opal-api.h ?

It sure should. Will fix.

> > +int64_t signal_system_reset(int cpu_nr)
> > +{
> > +	int64_t rc = 0;
> > +	struct cpu_thread *cpu;
> > +	uint32_t thread_id;
> > +
> > +	if (proc_gen != proc_gen_p8)
> > +		return OPAL_UNSUPPORTED;
> > +
> > +	/* Reset a single CPU */
> > +	if (cpu_nr >= 0) {
> > +		cpu = find_cpu_by_server(cpu_nr);
> > +		if (!cpu)
> > +			return OPAL_PARAMETER;
> > +
> > +		thread_id = pir_to_thread_id(cpu->pir);
> > +		cpu = cpu->primary;
> > +		return sreset_core(cpu, 1 << thread_id);
> > +	}
> > +
> > +	/* Otherwise reset all CPUs */
> > +	for_each_cpu(cpu) {
> > +		if (cpu->primary == this_cpu()->primary)
> > +			continue;
> > +
> > +		if (cpu->primary != cpu)
> > +			continue;
> > +
> > +		/* sreset all threads on a core */
> > +		rc |= sreset_core(cpu, -1);
> > +	}
> > +
> > +	return rc;
> > +}
> > diff --git a/doc/opal-api/opal-signal-system-reset-128.txt b/doc/opal-api/opal-signal-system-reset-128.txt
> > new file mode 100644
> > index 0000000..bb1b869
> > --- /dev/null
> > +++ b/doc/opal-api/opal-signal-system-reset-128.txt
> > @@ -0,0 +1,30 @@
> > +OPAL_SIGNAL_SYSTEM_RESET
> > +-------------------
> > +
> > +#define OPAL_SIGNAL_SYSTEM_RESET			128
> > +
> > +int64_t signal_system_reset(int cpu_nr)
> > +
> > +Arguments:
> > +
> > +  int cpu_nr
> > +    Either the cpu server number of the target cpu to reset or
> > +    SYS_RESET_ALLBUTSELF (-2) to indicate all but the current cpu
> > +    should be reset.
> 
> I'm thinking we should just copy what's going on in papr and also accept
> -1 = target all online threads including the caller
> 
> even if we just return OPAL_UNSUPPORTED or OPAL_PARTIAL or something for
> the -1 case.

Good idea. We could in theory add support to do all online threads in future
so being able to detect that with OPAL_UNSUPPORTED is probably good. For the
moment though Nick and I concluded it would be easier to do the equivalent in
Linux with SYS_RESET_ALLBUTSELF and another call to reset SELF from another
processor.

> > +This OPAL call causes the specified cpu(s) to be reset to the system
> > +reset exception handler (0x100). Sleeping cpus will be woken with
> > +SRR1[42:45] = 0x0100 indicating an interrupt caused by SCOM when in
> > +power saving mode. Active cpus will also indicate interrupt caused by
> > +SCOM but will have SRR1[46:47] = 0 as the interrupt did not occur
> > +during a power saving mode.
> > +
> > +Resetting active threads on the same core as this call is run is
>  > +currently not supported.
> > +
> > +Return Values:
> > +OPAL_SUCCESS: the power down was updated successful
> > +OPAL_PARAMETER: a parameter was incorrect
> > +OPAL_HARDWARE: hardware indicated failure during instruction ramming
> > +OPAL_PARTIAL: not all requested cpus could be reset at this time
> > +OPAL_UNSUPPORTED: this processor generation is not supported
> 
>                      or requested operation is not supported?
>                      or should that be OPAL_PARAMETER?
>                      (for -1)

OPAL_UNSUPPORTED for -1 makes sense to me. Will update it.

> 
> > index e1a8a4d..e4761ee 100644
> > --- a/platforms/astbmc/common.c
> > +++ b/platforms/astbmc/common.c
> > @@ -356,4 +356,6 @@ void astbmc_early_init(void)
> >  	uart_init();
> >
> >  	prd_init();
> > +
> > +	opal_register(OPAL_SIGNAL_SYSTEM_RESET, signal_system_reset, 1);
> >  }
> 
> Anything that makes this specific to astbmc systems?

I don't think so, other than I've only tested it on astbmc systems.
diff mbox

Patch

diff --git a/core/fast-reboot.c b/core/fast-reboot.c
index 66b3182..08d7c25 100644
--- a/core/fast-reboot.c
+++ b/core/fast-reboot.c
@@ -28,10 +28,33 @@ 
 #include <chiptod.h>

 #define P8_EX_TCTL_DIRECT_CONTROLS(t)	(0x10013000 + (t) * 0x10)
-#define P8_DIRECT_CTL_STOP		PPC_BIT(63)
 #define P8_DIRECT_CTL_PRENAP		PPC_BIT(47)
 #define P8_DIRECT_CTL_SRESET		PPC_BIT(60)
-
+#define P8_DIRECT_CTL_START		PPC_BIT(62)
+#define P8_DIRECT_CTL_STOP		PPC_BIT(63)
+#define P8_EX_TCTL_RAS_STATUS(t)	(0x10013002 + (t) * 0x10)
+#define RAS_STATUS_SRQ_EMPTY		PPC_BIT(8)
+#define RAS_STATUS_LSU_QUIESCED	PPC_BIT(9)
+#define RAS_STATUS_INST_COMPLETE	PPC_BIT(12)
+#define RAS_STATUS_THREAD_ACTIVE	PPC_BIT(48)
+#define RAS_STATUS_TS_QUIESCE		PPC_BIT(49)
+#define P8_EX_THREAD_ACTIVE		0x1001310e
+#define P8_EX_SPRC_REG			0x10013280
+#define SPRC_REG_SCRATCH_SPR		PPC_BIT(57)
+#define P8_EX_SPR_MODE_REG		0x10013281
+#define SPR_MODE_SPRC_WR_EN		PPC_BIT(3)
+#define SPR_MODE_SPRC_SEL		PPC_BITMASK(16, 19)
+#define SPR_MODE_SPRC_T_SEL		PPC_BITMASK(20, 27)
+#define P8_EX_SCR0_REG			0x10013283
+#define P8_EX_RAM_MODE_REG		0x10013c00
+#define RAM_MODE_ENABLE		PPC_BIT(0)
+#define P8_EX_RAM_CTRL_REG		0x10013c01
+#define RAM_CTRL_THREAD_SELECT		PPC_BITMASK(0, 2)
+#define RAM_CTRL_INSTR			PPC_BITMASK(3, 34)
+#define P8_EX_RAM_STATUS_REG		0x10013c02
+#define RAM_STATUS			PPC_BIT(1)
+
+#define RAS_STATUS_TIMEOUT		10

 /* Flag tested by the OPAL entry code */
 uint8_t reboot_in_progress;
@@ -39,6 +62,14 @@  static volatile bool fast_boot_release;
 static struct cpu_thread *last_man_standing;
 static struct lock reset_lock = LOCK_UNLOCKED;

+/* opcodes for instruction ramming */
+#define MFNIA(r) (0x00000004UL | ((r) << 21))
+#define MTNIA(r) (0x00000002UL | ((r) << 21))
+#define MFMSR(r) (0x7c0000a6UL | ((r) << 21))
+#define MTMSR(r) (0x7c000124UL | ((r) << 21))
+#define MFSPR(r, s) (0x7c0002a6UL | ((r) << 21) | (((s) & 0x1f) << 16) | (((s) & 0x3e0) << 6))
+#define MTSPR(s, r) (0x7c0003a6UL | ((r) << 21) | (((s) & 0x1f) << 16) | (((s) & 0x3e0) << 6))
+
 static int set_special_wakeup(struct cpu_thread *cpu)
 {
 	uint64_t val, poll_target, stamp;
@@ -203,11 +234,10 @@  static int clr_special_wakeup(struct cpu_thread *cpu)

 extern unsigned long callthru_tcl(const char *str, int len);

-static void set_direct_ctl(struct cpu_thread *cpu, uint64_t bits)
+static void set_direct_ctl(struct cpu_thread *cpu, uint32_t thread_id, uint64_t bits)
 {
 	uint32_t core_id = pir_to_core_id(cpu->pir);
 	uint32_t chip_id = pir_to_chip_id(cpu->pir);
-	uint32_t thread_id = pir_to_thread_id(cpu->pir);
 	uint32_t xscom_addr;
 	char tcl_cmd[50];

@@ -225,6 +255,35 @@  static void set_direct_ctl(struct cpu_thread *cpu, uint64_t bits)
 	xscom_write(chip_id, xscom_addr, bits);
 }

+static uint64_t get_ras_status(struct cpu_thread *cpu, uint32_t thread_id)
+{
+	uint32_t core_id = pir_to_core_id(cpu->pir);
+	uint32_t chip_id = pir_to_chip_id(cpu->pir);
+	uint32_t xscom_addr;
+	uint64_t ras_status;
+
+	xscom_addr = XSCOM_ADDR_P8_EX(core_id, P8_EX_TCTL_RAS_STATUS(thread_id));
+	xscom_read(chip_id, xscom_addr, &ras_status);
+
+	return ras_status;
+}
+
+static void set_ram_thread_active(struct cpu_thread *cpu, uint32_t thread_id, bool active)
+{
+	uint32_t core_id = pir_to_core_id(cpu->pir);
+	uint32_t chip_id = pir_to_chip_id(cpu->pir);
+	uint32_t xscom_addr;
+	uint64_t thread_active;
+
+	xscom_addr = XSCOM_ADDR_P8_EX(core_id, P8_EX_THREAD_ACTIVE);
+	xscom_read(chip_id, xscom_addr, &thread_active);
+	if (active)
+		thread_active |= PPC_BIT(8) >> thread_id;
+	else
+		thread_active &= ~(PPC_BIT(8) >> thread_id);
+	xscom_write(chip_id, xscom_addr, thread_active);
+}
+
 static bool fast_reset_p8(void)
 {
 	struct cpu_thread *cpu;
@@ -247,7 +306,7 @@  static bool fast_reset_p8(void)
 	/* Put everybody in stop except myself */
 	for_each_cpu(cpu) {
 		if (cpu != this_cpu())
-			set_direct_ctl(cpu, P8_DIRECT_CTL_STOP);
+			set_direct_ctl(cpu, pir_to_thread_id(cpu->pir), P8_DIRECT_CTL_STOP);

 		/* Also make sure that saved_r1 is 0 ! That's what will
 		 * make our reset vector jump to fast_reboot_entry
@@ -264,7 +323,7 @@  static bool fast_reset_p8(void)
 	/* Put everybody in pre-nap except myself */
 	for_each_cpu(cpu) {
 		if (cpu != this_cpu())
-			set_direct_ctl(cpu, P8_DIRECT_CTL_PRENAP);
+			set_direct_ctl(cpu, pir_to_thread_id(cpu->pir), P8_DIRECT_CTL_PRENAP);
 	}

 	prlog(PR_DEBUG, "RESET: Resetting all threads but one...\n");
@@ -272,7 +331,7 @@  static bool fast_reset_p8(void)
 	/* Reset everybody except my own core threads */
 	for_each_cpu(cpu) {
 		if (cpu != this_cpu())
-			set_direct_ctl(cpu, P8_DIRECT_CTL_SRESET);
+			set_direct_ctl(cpu, pir_to_thread_id(cpu->pir), P8_DIRECT_CTL_SRESET);
 	}

 	return true;
@@ -441,8 +500,8 @@  void __noreturn fast_reboot_entry(void)
 	lock(&reset_lock);
 	if (last_man_standing && next_cpu(first_cpu())) {
 		prlog(PR_DEBUG, "RESET: last man standing fixup...\n");
-		set_direct_ctl(last_man_standing, P8_DIRECT_CTL_PRENAP);
-		set_direct_ctl(last_man_standing, P8_DIRECT_CTL_SRESET);
+		set_direct_ctl(last_man_standing, pir_to_thread_id(last_man_standing->pir), P8_DIRECT_CTL_PRENAP);
+		set_direct_ctl(last_man_standing, pir_to_thread_id(last_man_standing->pir), P8_DIRECT_CTL_SRESET);
 	}
 	last_man_standing = NULL;
 	unlock(&reset_lock);
@@ -544,3 +603,275 @@  void __noreturn fast_reboot_entry(void)
 	/* Load and boot payload */
 	load_and_boot_kernel(true);
 }
+
+/*
+ * RAMs the opcodes in *opcodes and store the results of each opcode
+ * into *results. *results must point to an array the same size as
+ * *opcodes. Each entry from *results is put into SCR0 prior to
+ * executing an opcode so that it may also be used to pass in
+ * data. Note that only register r0 is saved and restored so opcodes
+ * must not touch other registers.
+ */
+static int ram_instructions(struct cpu_thread *cpu, uint32_t thread_id, uint64_t *opcodes,
+			    uint64_t *results, int len, unsigned int lpar)
+{
+	int i, rc = OPAL_SUCCESS;
+	uint64_t ram_mode, val, opcode, r0 = 0;
+	uint32_t chip_id = pir_to_chip_id(cpu->pir);
+	uint32_t core_id = pir_to_core_id(cpu->pir);
+
+
+	/* Activate RAM mode */
+	xscom_read(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_MODE_REG), &ram_mode);
+	ram_mode |= RAM_MODE_ENABLE;
+
+	/* Enable HV mode on thread */
+	ram_mode |= PPC_BIT(2) >> thread_id*2;
+	xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_MODE_REG), ram_mode);
+
+	/* Setup SPRC to use SPRD */
+	val = SPR_MODE_SPRC_WR_EN;
+	val = SETFIELD(SPR_MODE_SPRC_SEL, val, 1 << (3 - lpar));
+	val = SETFIELD(SPR_MODE_SPRC_T_SEL, val, 1 << (7 - thread_id));
+	xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SPR_MODE_REG), val);
+	xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SPRC_REG), SPRC_REG_SCRATCH_SPR);
+
+	for (i = -1; i <= len; i++) {
+		if (i < 0)
+			/* Save r0 (assumes opcodes don't touch other registers) */
+			opcode = MTSPR(277, 0);
+		else if (i < len) {
+			xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SCR0_REG), results[i]);
+			opcode = opcodes[i];
+		} else if (i >= len) {
+			/* Restore r0 */
+			xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SCR0_REG), r0);
+			opcode = MFSPR(0, 277);
+		}
+
+		/* ram instruction */
+		val = SETFIELD(RAM_CTRL_THREAD_SELECT, 0ULL, thread_id);
+		val = SETFIELD(RAM_CTRL_INSTR, val, opcode);
+		xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_CTRL_REG), val);
+
+		/* wait for completion */
+		do {
+			xscom_read(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_STATUS_REG), &val);
+		} while (!val);
+
+		if (!(val & RAM_STATUS)) {
+			prlog(PR_ERR, "Instruction ramming failed with status 0x%llx\n", val);
+			rc = OPAL_HARDWARE;
+		}
+
+		/* Save the results */
+		xscom_read(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SCR0_REG), &val);
+		if (i < 0)
+			r0 = val;
+		else if (i < len)
+			results[i] = val;
+	}
+
+	/* Disable RAM mode */
+	ram_mode &= ~RAM_MODE_ENABLE;
+
+	/* Disable HV mode on thread */
+	ram_mode &= ~(PPC_BIT(2) >> thread_id*2);
+	xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_MODE_REG), ram_mode);
+
+	return rc;
+}
+
+#define SRESET_SRR1 0x9000000000101001
+static int emulate_sreset(struct cpu_thread *cpu, int thread_id)
+{
+	int rc = OPAL_SUCCESS;
+	uint64_t opcodes[] = {
+		MFMSR(0), MTSPR(277, 0),		/* Get MSR */
+		MFNIA(0), MTSPR(277, 0),		/* Get NIA */
+		MFSPR(0, 277), MTMSR(0),		/* Put modified MSR back */
+		MFSPR(0, 277), MTSPR(SPR_SRR0, 0),	/* Set SRR0 to NIA */
+		MFSPR(0, 277), MTSPR(SPR_SRR1, 0),	/* Set SRR1 to SRESET value */
+		MFSPR(0, 277), MTNIA(0),		/* Set NIA */
+	};
+	uint64_t results[] = {
+		0, 0,
+		0, 0,
+		0, 0,
+		0, 0,					/* SRR0 */
+		SRESET_SRR1, 0,		      		/* SRR1 SRESET value */
+		0x100, 0,				/* Set NIA = 0x100 */
+	};
+
+	BUILD_ASSERT(ARRAY_SIZE(opcodes) == ARRAY_SIZE(results));
+
+	set_ram_thread_active(cpu, thread_id, true);
+
+	prlog(PR_ERR, "SRESET: About to ram cpu 0x%x thread 0x%x\n",
+	      cpu->pir, thread_id);
+
+	/* Ram the first 4 instructions to get MSR and NIA */
+	rc = ram_instructions(cpu, thread_id, opcodes, results, 4, 0);
+
+	/* Set MSR, SRR0 = NIA and ram remaining instructions*/
+	results[4] = (results[1] & ~(MSR_IR | MSR_DR | MSR_FE0 | MSR_FE1 | MSR_EE | MSR_RI)) | MSR_HV;
+	results[6] = results[3];
+	rc |= ram_instructions(cpu, thread_id, &opcodes[4], &results[4], ARRAY_SIZE(opcodes) - 4, 0);
+
+	set_ram_thread_active(cpu, thread_id, false);
+
+	return rc;
+}
+
+/*
+ * Apply an sreset to the given threads in a core. When ramming
+ * instructions the whole core must be quiesced so we can't apply an
+ * sreset to active threads on the same core as we're running
+ */
+static int64_t sreset_core(struct cpu_thread *cpu, unsigned int thread_mask)
+{
+	uint32_t thread_id;
+	uint32_t sreset_mask = 0, ram_mask = 0;
+	uint64_t ras_status;
+	int timeout;
+	int64_t rc = 0;
+	unsigned int max_thread_id = pir_to_thread_id(-1U);
+	unsigned int max_thread_mask = (1 << (max_thread_id + 1)) - 1;
+
+	assert(cpu == cpu->primary);
+	thread_mask &= max_thread_mask;
+	if (this_cpu() == cpu) {
+		prlog(PR_WARNING, "SRESET: Unable to reset threads on self\n");
+		return OPAL_PARAMETER;
+	}
+
+	prlog(PR_INFO, "SRESET: Start reset for cpu 0x%x thread_mask 0x%x\n",
+	      cpu->pir, thread_mask);
+
+	/* cpu is the primary thread */
+	if (set_special_wakeup(cpu) != OPAL_SUCCESS)
+		return OPAL_BUSY;
+
+	/* Stop threads selected for sreset */
+	for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
+		if (!((1 << thread_id) & thread_mask))
+			continue;
+
+		set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_STOP);
+		ras_status = get_ras_status(cpu, thread_id);
+		if (!(ras_status & RAS_STATUS_THREAD_ACTIVE))
+			sreset_mask |= 1 << thread_id;
+	}
+
+	if (thread_mask == sreset_mask)
+		/* All threads selected for sreset can be sreset directly */
+		prlog(PR_INFO, "SRESET: All threads inactive, doing direct sreset\n");
+		for (thread_id = 0; thread_id <= max_thread_id; thread_id++)
+			if ((1 << thread_id) & thread_mask) {
+				set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_SRESET);
+				rc = OPAL_SUCCESS;
+				goto out;
+			}
+
+	/* Need to emulate sreset so stop all other threads */
+	for (thread_id = 0; thread_id <= max_thread_id; thread_id++)
+		if (!((1 << thread_id) & thread_mask))
+			set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_STOP);
+
+	/* Work out which threads to sreset and which need sreset emulation */
+	for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
+		ras_status = get_ras_status(cpu, thread_id);
+		if (!(ras_status & RAS_STATUS_THREAD_ACTIVE))
+			sreset_mask |= 1 << thread_id;
+		else {
+			for (timeout = 0; timeout < RAS_STATUS_TIMEOUT; timeout++) {
+				ras_status = get_ras_status(cpu, thread_id);
+				if ((ras_status & RAS_STATUS_SRQ_EMPTY)
+				    && (ras_status & RAS_STATUS_LSU_QUIESCED)
+				    && (ras_status & RAS_STATUS_TS_QUIESCE)) {
+					ram_mask |= 1 << thread_id;
+					break;
+				}
+			}
+		}
+	}
+
+	/*
+	 * To emulate sreset we need to make sure all threads on a core are either:
+	 *  a) Quiesced
+	 *  b) Not active (recorded in sreset_mask)
+	 * So skip ramming threads if we're not in the right state.
+	 */
+	if ((ram_mask & thread_mask) && (ram_mask | sreset_mask) != max_thread_mask) {
+		prlog(PR_ERR, "SRESET: Unable to quiesce all threads for ramming (sreset_mask 0x%08x / ram_mask 0x%08x)\n",
+		      sreset_mask, ram_mask);
+		ram_mask = 0;
+		rc = OPAL_PARTIAL;
+	}
+
+	/* We need to ram threads before doing the direct sresets as
+	 * that makes the threads active */
+	for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
+		if (!((1 << thread_id) & thread_mask))
+			continue;
+
+		if ((1 << thread_id) & ram_mask)
+			emulate_sreset(cpu, thread_id);
+	}
+
+	for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
+		if (!((1 << thread_id) & thread_mask))
+			continue;
+
+		if ((1 << thread_id) & sreset_mask) {
+			prlog(PR_ERR, "SRESET: cpu 0x%x thread 0x%x not active, applying SRESET directly\n",
+			      cpu->pir, thread_id);
+			set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_SRESET);
+		}
+	}
+
+out:
+	/* Start all threads */
+	for (thread_id = 0; thread_id <= max_thread_id; thread_id++)
+		set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_START);
+
+	clr_special_wakeup(cpu);
+
+	return rc;
+}
+
+#define SYS_RESET_ALLBUTSELF -2
+int64_t signal_system_reset(int cpu_nr)
+{
+	int64_t rc = 0;
+	struct cpu_thread *cpu;
+	uint32_t thread_id;
+
+	if (proc_gen != proc_gen_p8)
+		return OPAL_UNSUPPORTED;
+
+	/* Reset a single CPU */
+	if (cpu_nr >= 0) {
+		cpu = find_cpu_by_server(cpu_nr);
+		if (!cpu)
+			return OPAL_PARAMETER;
+
+		thread_id = pir_to_thread_id(cpu->pir);
+		cpu = cpu->primary;
+		return sreset_core(cpu, 1 << thread_id);
+	}
+
+	/* Otherwise reset all CPUs */
+	for_each_cpu(cpu) {
+		if (cpu->primary == this_cpu()->primary)
+			continue;
+
+		if (cpu->primary != cpu)
+			continue;
+
+		/* sreset all threads on a core */
+		rc |= sreset_core(cpu, -1);
+	}
+
+	return rc;
+}
diff --git a/doc/opal-api/opal-signal-system-reset-128.txt b/doc/opal-api/opal-signal-system-reset-128.txt
new file mode 100644
index 0000000..bb1b869
--- /dev/null
+++ b/doc/opal-api/opal-signal-system-reset-128.txt
@@ -0,0 +1,30 @@ 
+OPAL_SIGNAL_SYSTEM_RESET
+-------------------
+
+#define OPAL_SIGNAL_SYSTEM_RESET			128
+
+int64_t signal_system_reset(int cpu_nr)
+
+Arguments:
+
+  int cpu_nr
+    Either the cpu server number of the target cpu to reset or
+    SYS_RESET_ALLBUTSELF (-2) to indicate all but the current cpu
+    should be reset.
+
+This OPAL call causes the specified cpu(s) to be reset to the system
+reset exception handler (0x100). Sleeping cpus will be woken with
+SRR1[42:45] = 0x0100 indicating an interrupt caused by SCOM when in
+power saving mode. Active cpus will also indicate interrupt caused by
+SCOM but will have SRR1[46:47] = 0 as the interrupt did not occur
+during a power saving mode.
+
+Resetting active threads on the same core as this call is run is
+currently not supported.
+
+Return Values:
+OPAL_SUCCESS: the power down was updated successful
+OPAL_PARAMETER: a parameter was incorrect
+OPAL_HARDWARE: hardware indicated failure during instruction ramming
+OPAL_PARTIAL: not all requested cpus could be reset at this time
+OPAL_UNSUPPORTED: this processor generation is not supported
diff --git a/include/opal-api.h b/include/opal-api.h
index 05ff51d..be9a534 100644
--- a/include/opal-api.h
+++ b/include/opal-api.h
@@ -181,7 +181,8 @@ 
 #define OPAL_INT_SET_MFRR			125
 #define OPAL_PCI_TCE_KILL			126
 #define OPAL_NMMU_SET_PTCR			127
-#define OPAL_LAST				127
+#define OPAL_SIGNAL_SYSTEM_RESET		128
+#define OPAL_LAST				128

 /* Device tree flags */

diff --git a/include/skiboot.h b/include/skiboot.h
index 2ef7677..b796a2c 100644
--- a/include/skiboot.h
+++ b/include/skiboot.h
@@ -191,6 +191,7 @@  extern unsigned long get_symbol(unsigned long addr,

 /* Fast reboot support */
 extern void fast_reboot(void);
+extern int64_t signal_system_reset(int __unused targets);
 extern void __noreturn __secondary_cpu_entry(void);
 extern void __noreturn load_and_boot_kernel(bool is_reboot);
 extern void cleanup_tlb(void);
diff --git a/platforms/astbmc/common.c b/platforms/astbmc/common.c
index e1a8a4d..e4761ee 100644
--- a/platforms/astbmc/common.c
+++ b/platforms/astbmc/common.c
@@ -356,4 +356,6 @@  void astbmc_early_init(void)
 	uart_init();

 	prd_init();
+
+	opal_register(OPAL_SIGNAL_SYSTEM_RESET, signal_system_reset, 1);
 }