[RFC,2/2] Add SBE driver support

Submitted by Vasant Hegde on Feb. 15, 2017, 3:14 p.m.

Details

Message ID 1487171655-23729-2-git-send-email-hegdevasant@linux.vnet.ibm.com
State New
Headers show

Commit Message

Vasant Hegde Feb. 15, 2017, 3:14 p.m.
SBE (Self Boot Engine) on P9 has two different jobs:
  - Boot the chip-up till core is functional
  - Provide various services like timer, scom, etc., at runtime

OPAL can communicate to SBE via a set of data and control registers provided
by the PSU block in P9 chip.
 - Four 8 byte registers for Host to send command packets to SBE.
 - Four 8 byte registers for SBE to send response packets to Host
 - Two doorbell registers (1 on each side) to alert either party
   when data is placed in above mentioned data register.

Protocol constraints:
  Only one command is accepted in the command buffer until the response for the
  command is enqueued in the response buffer by SBE.

Usage:
  We will use SBE for various purposes like timer, sreset, MPIPL, etc.

This patch implements the SBE MBOX spec for OPAL to communicate with
SBE.

Design consideration:
  - SBE works on request - response basis. It will not send any notification
    without request.
  - Each chip has SBE. We need to track SBE messages per chip. Hence added
    per chip sbe structure and list of messages to that chip.
  - SBE accepts only one command at a time. Hence serialized MBOX commands.
  - OPAL gets interrupted once SBE sets doorbell register
  - OPAL has to clear doorbell register after reading response.
  - Every command class has timeout option. Timed out messages are discarded.
  - SBE MBOX commands can be classified into four types :
    - Those that must be sent to the master only (ex: sending MDST/MDDT info)
    - Those that must be sent to slaves only (ex: continue MPIPL)
    - Those that must be sent to all chips (ex: close unsecure window)
    - Those that can be sent to any chip (ex: timer)

Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
---
 core/init.c     |   4 +
 hw/Makefile.inc |   2 +-
 hw/psi.c        |   3 +-
 hw/sbe.c        | 556 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/chip.h  |   4 +
 include/sbe.h   | 235 ++++++++++++++++++++++++
 6 files changed, 802 insertions(+), 2 deletions(-)
 create mode 100644 hw/sbe.c
 create mode 100644 include/sbe.h

Comments

Oliver O'Halloran Feb. 16, 2017, 6:26 a.m.
On Wed, 2017-02-15 at 20:44 +0530, Vasant Hegde wrote:
> SBE (Self Boot Engine) on P9 has two different jobs:
>   - Boot the chip-up till core is functional
>   - Provide various services like timer, scom, etc., at runtime
> 
> OPAL can communicate to SBE via a set of data and control registers
> provided
> by the PSU block in P9 chip.
>  - Four 8 byte registers for Host to send command packets to SBE.
>  - Four 8 byte registers for SBE to send response packets to Host
>  - Two doorbell registers (1 on each side) to alert either party
>    when data is placed in above mentioned data register.
> 
> Protocol constraints:
>   Only one command is accepted in the command buffer until the
> response for the
>   command is enqueued in the response buffer by SBE.
> 
> Usage:
>   We will use SBE for various purposes like timer, sreset, MPIPL,
> etc.
> 
> This patch implements the SBE MBOX spec for OPAL to communicate with
> SBE.
> 
> Design consideration:
>   - SBE works on request - response basis. It will not send any
> notification
>     without request.
>   - Each chip has SBE. We need to track SBE messages per chip. Hence
> added
>     per chip sbe structure and list of messages to that chip.
>   - SBE accepts only one command at a time. Hence serialized MBOX
> commands.
>   - OPAL gets interrupted once SBE sets doorbell register
>   - OPAL has to clear doorbell register after reading response.
>   - Every command class has timeout option. Timed out messages are
> discarded.
>   - SBE MBOX commands can be classified into four types :
>     - Those that must be sent to the master only (ex: sending
> MDST/MDDT info)
>     - Those that must be sent to slaves only (ex: continue MPIPL)
>     - Those that must be sent to all chips (ex: close unsecure
> window)
>     - Those that can be sent to any chip (ex: timer)
> 
> Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
> ---
>  core/init.c     |   4 +
>  hw/Makefile.inc |   2 +-
>  hw/psi.c        |   3 +-
>  hw/sbe.c        | 556
> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/chip.h  |   4 +
>  include/sbe.h   | 235 ++++++++++++++++++++++++
>  6 files changed, 802 insertions(+), 2 deletions(-)
>  create mode 100644 hw/sbe.c
>  create mode 100644 include/sbe.h
> 
> diff --git a/core/init.c b/core/init.c
> index c753b61..c7ca130 100644
> --- a/core/init.c
> +++ b/core/init.c
> @@ -47,6 +47,7 @@
>  #include <nvram.h>
>  #include <libstb/stb.h>
>  #include <libstb/container.h>
> +#include <sbe.h>
>  
>  enum proc_gen proc_gen;
>  
> @@ -796,6 +797,9 @@ void __noreturn __nomcount main_cpu_entry(const
> void *fdt)
>  	xscom_init();
>  	mfsi_init();
>  
> +	/* SBE init */
> +	sbe_init();
> +
>  	/*
>  	 * Put various bits & pieces in device-tree that might not
>  	 * already be there such as the /chosen node if not there
> yet,
> diff --git a/hw/Makefile.inc b/hw/Makefile.inc
> index f2dc328..f5b0fdc 100644
> --- a/hw/Makefile.inc
> +++ b/hw/Makefile.inc
> @@ -6,7 +6,7 @@ HW_OBJS += nx.o nx-rng.o nx-crypto.o nx-842.o
>  HW_OBJS += p7ioc.o p7ioc-inits.o p7ioc-phb.o
>  HW_OBJS += phb3.o sfc-ctrl.o fake-rtc.o bt.o p8-i2c.o prd.o
>  HW_OBJS += dts.o lpc-rtc.o npu.o npu-hw-procedures.o xive.o phb4.o
> -HW_OBJS += fake-nvram.o
> +HW_OBJS += fake-nvram.o sbe.o
>  HW=hw/built-in.o
>  
>  # FIXME hack this for now
> diff --git a/hw/psi.c b/hw/psi.c
> index 5d97fbb..a061179 100644
> --- a/hw/psi.c
> +++ b/hw/psi.c
> @@ -33,6 +33,7 @@
>  #include <platform.h>
>  #include <errorlog.h>
>  #include <xive.h>
> +#include <sbe.h>
>  
>  static LIST_HEAD(psis);
>  static u64 psi_link_timer;
> @@ -603,7 +604,7 @@ static void psihb_p9_interrupt(struct irq_source
> *is, uint32_t isn)
>  		printf("PSI: DIO irq received\n");
>  		break;
>  	case P9_PSI_IRQ_PSU:
> -		printf("PSI: PSU irq received\n");
> +		sbe_interrupt(psi->chip_id);
>  		break;
>  	}
>  }
> diff --git a/hw/sbe.c b/hw/sbe.c
> new file mode 100644
> index 0000000..112f840
> --- /dev/null
> +++ b/hw/sbe.c
> @@ -0,0 +1,556 @@
> +/* Copyright 2017 IBM Corp.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at
> + *
> + *	http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing,
> software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> + * implied.
> + * See the License for the specific language governing permissions
> and
> + * limitations under the License.
> + */
> +
> +#define pr_fmt(fmt) "SBE: " fmt
> +
> +#include <chip.h>
> +#include <errorlog.h>
> +#include <lock.h>
> +#include <opal.h>
> +#include <sbe.h>
> +#include <skiboot.h>
> +#include <timebase.h>
> +#include <trace.h>
> +#include <xscom.h>
> +
> +struct sbe {
> +	/* Chip ID to send message */
> +	u32	chip_id;
> +
> +	/* List to hold SBE queue messages */
> +	struct list_head	msg_list;
> +
> +	struct lock lock;
> +
> +	/* SBE MBOX message sequence number */
> +	u16	cur_seq;
> +};
> +
> +/* Default SBE chip ID */
> +static u32 sbe_default_chip_id = -1;
> +
> +
> +static u64 sbe_rreg(u32 chip_id, u64 reg)
> +{
> +	u64 data = 0;
> +
> +	xscom_read(chip_id, reg, &data);
> +	return be64_to_cpu(data);
> +}
> +
> +static void sbe_reg_dump(u32 chip_id)
> +{
> +#define SBE_DUMP_REG_ONE(chip_id, x) \
> +	prlog(PR_DEBUG, "  %20s: %llx\n", #x, sbe_rreg(chip_id, x));
> +
> +	prlog(PR_DEBUG, "MBOX register dump for chip : %x\n",
> chip_id);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG0);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG1);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG2);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG3);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG4);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG5);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG6);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG7);
> +}
> +
> +static u64 sbe_get_cmdclass_timeout(u8 cmdclass)
> +{
> +	switch (cmdclass) {
> +	case SBE_MCLASS_CORE_STATE:
> +	case SBE_MCLASS_TIMER:
> +	case SBE_MCLASS_MPIPL:
> +	case SBE_MCLASS_SECURITY:
> +	case SBE_MCLASS_GENERIC:
> +		return SBE_CMD_TIMEOUT_SHORT_MS;
> +	case SBE_MCLASS_SCOM:
> +	case SBE_MCLASS_RING:
> +		return SBE_CMD_TIMEOUT_LONG_MS;
> +	default: /* Shouldn't happen. assert? */
> +		prlog(PR_ERR, "Invalid command class\n");
> +		return 0;
> +	}
> +}
> +

> +static inline struct sbe_msg *__sbe_allocmsg(void)
> +{
> +	return zalloc(sizeof(struct sbe_msg));
> +}
> +
> +struct sbe_msg *sbe_allocmsg(bool alloc_response)
> +{
> +	struct sbe_msg *msg;
> +
> +	msg = __sbe_allocmsg();
> +	if (!msg)
> +		return NULL;
> +
> +	if (alloc_response) {
> +		msg->resp = __sbe_allocmsg();
> +		if (!msg->resp) {
> +			free(msg);
> +			return NULL;
> +		}
Given that we're limited to one outstanding response per SBE we could
have a sbe_msg inside the sbe structure itself and skip this extra
allocation. Up to you.

> +	}
> +
> +	return msg;
> +}
> +
> +static inline void __sbe_freemsg(struct sbe_msg *msg)
> +{
> +	free(msg);
> +}
> +
> +void sbe_freemsg(struct sbe_msg *msg)
> +{
> +	if (msg && msg->resp)
> +		__sbe_freemsg(msg->resp);
> +	__sbe_freemsg(msg);
> +}

Is there any reason we need to wrap zalloc() and free() with
__sbe_allocmsg() and __sbe_freemsg()?

> +
> +void sbe_fillmsg(struct sbe_msg *msg, u16 cmd,
> +		 u16 ctrl_flag, u64 reg1, u64 reg2, u64 reg3)
> +{
> +	bool response = !!(ctrl_flag & SBE_CMD_CTRL_RESP_REQ);
> +
> +	/* Seqence ID is filled by sbe_queue_msg() */
> +	msg->reg0 = ((u64)ctrl_flag << 32) | cmd;
> +	msg->reg1 = reg1;
> +	msg->reg2 = reg2;
> +	msg->reg3 = reg3;
> +
> +	prlog(PR_TRACE, "MBOX message :\n\t Reg0 : %llx\n\t "
> +	      "Reg1 : %llx\n\t Reg2 : %llx\n\t Reg3 : %lld\n",
> +	      msg->reg0, msg->reg1, msg->reg2, msg->reg3);
> +
> +	msg->response = response;
> +}
> +
> +/*
> + * Handles "command with direct data" format only.
> + *
> + * Note: All mbox messages of our interest uses direct data format.
> + *       If we need indirect data format then we have to enhance
> + *       this function.
> + */
> +struct sbe_msg *sbe_mkmsg(u16 cmd, u16 ctrl_flag,
> +			  u64 reg1, u64 reg2, u64 reg3)
> +{
> +	struct sbe_msg *msg;
> +
> +	msg = sbe_allocmsg(ctrl_flag & SBE_CMD_CTRL_RESP_REQ);
> +	if (!msg) {
> +		prlog(PR_ERR, "Failed to allocate memory for SBE
> message\n");
> +		return NULL;
> +	}
> +
> +	sbe_fillmsg(msg, cmd, ctrl_flag, reg1, reg2, reg3);
> +	return msg;
> +}
> +

What's the use case for making fillmsg public? Given this is an async
protocol you will need to allocate the structure anyway. Using static
storage will require extraneous synchronisation or just be straight up
broken.

Having a single entry point (mkmsg) would also allow you to
transparently implement a freelist (if needed) for messages similar to
how the opal message API works.

> +static inline bool sbe_busy(struct sbe_msg *msg)
> +{
> +	switch (msg->state) {
> +	case sbe_msg_unused:
> +	case sbe_msg_queued:
> +	case sbe_msg_response:
> +	case sbe_msg_done:
> +	case sbe_msg_timeout:
> +	case sbe_msg_cancelled:
> +		return false;
> +	default:	/* + sbe_msg_sent, sbe_msg_wresp */
> +		break;
> +	}
> +
> +	return true;
> +}
> +
> +/*
> + * Read SBE doorbell to confirm its ready to accept command. Since
> the device
> + * driver single threads the requests, we should never see not being
> ready to
> + * send a request.
> + */
>
> +static bool sbe_ready(u32 chip_id)
> +{
> +	int rc;
> +	u64 data;
> +
> +	rc = xscom_read(chip_id, PSU_SBE_DOORBELL_REG_RW, &data);
> +	if (rc) {
> +		prlog(PR_ERR, "Failed to read Host to SBE doorbell
> register "
> +		      "[chip id = %x]\n", chip_id);
> +		return false;
> +	}
> +
> +	if (data & HOST_SBE_MSG_WAITING) {
> +		prlog(PR_ERR, "Not ready to accept command [chip id
> = %x]."
> +		      " Doorbell reg : %llx\n", chip_id, data);
> +		/* Dump register content */
> +		sbe_reg_dump(chip_id);
> +		return false;
> +	}
> +
> +	return true;
> +}

Maybe rename these to sbe_msg_finished() and sbe_hw_ready(). From the
names you'd expect them to be implemented similarly, but the when you
look at them the relationship really isn't obvious.

> +
> +static int sbe_send_msg(u32 chip_id, struct sbe_msg *msg)
> +{
> +	int rc, i;
> +	u64 addr, *data;
> +
> +	addr = PSU_HOST_SBE_MBOX_REG0;
> +	data = &msg->reg0;
> +
> +	for (i = 0; i < NR_HOST_SBE_MBOX_REG; i++) {
> +		rc = xscom_write(chip_id, addr, *data);
> +		if (rc)
> +			return rc;
> +
> +		addr++;
> +		data++;
> +	}
> +
> +	rc = xscom_write(chip_id, PSU_SBE_DOORBELL_REG_OR,
> +			 HOST_SBE_MSG_WAITING);
> +	if (rc == OPAL_SUCCESS)
> +		msg->state = sbe_msg_sent;
> +
> +	return rc;
> +}
> +
> +static int sbe_recv_msg(u32 chip_id, struct sbe_msg *resp)
> +{
> +	int i;
> +	int rc = OPAL_SUCCESS;
> +	u64 addr, *data;
> +
> +	addr = PSU_HOST_SBE_MBOX_REG4;
> +	data = &resp->reg0;
> +
> +	for (i = 0; i < NR_HOST_SBE_MBOX_REG; i++) {
> +		rc = xscom_read(chip_id, addr, data);
> +		if (rc)
> +			return rc;
> +
> +		addr++;
> +		data++;
> +	}
> +
> +	resp->state = sbe_msg_response;
> +	return rc;
> +}
> +

> +/* This is called with sbe->lock */
> +static void sbe_complete_msg(struct sbe *sbe, struct sbe_msg *msg)
> +{
> +	void (*comp)(struct sbe_msg *msg);
> +
> +	prlog(PR_INSANE, "Completing msg [chip id = %x], reg0 :
> 0x%llx\n",
> +	      sbe->chip_id, msg->reg0);
> +
> +	comp = msg->complete;
> +	msg->state = sbe_msg_done;
> +	list_del_from(&sbe->msg_list, &msg->link);
> +
> +	if (comp) {
> +		unlock(&sbe->lock);
> +		(*comp)(msg);
> +		lock(&sbe->lock);
> +	} else {
> +		sbe_freemsg(msg);
> +	}
> +}

The extra function pointer variable is kinda weird. Can you just use
msg->complete directly?

> +
> +static void sbe_complete_send(struct sbe *sbe, struct sbe_msg *msg)
> +{
> +	u8 cmdclass;
> +	u64 timeout;
> +
> +	prlog(PR_INSANE, "Completing send [chip id = %x], reg0 :
> 0x%llx\n",
> +	      sbe->chip_id, msg->reg0);
> +
> +	/* Need response */
> +	if (msg->response) {
> +		cmdclass = (u8)(msg->reg0 >> 8) & 0xff;
> +		timeout = sbe_get_cmdclass_timeout(cmdclass);
> +		msg->timeout = mftb() + msecs_to_tb(timeout);
> +		msg->state = sbe_msg_wresp;
> +	} else {
> +		sbe_complete_msg(sbe, msg);
> +	}
> +}

I don't really like how the logic of sending a message is spread across
 
sbe_complete_send(), sbe_poke_queue() and sbe_send_msg(). It just seems
to make the flow control convoluted especially when compared to the
recieve path. Matter of preference I suppose...

> +
> +static void sbe_poke_queue(struct sbe *sbe)
> +{
> +	int rc;
> +	struct sbe_msg *msg;
> +
> +	if (list_empty(&sbe->msg_list))
> +		return;
> +
> +	msg = list_top(&sbe->msg_list, struct sbe_msg, link);
> +	if (sbe_busy(msg))
> +		return;
> +
> +	if (!sbe_ready(sbe->chip_id))
> +		return;
> +
> +	/* Send message */
> +	rc = sbe_send_msg(sbe->chip_id, msg);
> +	if (rc) {
> +		/* XXX commit error log */
> +		prlog(PR_ERR, "Failed to send message to SBE "
> +		      "[chip id = %x]\n", sbe->chip_id);
> +
> +		if (msg->resp)
> +			msg->resp->reg0 |=
> ((u64)SBE_STATUS_PRI_GENERIC_ERR << 48);
Should we make these unsigned long literals so they don't need to be
casted? You could also bake the shift into the constant itself.

> +
> +		sbe_complete_msg(sbe, msg);
> +	} else {
> +		sbe_complete_send(sbe, msg);
> +	}
> +
> +	/* Recursive call to send next message */
> +	sbe_poke_queue(sbe);

Maybe document that this is limited to a single recursion due to how
the send/recv logic works. IIRC Stewart would prefer we didn't use
recursion at all in skiboot, but it is safe here.

> +}
> +
> +/*
> + * WARNING:
> + *         Only one command is accepted in the command buffer until
> the response
> + *         for the command is en-queued in the response buffer by
> SBE.
> + *
> + *         Head of msg_list contains inflight message. Hence we
> should always
> + *         add new message to tail of the list.
> + */
> +int sbe_queue_msg(u32 chip_id, struct sbe_msg *msg,
> +		  void (*comp)(struct sbe_msg *msg))
> +{
> +	struct proc_chip *chip;
> +	struct sbe *sbe;
> +
> +	if (!msg)
> +		return OPAL_PARAMETER;
> +
> +	if (sbe_default_chip_id == -1)
> +		return OPAL_INTERNAL_ERROR;
> +
> +	/* Default to master chip ID */
> +	if (chip_id == -1)
> +		chip = get_chip(sbe_default_chip_id);
> +	else
> +		chip = get_chip(chip_id);
> +
> +	if (chip == NULL || chip->sbe == NULL)
> +		return OPAL_PARAMETER;
> +
> +	sbe = chip->sbe;
> +
> +	/* Set completion */
> +	msg->complete = comp;
> +	msg->state = sbe_msg_queued;
> +
> +	/* Clear response state */
> +	if (msg->resp)
> +		msg->resp->state = sbe_msg_unused;
> +
> +	lock(&sbe->lock);
> +	/* Update sequence number */
> +	msg->reg0 = msg->reg0 | (sbe->cur_seq << 16);
> +	sbe->cur_seq++;
> +
> +	/* Reset sequence number */
> +	if (sbe->cur_seq == 0xffff)
> +		sbe->cur_seq = 1;
> +
> +	/* Add message to queue */
> +	list_add_tail(&sbe->msg_list, &msg->link);
> +	sbe_poke_queue(sbe);
> +	unlock(&sbe->lock);
> +
> +	return OPAL_SUCCESS;
> +}
> +
> +static void sbe_handle_response(struct sbe *sbe)
> +{
> +	u16 send_seq, resp_seq;
> +	int rc;
> +	struct sbe_msg *msg;
> +
> +	if (list_empty(&sbe->msg_list))
> +		return;
> +
> +	msg = list_top(&sbe->msg_list, struct sbe_msg, link);
> +	rc = sbe_recv_msg(sbe->chip_id, msg->resp);
> +	if (rc != OPAL_SUCCESS) {
> +		msg->resp->reg0 |= ((u64)SBE_STATUS_PRI_GENERIC_ERR
> << 48);
> +		/* XXX commit error log */
> +		prlog(PR_ERR, "Failed to read response message "
> +		      "[chip id = %x]\n", sbe->chip_id);
> +	}
> +
> +	/* Validate sequence number */
> +	send_seq = (msg->reg0 >> 16) && 0xffff;
> +	resp_seq = (msg->resp->reg0 >> 16) && 0xffff;

That should probably be a bitwise AND.

> +	if (send_seq != resp_seq) {
> +		/* XXX commit error log */
> +		prlog(PR_ERR, "Invalid sequence id [chip id =
> %x]\n",
> +		      sbe->chip_id);
> +		return;
If sbe_msg_recv() fails above the sequence numbers might be wrong and
it'll return without calling sbe_complete_msg(). Should it be doing
that?

> +	}
> +
> +	sbe_complete_msg(sbe, msg);
> +}
> +
> +static void sbe_timeout_poll(void *user_data __unused)
> +{
> +	u64 now;
> +	struct sbe *sbe;
> +	struct sbe_msg *msg;
> +	struct proc_chip *chip;
> +
> +	for_each_chip(chip) {
> +		if (chip->sbe == NULL)
> +			continue;
> +
> +		sbe = chip->sbe;
> +		lock(&sbe->lock);
> +		if (list_empty(&sbe->msg_list)) {
> +			unlock(&sbe->lock);
> +			continue;
> +		}
> +
> +		msg = list_top(&sbe->msg_list, struct sbe_msg,
> link);
> +		now = mftb();
> +		if (tb_compare(now, msg->timeout) != TB_AAFTERB) {
> +			unlock(&sbe->lock);
> +			continue;
> +		}
> +
> +		/* Update response message */
> +		if (msg->resp) {
> +			msg->resp->state = sbe_msg_timeout;
> +			msg->resp->reg0 |=
> ((u64)SBE_STATUS_PRI_GENERIC_ERR << 48);
> +
> +			prlog(PR_ERR, "Message timeout [chip id =
> %x], "
> +			      "Reg0 = %llx\n", sbe->chip_id, msg-
> >reg0);
> +			sbe_reg_dump(sbe->chip_id);
> +		}
> +
> +		sbe_complete_msg(sbe, msg);

What's the time taken for a SBE reset? The logic in sbe_interrupt()
reposts messages while we complete them here. IIRC the P9 SBE boot time
is fairly long so resets might be the main source of timeouts. Should
we also be reposting the message here?

> +		sbe_poke_queue(sbe);
> +		unlock(&sbe->lock);
> +	}
> +}
> +
> +static void sbe_repost_msg(struct sbe *sbe)
> +{
> +	struct sbe_msg *msg;
> +
> +	if (list_empty(&sbe->msg_list))
> +		return;
> +
> +	msg = list_top(&sbe->msg_list, struct sbe_msg, link);
> +	msg->state = sbe_msg_queued;
> +	sbe_poke_queue(sbe);
> +}
This function can be folded into the SBE reset case in sbe_interrupt().
below. When you take out the empty list handling (also done in
sbe_interrupt()) it's only two lines of code.

> +
> +void sbe_interrupt(uint32_t chip_id)
> +{
> +	int rc;
> +	u64 data;
> +	struct proc_chip *chip;
> +	struct sbe *sbe;
> +
> +	chip = get_chip(chip_id);
> +	if (chip == NULL || chip->sbe == NULL)
> +		return;
> +
> +	sbe = chip->sbe;
> +
> +	lock(&sbe->lock);
> +	/* Empty queue */
> +	if (list_empty(&sbe->msg_list))
> +		goto clr_interrupt;
> +
> +	/* Read doorbell register */
> +	rc = xscom_read(sbe->chip_id, PSU_HOST_DOORBELL_REG_RW,
> &data);
> +	if (rc) {
> +		prlog(PR_ERR, "Failed to read SBE to Host doorbell
> register "
> +		      "[chip id = %x]\n", chip_id);
> +		sbe_reg_dump(chip_id);
> +		goto clr_interrupt;
> +	}
> +
> +	/* SBE came back from reset */
> +	if (data & SBE_HOST_RESET) {
> +		prlog(PR_NOTICE, "Came back from reset [chip id =
> %x]\n",
> +		      chip_id);
> +		/* Repost message */
> +		sbe_repost_msg(sbe);
> +		goto clr_interrupt;
> +	}
> +
> +	/* Handle SBE response */
> +	if (data & SBE_HOST_RESPONSE_WAITING) {
> +		sbe_handle_response(sbe);
> +		sbe_poke_queue(sbe);
> +	}
> +

> +clr_interrupt:
> +	unlock(&sbe->lock);
> +
> +	rc = xscom_write(chip_id, PSU_HOST_DOORBELL_REG_AND,
> +			 SBE_HOST_RESPONSE_CLEAR);
> +	if (rc) {
> +		prlog(PR_ERR, "Failed to clear SBE to host doorbell
> "
> +		      "register [chip id = %x]\n", chip_id);
> +	}
> +}
> +
> +void sbe_init(void)
> +{
> +	struct dt_node *xn;
> +	struct proc_chip *chip;
> +	struct sbe *sbe;
> +
> +	if (proc_gen < proc_gen_p9)
> +		return;
> +
> +	dt_for_each_compatible(dt_root, xn, "ibm,xscom") {
> +		sbe = zalloc(sizeof(struct sbe));
> +		assert(sbe);
> +		sbe->chip_id = dt_get_chip_id(xn);
> +		sbe->cur_seq = 1;
> +		list_head_init(&sbe->msg_list);
> +		init_lock(&sbe->lock);
> +
> +		chip = get_chip(sbe->chip_id);
> +		assert(chip);
> +		chip->sbe = sbe;
> +
> +		if (dt_has_node_property(xn, "primary", NULL))
> +			sbe_default_chip_id = sbe->chip_id;
> +	}
> +
> +	if (sbe_default_chip_id == -1) {
> +		/* XXX Commit error log */
> +		prlog(PR_ERR, "Master chip ID not found.\n");
> +		return;
> +	}
> +
> +	/* Initiate SBE timeout poller */
> +	opal_add_poller(sbe_timeout_poll, NULL);
> +}
> diff --git a/include/chip.h b/include/chip.h
> index 61b413c..a501009 100644
> --- a/include/chip.h
> +++ b/include/chip.h
> @@ -108,6 +108,7 @@ struct centaur_chip;
>  struct mfsi;
>  struct xive;
>  struct lpcm;
> +struct sbe;
>  
>  /* Chip type */
>  enum proc_chip_type {
> @@ -205,6 +206,9 @@ struct proc_chip {
>  
>  	/* Used by hw/xive.c */
>  	struct xive		*xive;
> +
> +	/* Used by hw/sbe.c */
> +	struct sbe		*sbe;
>  };
>  
>  extern uint32_t pir_to_chip_id(uint32_t pir);
> diff --git a/include/sbe.h b/include/sbe.h
> new file mode 100644
> index 0000000..8ac2682
> --- /dev/null
> +++ b/include/sbe.h
> @@ -0,0 +1,235 @@
> +/* Copyright 2017 IBM Corp.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at
> + *
> + *	http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing,
> software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> + * implied.
> + * See the License for the specific language governing permissions
> and
> + * limitations under the License.
> + */
> +
> +#ifndef __SBE_H
> +#define __SBE_H
> +
> +#include <bitutils.h>
> +#include <ccan/list/list.h>
> +#include <ccan/short_types/short_types.h>
> +
> +/*
> + * Timeout value for short running commands is 100 msecs and
> + * the timeout value for long running commands is 30 secs.
> + */
> +#define SBE_CMD_TIMEOUT_SHORT_MS		100
> +#define SBE_CMD_TIMEOUT_LONG_MS			(30 * 1000)
> +
> +/* Primary response status code */
> +#define SBE_STATUS_PRI_SUCCESS			0x00
> +#define SBE_STATUS_PRI_INVALID_CMD		0x01
> +#define SBE_STATUS_PRI_INVALID_DATA		0x02
> +#define SBE_STATUS_PRI_SEQ_ERR			0x03
> +#define SBE_STATUS_PRI_INTERNAL_ERR		0x04
> +#define SBE_STATUS_PRI_GENERIC_ERR		0xFE
> +
> +/* Secondary response status code */
> +#define SBE_STATUS_SEC_SUCCESS			0x00
> +#define SBE_STATUS_SEC_CMD_CLASS_UNSUPPORTED	0x01
> +#define SBE_STATUS_SEC_CMD_UNSUPPORTED		0x02
> +#define SBE_STATUS_SEC_INV_ADDR			0x03
> +#define SBE_STATUS_SEC_INV_TARGET_TYPE		0x04
> +#define SBE_STATUS_SEC_INV_TARGET_ID		0x05
> +#define SBE_STATUS_SEC_TARGET_NOT_PRESENT	0x06
> +#define SBE_STATUS_SEC_TARGET_NOT_FUNC		0x07
> +#define SBE_STATUS_SEC_CMD_NOT_ALLOW		0x08
> +#define SBE_STATUS_SEC_FUNC_NOT_SUPPORTED	0x09
> +#define SBE_STATUS_SEC_GENERIC_ERR		0x0A
> +#define SBE_STATUS_SEC_BLACKLIST_REG		0x0B
> +#define SBE_STATUS_SEC_OS_FAILURE		0x0C
> +#define SBE_STATUS_SEC_MBX_REG_FAILURE		0x0D
> +#define SBE_STATUS_SEC_INSUFFICIENT_DATA	0x0E
> +#define SBE_STATUS_SEC_EXCESS_DATA		0x0F
> +#define SBE_STATUS_SEC_BUSY			0x10
> +
> +/* Number of MBOX register on each side */
> +#define NR_HOST_SBE_MBOX_REG		0x04
> +
> +/*
> + * SBE MBOX register address
> + *   Reg 0 - 3 : Host to send command packets to SBE
> + *   Reg 4 - 7 : SBE to send response packets to Host
> + */
> +#define PSU_HOST_SBE_MBOX_REG0		0x000D0050
> +#define PSU_HOST_SBE_MBOX_REG1		0x000D0051
> +#define PSU_HOST_SBE_MBOX_REG2		0x000D0052
> +#define PSU_HOST_SBE_MBOX_REG3		0x000D0053
> +#define PSU_HOST_SBE_MBOX_REG4		0x000D0054
> +#define PSU_HOST_SBE_MBOX_REG5		0x000D0055
> +#define PSU_HOST_SBE_MBOX_REG6		0x000D0056
> +#define PSU_HOST_SBE_MBOX_REG7		0x000D0057
> +#define PSU_SBE_DOORBELL_REG_RW		0x000D0060
> +#define PSU_SBE_DOORBELL_REG_AND	0x000D0061
> +#define PSU_SBE_DOORBELL_REG_OR		0x000D0062
> +#define PSU_HOST_DOORBELL_REG_RW	0x000D0063
> +#define PSU_HOST_DOORBELL_REG_AND	0x000D0064
> +#define PSU_HOST_DOORBELL_REG_OR	0x000D0065
> +
> +/*
> + * Doorbell Register to trigger SBE interrupt. Set by OPAL to inform
> + * the SBE about a waiting message in the Host/SBE Mailbox Registers
> + */
> +#define HOST_SBE_MSG_WAITING		PPC_BIT(0)
> +
> +/*
> + * Doorbell Register for Host Bridge interrupt. Set by the SBE to
> inform
> + * host about a response message in the Host/SBE Mailbox Registers
> + */
> +#define SBE_HOST_RESPONSE_WAITING	PPC_BIT(0)
> +#define SBE_HOST_RESPONSE_CLEAR		~(SBE_HOST_RESPONSE_W
> AITING)
> +#define SBE_HOST_MSG_READ		PPC_BIT(1)
> +#define SBE_HOST_STOP15_EXIT		PPC_BIT(2)
> +#define SBE_HOST_RESET			PPC_BIT(3)
> +#define SBE_HOST_TIMER_EXPIRY		PPC_BIT(14)
> +
> +/* SBE Target Type */
> +#define SBE_TARGET_TYPE_PROC		0x00
> +#define SBE_TARGET_TYPE_EX		0x01
> +#define SBE_TARGET_TYPE_PERV		0x02
> +#define SBE_TARGET_TYPE_MCS		0x03
> +#define SBE_TARGET_TYPE_EQ		0x04
> +#define SBE_TARGET_TYPE_CORE		0x05
> +
> +/* SBE MBOX command class */
> +#define SBE_MCLASS_FIRST		0xD1
> +#define SBE_MCLASS_CORE_STATE		0xD1
> +#define SBE_MCLASS_SCOM			0xD2
> +#define SBE_MCLASS_RING			0xD3
> +#define SBE_MCLASS_TIMER		0xD4
> +#define SBE_MCLASS_MPIPL		0xD5
> +#define SBE_MCLASS_SECURITY		0xD6
> +#define SBE_MCLASS_GENERIC		0xD7
> +#define SBE_MCLASS_LAST			0xD7
> +
> +/*
> + * Commands are provided in xxyy form where:
> + *   - xx : command class
> + *   - yy : command
> + *
> + * Both request and response message uses same Seq ID,
> + * command class and command.
> + */
> +#define SBE_CMD_CTRL_DEADMAN_LOOP	0xD101
> +#define SBE_CMD_MULTI_SCOM		0xD201
> +#define SBE_CMD_PUT_RING_FORM_IMAGE	0xD301
> +#define SBE_CMD_CONTROL_TIMER		0xD401
> +#define SBE_CMD_CONTINUE_MPIPL		0xD501
> +#define SBE_CMD_GET_ARCHITECTED_REG	0xD502
> +#define SBE_CMD_CLR_ARCHITECTED_REG	0xD503
> +#define SBE_CMD_SET_UNSEC_MEM_WINDOW	0xD601
> +#define SBE_CMD_GET_SBE_FFDC		0xD701
> +#define SBE_CMD_GET_CAPABILITY		0xD702
> +#define SBE_CMD_GET_FREQUENCIES		0xD703
> +#define SBE_CMD_SET_FFDC_ADDR		0xD704
> +#define SBE_CMD_QUIESCE_SBE		0xD705
> +#define SBE_CMD_SET_FABRIC_ID_MAP	0xD706
> +#define SBE_CMD_STASH_MPIPL_CONFIG	0xD707
> +
> +/* SBE MBOX control flags */
> +
> +/* Generic flags */
> +#define SBE_CMD_CTRL_RESP_REQ		0x0100
> +#define SBE_CMD_CTRL_ACK_REQ		0x0200
> +
> +/* Deadman loop */
> +#define CTRL_DEADMAN_LOOP_START		0x0001
> +#define CTRL_DEADMAN_LOOP_STOP		0x0002
> +
> +/* Control timer */
> +#define CONTROL_TIMER_START		0x0001
> +#define CONTROL_TIMER_STOP		0x0002
> +
> +/* SBE message state */
> +enum sbe_msg_state {
> +	sbe_msg_unused = 0,
> +	sbe_msg_queued,
> +	sbe_msg_sent,
> +	sbe_msg_wresp,
> +	sbe_msg_response,
> +	sbe_msg_done,
> +	sbe_msg_timeout,
> +	sbe_msg_cancelled,
> +};
Can you add a few comments about what the various states mean and when
a message will be in each?

> +
> +/* SBE message */
> +struct sbe_msg {
> +	/*
> +	 * Reg0 :
> +	 *   word0 :
> +	 *     direct cmd : reserved << 16 | ctrl flag
> +	 *     indrect cmd: mem_addr_size_dword
> +	 *     response   : primary status << 16 | secondary satus
> +	 *
> +	 *   word1 : seq id << 16 | cmd class << 8 | cmd
> +	 *
> +	 * WARNING:
> +	 *   - Don't populate reg0.seq (byte 4,5). This will be
> populated by
> +	 *     sbe_queue_msg().
> +	 *   - Don't insert variables between reg<N> variables.
> +	 */
> +	u64	reg0;
> +	u64	reg1;
> +	u64	reg2;
> +	u64	reg3;
> +
> +	/* cmd timout : mftb() +
> msecs_to_tb(SBE_CMD_TIMEOUT_[SHORT/LONG]_MS) */
> +	u64	timeout;
> +
> +	/* Completion function */
> +	void (*complete)(struct sbe_msg *msg);
> +	void *user_data;
> +
> +	/* Current msg state */
> +	enum sbe_msg_state	state;
> +
> +	/* Set if the message expects a response */
> +	bool			response;
> +
> +	/* Response will be filed by driver when response received
> */
> +	struct sbe_msg		*resp;
> +
> +	/* Internal queuing */
> +	struct list_node	link;
> +} __packed;
> +
> +
> +/* Allocate and populate sbe_msg structure */
> +extern struct sbe_msg *sbe_mkmsg(u16 cmd, u16 ctrl_flag, u64 reg1,
> +				 u64 reg2, u64 reg3)
> __warn_unused_result;
> +
> +/* Allocate sbe_msg */
> +extern struct sbe_msg *sbe_allocmsg(bool alloc_response)
> __warn_unused_result;
> +
> +/* Populate a pre-allocated sbe_msg */
> +extern void sbe_fillmsg(struct sbe_msg *msg, u16 cmd,
> +			u16 ctrl_flag, u64 reg1, u64 reg2, u64
> reg3);
> +
> +/*
> + * Free sbe_msg structure.
> + * This will free an attached response if any
> + */
> +extern void sbe_freemsg(struct sbe_msg *msg);
> +
> +/* Add new message to sbe queue */
> +extern int sbe_queue_msg(uint32_t chip_id, struct sbe_msg *msg,
> +		  void (*comp)(struct sbe_msg *msg))
> __warn_unused_result;
> +
> +/* Initialize the SBE mailbox driver */
> +extern void sbe_init(void);
> +
> +/* SBE interrupt */
> +extern void sbe_interrupt(uint32_t chip_id);
> +#endif	/* __SBE_H */
Vasant Hegde Feb. 22, 2017, 10:21 a.m.
On 02/16/2017 11:56 AM, Oliver O'Halloran wrote:
> On Wed, 2017-02-15 at 20:44 +0530, Vasant Hegde wrote:
>> SBE (Self Boot Engine) on P9 has two different jobs:
>>    - Boot the chip-up till core is functional
>>    - Provide various services like timer, scom, etc., at runtime

.../...

>> +static inline struct sbe_msg *__sbe_allocmsg(void)
>> +{
>> +	return zalloc(sizeof(struct sbe_msg));
>> +}
>> +
>> +struct sbe_msg *sbe_allocmsg(bool alloc_response)
>> +{
>> +	struct sbe_msg *msg;
>> +
>> +	msg = __sbe_allocmsg();
>> +	if (!msg)
>> +		return NULL;
>> +
>> +	if (alloc_response) {
>> +		msg->resp = __sbe_allocmsg();
>> +		if (!msg->resp) {
>> +			free(msg);
>> +			return NULL;
>> +		}
> Given that we're limited to one outstanding response per SBE we could
> have a sbe_msg inside the sbe structure itself and skip this extra
> allocation. Up to you.

I think I should have explained this in commit message.  Actually caller can 
queue any number of messages for given SBE. And driver will serialize sending 
message. Hence I've added two structures.


>
>> +	}
>> +
>> +	return msg;
>> +}
>> +
>> +static inline void __sbe_freemsg(struct sbe_msg *msg)
>> +{
>> +	free(msg);
>> +}
>> +
>> +void sbe_freemsg(struct sbe_msg *msg)
>> +{
>> +	if (msg && msg->resp)
>> +		__sbe_freemsg(msg->resp);
>> +	__sbe_freemsg(msg);
>> +}
>
> Is there any reason we need to wrap zalloc() and free() with
> __sbe_allocmsg() and __sbe_freemsg()?

Probably I can kill __sbe_allocmsg() and free().

>
>> +
>> +void sbe_fillmsg(struct sbe_msg *msg, u16 cmd,
>> +		 u16 ctrl_flag, u64 reg1, u64 reg2, u64 reg3)
>> +{
>> +	bool response = !!(ctrl_flag & SBE_CMD_CTRL_RESP_REQ);
>> +
>> +	/* Seqence ID is filled by sbe_queue_msg() */
>> +	msg->reg0 = ((u64)ctrl_flag << 32) | cmd;
>> +	msg->reg1 = reg1;
>> +	msg->reg2 = reg2;
>> +	msg->reg3 = reg3;
>> +
>> +	prlog(PR_TRACE, "MBOX message :\n\t Reg0 : %llx\n\t "
>> +	      "Reg1 : %llx\n\t Reg2 : %llx\n\t Reg3 : %lld\n",
>> +	      msg->reg0, msg->reg1, msg->reg2, msg->reg3);
>> +
>> +	msg->response = response;
>> +}
>> +
>> +/*
>> + * Handles "command with direct data" format only.
>> + *
>> + * Note: All mbox messages of our interest uses direct data format.
>> + *       If we need indirect data format then we have to enhance
>> + *       this function.
>> + */
>> +struct sbe_msg *sbe_mkmsg(u16 cmd, u16 ctrl_flag,
>> +			  u64 reg1, u64 reg2, u64 reg3)
>> +{
>> +	struct sbe_msg *msg;
>> +
>> +	msg = sbe_allocmsg(ctrl_flag & SBE_CMD_CTRL_RESP_REQ);
>> +	if (!msg) {
>> +		prlog(PR_ERR, "Failed to allocate memory for SBE
>> message\n");
>> +		return NULL;
>> +	}
>> +
>> +	sbe_fillmsg(msg, cmd, ctrl_flag, reg1, reg2, reg3);
>> +	return msg;
>> +}
>> +
>
> What's the use case for making fillmsg public? Given this is an async
> protocol you will need to allocate the structure anyway. Using static
> storage will require extraneous synchronisation or just be straight up
> broken.

Correct. I can make couple of functions as static including fillmsg()

>
> Having a single entry point (mkmsg) would also allow you to
> transparently implement a freelist (if needed) for messages similar to
> how the opal message API works.

>
>> +static inline bool sbe_busy(struct sbe_msg *msg)
>> +{
>> +	switch (msg->state) {
>> +	case sbe_msg_unused:
>> +	case sbe_msg_queued:
>> +	case sbe_msg_response:
>> +	case sbe_msg_done:
>> +	case sbe_msg_timeout:
>> +	case sbe_msg_cancelled:
>> +		return false;
>> +	default:	/* + sbe_msg_sent, sbe_msg_wresp */
>> +		break;
>> +	}
>> +
>> +	return true;
>> +}
>> +
>> +/*
>> + * Read SBE doorbell to confirm its ready to accept command. Since
>> the device
>> + * driver single threads the requests, we should never see not being
>> ready to
>> + * send a request.
>> + */
>>
>> +static bool sbe_ready(u32 chip_id)
>> +{
>> +	int rc;
>> +	u64 data;
>> +
>> +	rc = xscom_read(chip_id, PSU_SBE_DOORBELL_REG_RW, &data);
>> +	if (rc) {
>> +		prlog(PR_ERR, "Failed to read Host to SBE doorbell
>> register "
>> +		      "[chip id = %x]\n", chip_id);
>> +		return false;
>> +	}
>> +
>> +	if (data & HOST_SBE_MSG_WAITING) {
>> +		prlog(PR_ERR, "Not ready to accept command [chip id
>> = %x]."
>> +		      " Doorbell reg : %llx\n", chip_id, data);
>> +		/* Dump register content */
>> +		sbe_reg_dump(chip_id);
>> +		return false;
>> +	}
>> +
>> +	return true;
>> +}
>
> Maybe rename these to sbe_msg_finished() and sbe_hw_ready(). From the
> names you'd expect them to be implemented similarly, but the when you
> look at them the relationship really isn't obvious.

Done.

>
>> +
>> +static int sbe_send_msg(u32 chip_id, struct sbe_msg *msg)
>> +{
>> +	int rc, i;
>> +	u64 addr, *data;
>> +
>> +	addr = PSU_HOST_SBE_MBOX_REG0;
>> +	data = &msg->reg0;
>> +
>> +	for (i = 0; i < NR_HOST_SBE_MBOX_REG; i++) {
>> +		rc = xscom_write(chip_id, addr, *data);
>> +		if (rc)
>> +			return rc;
>> +
>> +		addr++;
>> +		data++;
>> +	}
>> +
>> +	rc = xscom_write(chip_id, PSU_SBE_DOORBELL_REG_OR,
>> +			 HOST_SBE_MSG_WAITING);
>> +	if (rc == OPAL_SUCCESS)
>> +		msg->state = sbe_msg_sent;
>> +
>> +	return rc;
>> +}
>> +
>> +static int sbe_recv_msg(u32 chip_id, struct sbe_msg *resp)
>> +{
>> +	int i;
>> +	int rc = OPAL_SUCCESS;
>> +	u64 addr, *data;
>> +
>> +	addr = PSU_HOST_SBE_MBOX_REG4;
>> +	data = &resp->reg0;
>> +
>> +	for (i = 0; i < NR_HOST_SBE_MBOX_REG; i++) {
>> +		rc = xscom_read(chip_id, addr, data);
>> +		if (rc)
>> +			return rc;
>> +
>> +		addr++;
>> +		data++;
>> +	}
>> +
>> +	resp->state = sbe_msg_response;
>> +	return rc;
>> +}
>> +
>
>> +/* This is called with sbe->lock */
>> +static void sbe_complete_msg(struct sbe *sbe, struct sbe_msg *msg)
>> +{
>> +	void (*comp)(struct sbe_msg *msg);
>> +
>> +	prlog(PR_INSANE, "Completing msg [chip id = %x], reg0 :
>> 0x%llx\n",
>> +	      sbe->chip_id, msg->reg0);
>> +
>> +	comp = msg->complete;
>> +	msg->state = sbe_msg_done;
>> +	list_del_from(&sbe->msg_list, &msg->link);
>> +
>> +	if (comp) {
>> +		unlock(&sbe->lock);
>> +		(*comp)(msg);
>> +		lock(&sbe->lock);
>> +	} else {
>> +		sbe_freemsg(msg);
>> +	}
>> +}
>
> The extra function pointer variable is kinda weird. Can you just use
> msg->complete directly?

yeah. I can do that.


>
>> +
>> +static void sbe_complete_send(struct sbe *sbe, struct sbe_msg *msg)
>> +{
>> +	u8 cmdclass;
>> +	u64 timeout;
>> +
>> +	prlog(PR_INSANE, "Completing send [chip id = %x], reg0 :
>> 0x%llx\n",
>> +	      sbe->chip_id, msg->reg0);
>> +
>> +	/* Need response */
>> +	if (msg->response) {
>> +		cmdclass = (u8)(msg->reg0 >> 8) & 0xff;
>> +		timeout = sbe_get_cmdclass_timeout(cmdclass);
>> +		msg->timeout = mftb() + msecs_to_tb(timeout);
>> +		msg->state = sbe_msg_wresp;
>> +	} else {
>> +		sbe_complete_msg(sbe, msg);
>> +	}
>> +}
>
> I don't really like how the logic of sending a message is spread across
>
> sbe_complete_send(), sbe_poke_queue() and sbe_send_msg(). It just seems
> to make the flow control convoluted especially when compared to the
> recieve path. Matter of preference I suppose...


So message may or may not expect response from SBE. If its not expecting 
response, then I will send complete message to caller as soon as I send message 
to SBE. That's why I have extra function in sender path. (sbe_complete_send()). 
  I thought having extra function makes it easy to read.


>
>> +
>> +static void sbe_poke_queue(struct sbe *sbe)
>> +{
>> +	int rc;
>> +	struct sbe_msg *msg;
>> +
>> +	if (list_empty(&sbe->msg_list))
>> +		return;
>> +
>> +	msg = list_top(&sbe->msg_list, struct sbe_msg, link);
>> +	if (sbe_busy(msg))
>> +		return;
>> +
>> +	if (!sbe_ready(sbe->chip_id))
>> +		return;
>> +
>> +	/* Send message */
>> +	rc = sbe_send_msg(sbe->chip_id, msg);
>> +	if (rc) {
>> +		/* XXX commit error log */
>> +		prlog(PR_ERR, "Failed to send message to SBE "
>> +		      "[chip id = %x]\n", sbe->chip_id);
>> +
>> +		if (msg->resp)
>> +			msg->resp->reg0 |=
>> ((u64)SBE_STATUS_PRI_GENERIC_ERR << 48);
> Should we make these unsigned long literals so they don't need to be
> casted? You could also bake the shift into the constant itself.

Yes. They are embedding more info in return code. I was waiting for more clarity 
on spec.
Will fix it in next iteration.

>
>> +
>> +		sbe_complete_msg(sbe, msg);
>> +	} else {
>> +		sbe_complete_send(sbe, msg);
>> +	}
>> +
>> +	/* Recursive call to send next message */
>> +	sbe_poke_queue(sbe);
>
> Maybe document that this is limited to a single recursion due to how
> the send/recv logic works. IIRC Stewart would prefer we didn't use
> recursion at all in skiboot, but it is safe here.

Yep. Will add documentation.

>
>> +}
>> +
>> +/*
>> + * WARNING:
>> + *         Only one command is accepted in the command buffer until
>> the response
>> + *         for the command is en-queued in the response buffer by
>> SBE.
>> + *
>> + *         Head of msg_list contains inflight message. Hence we
>> should always
>> + *         add new message to tail of the list.
>> + */
>> +int sbe_queue_msg(u32 chip_id, struct sbe_msg *msg,
>> +		  void (*comp)(struct sbe_msg *msg))
>> +{
>> +	struct proc_chip *chip;
>> +	struct sbe *sbe;
>> +
>> +	if (!msg)
>> +		return OPAL_PARAMETER;
>> +
>> +	if (sbe_default_chip_id == -1)
>> +		return OPAL_INTERNAL_ERROR;
>> +
>> +	/* Default to master chip ID */
>> +	if (chip_id == -1)
>> +		chip = get_chip(sbe_default_chip_id);
>> +	else
>> +		chip = get_chip(chip_id);
>> +
>> +	if (chip == NULL || chip->sbe == NULL)
>> +		return OPAL_PARAMETER;
>> +
>> +	sbe = chip->sbe;
>> +
>> +	/* Set completion */
>> +	msg->complete = comp;
>> +	msg->state = sbe_msg_queued;
>> +
>> +	/* Clear response state */
>> +	if (msg->resp)
>> +		msg->resp->state = sbe_msg_unused;
>> +
>> +	lock(&sbe->lock);
>> +	/* Update sequence number */
>> +	msg->reg0 = msg->reg0 | (sbe->cur_seq << 16);
>> +	sbe->cur_seq++;
>> +
>> +	/* Reset sequence number */
>> +	if (sbe->cur_seq == 0xffff)
>> +		sbe->cur_seq = 1;
>> +
>> +	/* Add message to queue */
>> +	list_add_tail(&sbe->msg_list, &msg->link);
>> +	sbe_poke_queue(sbe);
>> +	unlock(&sbe->lock);
>> +
>> +	return OPAL_SUCCESS;
>> +}
>> +
>> +static void sbe_handle_response(struct sbe *sbe)
>> +{
>> +	u16 send_seq, resp_seq;
>> +	int rc;
>> +	struct sbe_msg *msg;
>> +
>> +	if (list_empty(&sbe->msg_list))
>> +		return;
>> +
>> +	msg = list_top(&sbe->msg_list, struct sbe_msg, link);
>> +	rc = sbe_recv_msg(sbe->chip_id, msg->resp);
>> +	if (rc != OPAL_SUCCESS) {
>> +		msg->resp->reg0 |= ((u64)SBE_STATUS_PRI_GENERIC_ERR
>> << 48);
>> +		/* XXX commit error log */
>> +		prlog(PR_ERR, "Failed to read response message "
>> +		      "[chip id = %x]\n", sbe->chip_id);
>> +	}
>> +
>> +	/* Validate sequence number */
>> +	send_seq = (msg->reg0 >> 16) && 0xffff;
>> +	resp_seq = (msg->resp->reg0 >> 16) && 0xffff;
>
> That should probably be a bitwise AND.

Let me try.

>
>> +	if (send_seq != resp_seq) {
>> +		/* XXX commit error log */
>> +		prlog(PR_ERR, "Invalid sequence id [chip id =
>> %x]\n",
>> +		      sbe->chip_id);
>> +		return;
> If sbe_msg_recv() fails above the sequence numbers might be wrong and
> it'll return without calling sbe_complete_msg(). Should it be doing
> that?

You are right. I've to re arrange this code.

>
>> +	}
>> +
>> +	sbe_complete_msg(sbe, msg);
>> +}
>> +
>> +static void sbe_timeout_poll(void *user_data __unused)
>> +{
>> +	u64 now;
>> +	struct sbe *sbe;
>> +	struct sbe_msg *msg;
>> +	struct proc_chip *chip;
>> +
>> +	for_each_chip(chip) {
>> +		if (chip->sbe == NULL)
>> +			continue;
>> +
>> +		sbe = chip->sbe;
>> +		lock(&sbe->lock);
>> +		if (list_empty(&sbe->msg_list)) {
>> +			unlock(&sbe->lock);
>> +			continue;
>> +		}
>> +
>> +		msg = list_top(&sbe->msg_list, struct sbe_msg,
>> link);
>> +		now = mftb();
>> +		if (tb_compare(now, msg->timeout) != TB_AAFTERB) {
>> +			unlock(&sbe->lock);
>> +			continue;
>> +		}
>> +
>> +		/* Update response message */
>> +		if (msg->resp) {
>> +			msg->resp->state = sbe_msg_timeout;
>> +			msg->resp->reg0 |=
>> ((u64)SBE_STATUS_PRI_GENERIC_ERR << 48);
>> +
>> +			prlog(PR_ERR, "Message timeout [chip id =
>> %x], "
>> +			      "Reg0 = %llx\n", sbe->chip_id, msg-
>>> reg0);
>> +			sbe_reg_dump(sbe->chip_id);
>> +		}
>> +
>> +		sbe_complete_msg(sbe, msg);
>
> What's the time taken for a SBE reset? The logic in sbe_interrupt()
> reposts messages while we complete them here. IIRC the P9 SBE boot time
> is fairly long so resets might be the main source of timeouts. Should
> we also be reposting the message here?

SBE accepts one message at a time and we wait until we get response to that 
message before
sending next one. Now timeout may happen for two reasons:
   - SBE didn't respond without specified timeout period (that's what this 
function handles)
   - SBE reset happens (after reset we get interrupt and I'm trying to repost 
last message).

Right now we don't know whether SBE is alive or not. So it may happen that I 
will continue to post
next message while SBE is dead/reset happening. This is something to fix.



>
>> +		sbe_poke_queue(sbe);
>> +		unlock(&sbe->lock);
>> +	}
>> +}
>> +
>> +static void sbe_repost_msg(struct sbe *sbe)
>> +{
>> +	struct sbe_msg *msg;
>> +
>> +	if (list_empty(&sbe->msg_list))
>> +		return;
>> +
>> +	msg = list_top(&sbe->msg_list, struct sbe_msg, link);
>> +	msg->state = sbe_msg_queued;
>> +	sbe_poke_queue(sbe);
>> +}
> This function can be folded into the SBE reset case in sbe_interrupt().
> below. When you take out the empty list handling (also done in
> sbe_interrupt()) it's only two lines of code.

Somehow I've habit of writing too many funcitons. Yes. I can be folded into 
interrupt() function.

-Vasant
Oliver O'Halloran Feb. 22, 2017, 12:06 p.m.
On Wed, Feb 22, 2017 at 9:21 PM, Vasant Hegde
<hegdevasant@linux.vnet.ibm.com> wrote:
> On 02/16/2017 11:56 AM, Oliver O'Halloran wrote:
>>
>> On Wed, 2017-02-15 at 20:44 +0530, Vasant Hegde wrote:
>>>
>>> SBE (Self Boot Engine) on P9 has two different jobs:
>>>    - Boot the chip-up till core is functional
>>>    - Provide various services like timer, scom, etc., at runtime
>
>
> .../...
>
>>> +static inline struct sbe_msg *__sbe_allocmsg(void)
>>> +{
>>> +       return zalloc(sizeof(struct sbe_msg));
>>> +}
>>> +
>>> +struct sbe_msg *sbe_allocmsg(bool alloc_response)
>>> +{
>>> +       struct sbe_msg *msg;
>>> +
>>> +       msg = __sbe_allocmsg();
>>> +       if (!msg)
>>> +               return NULL;
>>> +
>>> +       if (alloc_response) {
>>> +               msg->resp = __sbe_allocmsg();
>>> +               if (!msg->resp) {
>>> +                       free(msg);
>>> +                       return NULL;
>>> +               }
>>
>> Given that we're limited to one outstanding response per SBE we could
>> have a sbe_msg inside the sbe structure itself and skip this extra
>> allocation. Up to you.
>
>
> I think I should have explained this in commit message.  Actually caller can
> queue any number of messages for given SBE. And driver will serialize
> sending message. Hence I've added two structures.

I know you can have any number of messages queued up, but like you
said sending them is serialised. As far as I can tell the receive
logic looks something like this:

msg = msg_at_top_of_queue();
remove_from_queue(msg);
if (msg->complete)
     msg->complete(msg);
sbe_free(msg);
sbe_queue_poke();

So we completely handle the message at the top of the queue before
processing the next one. The space that was allocated for the response
to the first message is free()ed immediately after calling the
completion function. What I was getting at is that because responses
are also serialised there is no need to allocate a separate response
structure for each message. Instead we could have a single response
structure (for each SBE) that is re-used when processing each message
in the queue. I think, I might have missed something ;)

Oliver
Vasant Hegde Feb. 28, 2017, 8:23 a.m.
On 02/22/2017 05:36 PM, Oliver O'Halloran wrote:
> On Wed, Feb 22, 2017 at 9:21 PM, Vasant Hegde
> <hegdevasant@linux.vnet.ibm.com> wrote:
>> On 02/16/2017 11:56 AM, Oliver O'Halloran wrote:
>>>
>>> On Wed, 2017-02-15 at 20:44 +0530, Vasant Hegde wrote:
>>>>
>>>> SBE (Self Boot Engine) on P9 has two different jobs:
>>>>     - Boot the chip-up till core is functional
>>>>     - Provide various services like timer, scom, etc., at runtime
>>
>>
>> .../...
>>
>>>> +static inline struct sbe_msg *__sbe_allocmsg(void)
>>>> +{
>>>> +       return zalloc(sizeof(struct sbe_msg));
>>>> +}
>>>> +
>>>> +struct sbe_msg *sbe_allocmsg(bool alloc_response)
>>>> +{
>>>> +       struct sbe_msg *msg;
>>>> +
>>>> +       msg = __sbe_allocmsg();
>>>> +       if (!msg)
>>>> +               return NULL;
>>>> +
>>>> +       if (alloc_response) {
>>>> +               msg->resp = __sbe_allocmsg();
>>>> +               if (!msg->resp) {
>>>> +                       free(msg);
>>>> +                       return NULL;
>>>> +               }
>>>
>>> Given that we're limited to one outstanding response per SBE we could
>>> have a sbe_msg inside the sbe structure itself and skip this extra
>>> allocation. Up to you.
>>
>>
>> I think I should have explained this in commit message.  Actually caller can
>> queue any number of messages for given SBE. And driver will serialize
>> sending message. Hence I've added two structures.
>
> I know you can have any number of messages queued up, but like you
> said sending them is serialised. As far as I can tell the receive
> logic looks something like this:
>
> msg = msg_at_top_of_queue();
> remove_from_queue(msg);
> if (msg->complete)
>       msg->complete(msg);
> sbe_free(msg);

I want caller to decide whether to free message or not. Like in case of timer, I 
want o allocate msg once and reuse it instead of allocate/free/allocate.


> sbe_queue_poke();
>
> So we completely handle the message at the top of the queue before
> processing the next one. The space that was allocated for the response
> to the first message is free()ed immediately after calling the
> completion function. What I was getting at is that because responses
> are also serialised there is no need to allocate a separate response
> structure for each message. Instead we could have a single response
> structure (for each SBE) that is re-used when processing each message
> in the queue. I think, I might have missed something ;)


Yeah. I can allocate response message in sbe structure itself and then pass the 
response message to caller. something like
comp(msg, resp);

-Vasant

Patch hide | download patch | download mbox

diff --git a/core/init.c b/core/init.c
index c753b61..c7ca130 100644
--- a/core/init.c
+++ b/core/init.c
@@ -47,6 +47,7 @@ 
 #include <nvram.h>
 #include <libstb/stb.h>
 #include <libstb/container.h>
+#include <sbe.h>
 
 enum proc_gen proc_gen;
 
@@ -796,6 +797,9 @@  void __noreturn __nomcount main_cpu_entry(const void *fdt)
 	xscom_init();
 	mfsi_init();
 
+	/* SBE init */
+	sbe_init();
+
 	/*
 	 * Put various bits & pieces in device-tree that might not
 	 * already be there such as the /chosen node if not there yet,
diff --git a/hw/Makefile.inc b/hw/Makefile.inc
index f2dc328..f5b0fdc 100644
--- a/hw/Makefile.inc
+++ b/hw/Makefile.inc
@@ -6,7 +6,7 @@  HW_OBJS += nx.o nx-rng.o nx-crypto.o nx-842.o
 HW_OBJS += p7ioc.o p7ioc-inits.o p7ioc-phb.o
 HW_OBJS += phb3.o sfc-ctrl.o fake-rtc.o bt.o p8-i2c.o prd.o
 HW_OBJS += dts.o lpc-rtc.o npu.o npu-hw-procedures.o xive.o phb4.o
-HW_OBJS += fake-nvram.o
+HW_OBJS += fake-nvram.o sbe.o
 HW=hw/built-in.o
 
 # FIXME hack this for now
diff --git a/hw/psi.c b/hw/psi.c
index 5d97fbb..a061179 100644
--- a/hw/psi.c
+++ b/hw/psi.c
@@ -33,6 +33,7 @@ 
 #include <platform.h>
 #include <errorlog.h>
 #include <xive.h>
+#include <sbe.h>
 
 static LIST_HEAD(psis);
 static u64 psi_link_timer;
@@ -603,7 +604,7 @@  static void psihb_p9_interrupt(struct irq_source *is, uint32_t isn)
 		printf("PSI: DIO irq received\n");
 		break;
 	case P9_PSI_IRQ_PSU:
-		printf("PSI: PSU irq received\n");
+		sbe_interrupt(psi->chip_id);
 		break;
 	}
 }
diff --git a/hw/sbe.c b/hw/sbe.c
new file mode 100644
index 0000000..112f840
--- /dev/null
+++ b/hw/sbe.c
@@ -0,0 +1,556 @@ 
+/* Copyright 2017 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *	http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define pr_fmt(fmt) "SBE: " fmt
+
+#include <chip.h>
+#include <errorlog.h>
+#include <lock.h>
+#include <opal.h>
+#include <sbe.h>
+#include <skiboot.h>
+#include <timebase.h>
+#include <trace.h>
+#include <xscom.h>
+
+struct sbe {
+	/* Chip ID to send message */
+	u32	chip_id;
+
+	/* List to hold SBE queue messages */
+	struct list_head	msg_list;
+
+	struct lock lock;
+
+	/* SBE MBOX message sequence number */
+	u16	cur_seq;
+};
+
+/* Default SBE chip ID */
+static u32 sbe_default_chip_id = -1;
+
+
+static u64 sbe_rreg(u32 chip_id, u64 reg)
+{
+	u64 data = 0;
+
+	xscom_read(chip_id, reg, &data);
+	return be64_to_cpu(data);
+}
+
+static void sbe_reg_dump(u32 chip_id)
+{
+#define SBE_DUMP_REG_ONE(chip_id, x) \
+	prlog(PR_DEBUG, "  %20s: %llx\n", #x, sbe_rreg(chip_id, x));
+
+	prlog(PR_DEBUG, "MBOX register dump for chip : %x\n", chip_id);
+	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG0);
+	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG1);
+	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG2);
+	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG3);
+	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG4);
+	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG5);
+	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG6);
+	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG7);
+}
+
+static u64 sbe_get_cmdclass_timeout(u8 cmdclass)
+{
+	switch (cmdclass) {
+	case SBE_MCLASS_CORE_STATE:
+	case SBE_MCLASS_TIMER:
+	case SBE_MCLASS_MPIPL:
+	case SBE_MCLASS_SECURITY:
+	case SBE_MCLASS_GENERIC:
+		return SBE_CMD_TIMEOUT_SHORT_MS;
+	case SBE_MCLASS_SCOM:
+	case SBE_MCLASS_RING:
+		return SBE_CMD_TIMEOUT_LONG_MS;
+	default: /* Shouldn't happen. assert? */
+		prlog(PR_ERR, "Invalid command class\n");
+		return 0;
+	}
+}
+
+static inline struct sbe_msg *__sbe_allocmsg(void)
+{
+	return zalloc(sizeof(struct sbe_msg));
+}
+
+struct sbe_msg *sbe_allocmsg(bool alloc_response)
+{
+	struct sbe_msg *msg;
+
+	msg = __sbe_allocmsg();
+	if (!msg)
+		return NULL;
+
+	if (alloc_response) {
+		msg->resp = __sbe_allocmsg();
+		if (!msg->resp) {
+			free(msg);
+			return NULL;
+		}
+	}
+
+	return msg;
+}
+
+static inline void __sbe_freemsg(struct sbe_msg *msg)
+{
+	free(msg);
+}
+
+void sbe_freemsg(struct sbe_msg *msg)
+{
+	if (msg && msg->resp)
+		__sbe_freemsg(msg->resp);
+	__sbe_freemsg(msg);
+}
+
+void sbe_fillmsg(struct sbe_msg *msg, u16 cmd,
+		 u16 ctrl_flag, u64 reg1, u64 reg2, u64 reg3)
+{
+	bool response = !!(ctrl_flag & SBE_CMD_CTRL_RESP_REQ);
+
+	/* Seqence ID is filled by sbe_queue_msg() */
+	msg->reg0 = ((u64)ctrl_flag << 32) | cmd;
+	msg->reg1 = reg1;
+	msg->reg2 = reg2;
+	msg->reg3 = reg3;
+
+	prlog(PR_TRACE, "MBOX message :\n\t Reg0 : %llx\n\t "
+	      "Reg1 : %llx\n\t Reg2 : %llx\n\t Reg3 : %lld\n",
+	      msg->reg0, msg->reg1, msg->reg2, msg->reg3);
+
+	msg->response = response;
+}
+
+/*
+ * Handles "command with direct data" format only.
+ *
+ * Note: All mbox messages of our interest uses direct data format.
+ *       If we need indirect data format then we have to enhance
+ *       this function.
+ */
+struct sbe_msg *sbe_mkmsg(u16 cmd, u16 ctrl_flag,
+			  u64 reg1, u64 reg2, u64 reg3)
+{
+	struct sbe_msg *msg;
+
+	msg = sbe_allocmsg(ctrl_flag & SBE_CMD_CTRL_RESP_REQ);
+	if (!msg) {
+		prlog(PR_ERR, "Failed to allocate memory for SBE message\n");
+		return NULL;
+	}
+
+	sbe_fillmsg(msg, cmd, ctrl_flag, reg1, reg2, reg3);
+	return msg;
+}
+
+static inline bool sbe_busy(struct sbe_msg *msg)
+{
+	switch (msg->state) {
+	case sbe_msg_unused:
+	case sbe_msg_queued:
+	case sbe_msg_response:
+	case sbe_msg_done:
+	case sbe_msg_timeout:
+	case sbe_msg_cancelled:
+		return false;
+	default:	/* + sbe_msg_sent, sbe_msg_wresp */
+		break;
+	}
+
+	return true;
+}
+
+/*
+ * Read SBE doorbell to confirm its ready to accept command. Since the device
+ * driver single threads the requests, we should never see not being ready to
+ * send a request.
+ */
+static bool sbe_ready(u32 chip_id)
+{
+	int rc;
+	u64 data;
+
+	rc = xscom_read(chip_id, PSU_SBE_DOORBELL_REG_RW, &data);
+	if (rc) {
+		prlog(PR_ERR, "Failed to read Host to SBE doorbell register "
+		      "[chip id = %x]\n", chip_id);
+		return false;
+	}
+
+	if (data & HOST_SBE_MSG_WAITING) {
+		prlog(PR_ERR, "Not ready to accept command [chip id = %x]."
+		      " Doorbell reg : %llx\n", chip_id, data);
+		/* Dump register content */
+		sbe_reg_dump(chip_id);
+		return false;
+	}
+
+	return true;
+}
+
+static int sbe_send_msg(u32 chip_id, struct sbe_msg *msg)
+{
+	int rc, i;
+	u64 addr, *data;
+
+	addr = PSU_HOST_SBE_MBOX_REG0;
+	data = &msg->reg0;
+
+	for (i = 0; i < NR_HOST_SBE_MBOX_REG; i++) {
+		rc = xscom_write(chip_id, addr, *data);
+		if (rc)
+			return rc;
+
+		addr++;
+		data++;
+	}
+
+	rc = xscom_write(chip_id, PSU_SBE_DOORBELL_REG_OR,
+			 HOST_SBE_MSG_WAITING);
+	if (rc == OPAL_SUCCESS)
+		msg->state = sbe_msg_sent;
+
+	return rc;
+}
+
+static int sbe_recv_msg(u32 chip_id, struct sbe_msg *resp)
+{
+	int i;
+	int rc = OPAL_SUCCESS;
+	u64 addr, *data;
+
+	addr = PSU_HOST_SBE_MBOX_REG4;
+	data = &resp->reg0;
+
+	for (i = 0; i < NR_HOST_SBE_MBOX_REG; i++) {
+		rc = xscom_read(chip_id, addr, data);
+		if (rc)
+			return rc;
+
+		addr++;
+		data++;
+	}
+
+	resp->state = sbe_msg_response;
+	return rc;
+}
+
+/* This is called with sbe->lock */
+static void sbe_complete_msg(struct sbe *sbe, struct sbe_msg *msg)
+{
+	void (*comp)(struct sbe_msg *msg);
+
+	prlog(PR_INSANE, "Completing msg [chip id = %x], reg0 : 0x%llx\n",
+	      sbe->chip_id, msg->reg0);
+
+	comp = msg->complete;
+	msg->state = sbe_msg_done;
+	list_del_from(&sbe->msg_list, &msg->link);
+
+	if (comp) {
+		unlock(&sbe->lock);
+		(*comp)(msg);
+		lock(&sbe->lock);
+	} else {
+		sbe_freemsg(msg);
+	}
+}
+
+static void sbe_complete_send(struct sbe *sbe, struct sbe_msg *msg)
+{
+	u8 cmdclass;
+	u64 timeout;
+
+	prlog(PR_INSANE, "Completing send [chip id = %x], reg0 : 0x%llx\n",
+	      sbe->chip_id, msg->reg0);
+
+	/* Need response */
+	if (msg->response) {
+		cmdclass = (u8)(msg->reg0 >> 8) & 0xff;
+		timeout = sbe_get_cmdclass_timeout(cmdclass);
+		msg->timeout = mftb() + msecs_to_tb(timeout);
+		msg->state = sbe_msg_wresp;
+	} else {
+		sbe_complete_msg(sbe, msg);
+	}
+}
+
+static void sbe_poke_queue(struct sbe *sbe)
+{
+	int rc;
+	struct sbe_msg *msg;
+
+	if (list_empty(&sbe->msg_list))
+		return;
+
+	msg = list_top(&sbe->msg_list, struct sbe_msg, link);
+	if (sbe_busy(msg))
+		return;
+
+	if (!sbe_ready(sbe->chip_id))
+		return;
+
+	/* Send message */
+	rc = sbe_send_msg(sbe->chip_id, msg);
+	if (rc) {
+		/* XXX commit error log */
+		prlog(PR_ERR, "Failed to send message to SBE "
+		      "[chip id = %x]\n", sbe->chip_id);
+
+		if (msg->resp)
+			msg->resp->reg0 |= ((u64)SBE_STATUS_PRI_GENERIC_ERR << 48);
+
+		sbe_complete_msg(sbe, msg);
+	} else {
+		sbe_complete_send(sbe, msg);
+	}
+
+	/* Recursive call to send next message */
+	sbe_poke_queue(sbe);
+}
+
+/*
+ * WARNING:
+ *         Only one command is accepted in the command buffer until the response
+ *         for the command is en-queued in the response buffer by SBE.
+ *
+ *         Head of msg_list contains inflight message. Hence we should always
+ *         add new message to tail of the list.
+ */
+int sbe_queue_msg(u32 chip_id, struct sbe_msg *msg,
+		  void (*comp)(struct sbe_msg *msg))
+{
+	struct proc_chip *chip;
+	struct sbe *sbe;
+
+	if (!msg)
+		return OPAL_PARAMETER;
+
+	if (sbe_default_chip_id == -1)
+		return OPAL_INTERNAL_ERROR;
+
+	/* Default to master chip ID */
+	if (chip_id == -1)
+		chip = get_chip(sbe_default_chip_id);
+	else
+		chip = get_chip(chip_id);
+
+	if (chip == NULL || chip->sbe == NULL)
+		return OPAL_PARAMETER;
+
+	sbe = chip->sbe;
+
+	/* Set completion */
+	msg->complete = comp;
+	msg->state = sbe_msg_queued;
+
+	/* Clear response state */
+	if (msg->resp)
+		msg->resp->state = sbe_msg_unused;
+
+	lock(&sbe->lock);
+	/* Update sequence number */
+	msg->reg0 = msg->reg0 | (sbe->cur_seq << 16);
+	sbe->cur_seq++;
+
+	/* Reset sequence number */
+	if (sbe->cur_seq == 0xffff)
+		sbe->cur_seq = 1;
+
+	/* Add message to queue */
+	list_add_tail(&sbe->msg_list, &msg->link);
+	sbe_poke_queue(sbe);
+	unlock(&sbe->lock);
+
+	return OPAL_SUCCESS;
+}
+
+static void sbe_handle_response(struct sbe *sbe)
+{
+	u16 send_seq, resp_seq;
+	int rc;
+	struct sbe_msg *msg;
+
+	if (list_empty(&sbe->msg_list))
+		return;
+
+	msg = list_top(&sbe->msg_list, struct sbe_msg, link);
+	rc = sbe_recv_msg(sbe->chip_id, msg->resp);
+	if (rc != OPAL_SUCCESS) {
+		msg->resp->reg0 |= ((u64)SBE_STATUS_PRI_GENERIC_ERR << 48);
+		/* XXX commit error log */
+		prlog(PR_ERR, "Failed to read response message "
+		      "[chip id = %x]\n", sbe->chip_id);
+	}
+
+	/* Validate sequence number */
+	send_seq = (msg->reg0 >> 16) && 0xffff;
+	resp_seq = (msg->resp->reg0 >> 16) && 0xffff;
+	if (send_seq != resp_seq) {
+		/* XXX commit error log */
+		prlog(PR_ERR, "Invalid sequence id [chip id = %x]\n",
+		      sbe->chip_id);
+		return;
+	}
+
+	sbe_complete_msg(sbe, msg);
+}
+
+static void sbe_timeout_poll(void *user_data __unused)
+{
+	u64 now;
+	struct sbe *sbe;
+	struct sbe_msg *msg;
+	struct proc_chip *chip;
+
+	for_each_chip(chip) {
+		if (chip->sbe == NULL)
+			continue;
+
+		sbe = chip->sbe;
+		lock(&sbe->lock);
+		if (list_empty(&sbe->msg_list)) {
+			unlock(&sbe->lock);
+			continue;
+		}
+
+		msg = list_top(&sbe->msg_list, struct sbe_msg, link);
+		now = mftb();
+		if (tb_compare(now, msg->timeout) != TB_AAFTERB) {
+			unlock(&sbe->lock);
+			continue;
+		}
+
+		/* Update response message */
+		if (msg->resp) {
+			msg->resp->state = sbe_msg_timeout;
+			msg->resp->reg0 |= ((u64)SBE_STATUS_PRI_GENERIC_ERR << 48);
+
+			prlog(PR_ERR, "Message timeout [chip id = %x], "
+			      "Reg0 = %llx\n", sbe->chip_id, msg->reg0);
+			sbe_reg_dump(sbe->chip_id);
+		}
+
+		sbe_complete_msg(sbe, msg);
+		sbe_poke_queue(sbe);
+		unlock(&sbe->lock);
+	}
+}
+
+static void sbe_repost_msg(struct sbe *sbe)
+{
+	struct sbe_msg *msg;
+
+	if (list_empty(&sbe->msg_list))
+		return;
+
+	msg = list_top(&sbe->msg_list, struct sbe_msg, link);
+	msg->state = sbe_msg_queued;
+	sbe_poke_queue(sbe);
+}
+
+void sbe_interrupt(uint32_t chip_id)
+{
+	int rc;
+	u64 data;
+	struct proc_chip *chip;
+	struct sbe *sbe;
+
+	chip = get_chip(chip_id);
+	if (chip == NULL || chip->sbe == NULL)
+		return;
+
+	sbe = chip->sbe;
+
+	lock(&sbe->lock);
+	/* Empty queue */
+	if (list_empty(&sbe->msg_list))
+		goto clr_interrupt;
+
+	/* Read doorbell register */
+	rc = xscom_read(sbe->chip_id, PSU_HOST_DOORBELL_REG_RW, &data);
+	if (rc) {
+		prlog(PR_ERR, "Failed to read SBE to Host doorbell register "
+		      "[chip id = %x]\n", chip_id);
+		sbe_reg_dump(chip_id);
+		goto clr_interrupt;
+	}
+
+	/* SBE came back from reset */
+	if (data & SBE_HOST_RESET) {
+		prlog(PR_NOTICE, "Came back from reset [chip id = %x]\n",
+		      chip_id);
+		/* Repost message */
+		sbe_repost_msg(sbe);
+		goto clr_interrupt;
+	}
+
+	/* Handle SBE response */
+	if (data & SBE_HOST_RESPONSE_WAITING) {
+		sbe_handle_response(sbe);
+		sbe_poke_queue(sbe);
+	}
+
+clr_interrupt:
+	unlock(&sbe->lock);
+
+	rc = xscom_write(chip_id, PSU_HOST_DOORBELL_REG_AND,
+			 SBE_HOST_RESPONSE_CLEAR);
+	if (rc) {
+		prlog(PR_ERR, "Failed to clear SBE to host doorbell "
+		      "register [chip id = %x]\n", chip_id);
+	}
+}
+
+void sbe_init(void)
+{
+	struct dt_node *xn;
+	struct proc_chip *chip;
+	struct sbe *sbe;
+
+	if (proc_gen < proc_gen_p9)
+		return;
+
+	dt_for_each_compatible(dt_root, xn, "ibm,xscom") {
+		sbe = zalloc(sizeof(struct sbe));
+		assert(sbe);
+		sbe->chip_id = dt_get_chip_id(xn);
+		sbe->cur_seq = 1;
+		list_head_init(&sbe->msg_list);
+		init_lock(&sbe->lock);
+
+		chip = get_chip(sbe->chip_id);
+		assert(chip);
+		chip->sbe = sbe;
+
+		if (dt_has_node_property(xn, "primary", NULL))
+			sbe_default_chip_id = sbe->chip_id;
+	}
+
+	if (sbe_default_chip_id == -1) {
+		/* XXX Commit error log */
+		prlog(PR_ERR, "Master chip ID not found.\n");
+		return;
+	}
+
+	/* Initiate SBE timeout poller */
+	opal_add_poller(sbe_timeout_poll, NULL);
+}
diff --git a/include/chip.h b/include/chip.h
index 61b413c..a501009 100644
--- a/include/chip.h
+++ b/include/chip.h
@@ -108,6 +108,7 @@  struct centaur_chip;
 struct mfsi;
 struct xive;
 struct lpcm;
+struct sbe;
 
 /* Chip type */
 enum proc_chip_type {
@@ -205,6 +206,9 @@  struct proc_chip {
 
 	/* Used by hw/xive.c */
 	struct xive		*xive;
+
+	/* Used by hw/sbe.c */
+	struct sbe		*sbe;
 };
 
 extern uint32_t pir_to_chip_id(uint32_t pir);
diff --git a/include/sbe.h b/include/sbe.h
new file mode 100644
index 0000000..8ac2682
--- /dev/null
+++ b/include/sbe.h
@@ -0,0 +1,235 @@ 
+/* Copyright 2017 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *	http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SBE_H
+#define __SBE_H
+
+#include <bitutils.h>
+#include <ccan/list/list.h>
+#include <ccan/short_types/short_types.h>
+
+/*
+ * Timeout value for short running commands is 100 msecs and
+ * the timeout value for long running commands is 30 secs.
+ */
+#define SBE_CMD_TIMEOUT_SHORT_MS		100
+#define SBE_CMD_TIMEOUT_LONG_MS			(30 * 1000)
+
+/* Primary response status code */
+#define SBE_STATUS_PRI_SUCCESS			0x00
+#define SBE_STATUS_PRI_INVALID_CMD		0x01
+#define SBE_STATUS_PRI_INVALID_DATA		0x02
+#define SBE_STATUS_PRI_SEQ_ERR			0x03
+#define SBE_STATUS_PRI_INTERNAL_ERR		0x04
+#define SBE_STATUS_PRI_GENERIC_ERR		0xFE
+
+/* Secondary response status code */
+#define SBE_STATUS_SEC_SUCCESS			0x00
+#define SBE_STATUS_SEC_CMD_CLASS_UNSUPPORTED	0x01
+#define SBE_STATUS_SEC_CMD_UNSUPPORTED		0x02
+#define SBE_STATUS_SEC_INV_ADDR			0x03
+#define SBE_STATUS_SEC_INV_TARGET_TYPE		0x04
+#define SBE_STATUS_SEC_INV_TARGET_ID		0x05
+#define SBE_STATUS_SEC_TARGET_NOT_PRESENT	0x06
+#define SBE_STATUS_SEC_TARGET_NOT_FUNC		0x07
+#define SBE_STATUS_SEC_CMD_NOT_ALLOW		0x08
+#define SBE_STATUS_SEC_FUNC_NOT_SUPPORTED	0x09
+#define SBE_STATUS_SEC_GENERIC_ERR		0x0A
+#define SBE_STATUS_SEC_BLACKLIST_REG		0x0B
+#define SBE_STATUS_SEC_OS_FAILURE		0x0C
+#define SBE_STATUS_SEC_MBX_REG_FAILURE		0x0D
+#define SBE_STATUS_SEC_INSUFFICIENT_DATA	0x0E
+#define SBE_STATUS_SEC_EXCESS_DATA		0x0F
+#define SBE_STATUS_SEC_BUSY			0x10
+
+/* Number of MBOX register on each side */
+#define NR_HOST_SBE_MBOX_REG		0x04
+
+/*
+ * SBE MBOX register address
+ *   Reg 0 - 3 : Host to send command packets to SBE
+ *   Reg 4 - 7 : SBE to send response packets to Host
+ */
+#define PSU_HOST_SBE_MBOX_REG0		0x000D0050
+#define PSU_HOST_SBE_MBOX_REG1		0x000D0051
+#define PSU_HOST_SBE_MBOX_REG2		0x000D0052
+#define PSU_HOST_SBE_MBOX_REG3		0x000D0053
+#define PSU_HOST_SBE_MBOX_REG4		0x000D0054
+#define PSU_HOST_SBE_MBOX_REG5		0x000D0055
+#define PSU_HOST_SBE_MBOX_REG6		0x000D0056
+#define PSU_HOST_SBE_MBOX_REG7		0x000D0057
+#define PSU_SBE_DOORBELL_REG_RW		0x000D0060
+#define PSU_SBE_DOORBELL_REG_AND	0x000D0061
+#define PSU_SBE_DOORBELL_REG_OR		0x000D0062
+#define PSU_HOST_DOORBELL_REG_RW	0x000D0063
+#define PSU_HOST_DOORBELL_REG_AND	0x000D0064
+#define PSU_HOST_DOORBELL_REG_OR	0x000D0065
+
+/*
+ * Doorbell Register to trigger SBE interrupt. Set by OPAL to inform
+ * the SBE about a waiting message in the Host/SBE Mailbox Registers
+ */
+#define HOST_SBE_MSG_WAITING		PPC_BIT(0)
+
+/*
+ * Doorbell Register for Host Bridge interrupt. Set by the SBE to inform
+ * host about a response message in the Host/SBE Mailbox Registers
+ */
+#define SBE_HOST_RESPONSE_WAITING	PPC_BIT(0)
+#define SBE_HOST_RESPONSE_CLEAR		~(SBE_HOST_RESPONSE_WAITING)
+#define SBE_HOST_MSG_READ		PPC_BIT(1)
+#define SBE_HOST_STOP15_EXIT		PPC_BIT(2)
+#define SBE_HOST_RESET			PPC_BIT(3)
+#define SBE_HOST_TIMER_EXPIRY		PPC_BIT(14)
+
+/* SBE Target Type */
+#define SBE_TARGET_TYPE_PROC		0x00
+#define SBE_TARGET_TYPE_EX		0x01
+#define SBE_TARGET_TYPE_PERV		0x02
+#define SBE_TARGET_TYPE_MCS		0x03
+#define SBE_TARGET_TYPE_EQ		0x04
+#define SBE_TARGET_TYPE_CORE		0x05
+
+/* SBE MBOX command class */
+#define SBE_MCLASS_FIRST		0xD1
+#define SBE_MCLASS_CORE_STATE		0xD1
+#define SBE_MCLASS_SCOM			0xD2
+#define SBE_MCLASS_RING			0xD3
+#define SBE_MCLASS_TIMER		0xD4
+#define SBE_MCLASS_MPIPL		0xD5
+#define SBE_MCLASS_SECURITY		0xD6
+#define SBE_MCLASS_GENERIC		0xD7
+#define SBE_MCLASS_LAST			0xD7
+
+/*
+ * Commands are provided in xxyy form where:
+ *   - xx : command class
+ *   - yy : command
+ *
+ * Both request and response message uses same Seq ID,
+ * command class and command.
+ */
+#define SBE_CMD_CTRL_DEADMAN_LOOP	0xD101
+#define SBE_CMD_MULTI_SCOM		0xD201
+#define SBE_CMD_PUT_RING_FORM_IMAGE	0xD301
+#define SBE_CMD_CONTROL_TIMER		0xD401
+#define SBE_CMD_CONTINUE_MPIPL		0xD501
+#define SBE_CMD_GET_ARCHITECTED_REG	0xD502
+#define SBE_CMD_CLR_ARCHITECTED_REG	0xD503
+#define SBE_CMD_SET_UNSEC_MEM_WINDOW	0xD601
+#define SBE_CMD_GET_SBE_FFDC		0xD701
+#define SBE_CMD_GET_CAPABILITY		0xD702
+#define SBE_CMD_GET_FREQUENCIES		0xD703
+#define SBE_CMD_SET_FFDC_ADDR		0xD704
+#define SBE_CMD_QUIESCE_SBE		0xD705
+#define SBE_CMD_SET_FABRIC_ID_MAP	0xD706
+#define SBE_CMD_STASH_MPIPL_CONFIG	0xD707
+
+/* SBE MBOX control flags */
+
+/* Generic flags */
+#define SBE_CMD_CTRL_RESP_REQ		0x0100
+#define SBE_CMD_CTRL_ACK_REQ		0x0200
+
+/* Deadman loop */
+#define CTRL_DEADMAN_LOOP_START		0x0001
+#define CTRL_DEADMAN_LOOP_STOP		0x0002
+
+/* Control timer */
+#define CONTROL_TIMER_START		0x0001
+#define CONTROL_TIMER_STOP		0x0002
+
+/* SBE message state */
+enum sbe_msg_state {
+	sbe_msg_unused = 0,
+	sbe_msg_queued,
+	sbe_msg_sent,
+	sbe_msg_wresp,
+	sbe_msg_response,
+	sbe_msg_done,
+	sbe_msg_timeout,
+	sbe_msg_cancelled,
+};
+
+/* SBE message */
+struct sbe_msg {
+	/*
+	 * Reg0 :
+	 *   word0 :
+	 *     direct cmd : reserved << 16 | ctrl flag
+	 *     indrect cmd: mem_addr_size_dword
+	 *     response   : primary status << 16 | secondary satus
+	 *
+	 *   word1 : seq id << 16 | cmd class << 8 | cmd
+	 *
+	 * WARNING:
+	 *   - Don't populate reg0.seq (byte 4,5). This will be populated by
+	 *     sbe_queue_msg().
+	 *   - Don't insert variables between reg<N> variables.
+	 */
+	u64	reg0;
+	u64	reg1;
+	u64	reg2;
+	u64	reg3;
+
+	/* cmd timout : mftb() + msecs_to_tb(SBE_CMD_TIMEOUT_[SHORT/LONG]_MS) */
+	u64	timeout;
+
+	/* Completion function */
+	void (*complete)(struct sbe_msg *msg);
+	void *user_data;
+
+	/* Current msg state */
+	enum sbe_msg_state	state;
+
+	/* Set if the message expects a response */
+	bool			response;
+
+	/* Response will be filed by driver when response received */
+	struct sbe_msg		*resp;
+
+	/* Internal queuing */
+	struct list_node	link;
+} __packed;
+
+
+/* Allocate and populate sbe_msg structure */
+extern struct sbe_msg *sbe_mkmsg(u16 cmd, u16 ctrl_flag, u64 reg1,
+				 u64 reg2, u64 reg3) __warn_unused_result;
+
+/* Allocate sbe_msg */
+extern struct sbe_msg *sbe_allocmsg(bool alloc_response) __warn_unused_result;
+
+/* Populate a pre-allocated sbe_msg */
+extern void sbe_fillmsg(struct sbe_msg *msg, u16 cmd,
+			u16 ctrl_flag, u64 reg1, u64 reg2, u64 reg3);
+
+/*
+ * Free sbe_msg structure.
+ * This will free an attached response if any
+ */
+extern void sbe_freemsg(struct sbe_msg *msg);
+
+/* Add new message to sbe queue */
+extern int sbe_queue_msg(uint32_t chip_id, struct sbe_msg *msg,
+		  void (*comp)(struct sbe_msg *msg)) __warn_unused_result;
+
+/* Initialize the SBE mailbox driver */
+extern void sbe_init(void);
+
+/* SBE interrupt */
+extern void sbe_interrupt(uint32_t chip_id);
+#endif	/* __SBE_H */