[v5,4/5] Add SBE driver support

Message ID 20170723113050.8815-5-hegdevasant@linux.vnet.ibm.com
State New
Headers show

Commit Message

Vasant Hegde July 23, 2017, 11:30 a.m.
SBE (Self Boot Engine) on P9 has two different jobs:
  - Boot the chip up to the point the core is functional
  - Provide various services like timer, scom, etc., at runtime

OPAL can communicate to SBE via a set of data and control registers provided
by the PSU block in P9 chip.
 - Four 8 byte registers for Host to send command packets to SBE
 - Four 8 byte registers for SBE to send response packets to Host
 - Two doorbell registers (1 on each side) to alert either party
   when data is placed in above mentioned data register

Protocol constraints:
  Only one command is accepted in the command buffer until the response for the
  command is enqueued in the response buffer by SBE.

Usage:
  We will use SBE for various purposes like timer, sreset, MPIPL, etc.

This patch implements the SBE MBOX spec for OPAL to communicate with
SBE.

Design consideration:
  - Each chip has SBE. We need to track SBE messages per chip. Hence added
    per chip sbe structure and list of messages to that chip.
  - SBE accepts only one command at a time. Hence serialized MBOX commands.
  - OPAL gets interrupted once SBE sets doorbell register
  - OPAL has to clear doorbell register after reading response.
  - Every command class has timeout option. Timed out messages are discarded.
  - SBE MBOX commands can be classified into four types :
    - Those that must be sent to the master only (ex: sending MDST/MDDT info)
    - Those that must be sent to slaves only (ex: continue MPIPL)
    - Those that must be sent to all chips (ex: close insecure window)
    - Those that can be sent to any chip (ex: timer)

Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>

Testing:
  I have tested driver with timer chip op and its working fine.
---
 core/init.c      |   7 +
 hw/sbe-p9.c      | 592 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 include/chip.h   |   4 +
 include/sbe-p9.h | 196 ++++++++++++++++++
 4 files changed, 790 insertions(+), 9 deletions(-)

Comments

Benjamin Herrenschmidt July 23, 2017, 10:33 p.m. | #1
On Sun, 2017-07-23 at 17:00 +0530, Vasant Hegde wrote:
> 
> +	/*
> +	 * SBE uses TB value for scheduling timer. Hence init after
> +	 * chiptod init
> +	 */
> +	sbe_init();
> +

	p9_sbe_init();

>  	/* Initialize i2c */
>  	p8_i2c_init();
>  
> diff --git a/hw/sbe-p9.c b/hw/sbe-p9.c
> index c9c3feb..3af1094 100644
> --- a/hw/sbe-p9.c
> +++ b/hw/sbe-p9.c
> @@ -23,39 +23,613 @@
>  #include <sbe-p9.h>
>  #include <skiboot.h>
>  #include <timebase.h>
> -#include <timer.h>
>  #include <trace.h>
>  #include <xscom.h>

Add some comment documenting the protocol & doorbell register use.

> -void sbe_interrupt(uint32_t chip_id)
> +struct sbe {

p9_sbe... (and everything else here)

> +	/* Chip ID to send message */
> +	u32	chip_id;
> +
> +	/* List to hold SBE queue messages */
> +	struct list_head	msg_list;
> +
> +	struct lock lock;
> +
> +	/* SBE MBOX message sequence number */
> +	u16	cur_seq;
> +
> +	/*
> +	 * Sending messages to SBE is serialized. We can use
> +	 * single buffer per SBE to hold response message.
> +	 */
> +	struct sbe_msg *resp;
> +};
> +
> +/* Default SBE chip ID */
> +static int sbe_default_chip_id = -1;
> +
> +#define SBE_STATUS_PRI_SHIFT	0x30
> +#define SBE_STATUS_SEC_SHIFT	0x20
> +#define SBE_FFDC_PRESENT	PPC_BIT(1)
> +
> +static void sbe_timeout_poll(void *user_data __unused);
> +
> +static inline bool sbe_ffdc_present(struct sbe_msg *resp)
> +{
> +	return (resp->reg[0] & SBE_FFDC_PRESENT);
> +}
> +
> +/*
> + * bit 0    : PCBPIB status present in response
> + * bit 1    : FFDC package(s) present in response
> + * bit 2-3  : Reserved
> + * bit 4-15 : Primary status code
> + */
> +static inline u16 sbe_get_primary_rc(struct sbe_msg *resp)
> +{
> +	return ((resp->reg[0] >> SBE_STATUS_PRI_SHIFT) & 0xfff);
> +}
> +
> +static inline void sbe_set_primary_rc(struct sbe_msg *resp, u64 rc)
> +{
> +	resp->reg[0] |= (rc << SBE_STATUS_PRI_SHIFT);
> +}
> +
> +static u64 sbe_rreg(u32 chip_id, u64 reg)
> +{
> +	u64 data = 0;
> +
> +	xscom_read(chip_id, reg, &data);

Error handling ?

> +	return be64_to_cpu(data);
> +}
> +
> +static void sbe_reg_dump(u32 chip_id)
> +{
> +#define SBE_DUMP_REG_ONE(chip_id, x) \
> +	prlog(PR_DEBUG, "  %20s: %016llx\n", #x, sbe_rreg(chip_id, x));
> +
> +	prlog(PR_DEBUG, "MBOX register dump for chip : %x\n", chip_id);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_SBE_DOORBELL_REG_RW);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG0);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG1);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG2);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG3);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_DOORBELL_REG_RW);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG4);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG5);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG6);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG7);
> +}
> +
> +static u64 sbe_get_cmdclass_timeout(u16 cmdclass)
> +{
> +	u8 class = (u8)(cmdclass >> 8) & 0xff;
> +
> +	switch (class) {
> +	case SBE_MCLASS_CORE_STATE:
> +	/* fall through */
> +	case SBE_MCLASS_TIMER:
> +	case SBE_MCLASS_MPIPL:
> +	case SBE_MCLASS_SECURITY:
> +	case SBE_MCLASS_GENERIC:
> +		if (cmdclass == SBE_CMD_QUIESCE_SBE) /* Special case */
> +			return SBE_CMD_TIMEOUT_LONG_MS;
> +		return SBE_CMD_TIMEOUT_SHORT_MS;
> +	case SBE_MCLASS_SCOM:
> +	/* fall through */
> +	case SBE_MCLASS_RING:
> +		return SBE_CMD_TIMEOUT_LONG_MS;
> +	default:
> +		prlog(PR_ERR, "Invalid command class\n");
> +		return 0;
> +	}
> +}
> +
> +void sbe_freemsg(struct sbe_msg *msg)
> +{
> +	free(msg);
> +}
> +
> +static void sbe_fillmsg(struct sbe_msg *msg, u16 cmd,
> +			u16 ctrl_flag, u64 reg1, u64 reg2, u64 reg3)
> +{
> +	bool response = !!(ctrl_flag & SBE_CMD_CTRL_RESP_REQ);
> +
> +	/* Seqence ID is filled by sbe_queue_msg() */
> +	msg->reg[0] = ((u64)ctrl_flag << 32) | cmd;
> +	msg->reg[1] = reg1;
> +	msg->reg[2] = reg2;
> +	msg->reg[3] = reg3;
> +
> +	prlog(PR_TRACE, "MBOX message :\n\t Reg0 : %llx\n\t "
> +	      "Reg1 : %llx\n\t Reg2 : %llx\n\t Reg3 : %lld\n",
> +	      msg->reg[0], msg->reg[1], msg->reg[2], msg->reg[3]);

That trace is more useful at the point where the message is queued.

> +	msg->state = sbe_msg_unused;
> +	msg->response = response;
> +}
> +
> +/*
> + * Handles "command with direct data" format only.
> + *
> + * Note: All mbox messages of our interest uses direct data format. if we need
> + *       need indirect data format then we may have to enhance this function.

How does indirect work, can you tell me more ?

> +struct sbe_msg *sbe_mkmsg(u16 cmd, u16 ctrl_flag,
> +			  u64 reg1, u64 reg2, u64 reg3)
> +{
> +	struct sbe_msg *msg;
> +
> +	msg = zalloc(sizeof(struct sbe_msg));
> +	if (!msg) {
> +		prlog(PR_ERR, "Failed to allocate memory for SBE message\n");
> +		return NULL;
> +	}
> +
> +	sbe_fillmsg(msg, cmd, ctrl_flag, reg1, reg2, reg3);
> +	return msg;
> +}
> +
> +static inline bool sbe_busy(struct sbe_msg *msg)
> +{
> +	switch (msg->state) {
> +	case sbe_msg_unused:
> +	case sbe_msg_queued:
> +	case sbe_msg_done:
> +	case sbe_msg_timeout:
> +	case sbe_msg_cancelled:

Your enum has 8 states, it's easier to test the 3 that
return true than the 5 that return false no ? Also this
isn't clear what that means, can you elaborate ?

> +		return false;
> +	default:	/* + sbe_msg_response, sbe_msg_sent, sbe_msg_wresp */
> +		break;
> +	}
> +
> +	return true;
> +}
> +
> +/* Read SBE doorbell to confirm its ready to accept command. */
> +static bool sbe_hw_ready(u32 chip_id)
>  {
>  	int rc;
>  	u64 data;
> +
> +	rc = xscom_read(chip_id, PSU_SBE_DOORBELL_REG_RW, &data);
> +	if (rc) {
> +		prlog(PR_ERR, "Failed to read Host to SBE doorbell register "
> +		      "[chip id = %x]\n", chip_id);
> +		return false;
> +	}
> +
> +	/* Waiting for SBE to process previous message */
> +	if (data & HOST_SBE_MSG_WAITING)
> +		return false;

So when you try to send 2 msg back to back, the first one gets sent,
which sets the above right ?

Then the SBE clears it ? How long does it take ? Do we get an interrupt
to know it's been cleared ? Otherwise how do we know we can send the
next message if it's not a message that takes a response ?

> +	return true;
> +}
> +
> +static int sbe_msg_send(u32 chip_id, struct sbe_msg *msg)
> +{
> +	int rc, i;
> +	u64 addr, *data;
> +
> +	addr = PSU_HOST_SBE_MBOX_REG0;
> +	data = &msg->reg[0];
> +
> +	for (i = 0; i < NR_HOST_SBE_MBOX_REG; i++) {
> +		rc = xscom_write(chip_id, addr, *data);
> +		if (rc)
> +			return rc;
> +
> +		addr++;
> +		data++;
> +	}
> +
> +	rc = xscom_write(chip_id, PSU_SBE_DOORBELL_REG_OR,
> +			 HOST_SBE_MSG_WAITING);
> +	if (rc == OPAL_SUCCESS)
> +		msg->state = sbe_msg_sent;

I would do the state change in the caller

> +
> +	return rc;
> +}
> +
> +static int sbe_msg_receive(u32 chip_id, struct sbe_msg *resp)
> +{
> +	int i;
> +	int rc = OPAL_SUCCESS;
> +	u64 addr, *data;
> +
> +	addr = PSU_HOST_SBE_MBOX_REG4;
> +	data = &resp->reg[0];
> +
> +	for (i = 0; i < NR_HOST_SBE_MBOX_REG; i++) {
> +		rc = xscom_read(chip_id, addr, data);
> +		if (rc)
> +			return rc;
> +
> +		addr++;
> +		data++;
> +	}
> +
> +	return rc;
> +}
> +
> +/* This is called with sbe->lock */
> +static void sbe_msg_complete(struct sbe *sbe, struct sbe_msg *msg)
> +{
> +	prlog(PR_INSANE, "Completing msg [chip id = %x], reg0 : 0x%llx\n",
> +	      sbe->chip_id, msg->reg[0]);
> +
> +	msg->state = sbe_msg_done;
> +	if (msg->response)
> +		msg->resp = sbe->resp;
> +
> +	list_del_from(&sbe->msg_list, &msg->link);
> +
> +	if (msg->complete) {
> +		unlock(&sbe->lock);
> +		msg->complete(msg);
> +		lock(&sbe->lock);
> +	}
> +}
> +
> +/* This is called with sbe->lock */
> +static void sbe_msg_send_complete(struct sbe *sbe, struct sbe_msg *msg)
> +{

Tis should be inside __sbe_poke_queue(), that arbitrary split into 2
functions isn't useful

> +	u64 timeout;
> +
> +	prlog(PR_INSANE, "Completing send [chip id = %x], reg0 : 0x%llx\n",
> +	      sbe->chip_id, msg->reg[0]);
> +
> +	/* Need response */
> +	if (msg->response) {
> +		timeout = sbe_get_cmdclass_timeout(msg->reg[0] & 0xffff);
> +		msg->timeout = mftb() + msecs_to_tb(timeout);
> +		msg->state = sbe_msg_wresp;
> +	} else {
> +		sbe_msg_complete(sbe, msg);

Don't you get some sort of indication from the SBE that the message has
been accepted ? Otherwise how do you know that it's ready to send a new
one and you should process you queue further ?

Also add a clear comment that sbe_msg_complete can drop the lock and
carry that comment accross the call chain.

> +	}
> +}
> +
> +static void __sbe_poke_queue(struct sbe *sbe, struct sbe_msg *msg)
> +{

Call this something like sbe_process_queue

> +	int rc;
> +
> +	if (!sbe_hw_ready(sbe->chip_id))
> +		return;
> +
> +	/* Send message */
> +	rc = sbe_msg_send(sbe->chip_id, msg);
> +	if (rc) {
> +		prlog(PR_ERR, "Failed to send message to SBE "
> +		      "[chip id = %x]\n", sbe->chip_id);
> +		sbe_set_primary_rc(sbe->resp, SBE_STATUS_PRI_GENERIC_ERR);
> +		sbe_msg_complete(sbe, msg);
> +	} else {
> +		sbe_msg_send_complete(sbe, msg);

I'm not sure why that is a separate function. I would "return" at the
end of the error case and just do the rest direclty here.

> +	}
> +}
> +
> +static void sbe_poke_queue(struct sbe *sbe)
> +{
> +	struct sbe_msg *msg;
> +
> +	while (!list_empty(&sbe->msg_list)) {
> +		msg = list_top(&sbe->msg_list, struct sbe_msg, link);
> +		if (sbe_busy(msg))
> +			return;

I don't like that you are using the state of the top message in the
queue to know if the SBE is busy. I'd rather you kept a separate state
of the mbox itself.

> +		__sbe_poke_queue(sbe, msg);
> +	}
> +}
> +
> +/*
> + * WARNING:
> + *         Only one command is accepted in the command buffer until response
> + *         to the command is enqueued in the response buffer by SBE.
> + *
> + *         Head of msg_list contains in-flight message. Hence we should always
> + *         add new message to tail of the list.
> + */
> +int sbe_queue_msg(u32 chip_id, struct sbe_msg *msg,
> +		  void (*comp)(struct sbe_msg *msg))
> +{
>  	struct proc_chip *chip;
> +	struct sbe *sbe;
> +
> +	if (!msg)
> +		return OPAL_PARAMETER;
> +
> +	if (sbe_default_chip_id == -1)
> +		return OPAL_HARDWARE;
> +
> +	/* Default to SBE on master chip */
> +	if (chip_id == -1)
> +		chip = get_chip(sbe_default_chip_id);
> +	else
> +		chip = get_chip(chip_id);
> +
> +	if (chip == NULL || chip->sbe == NULL)
> +		return OPAL_INTERNAL_ERROR;
> +
> +	sbe = chip->sbe;
> +
> +	/* Set completion */
> +	msg->complete = comp;
> +	msg->state = sbe_msg_queued;
> +
> +	lock(&sbe->lock);
> +	/* Update sequence number */
> +	msg->reg[0] = msg->reg[0] | ((u64)sbe->cur_seq << 16);
> +	sbe->cur_seq++;
> +
> +	/* Reset sequence number */
> +	if (sbe->cur_seq == 0xffff)
> +		sbe->cur_seq = 1;
> +
> +	/* Add message to queue */
> +	list_add_tail(&sbe->msg_list, &msg->link);
> +	sbe_poke_queue(sbe);
> +	unlock(&sbe->lock);
> +
> +	return OPAL_SUCCESS;
> +}
> +
> +int sbe_sync_msg(u32 chip_id, struct sbe_msg *msg, bool autofree)
> +{
> +	int rc = OPAL_SUCCESS;
> +
> +	rc = sbe_queue_msg(chip_id, msg, NULL);
> +	if (rc)
> +		goto free_msg;
> +
> +	while (msg->state != sbe_msg_done &&
> +	       msg->state != sbe_msg_cancelled) {
> +		cpu_relax();
> +		sbe_timeout_poll(NULL);
> +	}
> 
What about timeout and other non-sense states ?

> +	if (msg->state == sbe_msg_done)
> +		rc = 0;
> +	else
> +		rc = -1;
> +
> +	if (msg->response && msg->resp)
> +		rc = sbe_get_primary_rc(msg->resp);
> +
> +free_msg:
> +	if (autofree)
> +		sbe_freemsg(msg);
> +
> +	return rc;
> +}
> +
> +int sbe_cancelmsg(u32 chip_id, struct sbe_msg *msg)
> +{
> +	struct proc_chip *chip;
> +	struct sbe *sbe;
>  
>  	chip = get_chip(chip_id);
> -	if (chip == NULL)
> +	if (chip == NULL || chip->sbe == NULL)
> +		return OPAL_PARAMETER;
> +
> +	sbe = chip->sbe;
> +
> +	lock(&sbe->lock);
> +	if (msg->state != sbe_msg_queued) {
> +		unlock(&sbe->lock);
> +		return OPAL_BUSY;
> +	}
> +
> +	list_del_from(&sbe->msg_list, &msg->link);
> +	msg->state = sbe_msg_cancelled;
> +	unlock(&sbe->lock);

What if that message has a pending response ? You must not lose that
state since the mbox will still send you a response... another reason
to have a mbox state rather than message state.

> +	return OPAL_SUCCESS;
> +}
> +
> +static void sbe_handle_response(struct sbe *sbe)
> +{
> +	u16 send_seq, resp_seq;
> +	int rc;
> +	struct sbe_msg *msg;
> +
> +	if (list_empty(&sbe->msg_list))
> +		return;
> +
> +	/* Clear response registers */
> +	memset(sbe->resp, 0, sizeof(struct sbe_msg));
> +
> +	msg = list_top(&sbe->msg_list, struct sbe_msg, link);

So if the message was cancelled you now give its response to whatever
the next one was, doesn't look quite right to me.

> +	rc = sbe_msg_receive(sbe->chip_id, sbe->resp);
> +	if (rc != OPAL_SUCCESS) {
> +		prlog(PR_ERR, "Failed to read response message "
> +		      "[chip id = %x]\n", sbe->chip_id);
> +		sbe_set_primary_rc(sbe->resp, SBE_STATUS_PRI_GENERIC_ERR);
> +		return;
> +	}
> +
> +	/* Update message state */
> +	msg->state = sbe_msg_response;
> +
> +	/* Validate sequence number */
> +	send_seq = (msg->reg[0] >> 16) & 0xffff;
> +	resp_seq = (sbe->resp->reg[0] >> 16) & 0xffff;
> +	if (send_seq != resp_seq) {
> +		/*
> +		 * XXX Reset SBE and repost message.
> +		 *     Lets send sequence error to caller until SBE reset works.
> +		 */
> +		prlog(PR_ERR, "Invalid sequence id [chip id = %x]\n",
> +		      sbe->chip_id);
> +		sbe_set_primary_rc(sbe->resp, SBE_STATUS_PRI_SEQ_ERR);
>  		return;
> +	}
> +}
> +
> +void sbe_interrupt(uint32_t chip_id)
> +{
> +	bool msg_complete = false;
> +	int rc;
> +	u64 data = 0;
> +	struct proc_chip *chip;
> +	struct sbe_msg *msg;
> +	struct sbe *sbe;
> +
> +	chip = get_chip(chip_id);
> +	if (chip == NULL || chip->sbe == NULL)
> +		return;
> +	sbe = chip->sbe;
>  
>  	/* Read doorbell register */
>  	rc = xscom_read(chip_id, PSU_HOST_DOORBELL_REG_RW, &data);
>  	if (rc) {
>  		prlog(PR_ERR, "Failed to read SBE to Host doorbell register "
>  		      "[chip id = %x]\n", chip_id);
> -		goto clr_interrupt;
> +		sbe_reg_dump(chip_id);
> +		return;
>  	}
>  
> -	/* SBE passtrhough command, call prd handler */
> -	if (data & SBE_HOST_PASSTHROUGH) {
> -		prd_sbe_passthrough(chip_id);
> +	/* Called from poller path (sbe_timeout_poll), nothing to process */
> +	if (!data)
> +		return;
> +
> +	/* Read SBE response before clearing doorbell register */
> +	if (data & SBE_HOST_RESPONSE_WAITING) {
> +		lock(&sbe->lock);
> +		sbe_handle_response(sbe);
> +		msg_complete = true;
> +		unlock(&sbe->lock);
>  	}
>  
> -clr_interrupt:
> -	/* Clears all the bits */
> +	/* Clear doorbell register so that we can send next message */
>  	rc = xscom_write(chip_id, PSU_HOST_DOORBELL_REG_AND,
>  			 SBE_HOST_RESPONSE_CLEAR);
>  	if (rc) {
>  		prlog(PR_ERR, "Failed to clear SBE to Host doorbell "
>  		      "register [chip id = %x]\n", chip_id);
> +		return;
> +	}
> +
> +	/* SBE came back from reset */
> +	if (data & SBE_HOST_RESET) {
> +		prlog(PR_NOTICE, "Came back from reset [chip id = %x]\n",
> +		      chip_id);
> +
> +		/* Repost message */
> +		lock(&sbe->lock);
> +		if (!list_empty(&sbe->msg_list)) {
> +			msg = list_top(&sbe->msg_list, struct sbe_msg, link);
> +			msg->state = sbe_msg_queued;
> +			sbe_poke_queue(sbe);
> +		}
> +		unlock(&sbe->lock);
> +		return;
> +	}
> +
> +	/* SBE passthrough command, call prd handler */
> +	if (data & SBE_HOST_PASSTHROUGH) {
> +		prd_sbe_passthrough(chip_id);
> +	}
> +
> +	/* Handle SBE response */
> +	if (msg_complete) {
> +		lock(&sbe->lock);
> +		if (!list_empty(&sbe->msg_list)) {
> +			msg = list_top(&sbe->msg_list, struct sbe_msg, link);
> +			sbe_msg_complete(sbe, msg);
> +		}
> +		sbe_poke_queue(sbe);
> +		unlock(&sbe->lock);
> +	}
> +}

Lots of lock/unlock happening here. It might be worth to have
sbe_msg_complete be called sbe_msg_complete_unlock and have it return
with the lock dropped, and same with sbe_handle_response(). Another
option is to organize things a bit differently here. You have
sbe_handle_response() not complete directly, but return the completed
message. Then you can do all the other stuff you have to do, and
finally call complete() with the lock dropped at the end of the
function after you're re-poked the queue.

Also your current code is racy vs. cancel. You really need to look into
that a bit more.

> +static void sbe_timeout_poll(void *user_data __unused)
> +{
> +	u64 now = mftb();
> +	struct sbe *sbe;
> +	struct sbe_msg *msg;
> +	struct proc_chip *chip;
> +
> +	for_each_chip(chip) {
> +		if (chip->sbe == NULL)
> +			continue;
> +
> +		sbe = chip->sbe;
> +		if (list_empty(&sbe->msg_list))
> +			continue;
> +
> +		/*
> +		 * In some cases there will be a delay in calling OPAL interrupt
> +		 * handler routine (opal_handle_interrupt). In such cases its
> +		 * possible that SBE has responded, but OPAL didn't act on that.
> +		 * Hence check for SBE response.
> +		 */
> +		sbe_interrupt(sbe->chip_id);
> +
> +		lock(&sbe->lock);
> +		if (list_empty(&sbe->msg_list)) {
> +			unlock(&sbe->lock);
> +			continue;
> +		}
> +
> +		msg = list_top(&sbe->msg_list, struct sbe_msg, link);
> +		if (!sbe_busy(msg)) {	/* Message not yet sent to SBE */
> +			sbe_poke_queue(sbe);
> +			unlock(&sbe->lock);
> +			continue;
> +		}
> +
> +		if (tb_compare(now, msg->timeout) != TB_AAFTERB) {
> +			unlock(&sbe->lock);
> +			continue;
> +		}
> +
> +		/* Message timeout */
> +		prlog(PR_ERR, "Message timeout [chip id = %x], "
> +		      "cmd = %llx, subcmd = %llx\n", sbe->chip_id,
> +		      (msg->reg[0] >> 8) & 0xff, msg->reg[0] & 0xff);
> +		sbe_reg_dump(sbe->chip_id);
> +		sbe->resp->state = sbe_msg_timeout;
> +		sbe_set_primary_rc(sbe->resp, SBE_STATUS_PRI_GENERIC_ERR);
> +
> +		sbe_msg_complete(sbe, msg);
> +		sbe_poke_queue(sbe);
> +		unlock(&sbe->lock);
>  	}
>  }
> +
> +void sbe_init(void)
> +{
> +	struct dt_node *xn;
> +	struct proc_chip *chip;
> +	struct sbe *sbe;
> +
> +	if (proc_gen < proc_gen_p9)
> +		return;
> +
> +	dt_for_each_compatible(dt_root, xn, "ibm,xscom") {
> +		sbe = zalloc(sizeof(struct sbe));
> +		assert(sbe);
> +		sbe->resp = zalloc(sizeof(struct sbe_msg));
> +		assert(sbe->resp);
> +		sbe->chip_id = dt_get_chip_id(xn);
> +		sbe->cur_seq = 1;
> +		list_head_init(&sbe->msg_list);
> +		init_lock(&sbe->lock);
> +
> +		chip = get_chip(sbe->chip_id);
> +		assert(chip);
> +		chip->sbe = sbe;
> +
> +		if (dt_has_node_property(xn, "primary", NULL)) {
> +			sbe_default_chip_id = sbe->chip_id;
> +			prlog(PR_DEBUG, "Master chip id : %x\n", sbe->chip_id);
> +		}
> +	}
> +
> +	if (sbe_default_chip_id == -1) {
> +		prlog(PR_ERR, "Master chip ID not found.\n");
> +		return;
> +	}
> +
> +	/* Initiate SBE timeout poller */
> +	opal_add_poller(sbe_timeout_poll, NULL);
> +}
> diff --git a/include/chip.h b/include/chip.h
> index e25f54d..01a3a61 100644
> --- a/include/chip.h
> +++ b/include/chip.h
> @@ -108,6 +108,7 @@ struct centaur_chip;
>  struct mfsi;
>  struct xive;
>  struct lpcm;
> +struct sbe;
>  
>  /* Chip type */
>  enum proc_chip_type {
> @@ -208,6 +209,9 @@ struct proc_chip {
>  
>  	/* Used by hw/xive.c */
>  	struct xive		*xive;
> +
> +	/* Used by hw/sbe-p9.c */
> +	struct sbe		*sbe;
>  };
>  
>  extern uint32_t pir_to_chip_id(uint32_t pir);
> diff --git a/include/sbe-p9.h b/include/sbe-p9.h
> index cca2100..c5616ba 100644
> --- a/include/sbe-p9.h
> +++ b/include/sbe-p9.h
> @@ -21,13 +21,209 @@
>  #include <ccan/list/list.h>
>  #include <ccan/short_types/short_types.h>
>  
> +/*
> + * Timeout value for short running commands is 100 msecs and
> + * the timeout value for long running commands is 30 secs.
> + */
> +#define SBE_CMD_TIMEOUT_SHORT_MS		100
> +#define SBE_CMD_TIMEOUT_LONG_MS			(30 * 1000)
> +
> +/* Primary response status code */
> +#define SBE_STATUS_PRI_SUCCESS			0x00
> +#define SBE_STATUS_PRI_INVALID_CMD		0x01
> +#define SBE_STATUS_PRI_INVALID_DATA		0x02
> +#define SBE_STATUS_PRI_SEQ_ERR			0x03
> +#define SBE_STATUS_PRI_INTERNAL_ERR		0x04
> +#define SBE_STATUS_PRI_GENERIC_ERR		0xFE
> +
> +/* Secondary response status code */
> +#define SBE_STATUS_SEC_SUCCESS			0x00
> +#define SBE_STATUS_SEC_CMD_CLASS_UNSUPPORTED	0x01
> +#define SBE_STATUS_SEC_CMD_UNSUPPORTED		0x02
> +#define SBE_STATUS_SEC_INV_ADDR			0x03
> +#define SBE_STATUS_SEC_INV_TARGET_TYPE		0x04
> +#define SBE_STATUS_SEC_INV_TARGET_ID		0x05
> +#define SBE_STATUS_SEC_TARGET_NOT_PRESENT	0x06
> +#define SBE_STATUS_SEC_TARGET_NOT_FUNC		0x07
> +#define SBE_STATUS_SEC_CMD_NOT_ALLOW		0x08
> +#define SBE_STATUS_SEC_FUNC_NOT_SUPPORTED	0x09
> +#define SBE_STATUS_SEC_GENERIC_ERR		0x0A
> +#define SBE_STATUS_SEC_BLACKLIST_REG		0x0B
> +#define SBE_STATUS_SEC_OS_FAILURE		0x0C
> +#define SBE_STATUS_SEC_MBX_REG_FAILURE		0x0D
> +#define SBE_STATUS_SEC_INSUFFICIENT_DATA	0x0E
> +#define SBE_STATUS_SEC_EXCESS_DATA		0x0F
> +#define SBE_STATUS_SEC_BUSY			0x10
> +#define SBE_STATUS_SEC_UNSEC_REGION_NOT_FOUND	0x11
> +#define SBE_STATUS_SEC_UNSEC_REGION_EXCEEDED	0x12
> +
> +/* Number of MBOX register on each side */
> +#define NR_HOST_SBE_MBOX_REG		0x04
> +
> +/*
> + * SBE MBOX register address
> + *   Reg 0 - 3 : Host to send command packets to SBE
> + *   Reg 4 - 7 : SBE to send response packets to Host
> + */
> +#define PSU_HOST_SBE_MBOX_REG0		0x000D0050
> +#define PSU_HOST_SBE_MBOX_REG1		0x000D0051
> +#define PSU_HOST_SBE_MBOX_REG2		0x000D0052
> +#define PSU_HOST_SBE_MBOX_REG3		0x000D0053
> +#define PSU_HOST_SBE_MBOX_REG4		0x000D0054
> +#define PSU_HOST_SBE_MBOX_REG5		0x000D0055
> +#define PSU_HOST_SBE_MBOX_REG6		0x000D0056
> +#define PSU_HOST_SBE_MBOX_REG7		0x000D0057
> +#define PSU_SBE_DOORBELL_REG_RW		0x000D0060
> +#define PSU_SBE_DOORBELL_REG_AND	0x000D0061
> +#define PSU_SBE_DOORBELL_REG_OR		0x000D0062
>  #define PSU_HOST_DOORBELL_REG_RW	0x000D0063
>  #define PSU_HOST_DOORBELL_REG_AND	0x000D0064
>  #define PSU_HOST_DOORBELL_REG_OR	0x000D0065
>  
> +/*
> + * Doorbell register to trigger SBE interrupt. Set by OPAL to inform
> + * the SBE about a waiting message in the Host/SBE mailbox registers
> + */
> +#define HOST_SBE_MSG_WAITING		PPC_BIT(0)
> +
> +/*
> + * Doorbell register for host bridge interrupt. Set by the SBE to inform
> + * host about a response message in the Host/SBE mailbox registers
> + */
> +#define SBE_HOST_RESPONSE_WAITING	PPC_BIT(0)
> +#define SBE_HOST_MSG_READ		PPC_BIT(1)
> +#define SBE_HOST_STOP15_EXIT		PPC_BIT(2)
> +#define SBE_HOST_RESET			PPC_BIT(3)
>  #define SBE_HOST_PASSTHROUGH		PPC_BIT(4)
> +#define SBE_HOST_TIMER_EXPIRY		PPC_BIT(14)
>  #define SBE_HOST_RESPONSE_CLEAR		0x00
>  
> +/* SBE Target Type */
> +#define SBE_TARGET_TYPE_PROC		0x00
> +#define SBE_TARGET_TYPE_EX		0x01
> +#define SBE_TARGET_TYPE_PERV		0x02
> +#define SBE_TARGET_TYPE_MCS		0x03
> +#define SBE_TARGET_TYPE_EQ		0x04
> +#define SBE_TARGET_TYPE_CORE		0x05
> +
> +/* SBE MBOX command class */
> +#define SBE_MCLASS_FIRST		0xD1
> +#define SBE_MCLASS_CORE_STATE		0xD1
> +#define SBE_MCLASS_SCOM			0xD2
> +#define SBE_MCLASS_RING			0xD3
> +#define SBE_MCLASS_TIMER		0xD4
> +#define SBE_MCLASS_MPIPL		0xD5
> +#define SBE_MCLASS_SECURITY		0xD6
> +#define SBE_MCLASS_GENERIC		0xD7
> +#define SBE_MCLASS_LAST			0xD7
> +
> +/*
> + * Commands are provided in xxyy form where:
> + *   - xx : command class
> + *   - yy : command
> + *
> + * Both request and response message uses same seq ID,
> + * command class and command.
> + */
> +#define SBE_CMD_CTRL_DEADMAN_LOOP	0xD101
> +#define SBE_CMD_MULTI_SCOM		0xD201
> +#define SBE_CMD_PUT_RING_FORM_IMAGE	0xD301
> +#define SBE_CMD_CONTROL_TIMER		0xD401
> +#define SBE_CMD_GET_ARCHITECTED_REG	0xD501
> +#define SBE_CMD_CLR_ARCHITECTED_REG	0xD502
> +#define SBE_CMD_SET_UNSEC_MEM_WINDOW	0xD601
> +#define SBE_CMD_GET_SBE_FFDC		0xD701
> +#define SBE_CMD_GET_CAPABILITY		0xD702
> +#define SBE_CMD_GET_FREQUENCIES		0xD703
> +#define SBE_CMD_SET_FFDC_ADDR		0xD704
> +#define SBE_CMD_QUIESCE_SBE		0xD705
> +#define SBE_CMD_SET_FABRIC_ID_MAP	0xD706
> +#define SBE_CMD_STASH_MPIPL_CONFIG	0xD707
> +
> +/* SBE MBOX control flags */
> +
> +/* Generic flags */
> +#define SBE_CMD_CTRL_RESP_REQ		0x0100
> +#define SBE_CMD_CTRL_ACK_REQ		0x0200
> +
> +/* Deadman loop */
> +#define CTRL_DEADMAN_LOOP_START		0x0001
> +#define CTRL_DEADMAN_LOOP_STOP		0x0002
> +
> +/* Control timer */
> +#define CONTROL_TIMER_START		0x0001
> +#define CONTROL_TIMER_STOP		0x0002
> +
> +/* SBE message state */
> +enum sbe_msg_state {
> +	sbe_msg_unused = 0,	/* Free */
> +	sbe_msg_queued,		/* Queued to SBE list */
> +	sbe_msg_sent,		/* Sent to SBE */
> +	sbe_msg_wresp,		/* Waiting for response */
> +	sbe_msg_response,	/* Got response */
> +	sbe_msg_done,		/* Complete */
> +	sbe_msg_timeout,	/* Message timeout */
> +	sbe_msg_cancelled,	/* Message removed from queue */
> +};
> +
> +/* SBE message */
> +struct sbe_msg {
> +	/*
> +	 * Reg[0] :
> +	 *   word0 :
> +	 *     direct cmd  : reserved << 16 | ctrl flag
> +	 *     indirect cmd: mem_addr_size_dword
> +	 *     response    : primary status << 16 | secondary status
> +	 *
> +	 *   word1 : seq id << 16 | cmd class << 8 | cmd
> +	 *
> +	 * WARNING:
> +	 *   - Don't populate reg[0].seq (byte 4,5). This will be populated by
> +	 *     sbe_queue_msg().
> +	 */
> +	u64	reg[4];
> +
> +	/* cmd timout : mftb() + msecs_to_tb(SBE_CMD_TIMEOUT_[SHORT/LONG]_MS) */
> +	u64	timeout;
> +
> +	/* Completion function */
> +	void (*complete)(struct sbe_msg *msg);
> +	void *user_data;
> +
> +	/* Current msg state */
> +	enum sbe_msg_state	state;
> +
> +	/* Set if the message expects a response */
> +	bool			response;
> +
> +	/* Points to sbe->resp, which contains response message */
> +	struct sbe_msg		*resp;
> +
> +	/* Internal queuing */
> +	struct list_node	link;
> +} __packed;
> +
> +
> +/* Allocate and populate sbe_msg structure */
> +extern struct sbe_msg *sbe_mkmsg(u16 cmd, u16 ctrl_flag, u64 reg1,
> +				 u64 reg2, u64 reg3) __warn_unused_result;
> +
> +/* Free sbe_msg structure */
> +extern void sbe_freemsg(struct sbe_msg *msg);
> +
> +/* Add new message to sbe queue */
> +extern int sbe_queue_msg(uint32_t chip_id, struct sbe_msg *msg,
> +		  void (*comp)(struct sbe_msg *msg)) __warn_unused_result;
> +
> +/* Synchronously send message to SBE */
> +extern int sbe_sync_msg(u32 chip_id, struct sbe_msg *msg, bool autofree);
> +
> +/* Remove message from SBE queue, it will not remove inflight message */
> +extern int sbe_cancelmsg(u32 chip_id, struct sbe_msg *msg);
> +
> +/* Initialize the SBE mailbox driver */
> +extern void sbe_init(void);
> +
>  /* SBE interrupt */
>  extern void sbe_interrupt(uint32_t chip_id);
>
Vasant Hegde July 24, 2017, 2:40 p.m. | #2
On 07/24/2017 04:03 AM, Benjamin Herrenschmidt wrote:
> On Sun, 2017-07-23 at 17:00 +0530, Vasant Hegde wrote:
>>
>> +	/*
>> +	 * SBE uses TB value for scheduling timer. Hence init after
>> +	 * chiptod init
>> +	 */
>> +	sbe_init();
>> +
>
> 	p9_sbe_init();


Sure. Will rename all functions.

will change P8 related function names as well (like p8_sbe_init_timer).

>
>>  	/* Initialize i2c */
>>  	p8_i2c_init();
>>
>> diff --git a/hw/sbe-p9.c b/hw/sbe-p9.c
>> index c9c3feb..3af1094 100644
>> --- a/hw/sbe-p9.c
>> +++ b/hw/sbe-p9.c
>> @@ -23,39 +23,613 @@
>>  #include <sbe-p9.h>
>>  #include <skiboot.h>
>>  #include <timebase.h>
>> -#include <timer.h>
>>  #include <trace.h>
>>  #include <xscom.h>
>
> Add some comment documenting the protocol & doorbell register use.

Sure.

>
>> -void sbe_interrupt(uint32_t chip_id)
>> +struct sbe {
>
> p9_sbe... (and everything else here)

Ok.

.../...

>> +
>> +static u64 sbe_rreg(u32 chip_id, u64 reg)
>> +{
>> +	u64 data = 0;
>> +
>> +	xscom_read(chip_id, reg, &data);
>
> Error handling ?

Missed to handle error here. Will add check .

.../...

>> +
>> +static void sbe_fillmsg(struct sbe_msg *msg, u16 cmd,
>> +			u16 ctrl_flag, u64 reg1, u64 reg2, u64 reg3)
>> +{
>> +	bool response = !!(ctrl_flag & SBE_CMD_CTRL_RESP_REQ);
>> +
>> +	/* Seqence ID is filled by sbe_queue_msg() */
>> +	msg->reg[0] = ((u64)ctrl_flag << 32) | cmd;
>> +	msg->reg[1] = reg1;
>> +	msg->reg[2] = reg2;
>> +	msg->reg[3] = reg3;
>> +
>> +	prlog(PR_TRACE, "MBOX message :\n\t Reg0 : %llx\n\t "
>> +	      "Reg1 : %llx\n\t Reg2 : %llx\n\t Reg3 : %lld\n",
>> +	      msg->reg[0], msg->reg[1], msg->reg[2], msg->reg[3]);
>
> That trace is more useful at the point where the message is queued.

Yes. Will fix .

>
>> +	msg->state = sbe_msg_unused;
>> +	msg->response = response;
>> +}
>> +
>> +/*
>> + * Handles "command with direct data" format only.
>> + *
>> + * Note: All mbox messages of our interest uses direct data format. if we need
>> + *       need indirect data format then we may have to enhance this function.
>
> How does indirect work, can you tell me more ?

In case of direct message data is passed as part of mbox message itself (via 
registers).
But in case of indirect data is passed via memory and we pass memory address in 
mbox command.

 From implementation point of view their is a difference in message format.

register 0 -> word 0 :
	direct cmd  : reserved << 16 | ctrl flag
	indirect cmd: mem_addr_size_dword

Presently this function handles only direct command. Whenever we need support 
for indirect data format, we can tweak this function or add wrapper around this 
function.


>
>> +struct sbe_msg *sbe_mkmsg(u16 cmd, u16 ctrl_flag,
>> +			  u64 reg1, u64 reg2, u64 reg3)
>> +{
>> +	struct sbe_msg *msg;
>> +
>> +	msg = zalloc(sizeof(struct sbe_msg));
>> +	if (!msg) {
>> +		prlog(PR_ERR, "Failed to allocate memory for SBE message\n");
>> +		return NULL;
>> +	}
>> +
>> +	sbe_fillmsg(msg, cmd, ctrl_flag, reg1, reg2, reg3);
>> +	return msg;
>> +}
>> +
>> +static inline bool sbe_busy(struct sbe_msg *msg)
>> +{
>> +	switch (msg->state) {
>> +	case sbe_msg_unused:
>> +	case sbe_msg_queued:
>> +	case sbe_msg_done:
>> +	case sbe_msg_timeout:
>> +	case sbe_msg_cancelled:
>
> Your enum has 8 states, it's easier to test the 3 that
> return true than the 5 that return false no ?

Yes. You are right. Also may be I can get rid of sbe_msg_timeout .

 > Also this
> isn't clear what that means, can you elaborate ?


	

I've added comment in header file.

159         sbe_msg_unused = 0,     /* Free */
160         sbe_msg_queued,         /* Queued to SBE list */
161         sbe_msg_sent,           /* Sent to SBE */
162         sbe_msg_wresp,          /* Waiting for response */
163         sbe_msg_response,       /* Got response */
164         sbe_msg_done,           /* Complete */
165         sbe_msg_timeout,        /* Message timeout */
166         sbe_msg_cancelled,      /* Message removed from queue */


>
>> +		return false;
>> +	default:	/* + sbe_msg_response, sbe_msg_sent, sbe_msg_wresp */
>> +		break;
>> +	}
>> +
>> +	return true;
>> +}
>> +
>> +/* Read SBE doorbell to confirm its ready to accept command. */
>> +static bool sbe_hw_ready(u32 chip_id)
>>  {
>>  	int rc;
>>  	u64 data;
>> +
>> +	rc = xscom_read(chip_id, PSU_SBE_DOORBELL_REG_RW, &data);
>> +	if (rc) {
>> +		prlog(PR_ERR, "Failed to read Host to SBE doorbell register "
>> +		      "[chip id = %x]\n", chip_id);
>> +		return false;
>> +	}
>> +
>> +	/* Waiting for SBE to process previous message */
>> +	if (data & HOST_SBE_MSG_WAITING)
>> +		return false;
>
> So when you try to send 2 msg back to back, the first one gets sent,
> which sets the above right ?

We populate data registers and then enable above bit in doorbell register.
This will trigger interrupt to SBE.

Yes. First message sets this bit.


>
> Then the SBE clears it ? How long does it take ? Do we get an interrupt
> to know it's been cleared ? Otherwise how do we know we can send the
> next message if it's not a message that takes a response ?

Yes. SBE clears it. I didn't profile how long it takes. Presently I'm assuming 
once SBE clears
this bit we are good to proceed.

And yes, for messages that doesn't need response I'm not waiting. That needs to 
be fixed.

Actually we can request for acknowledgment from SBE (like part of control flag 
in message header).
SBE will read message and sends ACK interrupt to host.

We can enforce this before sending message to SBE. But we have to handle extra 
interrupt.
Is it fine to handle extra interrupt (specially for timer messages)?


>
>> +	return true;
>> +}
>> +
>> +static int sbe_msg_send(u32 chip_id, struct sbe_msg *msg)
>> +{
>> +	int rc, i;
>> +	u64 addr, *data;
>> +
>> +	addr = PSU_HOST_SBE_MBOX_REG0;
>> +	data = &msg->reg[0];
>> +
>> +	for (i = 0; i < NR_HOST_SBE_MBOX_REG; i++) {
>> +		rc = xscom_write(chip_id, addr, *data);
>> +		if (rc)
>> +			return rc;
>> +
>> +		addr++;
>> +		data++;
>> +	}
>> +
>> +	rc = xscom_write(chip_id, PSU_SBE_DOORBELL_REG_OR,
>> +			 HOST_SBE_MSG_WAITING);
>> +	if (rc == OPAL_SUCCESS)
>> +		msg->state = sbe_msg_sent;
>
> I would do the state change in the caller

Makes sense. Will fix in next iteration.

>
>> +
>> +	return rc;
>> +}
>> +
>> +static int sbe_msg_receive(u32 chip_id, struct sbe_msg *resp)
>> +{
>> +	int i;
>> +	int rc = OPAL_SUCCESS;
>> +	u64 addr, *data;
>> +
>> +	addr = PSU_HOST_SBE_MBOX_REG4;
>> +	data = &resp->reg[0];
>> +
>> +	for (i = 0; i < NR_HOST_SBE_MBOX_REG; i++) {
>> +		rc = xscom_read(chip_id, addr, data);
>> +		if (rc)
>> +			return rc;
>> +
>> +		addr++;
>> +		data++;
>> +	}
>> +
>> +	return rc;
>> +}
>> +
>> +/* This is called with sbe->lock */
>> +static void sbe_msg_complete(struct sbe *sbe, struct sbe_msg *msg)
>> +{
>> +	prlog(PR_INSANE, "Completing msg [chip id = %x], reg0 : 0x%llx\n",
>> +	      sbe->chip_id, msg->reg[0]);
>> +
>> +	msg->state = sbe_msg_done;
>> +	if (msg->response)
>> +		msg->resp = sbe->resp;
>> +
>> +	list_del_from(&sbe->msg_list, &msg->link);
>> +
>> +	if (msg->complete) {
>> +		unlock(&sbe->lock);
>> +		msg->complete(msg);
>> +		lock(&sbe->lock);
>> +	}
>> +}
>> +
>> +/* This is called with sbe->lock */
>> +static void sbe_msg_send_complete(struct sbe *sbe, struct sbe_msg *msg)
>> +{
>
> Tis should be inside __sbe_poke_queue(), that arbitrary split into 2
> functions isn't useful

Makes sense. Will fix in next iteration.

>
>> +	u64 timeout;
>> +
>> +	prlog(PR_INSANE, "Completing send [chip id = %x], reg0 : 0x%llx\n",
>> +	      sbe->chip_id, msg->reg[0]);
>> +
>> +	/* Need response */
>> +	if (msg->response) {
>> +		timeout = sbe_get_cmdclass_timeout(msg->reg[0] & 0xffff);
>> +		msg->timeout = mftb() + msecs_to_tb(timeout);
>> +		msg->state = sbe_msg_wresp;
>> +	} else {
>> +		sbe_msg_complete(sbe, msg);
>
> Don't you get some sort of indication from the SBE that the message has
> been accepted ? Otherwise how do you know that it's ready to send a new
> one and you should process you queue further ?

As described above, we can request for ack from SBE. That way we are sure
that SBE has read the message.

>
> Also add a clear comment that sbe_msg_complete can drop the lock and
> carry that comment accross the call chain.

sure.


>
>> +	}
>> +}
>> +
>> +static void __sbe_poke_queue(struct sbe *sbe, struct sbe_msg *msg)
>> +{
>
> Call this something like sbe_process_queue

Sure.

>
>> +	int rc;
>> +
>> +	if (!sbe_hw_ready(sbe->chip_id))
>> +		return;
>> +
>> +	/* Send message */
>> +	rc = sbe_msg_send(sbe->chip_id, msg);
>> +	if (rc) {
>> +		prlog(PR_ERR, "Failed to send message to SBE "
>> +		      "[chip id = %x]\n", sbe->chip_id);
>> +		sbe_set_primary_rc(sbe->resp, SBE_STATUS_PRI_GENERIC_ERR);
>> +		sbe_msg_complete(sbe, msg);
>> +	} else {
>> +		sbe_msg_send_complete(sbe, msg);
>
> I'm not sure why that is a separate function. I would "return" at the
> end of the error case and just do the rest direclty here.

Yes, separate function not required.

>
>> +	}
>> +}
>> +
>> +static void sbe_poke_queue(struct sbe *sbe)
>> +{
>> +	struct sbe_msg *msg;
>> +
>> +	while (!list_empty(&sbe->msg_list)) {
>> +		msg = list_top(&sbe->msg_list, struct sbe_msg, link);
>> +		if (sbe_busy(msg))
>> +			return;
>
> I don't like that you are using the state of the top message in the
> queue to know if the SBE is busy. I'd rather you kept a separate state
> of the mbox itself.

Yeah. Separate state of MBOX is required when we handle SBE R/R etc. I thought 
for now
message state is enough. Let me try to introduce/use per SBE state.


>
>> +		__sbe_poke_queue(sbe, msg);
>> +	}
>> +}
>> +
>> +/*
>> + * WARNING:
>> + *         Only one command is accepted in the command buffer until response
>> + *         to the command is enqueued in the response buffer by SBE.
>> + *
>> + *         Head of msg_list contains in-flight message. Hence we should always
>> + *         add new message to tail of the list.
>> + */
>> +int sbe_queue_msg(u32 chip_id, struct sbe_msg *msg,
>> +		  void (*comp)(struct sbe_msg *msg))
>> +{
>>  	struct proc_chip *chip;
>> +	struct sbe *sbe;
>> +
>> +	if (!msg)
>> +		return OPAL_PARAMETER;
>> +
>> +	if (sbe_default_chip_id == -1)
>> +		return OPAL_HARDWARE;
>> +
>> +	/* Default to SBE on master chip */
>> +	if (chip_id == -1)
>> +		chip = get_chip(sbe_default_chip_id);
>> +	else
>> +		chip = get_chip(chip_id);
>> +
>> +	if (chip == NULL || chip->sbe == NULL)
>> +		return OPAL_INTERNAL_ERROR;
>> +
>> +	sbe = chip->sbe;
>> +
>> +	/* Set completion */
>> +	msg->complete = comp;
>> +	msg->state = sbe_msg_queued;
>> +
>> +	lock(&sbe->lock);
>> +	/* Update sequence number */
>> +	msg->reg[0] = msg->reg[0] | ((u64)sbe->cur_seq << 16);
>> +	sbe->cur_seq++;
>> +
>> +	/* Reset sequence number */
>> +	if (sbe->cur_seq == 0xffff)
>> +		sbe->cur_seq = 1;
>> +
>> +	/* Add message to queue */
>> +	list_add_tail(&sbe->msg_list, &msg->link);
>> +	sbe_poke_queue(sbe);
>> +	unlock(&sbe->lock);
>> +
>> +	return OPAL_SUCCESS;
>> +}
>> +
>> +int sbe_sync_msg(u32 chip_id, struct sbe_msg *msg, bool autofree)
>> +{
>> +	int rc = OPAL_SUCCESS;
>> +
>> +	rc = sbe_queue_msg(chip_id, msg, NULL);
>> +	if (rc)
>> +		goto free_msg;
>> +
>> +	while (msg->state != sbe_msg_done &&
>> +	       msg->state != sbe_msg_cancelled) {
>> +		cpu_relax();
>> +		sbe_timeout_poll(NULL);
>> +	}
>>
> What about timeout and other non-sense states ?

Yes. I missed timeout state. Anyway I'm going to remove that state.

>
>> +	if (msg->state == sbe_msg_done)
>> +		rc = 0;
>> +	else
>> +		rc = -1;
>> +
>> +	if (msg->response && msg->resp)
>> +		rc = sbe_get_primary_rc(msg->resp);
>> +
>> +free_msg:
>> +	if (autofree)
>> +		sbe_freemsg(msg);
>> +
>> +	return rc;
>> +}
>> +
>> +int sbe_cancelmsg(u32 chip_id, struct sbe_msg *msg)
>> +{
>> +	struct proc_chip *chip;
>> +	struct sbe *sbe;
>>
>>  	chip = get_chip(chip_id);
>> -	if (chip == NULL)
>> +	if (chip == NULL || chip->sbe == NULL)
>> +		return OPAL_PARAMETER;
>> +
>> +	sbe = chip->sbe;
>> +
>> +	lock(&sbe->lock);
>> +	if (msg->state != sbe_msg_queued) {
>> +		unlock(&sbe->lock);
>> +		return OPAL_BUSY;
>> +	}
>> +
>> +	list_del_from(&sbe->msg_list, &msg->link);
>> +	msg->state = sbe_msg_cancelled;
>> +	unlock(&sbe->lock);
>
> What if that message has a pending response ? You must not lose that
> state since the mbox will still send you a response... another reason
> to have a mbox state rather than message state.

I'm removing messages that are in queued state (not yet sent to SBE). So I think 
above code is fine.

>
>> +	return OPAL_SUCCESS;
>> +}
>> +
>> +static void sbe_handle_response(struct sbe *sbe)
>> +{
>> +	u16 send_seq, resp_seq;
>> +	int rc;
>> +	struct sbe_msg *msg;
>> +
>> +	if (list_empty(&sbe->msg_list))
>> +		return;
>> +
>> +	/* Clear response registers */
>> +	memset(sbe->resp, 0, sizeof(struct sbe_msg));
>> +
>> +	msg = list_top(&sbe->msg_list, struct sbe_msg, link);
>
> So if the message was cancelled you now give its response to whatever
> the next one was, doesn't look quite right to me.

I'm not canceling already queued message. So its fine.

>
>> +	rc = sbe_msg_receive(sbe->chip_id, sbe->resp);
>> +	if (rc != OPAL_SUCCESS) {
>> +		prlog(PR_ERR, "Failed to read response message "
>> +		      "[chip id = %x]\n", sbe->chip_id);
>> +		sbe_set_primary_rc(sbe->resp, SBE_STATUS_PRI_GENERIC_ERR);
>> +		return;
>> +	}
>> +
>> +	/* Update message state */
>> +	msg->state = sbe_msg_response;
>> +
>> +	/* Validate sequence number */
>> +	send_seq = (msg->reg[0] >> 16) & 0xffff;
>> +	resp_seq = (sbe->resp->reg[0] >> 16) & 0xffff;
>> +	if (send_seq != resp_seq) {
>> +		/*
>> +		 * XXX Reset SBE and repost message.
>> +		 *     Lets send sequence error to caller until SBE reset works.
>> +		 */
>> +		prlog(PR_ERR, "Invalid sequence id [chip id = %x]\n",
>> +		      sbe->chip_id);
>> +		sbe_set_primary_rc(sbe->resp, SBE_STATUS_PRI_SEQ_ERR);
>>  		return;
>> +	}
>> +}
>> +
>> +void sbe_interrupt(uint32_t chip_id)
>> +{
>> +	bool msg_complete = false;
>> +	int rc;
>> +	u64 data = 0;
>> +	struct proc_chip *chip;
>> +	struct sbe_msg *msg;
>> +	struct sbe *sbe;
>> +
>> +	chip = get_chip(chip_id);
>> +	if (chip == NULL || chip->sbe == NULL)
>> +		return;
>> +	sbe = chip->sbe;
>>
>>  	/* Read doorbell register */
>>  	rc = xscom_read(chip_id, PSU_HOST_DOORBELL_REG_RW, &data);
>>  	if (rc) {
>>  		prlog(PR_ERR, "Failed to read SBE to Host doorbell register "
>>  		      "[chip id = %x]\n", chip_id);
>> -		goto clr_interrupt;
>> +		sbe_reg_dump(chip_id);
>> +		return;
>>  	}
>>
>> -	/* SBE passtrhough command, call prd handler */
>> -	if (data & SBE_HOST_PASSTHROUGH) {
>> -		prd_sbe_passthrough(chip_id);
>> +	/* Called from poller path (sbe_timeout_poll), nothing to process */
>> +	if (!data)
>> +		return;
>> +
>> +	/* Read SBE response before clearing doorbell register */
>> +	if (data & SBE_HOST_RESPONSE_WAITING) {
>> +		lock(&sbe->lock);
>> +		sbe_handle_response(sbe);
>> +		msg_complete = true;
>> +		unlock(&sbe->lock);
>>  	}
>>
>> -clr_interrupt:
>> -	/* Clears all the bits */
>> +	/* Clear doorbell register so that we can send next message */
>>  	rc = xscom_write(chip_id, PSU_HOST_DOORBELL_REG_AND,
>>  			 SBE_HOST_RESPONSE_CLEAR);
>>  	if (rc) {
>>  		prlog(PR_ERR, "Failed to clear SBE to Host doorbell "
>>  		      "register [chip id = %x]\n", chip_id);
>> +		return;
>> +	}
>> +
>> +	/* SBE came back from reset */
>> +	if (data & SBE_HOST_RESET) {
>> +		prlog(PR_NOTICE, "Came back from reset [chip id = %x]\n",
>> +		      chip_id);
>> +
>> +		/* Repost message */
>> +		lock(&sbe->lock);
>> +		if (!list_empty(&sbe->msg_list)) {
>> +			msg = list_top(&sbe->msg_list, struct sbe_msg, link);
>> +			msg->state = sbe_msg_queued;
>> +			sbe_poke_queue(sbe);
>> +		}
>> +		unlock(&sbe->lock);
>> +		return;
>> +	}
>> +
>> +	/* SBE passthrough command, call prd handler */
>> +	if (data & SBE_HOST_PASSTHROUGH) {
>> +		prd_sbe_passthrough(chip_id);
>> +	}
>> +
>> +	/* Handle SBE response */
>> +	if (msg_complete) {
>> +		lock(&sbe->lock);
>> +		if (!list_empty(&sbe->msg_list)) {
>> +			msg = list_top(&sbe->msg_list, struct sbe_msg, link);
>> +			sbe_msg_complete(sbe, msg);
>> +		}
>> +		sbe_poke_queue(sbe);
>> +		unlock(&sbe->lock);
>> +	}
>> +}
>
> Lots of lock/unlock happening here. It might be worth to have
> sbe_msg_complete be called sbe_msg_complete_unlock and have it return
> with the lock dropped, and same with sbe_handle_response(). Another
> option is to organize things a bit differently here. You have
> sbe_handle_response() not complete directly, but return the completed
> message. Then you can do all the other stuff you have to do, and
> finally call complete() with the lock dropped at the end of the
> function after you're re-poked the queue.

Yes, too many lock/unlock is happening in this function. Let me try to reognaize 
this function.


>
> Also your current code is racy vs. cancel. You really need to look into
> that a bit more.
>

As mentioned above, I'm cancelling only queued messages (messages that are not 
yet sent to SBE).
I don't see any race here. Did I miss anything here?

Thanks
-Vasant

Patch

diff --git a/core/init.c b/core/init.c
index 02bd30c..ec3c704 100644
--- a/core/init.c
+++ b/core/init.c
@@ -49,6 +49,7 @@ 
 #include <libstb/container.h>
 #include <phys-map.h>
 #include <imc.h>
+#include <sbe-p9.h>
 
 enum proc_gen proc_gen;
 unsigned int pcie_max_link_speed;
@@ -954,6 +955,12 @@  void __noreturn __nomcount main_cpu_entry(const void *fdt)
 	 */
 	chiptod_init();
 
+	/*
+	 * SBE uses TB value for scheduling timer. Hence init after
+	 * chiptod init
+	 */
+	sbe_init();
+
 	/* Initialize i2c */
 	p8_i2c_init();
 
diff --git a/hw/sbe-p9.c b/hw/sbe-p9.c
index c9c3feb..3af1094 100644
--- a/hw/sbe-p9.c
+++ b/hw/sbe-p9.c
@@ -23,39 +23,613 @@ 
 #include <sbe-p9.h>
 #include <skiboot.h>
 #include <timebase.h>
-#include <timer.h>
 #include <trace.h>
 #include <xscom.h>
 
-void sbe_interrupt(uint32_t chip_id)
+struct sbe {
+	/* Chip ID to send message */
+	u32	chip_id;
+
+	/* List to hold SBE queue messages */
+	struct list_head	msg_list;
+
+	struct lock lock;
+
+	/* SBE MBOX message sequence number */
+	u16	cur_seq;
+
+	/*
+	 * Sending messages to SBE is serialized. We can use
+	 * single buffer per SBE to hold response message.
+	 */
+	struct sbe_msg *resp;
+};
+
+/* Default SBE chip ID */
+static int sbe_default_chip_id = -1;
+
+#define SBE_STATUS_PRI_SHIFT	0x30
+#define SBE_STATUS_SEC_SHIFT	0x20
+#define SBE_FFDC_PRESENT	PPC_BIT(1)
+
+static void sbe_timeout_poll(void *user_data __unused);
+
+static inline bool sbe_ffdc_present(struct sbe_msg *resp)
+{
+	return (resp->reg[0] & SBE_FFDC_PRESENT);
+}
+
+/*
+ * bit 0    : PCBPIB status present in response
+ * bit 1    : FFDC package(s) present in response
+ * bit 2-3  : Reserved
+ * bit 4-15 : Primary status code
+ */
+static inline u16 sbe_get_primary_rc(struct sbe_msg *resp)
+{
+	return ((resp->reg[0] >> SBE_STATUS_PRI_SHIFT) & 0xfff);
+}
+
+static inline void sbe_set_primary_rc(struct sbe_msg *resp, u64 rc)
+{
+	resp->reg[0] |= (rc << SBE_STATUS_PRI_SHIFT);
+}
+
+static u64 sbe_rreg(u32 chip_id, u64 reg)
+{
+	u64 data = 0;
+
+	xscom_read(chip_id, reg, &data);
+	return be64_to_cpu(data);
+}
+
+static void sbe_reg_dump(u32 chip_id)
+{
+#define SBE_DUMP_REG_ONE(chip_id, x) \
+	prlog(PR_DEBUG, "  %20s: %016llx\n", #x, sbe_rreg(chip_id, x));
+
+	prlog(PR_DEBUG, "MBOX register dump for chip : %x\n", chip_id);
+	SBE_DUMP_REG_ONE(chip_id, PSU_SBE_DOORBELL_REG_RW);
+	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG0);
+	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG1);
+	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG2);
+	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG3);
+	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_DOORBELL_REG_RW);
+	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG4);
+	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG5);
+	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG6);
+	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG7);
+}
+
+static u64 sbe_get_cmdclass_timeout(u16 cmdclass)
+{
+	u8 class = (u8)(cmdclass >> 8) & 0xff;
+
+	switch (class) {
+	case SBE_MCLASS_CORE_STATE:
+	/* fall through */
+	case SBE_MCLASS_TIMER:
+	case SBE_MCLASS_MPIPL:
+	case SBE_MCLASS_SECURITY:
+	case SBE_MCLASS_GENERIC:
+		if (cmdclass == SBE_CMD_QUIESCE_SBE) /* Special case */
+			return SBE_CMD_TIMEOUT_LONG_MS;
+		return SBE_CMD_TIMEOUT_SHORT_MS;
+	case SBE_MCLASS_SCOM:
+	/* fall through */
+	case SBE_MCLASS_RING:
+		return SBE_CMD_TIMEOUT_LONG_MS;
+	default:
+		prlog(PR_ERR, "Invalid command class\n");
+		return 0;
+	}
+}
+
+void sbe_freemsg(struct sbe_msg *msg)
+{
+	free(msg);
+}
+
+static void sbe_fillmsg(struct sbe_msg *msg, u16 cmd,
+			u16 ctrl_flag, u64 reg1, u64 reg2, u64 reg3)
+{
+	bool response = !!(ctrl_flag & SBE_CMD_CTRL_RESP_REQ);
+
+	/* Seqence ID is filled by sbe_queue_msg() */
+	msg->reg[0] = ((u64)ctrl_flag << 32) | cmd;
+	msg->reg[1] = reg1;
+	msg->reg[2] = reg2;
+	msg->reg[3] = reg3;
+
+	prlog(PR_TRACE, "MBOX message :\n\t Reg0 : %llx\n\t "
+	      "Reg1 : %llx\n\t Reg2 : %llx\n\t Reg3 : %lld\n",
+	      msg->reg[0], msg->reg[1], msg->reg[2], msg->reg[3]);
+
+	msg->state = sbe_msg_unused;
+	msg->response = response;
+}
+
+/*
+ * Handles "command with direct data" format only.
+ *
+ * Note: All mbox messages of our interest uses direct data format. if we need
+ *       need indirect data format then we may have to enhance this function.
+ */
+struct sbe_msg *sbe_mkmsg(u16 cmd, u16 ctrl_flag,
+			  u64 reg1, u64 reg2, u64 reg3)
+{
+	struct sbe_msg *msg;
+
+	msg = zalloc(sizeof(struct sbe_msg));
+	if (!msg) {
+		prlog(PR_ERR, "Failed to allocate memory for SBE message\n");
+		return NULL;
+	}
+
+	sbe_fillmsg(msg, cmd, ctrl_flag, reg1, reg2, reg3);
+	return msg;
+}
+
+static inline bool sbe_busy(struct sbe_msg *msg)
+{
+	switch (msg->state) {
+	case sbe_msg_unused:
+	case sbe_msg_queued:
+	case sbe_msg_done:
+	case sbe_msg_timeout:
+	case sbe_msg_cancelled:
+		return false;
+	default:	/* + sbe_msg_response, sbe_msg_sent, sbe_msg_wresp */
+		break;
+	}
+
+	return true;
+}
+
+/* Read SBE doorbell to confirm its ready to accept command. */
+static bool sbe_hw_ready(u32 chip_id)
 {
 	int rc;
 	u64 data;
+
+	rc = xscom_read(chip_id, PSU_SBE_DOORBELL_REG_RW, &data);
+	if (rc) {
+		prlog(PR_ERR, "Failed to read Host to SBE doorbell register "
+		      "[chip id = %x]\n", chip_id);
+		return false;
+	}
+
+	/* Waiting for SBE to process previous message */
+	if (data & HOST_SBE_MSG_WAITING)
+		return false;
+
+	return true;
+}
+
+static int sbe_msg_send(u32 chip_id, struct sbe_msg *msg)
+{
+	int rc, i;
+	u64 addr, *data;
+
+	addr = PSU_HOST_SBE_MBOX_REG0;
+	data = &msg->reg[0];
+
+	for (i = 0; i < NR_HOST_SBE_MBOX_REG; i++) {
+		rc = xscom_write(chip_id, addr, *data);
+		if (rc)
+			return rc;
+
+		addr++;
+		data++;
+	}
+
+	rc = xscom_write(chip_id, PSU_SBE_DOORBELL_REG_OR,
+			 HOST_SBE_MSG_WAITING);
+	if (rc == OPAL_SUCCESS)
+		msg->state = sbe_msg_sent;
+
+	return rc;
+}
+
+static int sbe_msg_receive(u32 chip_id, struct sbe_msg *resp)
+{
+	int i;
+	int rc = OPAL_SUCCESS;
+	u64 addr, *data;
+
+	addr = PSU_HOST_SBE_MBOX_REG4;
+	data = &resp->reg[0];
+
+	for (i = 0; i < NR_HOST_SBE_MBOX_REG; i++) {
+		rc = xscom_read(chip_id, addr, data);
+		if (rc)
+			return rc;
+
+		addr++;
+		data++;
+	}
+
+	return rc;
+}
+
+/* This is called with sbe->lock */
+static void sbe_msg_complete(struct sbe *sbe, struct sbe_msg *msg)
+{
+	prlog(PR_INSANE, "Completing msg [chip id = %x], reg0 : 0x%llx\n",
+	      sbe->chip_id, msg->reg[0]);
+
+	msg->state = sbe_msg_done;
+	if (msg->response)
+		msg->resp = sbe->resp;
+
+	list_del_from(&sbe->msg_list, &msg->link);
+
+	if (msg->complete) {
+		unlock(&sbe->lock);
+		msg->complete(msg);
+		lock(&sbe->lock);
+	}
+}
+
+/* This is called with sbe->lock */
+static void sbe_msg_send_complete(struct sbe *sbe, struct sbe_msg *msg)
+{
+	u64 timeout;
+
+	prlog(PR_INSANE, "Completing send [chip id = %x], reg0 : 0x%llx\n",
+	      sbe->chip_id, msg->reg[0]);
+
+	/* Need response */
+	if (msg->response) {
+		timeout = sbe_get_cmdclass_timeout(msg->reg[0] & 0xffff);
+		msg->timeout = mftb() + msecs_to_tb(timeout);
+		msg->state = sbe_msg_wresp;
+	} else {
+		sbe_msg_complete(sbe, msg);
+	}
+}
+
+static void __sbe_poke_queue(struct sbe *sbe, struct sbe_msg *msg)
+{
+	int rc;
+
+	if (!sbe_hw_ready(sbe->chip_id))
+		return;
+
+	/* Send message */
+	rc = sbe_msg_send(sbe->chip_id, msg);
+	if (rc) {
+		prlog(PR_ERR, "Failed to send message to SBE "
+		      "[chip id = %x]\n", sbe->chip_id);
+		sbe_set_primary_rc(sbe->resp, SBE_STATUS_PRI_GENERIC_ERR);
+		sbe_msg_complete(sbe, msg);
+	} else {
+		sbe_msg_send_complete(sbe, msg);
+	}
+}
+
+static void sbe_poke_queue(struct sbe *sbe)
+{
+	struct sbe_msg *msg;
+
+	while (!list_empty(&sbe->msg_list)) {
+		msg = list_top(&sbe->msg_list, struct sbe_msg, link);
+		if (sbe_busy(msg))
+			return;
+		__sbe_poke_queue(sbe, msg);
+	}
+}
+
+/*
+ * WARNING:
+ *         Only one command is accepted in the command buffer until response
+ *         to the command is enqueued in the response buffer by SBE.
+ *
+ *         Head of msg_list contains in-flight message. Hence we should always
+ *         add new message to tail of the list.
+ */
+int sbe_queue_msg(u32 chip_id, struct sbe_msg *msg,
+		  void (*comp)(struct sbe_msg *msg))
+{
 	struct proc_chip *chip;
+	struct sbe *sbe;
+
+	if (!msg)
+		return OPAL_PARAMETER;
+
+	if (sbe_default_chip_id == -1)
+		return OPAL_HARDWARE;
+
+	/* Default to SBE on master chip */
+	if (chip_id == -1)
+		chip = get_chip(sbe_default_chip_id);
+	else
+		chip = get_chip(chip_id);
+
+	if (chip == NULL || chip->sbe == NULL)
+		return OPAL_INTERNAL_ERROR;
+
+	sbe = chip->sbe;
+
+	/* Set completion */
+	msg->complete = comp;
+	msg->state = sbe_msg_queued;
+
+	lock(&sbe->lock);
+	/* Update sequence number */
+	msg->reg[0] = msg->reg[0] | ((u64)sbe->cur_seq << 16);
+	sbe->cur_seq++;
+
+	/* Reset sequence number */
+	if (sbe->cur_seq == 0xffff)
+		sbe->cur_seq = 1;
+
+	/* Add message to queue */
+	list_add_tail(&sbe->msg_list, &msg->link);
+	sbe_poke_queue(sbe);
+	unlock(&sbe->lock);
+
+	return OPAL_SUCCESS;
+}
+
+int sbe_sync_msg(u32 chip_id, struct sbe_msg *msg, bool autofree)
+{
+	int rc = OPAL_SUCCESS;
+
+	rc = sbe_queue_msg(chip_id, msg, NULL);
+	if (rc)
+		goto free_msg;
+
+	while (msg->state != sbe_msg_done &&
+	       msg->state != sbe_msg_cancelled) {
+		cpu_relax();
+		sbe_timeout_poll(NULL);
+	}
+
+	if (msg->state == sbe_msg_done)
+		rc = 0;
+	else
+		rc = -1;
+
+	if (msg->response && msg->resp)
+		rc = sbe_get_primary_rc(msg->resp);
+
+free_msg:
+	if (autofree)
+		sbe_freemsg(msg);
+
+	return rc;
+}
+
+int sbe_cancelmsg(u32 chip_id, struct sbe_msg *msg)
+{
+	struct proc_chip *chip;
+	struct sbe *sbe;
 
 	chip = get_chip(chip_id);
-	if (chip == NULL)
+	if (chip == NULL || chip->sbe == NULL)
+		return OPAL_PARAMETER;
+
+	sbe = chip->sbe;
+
+	lock(&sbe->lock);
+	if (msg->state != sbe_msg_queued) {
+		unlock(&sbe->lock);
+		return OPAL_BUSY;
+	}
+
+	list_del_from(&sbe->msg_list, &msg->link);
+	msg->state = sbe_msg_cancelled;
+	unlock(&sbe->lock);
+
+	return OPAL_SUCCESS;
+}
+
+static void sbe_handle_response(struct sbe *sbe)
+{
+	u16 send_seq, resp_seq;
+	int rc;
+	struct sbe_msg *msg;
+
+	if (list_empty(&sbe->msg_list))
+		return;
+
+	/* Clear response registers */
+	memset(sbe->resp, 0, sizeof(struct sbe_msg));
+
+	msg = list_top(&sbe->msg_list, struct sbe_msg, link);
+	rc = sbe_msg_receive(sbe->chip_id, sbe->resp);
+	if (rc != OPAL_SUCCESS) {
+		prlog(PR_ERR, "Failed to read response message "
+		      "[chip id = %x]\n", sbe->chip_id);
+		sbe_set_primary_rc(sbe->resp, SBE_STATUS_PRI_GENERIC_ERR);
+		return;
+	}
+
+	/* Update message state */
+	msg->state = sbe_msg_response;
+
+	/* Validate sequence number */
+	send_seq = (msg->reg[0] >> 16) & 0xffff;
+	resp_seq = (sbe->resp->reg[0] >> 16) & 0xffff;
+	if (send_seq != resp_seq) {
+		/*
+		 * XXX Reset SBE and repost message.
+		 *     Lets send sequence error to caller until SBE reset works.
+		 */
+		prlog(PR_ERR, "Invalid sequence id [chip id = %x]\n",
+		      sbe->chip_id);
+		sbe_set_primary_rc(sbe->resp, SBE_STATUS_PRI_SEQ_ERR);
 		return;
+	}
+}
+
+void sbe_interrupt(uint32_t chip_id)
+{
+	bool msg_complete = false;
+	int rc;
+	u64 data = 0;
+	struct proc_chip *chip;
+	struct sbe_msg *msg;
+	struct sbe *sbe;
+
+	chip = get_chip(chip_id);
+	if (chip == NULL || chip->sbe == NULL)
+		return;
+	sbe = chip->sbe;
 
 	/* Read doorbell register */
 	rc = xscom_read(chip_id, PSU_HOST_DOORBELL_REG_RW, &data);
 	if (rc) {
 		prlog(PR_ERR, "Failed to read SBE to Host doorbell register "
 		      "[chip id = %x]\n", chip_id);
-		goto clr_interrupt;
+		sbe_reg_dump(chip_id);
+		return;
 	}
 
-	/* SBE passtrhough command, call prd handler */
-	if (data & SBE_HOST_PASSTHROUGH) {
-		prd_sbe_passthrough(chip_id);
+	/* Called from poller path (sbe_timeout_poll), nothing to process */
+	if (!data)
+		return;
+
+	/* Read SBE response before clearing doorbell register */
+	if (data & SBE_HOST_RESPONSE_WAITING) {
+		lock(&sbe->lock);
+		sbe_handle_response(sbe);
+		msg_complete = true;
+		unlock(&sbe->lock);
 	}
 
-clr_interrupt:
-	/* Clears all the bits */
+	/* Clear doorbell register so that we can send next message */
 	rc = xscom_write(chip_id, PSU_HOST_DOORBELL_REG_AND,
 			 SBE_HOST_RESPONSE_CLEAR);
 	if (rc) {
 		prlog(PR_ERR, "Failed to clear SBE to Host doorbell "
 		      "register [chip id = %x]\n", chip_id);
+		return;
+	}
+
+	/* SBE came back from reset */
+	if (data & SBE_HOST_RESET) {
+		prlog(PR_NOTICE, "Came back from reset [chip id = %x]\n",
+		      chip_id);
+
+		/* Repost message */
+		lock(&sbe->lock);
+		if (!list_empty(&sbe->msg_list)) {
+			msg = list_top(&sbe->msg_list, struct sbe_msg, link);
+			msg->state = sbe_msg_queued;
+			sbe_poke_queue(sbe);
+		}
+		unlock(&sbe->lock);
+		return;
+	}
+
+	/* SBE passthrough command, call prd handler */
+	if (data & SBE_HOST_PASSTHROUGH) {
+		prd_sbe_passthrough(chip_id);
+	}
+
+	/* Handle SBE response */
+	if (msg_complete) {
+		lock(&sbe->lock);
+		if (!list_empty(&sbe->msg_list)) {
+			msg = list_top(&sbe->msg_list, struct sbe_msg, link);
+			sbe_msg_complete(sbe, msg);
+		}
+		sbe_poke_queue(sbe);
+		unlock(&sbe->lock);
+	}
+}
+
+static void sbe_timeout_poll(void *user_data __unused)
+{
+	u64 now = mftb();
+	struct sbe *sbe;
+	struct sbe_msg *msg;
+	struct proc_chip *chip;
+
+	for_each_chip(chip) {
+		if (chip->sbe == NULL)
+			continue;
+
+		sbe = chip->sbe;
+		if (list_empty(&sbe->msg_list))
+			continue;
+
+		/*
+		 * In some cases there will be a delay in calling OPAL interrupt
+		 * handler routine (opal_handle_interrupt). In such cases its
+		 * possible that SBE has responded, but OPAL didn't act on that.
+		 * Hence check for SBE response.
+		 */
+		sbe_interrupt(sbe->chip_id);
+
+		lock(&sbe->lock);
+		if (list_empty(&sbe->msg_list)) {
+			unlock(&sbe->lock);
+			continue;
+		}
+
+		msg = list_top(&sbe->msg_list, struct sbe_msg, link);
+		if (!sbe_busy(msg)) {	/* Message not yet sent to SBE */
+			sbe_poke_queue(sbe);
+			unlock(&sbe->lock);
+			continue;
+		}
+
+		if (tb_compare(now, msg->timeout) != TB_AAFTERB) {
+			unlock(&sbe->lock);
+			continue;
+		}
+
+		/* Message timeout */
+		prlog(PR_ERR, "Message timeout [chip id = %x], "
+		      "cmd = %llx, subcmd = %llx\n", sbe->chip_id,
+		      (msg->reg[0] >> 8) & 0xff, msg->reg[0] & 0xff);
+		sbe_reg_dump(sbe->chip_id);
+		sbe->resp->state = sbe_msg_timeout;
+		sbe_set_primary_rc(sbe->resp, SBE_STATUS_PRI_GENERIC_ERR);
+
+		sbe_msg_complete(sbe, msg);
+		sbe_poke_queue(sbe);
+		unlock(&sbe->lock);
 	}
 }
+
+void sbe_init(void)
+{
+	struct dt_node *xn;
+	struct proc_chip *chip;
+	struct sbe *sbe;
+
+	if (proc_gen < proc_gen_p9)
+		return;
+
+	dt_for_each_compatible(dt_root, xn, "ibm,xscom") {
+		sbe = zalloc(sizeof(struct sbe));
+		assert(sbe);
+		sbe->resp = zalloc(sizeof(struct sbe_msg));
+		assert(sbe->resp);
+		sbe->chip_id = dt_get_chip_id(xn);
+		sbe->cur_seq = 1;
+		list_head_init(&sbe->msg_list);
+		init_lock(&sbe->lock);
+
+		chip = get_chip(sbe->chip_id);
+		assert(chip);
+		chip->sbe = sbe;
+
+		if (dt_has_node_property(xn, "primary", NULL)) {
+			sbe_default_chip_id = sbe->chip_id;
+			prlog(PR_DEBUG, "Master chip id : %x\n", sbe->chip_id);
+		}
+	}
+
+	if (sbe_default_chip_id == -1) {
+		prlog(PR_ERR, "Master chip ID not found.\n");
+		return;
+	}
+
+	/* Initiate SBE timeout poller */
+	opal_add_poller(sbe_timeout_poll, NULL);
+}
diff --git a/include/chip.h b/include/chip.h
index e25f54d..01a3a61 100644
--- a/include/chip.h
+++ b/include/chip.h
@@ -108,6 +108,7 @@  struct centaur_chip;
 struct mfsi;
 struct xive;
 struct lpcm;
+struct sbe;
 
 /* Chip type */
 enum proc_chip_type {
@@ -208,6 +209,9 @@  struct proc_chip {
 
 	/* Used by hw/xive.c */
 	struct xive		*xive;
+
+	/* Used by hw/sbe-p9.c */
+	struct sbe		*sbe;
 };
 
 extern uint32_t pir_to_chip_id(uint32_t pir);
diff --git a/include/sbe-p9.h b/include/sbe-p9.h
index cca2100..c5616ba 100644
--- a/include/sbe-p9.h
+++ b/include/sbe-p9.h
@@ -21,13 +21,209 @@ 
 #include <ccan/list/list.h>
 #include <ccan/short_types/short_types.h>
 
+/*
+ * Timeout value for short running commands is 100 msecs and
+ * the timeout value for long running commands is 30 secs.
+ */
+#define SBE_CMD_TIMEOUT_SHORT_MS		100
+#define SBE_CMD_TIMEOUT_LONG_MS			(30 * 1000)
+
+/* Primary response status code */
+#define SBE_STATUS_PRI_SUCCESS			0x00
+#define SBE_STATUS_PRI_INVALID_CMD		0x01
+#define SBE_STATUS_PRI_INVALID_DATA		0x02
+#define SBE_STATUS_PRI_SEQ_ERR			0x03
+#define SBE_STATUS_PRI_INTERNAL_ERR		0x04
+#define SBE_STATUS_PRI_GENERIC_ERR		0xFE
+
+/* Secondary response status code */
+#define SBE_STATUS_SEC_SUCCESS			0x00
+#define SBE_STATUS_SEC_CMD_CLASS_UNSUPPORTED	0x01
+#define SBE_STATUS_SEC_CMD_UNSUPPORTED		0x02
+#define SBE_STATUS_SEC_INV_ADDR			0x03
+#define SBE_STATUS_SEC_INV_TARGET_TYPE		0x04
+#define SBE_STATUS_SEC_INV_TARGET_ID		0x05
+#define SBE_STATUS_SEC_TARGET_NOT_PRESENT	0x06
+#define SBE_STATUS_SEC_TARGET_NOT_FUNC		0x07
+#define SBE_STATUS_SEC_CMD_NOT_ALLOW		0x08
+#define SBE_STATUS_SEC_FUNC_NOT_SUPPORTED	0x09
+#define SBE_STATUS_SEC_GENERIC_ERR		0x0A
+#define SBE_STATUS_SEC_BLACKLIST_REG		0x0B
+#define SBE_STATUS_SEC_OS_FAILURE		0x0C
+#define SBE_STATUS_SEC_MBX_REG_FAILURE		0x0D
+#define SBE_STATUS_SEC_INSUFFICIENT_DATA	0x0E
+#define SBE_STATUS_SEC_EXCESS_DATA		0x0F
+#define SBE_STATUS_SEC_BUSY			0x10
+#define SBE_STATUS_SEC_UNSEC_REGION_NOT_FOUND	0x11
+#define SBE_STATUS_SEC_UNSEC_REGION_EXCEEDED	0x12
+
+/* Number of MBOX register on each side */
+#define NR_HOST_SBE_MBOX_REG		0x04
+
+/*
+ * SBE MBOX register address
+ *   Reg 0 - 3 : Host to send command packets to SBE
+ *   Reg 4 - 7 : SBE to send response packets to Host
+ */
+#define PSU_HOST_SBE_MBOX_REG0		0x000D0050
+#define PSU_HOST_SBE_MBOX_REG1		0x000D0051
+#define PSU_HOST_SBE_MBOX_REG2		0x000D0052
+#define PSU_HOST_SBE_MBOX_REG3		0x000D0053
+#define PSU_HOST_SBE_MBOX_REG4		0x000D0054
+#define PSU_HOST_SBE_MBOX_REG5		0x000D0055
+#define PSU_HOST_SBE_MBOX_REG6		0x000D0056
+#define PSU_HOST_SBE_MBOX_REG7		0x000D0057
+#define PSU_SBE_DOORBELL_REG_RW		0x000D0060
+#define PSU_SBE_DOORBELL_REG_AND	0x000D0061
+#define PSU_SBE_DOORBELL_REG_OR		0x000D0062
 #define PSU_HOST_DOORBELL_REG_RW	0x000D0063
 #define PSU_HOST_DOORBELL_REG_AND	0x000D0064
 #define PSU_HOST_DOORBELL_REG_OR	0x000D0065
 
+/*
+ * Doorbell register to trigger SBE interrupt. Set by OPAL to inform
+ * the SBE about a waiting message in the Host/SBE mailbox registers
+ */
+#define HOST_SBE_MSG_WAITING		PPC_BIT(0)
+
+/*
+ * Doorbell register for host bridge interrupt. Set by the SBE to inform
+ * host about a response message in the Host/SBE mailbox registers
+ */
+#define SBE_HOST_RESPONSE_WAITING	PPC_BIT(0)
+#define SBE_HOST_MSG_READ		PPC_BIT(1)
+#define SBE_HOST_STOP15_EXIT		PPC_BIT(2)
+#define SBE_HOST_RESET			PPC_BIT(3)
 #define SBE_HOST_PASSTHROUGH		PPC_BIT(4)
+#define SBE_HOST_TIMER_EXPIRY		PPC_BIT(14)
 #define SBE_HOST_RESPONSE_CLEAR		0x00
 
+/* SBE Target Type */
+#define SBE_TARGET_TYPE_PROC		0x00
+#define SBE_TARGET_TYPE_EX		0x01
+#define SBE_TARGET_TYPE_PERV		0x02
+#define SBE_TARGET_TYPE_MCS		0x03
+#define SBE_TARGET_TYPE_EQ		0x04
+#define SBE_TARGET_TYPE_CORE		0x05
+
+/* SBE MBOX command class */
+#define SBE_MCLASS_FIRST		0xD1
+#define SBE_MCLASS_CORE_STATE		0xD1
+#define SBE_MCLASS_SCOM			0xD2
+#define SBE_MCLASS_RING			0xD3
+#define SBE_MCLASS_TIMER		0xD4
+#define SBE_MCLASS_MPIPL		0xD5
+#define SBE_MCLASS_SECURITY		0xD6
+#define SBE_MCLASS_GENERIC		0xD7
+#define SBE_MCLASS_LAST			0xD7
+
+/*
+ * Commands are provided in xxyy form where:
+ *   - xx : command class
+ *   - yy : command
+ *
+ * Both request and response message uses same seq ID,
+ * command class and command.
+ */
+#define SBE_CMD_CTRL_DEADMAN_LOOP	0xD101
+#define SBE_CMD_MULTI_SCOM		0xD201
+#define SBE_CMD_PUT_RING_FORM_IMAGE	0xD301
+#define SBE_CMD_CONTROL_TIMER		0xD401
+#define SBE_CMD_GET_ARCHITECTED_REG	0xD501
+#define SBE_CMD_CLR_ARCHITECTED_REG	0xD502
+#define SBE_CMD_SET_UNSEC_MEM_WINDOW	0xD601
+#define SBE_CMD_GET_SBE_FFDC		0xD701
+#define SBE_CMD_GET_CAPABILITY		0xD702
+#define SBE_CMD_GET_FREQUENCIES		0xD703
+#define SBE_CMD_SET_FFDC_ADDR		0xD704
+#define SBE_CMD_QUIESCE_SBE		0xD705
+#define SBE_CMD_SET_FABRIC_ID_MAP	0xD706
+#define SBE_CMD_STASH_MPIPL_CONFIG	0xD707
+
+/* SBE MBOX control flags */
+
+/* Generic flags */
+#define SBE_CMD_CTRL_RESP_REQ		0x0100
+#define SBE_CMD_CTRL_ACK_REQ		0x0200
+
+/* Deadman loop */
+#define CTRL_DEADMAN_LOOP_START		0x0001
+#define CTRL_DEADMAN_LOOP_STOP		0x0002
+
+/* Control timer */
+#define CONTROL_TIMER_START		0x0001
+#define CONTROL_TIMER_STOP		0x0002
+
+/* SBE message state */
+enum sbe_msg_state {
+	sbe_msg_unused = 0,	/* Free */
+	sbe_msg_queued,		/* Queued to SBE list */
+	sbe_msg_sent,		/* Sent to SBE */
+	sbe_msg_wresp,		/* Waiting for response */
+	sbe_msg_response,	/* Got response */
+	sbe_msg_done,		/* Complete */
+	sbe_msg_timeout,	/* Message timeout */
+	sbe_msg_cancelled,	/* Message removed from queue */
+};
+
+/* SBE message */
+struct sbe_msg {
+	/*
+	 * Reg[0] :
+	 *   word0 :
+	 *     direct cmd  : reserved << 16 | ctrl flag
+	 *     indirect cmd: mem_addr_size_dword
+	 *     response    : primary status << 16 | secondary status
+	 *
+	 *   word1 : seq id << 16 | cmd class << 8 | cmd
+	 *
+	 * WARNING:
+	 *   - Don't populate reg[0].seq (byte 4,5). This will be populated by
+	 *     sbe_queue_msg().
+	 */
+	u64	reg[4];
+
+	/* cmd timout : mftb() + msecs_to_tb(SBE_CMD_TIMEOUT_[SHORT/LONG]_MS) */
+	u64	timeout;
+
+	/* Completion function */
+	void (*complete)(struct sbe_msg *msg);
+	void *user_data;
+
+	/* Current msg state */
+	enum sbe_msg_state	state;
+
+	/* Set if the message expects a response */
+	bool			response;
+
+	/* Points to sbe->resp, which contains response message */
+	struct sbe_msg		*resp;
+
+	/* Internal queuing */
+	struct list_node	link;
+} __packed;
+
+
+/* Allocate and populate sbe_msg structure */
+extern struct sbe_msg *sbe_mkmsg(u16 cmd, u16 ctrl_flag, u64 reg1,
+				 u64 reg2, u64 reg3) __warn_unused_result;
+
+/* Free sbe_msg structure */
+extern void sbe_freemsg(struct sbe_msg *msg);
+
+/* Add new message to sbe queue */
+extern int sbe_queue_msg(uint32_t chip_id, struct sbe_msg *msg,
+		  void (*comp)(struct sbe_msg *msg)) __warn_unused_result;
+
+/* Synchronously send message to SBE */
+extern int sbe_sync_msg(u32 chip_id, struct sbe_msg *msg, bool autofree);
+
+/* Remove message from SBE queue, it will not remove inflight message */
+extern int sbe_cancelmsg(u32 chip_id, struct sbe_msg *msg);
+
+/* Initialize the SBE mailbox driver */
+extern void sbe_init(void);
+
 /* SBE interrupt */
 extern void sbe_interrupt(uint32_t chip_id);