diff mbox

[v2,2/2] prd: PRD framework

Message ID 20150211164010.1801.31691.stgit@localhost.localdomain
State Changes Requested
Headers show

Commit Message

Neelesh Gupta Feb. 11, 2015, 4:40 p.m. UTC
Enable the run-time diagnostic support in skiboot, provide the
framework to pass the data between OPAL and linux.

Signed-off-by: Neelesh Gupta <neelegup@linux.vnet.ibm.com>
---

v1 -> v2

Comments

Stewart Smith Feb. 12, 2015, 4:06 a.m. UTC | #1
Neelesh Gupta <neelegup@linux.vnet.ibm.com> writes:
> diff --git a/hw/prd.c b/hw/prd.c
> new file mode 100644
> index 0000000..b574558
> --- /dev/null
> +++ b/hw/prd.c
> @@ -0,0 +1,293 @@
> +struct prd_node {
> +	struct list_node	link;
> +	uint32_t		proc;
> +	struct opal_prd_msg	prd_msg;
> +};
> +
> +static LIST_HEAD(prd_free_list);
> +static LIST_HEAD(prd_in_use_list);
> +static uint32_t token;
> +static int (*prd_functions[OPAL_PRD_MSG_TYPE_MAX])(void);
> +static struct lock prd_lock = LOCK_UNLOCKED;
> +
> +/* Entry from the below HW */
> +void prd_interrupt(uint32_t proc, enum opal_prd_msg_type type)
> +{
> +	struct prd_node *node;
> +
> +	lock(&prd_lock);
> +	node = list_pop(&prd_free_list, struct prd_node, link);
> +	if (!node) { /* Free list exhausted */
> +		node = zalloc(sizeof(*node));
> +		if (!node) {
> +			prlog(PR_ERR, "Failed to allocate prd node\n");
> +			unlock(&prd_lock);
> +			return;
> +		}
> +	}
> +
> +	node->proc = proc;
> +	node->prd_msg.type = type;
> +	node->prd_msg.token = ++token;
> +
> +	list_add_tail(&prd_in_use_list, &node->link);
> +	unlock(&prd_lock);
> +
> +	if (prd_functions[type])
> +		prd_functions[type]();
> +}
> +
> +static int prd_msg_attn_ack(void)
> +{
> +	struct prd_node *node_attn, *node_ack;
> +	int rc;
> +
> +	lock(&prd_lock);
> +	list_for_each(&prd_in_use_list, node_ack, link)
> +		if (node_ack->prd_msg.type == OPAL_PRD_MSG_TYPE_ATTN_ACK)
> +			break;
> +
> +	if (!node_ack) {
> +		unlock(&prd_lock);
> +		return OPAL_RESOURCE;
> +	}
> +
> +	list_for_each(&prd_in_use_list, node_attn, link)
> +		/* prd node of ATTN type that matches the token of ATTN_ACK */
> +		if (node_attn->prd_msg.type == OPAL_PRD_MSG_TYPE_ATTN &&
> +		    node_attn->prd_msg.token == node_ack->prd_msg.token)
> +			break;
> +
> +	if (!node_attn) {
> +		unlock(&prd_lock);
> +		return OPAL_RESOURCE;
> +	}
> +
> +	/* ATTN acknowledged by the host, unmask the IPOLL */
> +	rc = xscom_write(node_attn->proc, PRD_IPOLL_MASK_REG,
> +			 ~(node_ack->prd_msg.attn_ack.ipoll_ack) &
> +			 PRD_IPOLL_MASK);
> +
> +	/* Done. Now move both the ATTN & ATTN_ACK nodes to the free list */
> +	list_del(&node_attn->link);
> +	list_add_tail(&prd_free_list, &node_attn->link);
> +
> +	list_del(&node_ack->link);
> +	list_add_tail(&prd_free_list, &node_ack->link);
> +	unlock(&prd_lock);
> +
> +	return rc;
> +}
> +
> +static int prd_msg_attn(void)
> +{
> +	uint64_t status, mask;
> +	struct prd_node *node;
> +	uint64_t *prd_msg;
> +	int rc;
> +
> +	lock(&prd_lock);
> +	list_for_each(&prd_in_use_list, node, link)
> +		if (node->prd_msg.type == OPAL_PRD_MSG_TYPE_ATTN)
> +			break;
> +	unlock(&prd_lock);
> +
> +	if (!node)
> +		return OPAL_RESOURCE;
> +
> +	rc = xscom_read(node->proc, PRD_ERROR_STATUS, &status);
> +	if (rc) {
> +		prlog(PR_ERR, "Failed to read the ipoll status\n");
> +		goto exit;
> +	}
> +
> +	/* Mask IPOLL for all the bits set in the 'status' */
> +	mask = status & PRD_IPOLL_MASK;
> +	rc = xscom_write(node->proc, PRD_IPOLL_MASK_REG, mask);
> +	if (rc) {
> +		prlog(PR_ERR, "Failed to mask the IPOLL\n");
> +		goto exit;
> +	}
> +
> +	/* Fill up the attention fields */
> +	node->prd_msg.attn.proc = node->proc;		/* params[1] */
> +	node->prd_msg.attn.ipoll_status = status;	/* params[2] */
> +	node->prd_msg.attn.ipoll_mask = mask;		/* params[3] */
> +
> +	prd_msg = (uint64_t *)&node->prd_msg;
> +
> +	rc = opal_queue_msg(OPAL_PRD_MSG, NULL, NULL, prd_msg[0], prd_msg[1],
> +			    prd_msg[2], prd_msg[3]);
> +	if (rc) {
> +		prlog(PR_ERR, "Failed to queue up the ATTN\n");
> +		goto exit;
> +	}
> +
> +	return 0;
> +
> +	/* In the error case, delete the node from 'in_use' list and add it
> +	 * to the 'free' list as the ACK is never going to come from the host
> +	 */
> +exit:
> +	lock(&prd_lock);
> +	list_del(&node->link);
> +	list_add_tail(&prd_free_list, &node->link);
> +	unlock(&prd_lock);
> +
> +	return rc;
> +}
> +
> +static int prd_msg_finish(void)
> +{
> +	struct proc_chip *chip;
> +	struct prd_node *node;
> +	int rc;
> +
> +	/* Mask the interrupts on all the cores */
> +	for_each_chip(chip) {
> +		rc = xscom_write(chip->id, PRD_IPOLL_MASK_REG, PRD_IPOLL_MASK);
> +		if (rc)
> +			prlog(PR_ERR, "Failed to mask the IPOLL on %d chip\n",
> +			      chip->id);
> +	}
> +
> +	lock(&prd_lock);
> +	list_for_each(&prd_in_use_list, node, link)
> +		if (node->prd_msg.type == OPAL_PRD_MSG_TYPE_FINI)
> +			break;
> +
> +	if (!node) { /* should not happen though */
> +		unlock(&prd_lock);
> +		return OPAL_RESOURCE;
> +	}
> +
> +	list_del(&node->link);
> +	list_add_tail(&prd_free_list, &node->link);
> +	unlock(&prd_lock);
> +
> +	return 0;
> +}
> +
> +static int prd_msg_init(void)
> +{
> +	struct proc_chip *chip;
> +	struct prd_node *node;
> +	/* XXX We will use it for enabling the functionalities
> +	 * uint32_t version;
> +	 */
> +	uint64_t ipoll;
> +	int rc;
> +
> +	lock(&prd_lock);
> +	list_for_each(&prd_in_use_list, node, link)
> +		if (node->prd_msg.type == OPAL_PRD_MSG_TYPE_INIT)
> +			break;
> +	unlock(&prd_lock);

Why are we doing this look instead of having prd_msg_XXX() take a
prd_msg as a parameter?

It seems as though we're maintaining the prd_in_use list for no real
reason? we only add one to it per interrupt and then immediately
process, same with from an OPAL call.

> +
> +	if (!node)
> +		return OPAL_RESOURCE;
> +
> +	ipoll = node->prd_msg.init.ipoll;
> +	/* Unmask these ATTNs which are supported */
> +	for_each_chip(chip) {
> +		rc = xscom_write(chip->id, PRD_IPOLL_MASK_REG,
> +				 ~ipoll & PRD_IPOLL_MASK);
> +		if (rc) {
> +			prlog(PR_ERR, "Failed to unmask the IPOLL on %d chip\n",
> +			      chip->id);
> +			rc = OPAL_HARDWARE;
> +			goto exit;
> +		}
> +	}
> +
> +	memset(prd_functions, 0, sizeof(prd_functions));
> +
> +	/*
> +	 * XXX
> +	 * version = node->prd_msg.init.version;
> +	 *
> +	 * Use the version to initialise the prd_functions[]()
> +	 * supported by the application, otherwise NULL.
> +	 * Currently, supporting 'ATTN' & 'ATTN_ACK' in default
> +	 */
> +	prd_functions[OPAL_PRD_MSG_TYPE_ATTN] = prd_msg_attn;
> +	prd_functions[OPAL_PRD_MSG_TYPE_ATTN_ACK] = prd_msg_attn_ack;
> +
> +exit:
> +	lock(&prd_lock);
> +	list_del(&node->link);
> +	list_add_tail(&prd_free_list, &node->link);
> +	unlock(&prd_lock);
> +
> +	return rc;
> +}
> +
> +/* Entry from the host above */
> +static int64_t opal_prd_msg(uint64_t *buffer)

Why not struct opal_prd_msg *buffer as arg?

> +{
> +	struct opal_prd_msg prd_msg;
> +	struct prd_node *node;
> +
> +	memcpy(&prd_msg, buffer, sizeof(prd_msg));
> +
> +	if (!prd_functions[prd_msg.type])
> +		return OPAL_UNSUPPORTED;

Please range/validity check anything that's coming in through an OPAL
call as much as possible.

> +	lock(&prd_lock);
> +	node = list_pop(&prd_free_list, struct prd_node, link);
> +	if (!node) { /* Free list exhausted */
> +		node = zalloc(sizeof(*node));
> +		if (!node) {
> +			prlog(PR_ERR, "Failed to allocate prd node\n");
> +			unlock(&prd_lock);
> +			return OPAL_NO_MEM;
> +		}
> +	}

Why not just dynamically allocate everything? Why have a free list at
all if we're just going to allocate until ENOMEM?

Also, I see extra allocation but nowhere where these extra ones are freed?

> +	memcpy(&node->prd_msg, buffer, sizeof(node->prd_msg));
> +	list_add_tail(&prd_in_use_list, &node->link);
> +	unlock(&prd_lock);
> +
> +	return prd_functions[node->prd_msg.type]();
> +}
> +opal_call(OPAL_PRD_MSG, opal_prd_msg, 1);
> +
> +void prd_init(void)
> +{
> +	struct prd_node *node;
> +	int i;
> +
> +	node = zalloc(sizeof(*node) * OPAL_PRD_MSG_TYPE_MAX);
> +	if (!node)
> +		return;
> +
> +	for (i = 0; i < OPAL_PRD_MSG_TYPE_MAX; i++)
> +		list_add_tail(&prd_free_list, &node->link);
> +
> +	/* Basic init and finish functions */
> +	prd_functions[OPAL_PRD_MSG_TYPE_INIT] = prd_msg_init;
> +	prd_functions[OPAL_PRD_MSG_TYPE_FINI] = prd_msg_finish;
> +}

What's the logic behind preallocating OPAL_PRD_MSG_TYPE_MAX amount? Why
not just 1 or some other number?

> --- a/include/platform.h
> +++ b/include/platform.h
> @@ -100,6 +100,8 @@ struct platform {
>  	 */
>  	void		(*external_irq)(unsigned int chip_id);
>  
> +	void		(*local_irq)(unsigned int chip_id);
> +
>  	/*
>  	 * nvram ops.
>  	 *

Why is this a platform op? It seems identical for the time being... or
is it just missing in rhesus?
Neelesh Gupta Feb. 12, 2015, 5:35 a.m. UTC | #2
On 02/12/2015 09:36 AM, Stewart Smith wrote:
> Neelesh Gupta <neelegup@linux.vnet.ibm.com> writes:
>> diff --git a/hw/prd.c b/hw/prd.c
>> new file mode 100644
>> index 0000000..b574558
>> --- /dev/null
>> +++ b/hw/prd.c
>> @@ -0,0 +1,293 @@
>> +struct prd_node {
>> +	struct list_node	link;
>> +	uint32_t		proc;
>> +	struct opal_prd_msg	prd_msg;
>> +};
>> +
>> +static LIST_HEAD(prd_free_list);
>> +static LIST_HEAD(prd_in_use_list);
>> +static uint32_t token;
>> +static int (*prd_functions[OPAL_PRD_MSG_TYPE_MAX])(void);
>> +static struct lock prd_lock = LOCK_UNLOCKED;
>> +
>> +/* Entry from the below HW */
>> +void prd_interrupt(uint32_t proc, enum opal_prd_msg_type type)
>> +{
>> +	struct prd_node *node;
>> +
>> +	lock(&prd_lock);
>> +	node = list_pop(&prd_free_list, struct prd_node, link);
>> +	if (!node) { /* Free list exhausted */
>> +		node = zalloc(sizeof(*node));
>> +		if (!node) {
>> +			prlog(PR_ERR, "Failed to allocate prd node\n");
>> +			unlock(&prd_lock);
>> +			return;
>> +		}
>> +	}
>> +
>> +	node->proc = proc;
>> +	node->prd_msg.type = type;
>> +	node->prd_msg.token = ++token;
>> +
>> +	list_add_tail(&prd_in_use_list, &node->link);
>> +	unlock(&prd_lock);
>> +
>> +	if (prd_functions[type])
>> +		prd_functions[type]();
>> +}
>> +
>> +static int prd_msg_attn_ack(void)
>> +{
>> +	struct prd_node *node_attn, *node_ack;
>> +	int rc;
>> +
>> +	lock(&prd_lock);
>> +	list_for_each(&prd_in_use_list, node_ack, link)
>> +		if (node_ack->prd_msg.type == OPAL_PRD_MSG_TYPE_ATTN_ACK)
>> +			break;
>> +
>> +	if (!node_ack) {
>> +		unlock(&prd_lock);
>> +		return OPAL_RESOURCE;
>> +	}
>> +
>> +	list_for_each(&prd_in_use_list, node_attn, link)
>> +		/* prd node of ATTN type that matches the token of ATTN_ACK */
>> +		if (node_attn->prd_msg.type == OPAL_PRD_MSG_TYPE_ATTN &&
>> +		    node_attn->prd_msg.token == node_ack->prd_msg.token)
>> +			break;
>> +
>> +	if (!node_attn) {
>> +		unlock(&prd_lock);
>> +		return OPAL_RESOURCE;
>> +	}
>> +
>> +	/* ATTN acknowledged by the host, unmask the IPOLL */
>> +	rc = xscom_write(node_attn->proc, PRD_IPOLL_MASK_REG,
>> +			 ~(node_ack->prd_msg.attn_ack.ipoll_ack) &
>> +			 PRD_IPOLL_MASK);
>> +
>> +	/* Done. Now move both the ATTN & ATTN_ACK nodes to the free list */
>> +	list_del(&node_attn->link);
>> +	list_add_tail(&prd_free_list, &node_attn->link);
>> +
>> +	list_del(&node_ack->link);
>> +	list_add_tail(&prd_free_list, &node_ack->link);
>> +	unlock(&prd_lock);
>> +
>> +	return rc;
>> +}
>> +
>> +static int prd_msg_attn(void)
>> +{
>> +	uint64_t status, mask;
>> +	struct prd_node *node;
>> +	uint64_t *prd_msg;
>> +	int rc;
>> +
>> +	lock(&prd_lock);
>> +	list_for_each(&prd_in_use_list, node, link)
>> +		if (node->prd_msg.type == OPAL_PRD_MSG_TYPE_ATTN)
>> +			break;
>> +	unlock(&prd_lock);
>> +
>> +	if (!node)
>> +		return OPAL_RESOURCE;
>> +
>> +	rc = xscom_read(node->proc, PRD_ERROR_STATUS, &status);
>> +	if (rc) {
>> +		prlog(PR_ERR, "Failed to read the ipoll status\n");
>> +		goto exit;
>> +	}
>> +
>> +	/* Mask IPOLL for all the bits set in the 'status' */
>> +	mask = status & PRD_IPOLL_MASK;
>> +	rc = xscom_write(node->proc, PRD_IPOLL_MASK_REG, mask);
>> +	if (rc) {
>> +		prlog(PR_ERR, "Failed to mask the IPOLL\n");
>> +		goto exit;
>> +	}
>> +
>> +	/* Fill up the attention fields */
>> +	node->prd_msg.attn.proc = node->proc;		/* params[1] */
>> +	node->prd_msg.attn.ipoll_status = status;	/* params[2] */
>> +	node->prd_msg.attn.ipoll_mask = mask;		/* params[3] */
>> +
>> +	prd_msg = (uint64_t *)&node->prd_msg;
>> +
>> +	rc = opal_queue_msg(OPAL_PRD_MSG, NULL, NULL, prd_msg[0], prd_msg[1],
>> +			    prd_msg[2], prd_msg[3]);
>> +	if (rc) {
>> +		prlog(PR_ERR, "Failed to queue up the ATTN\n");
>> +		goto exit;
>> +	}
>> +
>> +	return 0;
>> +
>> +	/* In the error case, delete the node from 'in_use' list and add it
>> +	 * to the 'free' list as the ACK is never going to come from the host
>> +	 */
>> +exit:
>> +	lock(&prd_lock);
>> +	list_del(&node->link);
>> +	list_add_tail(&prd_free_list, &node->link);
>> +	unlock(&prd_lock);
>> +
>> +	return rc;
>> +}
>> +
>> +static int prd_msg_finish(void)
>> +{
>> +	struct proc_chip *chip;
>> +	struct prd_node *node;
>> +	int rc;
>> +
>> +	/* Mask the interrupts on all the cores */
>> +	for_each_chip(chip) {
>> +		rc = xscom_write(chip->id, PRD_IPOLL_MASK_REG, PRD_IPOLL_MASK);
>> +		if (rc)
>> +			prlog(PR_ERR, "Failed to mask the IPOLL on %d chip\n",
>> +			      chip->id);
>> +	}
>> +
>> +	lock(&prd_lock);
>> +	list_for_each(&prd_in_use_list, node, link)
>> +		if (node->prd_msg.type == OPAL_PRD_MSG_TYPE_FINI)
>> +			break;
>> +
>> +	if (!node) { /* should not happen though */
>> +		unlock(&prd_lock);
>> +		return OPAL_RESOURCE;
>> +	}
>> +
>> +	list_del(&node->link);
>> +	list_add_tail(&prd_free_list, &node->link);
>> +	unlock(&prd_lock);
>> +
>> +	return 0;
>> +}
>> +
>> +static int prd_msg_init(void)
>> +{
>> +	struct proc_chip *chip;
>> +	struct prd_node *node;
>> +	/* XXX We will use it for enabling the functionalities
>> +	 * uint32_t version;
>> +	 */
>> +	uint64_t ipoll;
>> +	int rc;
>> +
>> +	lock(&prd_lock);
>> +	list_for_each(&prd_in_use_list, node, link)
>> +		if (node->prd_msg.type == OPAL_PRD_MSG_TYPE_INIT)
>> +			break;
>> +	unlock(&prd_lock);
> Why are we doing this look instead of having prd_msg_XXX() take a
> prd_msg as a parameter?
>
> It seems as though we're maintaining the prd_in_use list for no real
> reason? we only add one to it per interrupt and then immediately
> process, same with from an OPAL call.

The node gets processed where linux makes an OPAL call, apparently it 
will be
immediate but depends on the linux when it consumes it. Moreover, I put the
list thinking that there could be simultaneous ATTNs from multiple procs or
interrupts of different 'prd_msg_type'..

>
>> +
>> +	if (!node)
>> +		return OPAL_RESOURCE;
>> +
>> +	ipoll = node->prd_msg.init.ipoll;
>> +	/* Unmask these ATTNs which are supported */
>> +	for_each_chip(chip) {
>> +		rc = xscom_write(chip->id, PRD_IPOLL_MASK_REG,
>> +				 ~ipoll & PRD_IPOLL_MASK);
>> +		if (rc) {
>> +			prlog(PR_ERR, "Failed to unmask the IPOLL on %d chip\n",
>> +			      chip->id);
>> +			rc = OPAL_HARDWARE;
>> +			goto exit;
>> +		}
>> +	}
>> +
>> +	memset(prd_functions, 0, sizeof(prd_functions));
>> +
>> +	/*
>> +	 * XXX
>> +	 * version = node->prd_msg.init.version;
>> +	 *
>> +	 * Use the version to initialise the prd_functions[]()
>> +	 * supported by the application, otherwise NULL.
>> +	 * Currently, supporting 'ATTN' & 'ATTN_ACK' in default
>> +	 */
>> +	prd_functions[OPAL_PRD_MSG_TYPE_ATTN] = prd_msg_attn;
>> +	prd_functions[OPAL_PRD_MSG_TYPE_ATTN_ACK] = prd_msg_attn_ack;
>> +
>> +exit:
>> +	lock(&prd_lock);
>> +	list_del(&node->link);
>> +	list_add_tail(&prd_free_list, &node->link);
>> +	unlock(&prd_lock);
>> +
>> +	return rc;
>> +}
>> +
>> +/* Entry from the host above */
>> +static int64_t opal_prd_msg(uint64_t *buffer)
> Why not struct opal_prd_msg *buffer as arg?

Yes, will change.

>
>> +{
>> +	struct opal_prd_msg prd_msg;
>> +	struct prd_node *node;
>> +
>> +	memcpy(&prd_msg, buffer, sizeof(prd_msg));
>> +
>> +	if (!prd_functions[prd_msg.type])
>> +		return OPAL_UNSUPPORTED;
> Please range/validity check anything that's coming in through an OPAL
> call as much as possible.

Yes, will add these checks.

>
>> +	lock(&prd_lock);
>> +	node = list_pop(&prd_free_list, struct prd_node, link);
>> +	if (!node) { /* Free list exhausted */
>> +		node = zalloc(sizeof(*node));
>> +		if (!node) {
>> +			prlog(PR_ERR, "Failed to allocate prd node\n");
>> +			unlock(&prd_lock);
>> +			return OPAL_NO_MEM;
>> +		}
>> +	}
> Why not just dynamically allocate everything? Why have a free list at
> all if we're just going to allocate until ENOMEM?

Have sufficiently allocated during init() and added them to the free list to
cater to multiple messages which should not exhaust the free list and reduce
fragmentation, but if it does then it is not an error and we dynamically 
allocate..
further add and maintain it in the list.

>
> Also, I see extra allocation but nowhere where these extra ones are freed?

These are not freed, added and maintained as part of the 'free' list.

>
>> +	memcpy(&node->prd_msg, buffer, sizeof(node->prd_msg));
>> +	list_add_tail(&prd_in_use_list, &node->link);
>> +	unlock(&prd_lock);
>> +
>> +	return prd_functions[node->prd_msg.type]();
>> +}
>> +opal_call(OPAL_PRD_MSG, opal_prd_msg, 1);
>> +
>> +void prd_init(void)
>> +{
>> +	struct prd_node *node;
>> +	int i;
>> +
>> +	node = zalloc(sizeof(*node) * OPAL_PRD_MSG_TYPE_MAX);
>> +	if (!node)
>> +		return;
>> +
>> +	for (i = 0; i < OPAL_PRD_MSG_TYPE_MAX; i++)
>> +		list_add_tail(&prd_free_list, &node->link);
>> +
>> +	/* Basic init and finish functions */
>> +	prd_functions[OPAL_PRD_MSG_TYPE_INIT] = prd_msg_init;
>> +	prd_functions[OPAL_PRD_MSG_TYPE_FINI] = prd_msg_finish;
>> +}
> What's the logic behind preallocating OPAL_PRD_MSG_TYPE_MAX amount? Why
> not just 1 or some other number?

Considering these many types of prd messages can be handled 
simultaneously without
having to go for dynamic allocation when they arrive .. just pop from 
'free' list, move
to 'in_use' list and move back to the 'free' list when done.

>
>> --- a/include/platform.h
>> +++ b/include/platform.h
>> @@ -100,6 +100,8 @@ struct platform {
>>   	 */
>>   	void		(*external_irq)(unsigned int chip_id);
>>   
>> +	void		(*local_irq)(unsigned int chip_id);
>> +
>>   	/*
>>   	 * nvram ops.
>>   	 *
> Why is this a platform op? It seems identical for the time being... or
> is it just missing in rhesus?

Is not the prd interrupts platform dependent? they reach to OPAL thru 
same channel
but could be of different nature between the platforms..?

Neelesh.
Benjamin Herrenschmidt Feb. 12, 2015, 6:03 a.m. UTC | #3
On Thu, 2015-02-12 at 15:06 +1100, Stewart Smith wrote:
> Why is this a platform op? It seems identical for the time being... or
> is it just missing in rhesus?

The external irq is a platform op because it depends on the device
connected to the P8 pin which is platform specific (BMC, Rhesus
EC, ...). However the local error interrupts is ... local :) So it
doesn't need to be a platform op at all.

The only thing here is whether we enable it as in, whether we enable
the ipoll mask, and we only do that as a result of hbrtd kicking in,
which should only happen on platforms without an FSP, so we might have
some conditionals in that area, but I agree, the interrupt itself
doesn't need to be abstracted.

Cheers,
Ben.
Benjamin Herrenschmidt Feb. 12, 2015, 6:08 a.m. UTC | #4
On Thu, 2015-02-12 at 11:05 +0530, Neelesh Gupta wrote:
> The node gets processed where linux makes an OPAL call, apparently it 
> will be
> immediate but depends on the linux when it consumes it. Moreover, I put the
> list thinking that there could be simultaneous ATTNs from multiple procs or
> interrupts of different 'prd_msg_type'..

Ah that's a good point, we might have attentions (and OCC resets) coming
simultaneously on multiple chips since the plan was to only mask the IPOLL
mask locally...

Jeremy, that changes things a bit for us... We can keep the code simpler
still I think, but we probably do need some kind of queue. The code is
simpler because userspace will only process one at a time and we
can bake that in the design on our side.

Unless we want to plan for a hypothetical day where HBRTD can process
multiple at a time...

Cheers,
Ben.
Jeremy Kerr Feb. 12, 2015, 6:15 a.m. UTC | #5
Hi Ben,

> Jeremy, that changes things a bit for us... We can keep the code simpler
> still I think, but we probably do need some kind of queue. The code is
> simpler because userspace will only process one at a time and we
> can bake that in the design on our side.

We do queue in the Linux layer - we'll keep a list of "unread" messages
(via the chardev interface). Is there any issue with it doing it in the
kernel rather than firmware?

Cheers,


Jeremy
Benjamin Herrenschmidt Feb. 12, 2015, 6:29 a.m. UTC | #6
On Thu, 2015-02-12 at 14:15 +0800, Jeremy Kerr wrote:
> Hi Ben,
> 
> > Jeremy, that changes things a bit for us... We can keep the code simpler
> > still I think, but we probably do need some kind of queue. The code is
> > simpler because userspace will only process one at a time and we
> > can bake that in the design on our side.
> 
> We do queue in the Linux layer - we'll keep a list of "unread" messages
> (via the chardev interface). Is there any issue with it doing it in the
> kernel rather than firmware?

Well, we sort-of have to in fw no ?

IE, we get the attn irq, we *have* to mask it or it's going to come back
again and again and hang that cpu. So we also need to process it and
create a message. Now sending a message to linux is an asynchronous
process, we set an OPAL event and eventually Linux will read the
message, so we might already have one pending, we need to queue up
another one. I don't think we can get away without this.

Ben.

> Cheers,
> 
> 
> Jeremy
>
Stewart Smith Feb. 12, 2015, 7:27 a.m. UTC | #7
Neelesh Gupta <neelegup@linux.vnet.ibm.com> writes:
>>> +	lock(&prd_lock);
>>> +	node = list_pop(&prd_free_list, struct prd_node, link);
>>> +	if (!node) { /* Free list exhausted */
>>> +		node = zalloc(sizeof(*node));
>>> +		if (!node) {
>>> +			prlog(PR_ERR, "Failed to allocate prd node\n");
>>> +			unlock(&prd_lock);
>>> +			return OPAL_NO_MEM;
>>> +		}
>>> +	}
>> Why not just dynamically allocate everything? Why have a free list at
>> all if we're just going to allocate until ENOMEM?
>
> Have sufficiently allocated during init() and added them to the free list to
> cater to multiple messages which should not exhaust the free list and reduce
> fragmentation, but if it does then it is not an error and we dynamically 
> allocate..
> further add and maintain it in the list.
>
>>
>> Also, I see extra allocation but nowhere where these extra ones are freed?
>
> These are not freed, added and maintained as part of the 'free' list.

This is a problem - a flood of these will hit ENOMEM and that won't be
good for keeping the system running at all (many OPAL calls would fail
without being able to allocate memory).


>>> --- a/include/platform.h
>>> +++ b/include/platform.h
>>> @@ -100,6 +100,8 @@ struct platform {
>>>   	 */
>>>   	void		(*external_irq)(unsigned int chip_id);
>>>   
>>> +	void		(*local_irq)(unsigned int chip_id);
>>> +
>>>   	/*
>>>   	 * nvram ops.
>>>   	 *
>> Why is this a platform op? It seems identical for the time being... or
>> is it just missing in rhesus?
>
> Is not the prd interrupts platform dependent? they reach to OPAL thru 
> same channel
> but could be of different nature between the platforms..?

Let's change it to a platform op when/if there's something that needs to
be platform specific.
Benjamin Herrenschmidt Feb. 12, 2015, 9:11 p.m. UTC | #8
On Thu, 2015-02-12 at 18:27 +1100, Stewart Smith wrote:

> This is a problem - a flood of these will hit ENOMEM and that won't be
> good for keeping the system running at all (many OPAL calls would fail
> without being able to allocate memory).

We can't flood them more than one per chip of each kind (OCC and ATTN)
I think, can we ? I mean the OCC one might be we could pace it ourselves
by ignoring subsequent interrupts from the OCC until the previous one is
complete.

Ben.
diff mbox

Patch

========
- Reworked the IPOLL register mask/unmask logic in various prd functions.
- Added the missing node movement from 'in_use' list to 'free' list
  in prd_msg_finish().

 hw/Makefile.inc              |    2 
 hw/prd.c                     |  293 ++++++++++++++++++++++++++++++++++++++++++
 hw/psi.c                     |    4 -
 include/platform.h           |    2 
 include/skiboot.h            |    5 +
 platforms/astbmc/astbmc.h    |    1 
 platforms/astbmc/common.c    |    8 +
 platforms/astbmc/firestone.c |    1 
 platforms/astbmc/habanero.c  |    1 
 platforms/astbmc/palmetto.c  |    1 
 platforms/ibm-fsp/common.c   |    8 +
 platforms/ibm-fsp/firenze.c  |    1 
 platforms/ibm-fsp/ibm-fsp.h  |    1 
 13 files changed, 325 insertions(+), 3 deletions(-)
 create mode 100644 hw/prd.c

diff --git a/hw/Makefile.inc b/hw/Makefile.inc
index 83125be..bd9186b 100644
--- a/hw/Makefile.inc
+++ b/hw/Makefile.inc
@@ -4,7 +4,7 @@  SUBDIRS += hw
 HW_OBJS  = xscom.o chiptod.o gx.o cec.o lpc.o lpc-uart.o psi.o
 HW_OBJS += homer.o slw.o occ.o nx.o fsi-master.o centaur.o
 HW_OBJS += p7ioc.o p7ioc-inits.o p7ioc-phb.o p5ioc2.o p5ioc2-phb.o
-HW_OBJS += phb3.o sfc-ctrl.o fake-rtc.o bt.o p8-i2c.o
+HW_OBJS += phb3.o sfc-ctrl.o fake-rtc.o bt.o p8-i2c.o prd.o
 HW=hw/built-in.o
 
 include $(SRC)/hw/fsp/Makefile.inc
diff --git a/hw/prd.c b/hw/prd.c
new file mode 100644
index 0000000..b574558
--- /dev/null
+++ b/hw/prd.c
@@ -0,0 +1,293 @@ 
+/* Copyright 2014-2015 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *	http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * imitations under the License.
+ */
+
+#include <skiboot.h>
+#include <opal.h>
+#include <lock.h>
+#include <xscom.h>
+#include <chip.h>
+#include <opal-msg.h>
+
+struct prd_node {
+	struct list_node	link;
+	uint32_t		proc;
+	struct opal_prd_msg	prd_msg;
+};
+
+static LIST_HEAD(prd_free_list);
+static LIST_HEAD(prd_in_use_list);
+static uint32_t token;
+static int (*prd_functions[OPAL_PRD_MSG_TYPE_MAX])(void);
+static struct lock prd_lock = LOCK_UNLOCKED;
+
+/* Entry from the below HW */
+void prd_interrupt(uint32_t proc, enum opal_prd_msg_type type)
+{
+	struct prd_node *node;
+
+	lock(&prd_lock);
+	node = list_pop(&prd_free_list, struct prd_node, link);
+	if (!node) { /* Free list exhausted */
+		node = zalloc(sizeof(*node));
+		if (!node) {
+			prlog(PR_ERR, "Failed to allocate prd node\n");
+			unlock(&prd_lock);
+			return;
+		}
+	}
+
+	node->proc = proc;
+	node->prd_msg.type = type;
+	node->prd_msg.token = ++token;
+
+	list_add_tail(&prd_in_use_list, &node->link);
+	unlock(&prd_lock);
+
+	if (prd_functions[type])
+		prd_functions[type]();
+}
+
+static int prd_msg_attn_ack(void)
+{
+	struct prd_node *node_attn, *node_ack;
+	int rc;
+
+	lock(&prd_lock);
+	list_for_each(&prd_in_use_list, node_ack, link)
+		if (node_ack->prd_msg.type == OPAL_PRD_MSG_TYPE_ATTN_ACK)
+			break;
+
+	if (!node_ack) {
+		unlock(&prd_lock);
+		return OPAL_RESOURCE;
+	}
+
+	list_for_each(&prd_in_use_list, node_attn, link)
+		/* prd node of ATTN type that matches the token of ATTN_ACK */
+		if (node_attn->prd_msg.type == OPAL_PRD_MSG_TYPE_ATTN &&
+		    node_attn->prd_msg.token == node_ack->prd_msg.token)
+			break;
+
+	if (!node_attn) {
+		unlock(&prd_lock);
+		return OPAL_RESOURCE;
+	}
+
+	/* ATTN acknowledged by the host, unmask the IPOLL */
+	rc = xscom_write(node_attn->proc, PRD_IPOLL_MASK_REG,
+			 ~(node_ack->prd_msg.attn_ack.ipoll_ack) &
+			 PRD_IPOLL_MASK);
+
+	/* Done. Now move both the ATTN & ATTN_ACK nodes to the free list */
+	list_del(&node_attn->link);
+	list_add_tail(&prd_free_list, &node_attn->link);
+
+	list_del(&node_ack->link);
+	list_add_tail(&prd_free_list, &node_ack->link);
+	unlock(&prd_lock);
+
+	return rc;
+}
+
+static int prd_msg_attn(void)
+{
+	uint64_t status, mask;
+	struct prd_node *node;
+	uint64_t *prd_msg;
+	int rc;
+
+	lock(&prd_lock);
+	list_for_each(&prd_in_use_list, node, link)
+		if (node->prd_msg.type == OPAL_PRD_MSG_TYPE_ATTN)
+			break;
+	unlock(&prd_lock);
+
+	if (!node)
+		return OPAL_RESOURCE;
+
+	rc = xscom_read(node->proc, PRD_ERROR_STATUS, &status);
+	if (rc) {
+		prlog(PR_ERR, "Failed to read the ipoll status\n");
+		goto exit;
+	}
+
+	/* Mask IPOLL for all the bits set in the 'status' */
+	mask = status & PRD_IPOLL_MASK;
+	rc = xscom_write(node->proc, PRD_IPOLL_MASK_REG, mask);
+	if (rc) {
+		prlog(PR_ERR, "Failed to mask the IPOLL\n");
+		goto exit;
+	}
+
+	/* Fill up the attention fields */
+	node->prd_msg.attn.proc = node->proc;		/* params[1] */
+	node->prd_msg.attn.ipoll_status = status;	/* params[2] */
+	node->prd_msg.attn.ipoll_mask = mask;		/* params[3] */
+
+	prd_msg = (uint64_t *)&node->prd_msg;
+
+	rc = opal_queue_msg(OPAL_PRD_MSG, NULL, NULL, prd_msg[0], prd_msg[1],
+			    prd_msg[2], prd_msg[3]);
+	if (rc) {
+		prlog(PR_ERR, "Failed to queue up the ATTN\n");
+		goto exit;
+	}
+
+	return 0;
+
+	/* In the error case, delete the node from 'in_use' list and add it
+	 * to the 'free' list as the ACK is never going to come from the host
+	 */
+exit:
+	lock(&prd_lock);
+	list_del(&node->link);
+	list_add_tail(&prd_free_list, &node->link);
+	unlock(&prd_lock);
+
+	return rc;
+}
+
+static int prd_msg_finish(void)
+{
+	struct proc_chip *chip;
+	struct prd_node *node;
+	int rc;
+
+	/* Mask the interrupts on all the cores */
+	for_each_chip(chip) {
+		rc = xscom_write(chip->id, PRD_IPOLL_MASK_REG, PRD_IPOLL_MASK);
+		if (rc)
+			prlog(PR_ERR, "Failed to mask the IPOLL on %d chip\n",
+			      chip->id);
+	}
+
+	lock(&prd_lock);
+	list_for_each(&prd_in_use_list, node, link)
+		if (node->prd_msg.type == OPAL_PRD_MSG_TYPE_FINI)
+			break;
+
+	if (!node) { /* should not happen though */
+		unlock(&prd_lock);
+		return OPAL_RESOURCE;
+	}
+
+	list_del(&node->link);
+	list_add_tail(&prd_free_list, &node->link);
+	unlock(&prd_lock);
+
+	return 0;
+}
+
+static int prd_msg_init(void)
+{
+	struct proc_chip *chip;
+	struct prd_node *node;
+	/* XXX We will use it for enabling the functionalities
+	 * uint32_t version;
+	 */
+	uint64_t ipoll;
+	int rc;
+
+	lock(&prd_lock);
+	list_for_each(&prd_in_use_list, node, link)
+		if (node->prd_msg.type == OPAL_PRD_MSG_TYPE_INIT)
+			break;
+	unlock(&prd_lock);
+
+	if (!node)
+		return OPAL_RESOURCE;
+
+	ipoll = node->prd_msg.init.ipoll;
+	/* Unmask these ATTNs which are supported */
+	for_each_chip(chip) {
+		rc = xscom_write(chip->id, PRD_IPOLL_MASK_REG,
+				 ~ipoll & PRD_IPOLL_MASK);
+		if (rc) {
+			prlog(PR_ERR, "Failed to unmask the IPOLL on %d chip\n",
+			      chip->id);
+			rc = OPAL_HARDWARE;
+			goto exit;
+		}
+	}
+
+	memset(prd_functions, 0, sizeof(prd_functions));
+
+	/*
+	 * XXX
+	 * version = node->prd_msg.init.version;
+	 *
+	 * Use the version to initialise the prd_functions[]()
+	 * supported by the application, otherwise NULL.
+	 * Currently, supporting 'ATTN' & 'ATTN_ACK' in default
+	 */
+	prd_functions[OPAL_PRD_MSG_TYPE_ATTN] = prd_msg_attn;
+	prd_functions[OPAL_PRD_MSG_TYPE_ATTN_ACK] = prd_msg_attn_ack;
+
+exit:
+	lock(&prd_lock);
+	list_del(&node->link);
+	list_add_tail(&prd_free_list, &node->link);
+	unlock(&prd_lock);
+
+	return rc;
+}
+
+/* Entry from the host above */
+static int64_t opal_prd_msg(uint64_t *buffer)
+{
+	struct opal_prd_msg prd_msg;
+	struct prd_node *node;
+
+	memcpy(&prd_msg, buffer, sizeof(prd_msg));
+
+	if (!prd_functions[prd_msg.type])
+		return OPAL_UNSUPPORTED;
+
+	lock(&prd_lock);
+	node = list_pop(&prd_free_list, struct prd_node, link);
+	if (!node) { /* Free list exhausted */
+		node = zalloc(sizeof(*node));
+		if (!node) {
+			prlog(PR_ERR, "Failed to allocate prd node\n");
+			unlock(&prd_lock);
+			return OPAL_NO_MEM;
+		}
+	}
+
+	memcpy(&node->prd_msg, buffer, sizeof(node->prd_msg));
+	list_add_tail(&prd_in_use_list, &node->link);
+	unlock(&prd_lock);
+
+	return prd_functions[node->prd_msg.type]();
+}
+opal_call(OPAL_PRD_MSG, opal_prd_msg, 1);
+
+void prd_init(void)
+{
+	struct prd_node *node;
+	int i;
+
+	node = zalloc(sizeof(*node) * OPAL_PRD_MSG_TYPE_MAX);
+	if (!node)
+		return;
+
+	for (i = 0; i < OPAL_PRD_MSG_TYPE_MAX; i++)
+		list_add_tail(&prd_free_list, &node->link);
+
+	/* Basic init and finish functions */
+	prd_functions[OPAL_PRD_MSG_TYPE_INIT] = prd_msg_init;
+	prd_functions[OPAL_PRD_MSG_TYPE_FINI] = prd_msg_finish;
+}
diff --git a/hw/psi.c b/hw/psi.c
index 70403fd..72e3e43 100644
--- a/hw/psi.c
+++ b/hw/psi.c
@@ -290,8 +290,8 @@  static void handle_extra_interrupt(struct psi *psi)
 		 */
 		p8_i2c_interrupt(psi->chip_id);
 	}
-	if (val & PSIHB_IRQ_STAT_LOCAL_ERR)
-		printf("PSI: ATTN irq received\n");
+	if (val & PSIHB_IRQ_STAT_LOCAL_ERR && platform.local_irq)
+		platform.local_irq(psi->chip_id);
 	if (val & PSIHB_IRQ_STAT_HOST_ERR) {
 		if (platform.external_irq)
 			platform.external_irq(psi->chip_id);
diff --git a/include/platform.h b/include/platform.h
index b1aef49..783d6e2 100644
--- a/include/platform.h
+++ b/include/platform.h
@@ -100,6 +100,8 @@  struct platform {
 	 */
 	void		(*external_irq)(unsigned int chip_id);
 
+	void		(*local_irq)(unsigned int chip_id);
+
 	/*
 	 * nvram ops.
 	 *
diff --git a/include/skiboot.h b/include/skiboot.h
index 1b55638..384b3cb 100644
--- a/include/skiboot.h
+++ b/include/skiboot.h
@@ -27,6 +27,7 @@ 
 #include <errno.h>
 #include <bitutils.h>
 #include <types.h>
+#include <opal.h>
 
 #include <ccan/container_of/container_of.h>
 #include <ccan/list/list.h>
@@ -213,6 +214,10 @@  extern void uart_setup_opal_console(void);
 extern void occ_interrupt(uint32_t chip_id);
 extern void occ_send_dummy_interrupt(void);
 
+/* PRD */
+extern void prd_init(void);
+extern void prd_interrupt(uint32_t proc, enum opal_prd_msg_type type);
+
 /* Flatten device-tree */
 extern void *create_dtb(const struct dt_node *root);
 
diff --git a/platforms/astbmc/astbmc.h b/platforms/astbmc/astbmc.h
index cee475a..ebf05cf 100644
--- a/platforms/astbmc/astbmc.h
+++ b/platforms/astbmc/astbmc.h
@@ -23,6 +23,7 @@  extern int64_t astbmc_ipmi_reboot(void);
 extern int64_t astbmc_ipmi_power_down(uint64_t request);
 extern void astbmc_init(void);
 extern void astbmc_ext_irq(unsigned int chip_id);
+extern void astbmc_local_irq(unsigned int chip_id);
 extern int pnor_init(void);
 
 #endif /* __ASTBMC_H */
diff --git a/platforms/astbmc/common.c b/platforms/astbmc/common.c
index f9c988d..34e2735 100644
--- a/platforms/astbmc/common.c
+++ b/platforms/astbmc/common.c
@@ -44,11 +44,19 @@  void astbmc_ext_irq(unsigned int chip_id __unused)
 	bt_irq();
 }
 
+void astbmc_local_irq(unsigned int chip_id)
+{
+	prd_interrupt(chip_id, OPAL_PRD_MSG_TYPE_ATTN);
+}
+
 void astbmc_init(void)
 {
 	/* Initialize PNOR/NVRAM */
 	pnor_init();
 
+	/* Initialize PRD */
+	prd_init();
+
 	/* Register the BT interface with the IPMI layer */
 	bt_init();
 	ipmi_rtc_init();
diff --git a/platforms/astbmc/firestone.c b/platforms/astbmc/firestone.c
index 4a51e3f..3c2b778 100644
--- a/platforms/astbmc/firestone.c
+++ b/platforms/astbmc/firestone.c
@@ -39,6 +39,7 @@  DECLARE_PLATFORM(firestone) = {
 	.probe			= firestone_probe,
 	.init			= astbmc_init,
 	.external_irq		= astbmc_ext_irq,
+	.local_irq		= astbmc_local_irq,
 	.cec_power_down         = astbmc_ipmi_power_down,
 	.cec_reboot             = astbmc_ipmi_reboot,
 };
diff --git a/platforms/astbmc/habanero.c b/platforms/astbmc/habanero.c
index d442d1f..05346d5 100644
--- a/platforms/astbmc/habanero.c
+++ b/platforms/astbmc/habanero.c
@@ -47,6 +47,7 @@  DECLARE_PLATFORM(habanero) = {
 	.probe			= habanero_probe,
 	.init			= astbmc_init,
 	.external_irq		= astbmc_ext_irq,
+	.local_irq		= astbmc_local_irq,
 	.cec_power_down         = astbmc_ipmi_power_down,
 	.cec_reboot             = astbmc_ipmi_reboot,
 };
diff --git a/platforms/astbmc/palmetto.c b/platforms/astbmc/palmetto.c
index a0030e8..9c4850f 100644
--- a/platforms/astbmc/palmetto.c
+++ b/platforms/astbmc/palmetto.c
@@ -48,6 +48,7 @@  DECLARE_PLATFORM(palmetto) = {
 	.probe			= palmetto_probe,
 	.init			= astbmc_init,
 	.external_irq		= astbmc_ext_irq,
+	.local_irq		= astbmc_local_irq,
 	.cec_power_down         = astbmc_ipmi_power_down,
 	.cec_reboot             = astbmc_ipmi_reboot,
 	.elog_commit		= ipmi_elog_commit,
diff --git a/platforms/ibm-fsp/common.c b/platforms/ibm-fsp/common.c
index 5eb2a14..c6fcbc2 100644
--- a/platforms/ibm-fsp/common.c
+++ b/platforms/ibm-fsp/common.c
@@ -23,6 +23,11 @@ 
 
 #include "ibm-fsp.h"
 
+void ibm_fsp_local_irq(unsigned int chip_id)
+{
+	prd_interrupt(chip_id, OPAL_PRD_MSG_TYPE_ATTN);
+}
+
 static void map_debug_areas(void)
 {
 	uint64_t t, i;
@@ -90,6 +95,9 @@  void ibm_fsp_init(void)
 	/* Get ready to receive OCC related messages */
 	occ_fsp_init();
 
+	/* Initialize PRD access */
+	prd_init();
+
 	/* Get ready to receive Memory [Un]corretable Error messages. */
 	fsp_memory_err_init();
 
diff --git a/platforms/ibm-fsp/firenze.c b/platforms/ibm-fsp/firenze.c
index 258a6b3..89b13b2 100644
--- a/platforms/ibm-fsp/firenze.c
+++ b/platforms/ibm-fsp/firenze.c
@@ -393,6 +393,7 @@  DECLARE_PLATFORM(firenze) = {
 	.pci_setup_phb		= firenze_setup_phb,
 	.pci_get_slot_info	= firenze_get_slot_info,
 	.pci_probe_complete	= firenze_send_pci_inventory,
+	.local_irq		= ibm_fsp_local_irq,
 	.nvram_info		= fsp_nvram_info,
 	.nvram_start_read	= fsp_nvram_start_read,
 	.nvram_write		= fsp_nvram_write,
diff --git a/platforms/ibm-fsp/ibm-fsp.h b/platforms/ibm-fsp/ibm-fsp.h
index 160038a..bdd9eaf 100644
--- a/platforms/ibm-fsp/ibm-fsp.h
+++ b/platforms/ibm-fsp/ibm-fsp.h
@@ -22,6 +22,7 @@  extern void ibm_fsp_init(void);
 
 extern int64_t ibm_fsp_cec_power_down(uint64_t request);
 extern int64_t ibm_fsp_cec_reboot(void);
+extern void ibm_fsp_local_irq(uint32_t chip_id);
 
 struct errorlog;
 extern int elog_fsp_commit(struct errorlog *buf);