diff mbox series

[SRU,F:linux-bluefield,v1,1/1] UBUNTU: SAUCE: ipmb_host.c: Fix slow transactions

Message ID 1617399650-28927-2-git-send-email-asmaa@nvidia.com
State New
Headers show
Series UBUNTU: SAUCE: ipmb_host.c: Fix slow transactions | expand

Commit Message

Asmaa Mnebhi April 2, 2021, 9:40 p.m. UTC
BugLink: https://bugs.launchpad.net/bugs/1922393

The previous ipmb_host patch broke customers' IPMB tests. They
requested to make the ipmb_host response time as long as before.
In the case where the I2C bus is made very busy, the ipmb_host driver
just drops slow/delayed responses. This fix elongates the timeout of the response. So restore the previous stable ipmb_host patch.
This patch also fixes a crash which occurs after powercycling certain BlueField-2 systems.
The crash is due to the handshake which takes too long to wait for a response at boot time.

Signed-off-by: Asmaa Mnebhi <asmaa@nvidia.com>
Reviewed-by: David Thompson <davthompson@nvidia.com>
Signed-off-by: Asmaa Mnebhi <asmaa@nvidia.com>
---
 drivers/char/ipmi/ipmb_host.c | 131 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 111 insertions(+), 20 deletions(-)

Comments

Stefan Bader April 8, 2021, 6:18 a.m. UTC | #1
On 02.04.21 23:40, Asmaa Mnebhi wrote:
> BugLink: https://bugs.launchpad.net/bugs/1922393
> 
> The previous ipmb_host patch broke customers' IPMB tests. They
> requested to make the ipmb_host response time as long as before.
> In the case where the I2C bus is made very busy, the ipmb_host driver
> just drops slow/delayed responses. This fix elongates the timeout of the response. So restore the previous stable ipmb_host patch.
> This patch also fixes a crash which occurs after powercycling certain BlueField-2 systems.
> The crash is due to the handshake which takes too long to wait for a response at boot time.
> 
> Signed-off-by: Asmaa Mnebhi <asmaa@nvidia.com>
> Reviewed-by: David Thompson <davthompson@nvidia.com>
> Signed-off-by: Asmaa Mnebhi <asmaa@nvidia.com>
Acked-by: Stefan Bader <stefan.bader@canonical.com>
> ---
>   drivers/char/ipmi/ipmb_host.c | 131 +++++++++++++++++++++++++++++++++++-------
>   1 file changed, 111 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/char/ipmi/ipmb_host.c b/drivers/char/ipmi/ipmb_host.c
> index a408c1a..5cc9d92 100644
> --- a/drivers/char/ipmi/ipmb_host.c
> +++ b/drivers/char/ipmi/ipmb_host.c
> @@ -3,7 +3,7 @@
>   /*
>    * Copyright 2020, NVIDIA Corporation. All rights reserved.
>    *
> - * This was inspired by Brendan Higgins' bt-i2c driver.
> + * This was inspired by Brendan Higgins' bt-i2c driver.
>    *
>    */
>   
> @@ -24,6 +24,8 @@
>   
>   #define	IPMB_TIMEOUT			(msecs_to_jiffies(20000))
>   
> +static bool handshake_rsp = false;
> +
>   /*
>    * The least we expect in an IPMB message is:
>    * netfn_rs_lun, checksum1, rq_sa, rq_seq_rq_lun,
> @@ -44,7 +46,7 @@
>   #define	IPMB_MAX_SMI_SIZE		125
>   #define	IPMB_SMI_MSG_HEADER_SIZE	2
>   
> -#define	IPMB_SEQ_MAX			1024
> +#define	IPMB_SEQ_MAX			64
>   
>   #define	MAX_BUF_SIZE			122
>   
> @@ -132,6 +134,8 @@ struct ipmb_master {
>   	struct list_head		rsp_queue;
>   	atomic_t			rsp_queue_len;
>   	wait_queue_head_t		wait_queue;
> +
> +	bool				slave_registered;
>   };
>   
>   /* +1 is for the checksum integrated in payload */
> @@ -206,25 +210,20 @@ static int ipmb_handle_response(struct ipmb_master *master)
>    * i2c_master_send
>    */
>   static int ipmb_send_request(struct ipmb_master *master,
> -				struct request *request)
> +				struct request *request, u8 i2c_msg_len)
>   {
>   	struct i2c_client *client = master->client;
>   	unsigned long timeout, read_time;
>   	u8 *buf = (u8 *) request;
>   	int ret;
> -	int msg_len;
>   	union i2c_smbus_data data;
>   
>   	/*
> -	 * subtract netfn_rs_lun payload since it is passed as arg
> +	 * skip netfn_rs_lun payload since it is passed as arg
>   	 * 5 to i2c_smbus_xfer.
>   	 */
> -	msg_len = ipmi_smi_to_ipmb_len(master->msg_to_send->data_size) - 1;
> -	if (msg_len > I2C_SMBUS_BLOCK_MAX)
> -		msg_len = I2C_SMBUS_BLOCK_MAX;
> -
> -	data.block[0] = msg_len;
> -	memcpy(&data.block[1], buf + 1, msg_len);
> +	data.block[0] = i2c_msg_len;
> +	memcpy(&data.block[1], buf + 1, i2c_msg_len);
>   
>   	timeout = jiffies + msecs_to_jiffies(WRITE_TIMEOUT);
>   	do {
> @@ -387,11 +386,12 @@ static int ipmb_receive_rsp(struct ipmb_master *master,
>   {
>   	struct ipmb_rsp_elem	*queue_elem;
>   	int			ret = 1;
> +	unsigned long		flags;
>   
> -	spin_lock_irq(&master->lock);
> +	spin_lock_irqsave(&master->lock, flags);
>   
>   	if (list_empty(&master->rsp_queue)) {
> -		spin_unlock_irq(&master->lock);
> +		spin_unlock_irqrestore(&master->lock, flags);
>   
>   		ret = wait_event_interruptible_timeout(master->wait_queue,
>   			!list_empty(&master->rsp_queue), IPMB_TIMEOUT);
> @@ -399,16 +399,17 @@ static int ipmb_receive_rsp(struct ipmb_master *master,
>   		if (ret <= 0)
>   			return ret;
>   
> -		spin_lock_irq(&master->lock);
> +		spin_lock_irqsave(&master->lock, flags);
>   	}
>   
>   	queue_elem = list_first_entry(&master->rsp_queue,
>   			struct ipmb_rsp_elem, list);
> +
>   	memcpy(ipmb_rsp, &queue_elem->rsp, sizeof(struct response));
>   	list_del(&queue_elem->list);
>   	kfree(queue_elem);
>   	atomic_dec(&master->rsp_queue_len);
> -	spin_unlock_irq(&master->lock);
> +	spin_unlock_irqrestore(&master->lock, flags);
>   
>   	return ret;
>   }
> @@ -438,6 +439,8 @@ static void ipmb_send_workfn(struct work_struct *work)
>   	int				rsp_msg_len;
>   	u8 				*buf_rsp;
>   	u8 				verify_checksum;
> +	u8				seq;
> +	u8				i2c_msg_len;
>   
>   	u8 *buf = (u8 *) &ipmb_req_msg;
>   
> @@ -460,11 +463,13 @@ static void ipmb_send_workfn(struct work_struct *work)
>   		return;
>   	}
>   
> -	if (!ipmb_assign_seq(master, req_msg, &ipmb_req_msg.rq_seq_rq_lun)) {
> +	if (!ipmb_assign_seq(master, req_msg, &seq)) {
>   		ipmb_error_reply(master, req_msg, IPMI_NODE_BUSY_ERR);
>   		return;
>   	}
>   
> +	ipmb_req_msg.rq_seq_rq_lun = seq << 2;
> +
>   	msg_len = ipmi_smi_to_ipmb_len(smi_msg_size);
>   
>   	/* Responder  */
> @@ -481,7 +486,15 @@ static void ipmb_send_workfn(struct work_struct *work)
>   	ipmb_req_msg.payload[ipmb_payload_len((size_t)msg_len)] =
>   		ipmb_checksum(buf + 2, msg_len - 2, 0);
>   
> -	if (ipmb_send_request(master, &ipmb_req_msg) < 0) {
> +	/*
> +	 * subtract netfn_rs_lun payload since it is passed as arg
> +	 * 5 to i2c_smbus_xfer.
> +	 */
> +	i2c_msg_len = ipmi_smi_to_ipmb_len(master->msg_to_send->data_size) - 1;
> +	if (i2c_msg_len > I2C_SMBUS_BLOCK_MAX)
> +		i2c_msg_len = I2C_SMBUS_BLOCK_MAX;
> +
> +	if (ipmb_send_request(master, &ipmb_req_msg, i2c_msg_len) < 0) {
>   		ipmb_free_seq(master, (GET_SEQ(ipmb_req_msg.rq_seq_rq_lun)));
>   		ipmb_error_reply(master, req_msg, IPMI_BUS_ERR);
>   		spin_lock_irqsave(&master->lock, flags);
> @@ -621,6 +634,11 @@ static int ipmb_slave_cb(struct i2c_client *client,
>   	struct ipmb_master *master = i2c_get_clientdata(client);
>   	u8 *buf;
>   
> +	if (!handshake_rsp) {
> +		handshake_rsp = true;
> +		return 0;
> +	}
> +
>   	spin_lock(&master->lock);
>   
>   	switch (event) {
> @@ -665,6 +683,56 @@ static unsigned short slave_add = 0x0;
>   module_param(slave_add, ushort, 0);
>   MODULE_PARM_DESC(slave_add, "The i2c slave address of the responding device");
>   
> +#define GET_DEVICE_ID_MSG_LEN 7
> +
> +static bool ipmb_detect(struct ipmb_master *master)
> +{
> +	struct ipmb_rsp_elem *q_elem, *tmp_q_elem;
> +	struct request request;
> +	u8 *buf = (u8 *) &request;
> +	struct device dev;
> +	int retry = 2000;
> +	u8 i2c_msg_len;
> +	int ret;
> +
> +	/* Subtract rs sa and netfn */
> +	i2c_msg_len = GET_DEVICE_ID_MSG_LEN - 2;
> +
> +	dev = master->client->dev;
> +
> +	request.netfn_rs_lun = IPMI_NETFN_APP_REQUEST << 2;
> +	request.checksum1 = ipmb_checksum1((u8)(master->rs_sa << 1),
> +						request.netfn_rs_lun);
> +	request.rq_sa = (u8)(master->client->addr << 1);
> +	request.rq_seq_rq_lun = 0;
> +	request.cmd = IPMI_GET_DEVICE_ID_CMD;
> +	request.payload[0] = ipmb_checksum(buf + 2, 3, 0);
> +
> +	ret = ipmb_send_request(master, &request, i2c_msg_len);
> +	if (ret < 0) {
> +		dev_err(&dev, "ERROR: ipmb_send_request failed during ipmb detection\n");
> +		return false;
> +	}
> +
> +	while(!handshake_rsp && (retry > 0)) {
> +		mdelay(10);
> +		retry--;
> +	}
> +
> +	if (!retry) {
> +		dev_err(&dev, "ERROR: Response timed out during ipmb detection\n");
> +		return false;
> +	}
> +
> +	list_for_each_entry_safe(q_elem, tmp_q_elem, &master->rsp_queue, list){
> +		list_del(&q_elem->list);
> +		kfree(q_elem);
> +		atomic_dec(&master->rsp_queue_len);
> +	}
> +
> +	return true;
> +}
> +
>   static int ipmb_probe(struct i2c_client *client,
>   			const struct i2c_device_id *id)
>   {
> @@ -702,12 +770,30 @@ static int ipmb_probe(struct i2c_client *client,
>   	if (ret)
>   		return ret;
>   
> +	master->slave_registered = true;
> +
> +	/*
> +	 * Send a simple message "get device ID" to detect whether the BMC is responsive or not.
> +	 * This is necessary before calling ipmi_register_smi which executes a handshake with the
> +	 * slave device and can hold the lock for a very long if the BMC is not up. This long wait
> +	 * at boot time causes the system to crash.
> +	 */
> +	if (!ipmb_detect(master)) {
> +		dev_err(&client->dev, "Unable to get response from slave device at this time\n");
> +		i2c_slave_unregister(client);
> +		master->slave_registered = false;
> +		return -ENXIO;
> +	}
> +
>   	ret = ipmi_register_smi(&ipmb_smi_handlers, master,
>   				&client->dev,
>   				(unsigned char)master->rs_sa);
>   
> -	if (ret)
> +	if (ret) {
> +		dev_err(&client->dev, "ipmi_register_smi failed with ret = %d\n", ret);
>   		i2c_slave_unregister(client);
> +		master->slave_registered = false;
> +	}
>   
>   	return ret;
>   }
> @@ -717,8 +803,13 @@ static int ipmb_remove(struct i2c_client *client)
>   	struct ipmb_master *master;
>   
>   	master = i2c_get_clientdata(client);
> -	ipmi_unregister_smi(master->intf);
> -	i2c_slave_unregister(client);
> +	if (!master)
> +		return 0;
> +
> +	if (master->slave_registered) {
> +		ipmi_unregister_smi(master->intf);
> +		i2c_slave_unregister(client);
> +	}
>   
>   	return 0;
>   }
>
Kleber Sacilotto de Souza April 8, 2021, 9:17 a.m. UTC | #2
On 02.04.21 23:40, Asmaa Mnebhi wrote:
> BugLink: https://bugs.launchpad.net/bugs/1922393
> 
> The previous ipmb_host patch broke customers' IPMB tests. They
> requested to make the ipmb_host response time as long as before.
> In the case where the I2C bus is made very busy, the ipmb_host driver
> just drops slow/delayed responses. This fix elongates the timeout of the response. So restore the previous stable ipmb_host patch.
> This patch also fixes a crash which occurs after powercycling certain BlueField-2 systems.
> The crash is due to the handshake which takes too long to wait for a response at boot time.
> 
> Signed-off-by: Asmaa Mnebhi <asmaa@nvidia.com>
> Reviewed-by: David Thompson <davthompson@nvidia.com>
> Signed-off-by: Asmaa Mnebhi <asmaa@nvidia.com>

Acked-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>

Thanks

> ---
>   drivers/char/ipmi/ipmb_host.c | 131 +++++++++++++++++++++++++++++++++++-------
>   1 file changed, 111 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/char/ipmi/ipmb_host.c b/drivers/char/ipmi/ipmb_host.c
> index a408c1a..5cc9d92 100644
> --- a/drivers/char/ipmi/ipmb_host.c
> +++ b/drivers/char/ipmi/ipmb_host.c
> @@ -3,7 +3,7 @@
>   /*
>    * Copyright 2020, NVIDIA Corporation. All rights reserved.
>    *
> - * This was inspired by Brendan Higgins' bt-i2c driver.
> + * This was inspired by Brendan Higgins' bt-i2c driver.
>    *
>    */
>   
> @@ -24,6 +24,8 @@
>   
>   #define	IPMB_TIMEOUT			(msecs_to_jiffies(20000))
>   
> +static bool handshake_rsp = false;
> +
>   /*
>    * The least we expect in an IPMB message is:
>    * netfn_rs_lun, checksum1, rq_sa, rq_seq_rq_lun,
> @@ -44,7 +46,7 @@
>   #define	IPMB_MAX_SMI_SIZE		125
>   #define	IPMB_SMI_MSG_HEADER_SIZE	2
>   
> -#define	IPMB_SEQ_MAX			1024
> +#define	IPMB_SEQ_MAX			64
>   
>   #define	MAX_BUF_SIZE			122
>   
> @@ -132,6 +134,8 @@ struct ipmb_master {
>   	struct list_head		rsp_queue;
>   	atomic_t			rsp_queue_len;
>   	wait_queue_head_t		wait_queue;
> +
> +	bool				slave_registered;
>   };
>   
>   /* +1 is for the checksum integrated in payload */
> @@ -206,25 +210,20 @@ static int ipmb_handle_response(struct ipmb_master *master)
>    * i2c_master_send
>    */
>   static int ipmb_send_request(struct ipmb_master *master,
> -				struct request *request)
> +				struct request *request, u8 i2c_msg_len)
>   {
>   	struct i2c_client *client = master->client;
>   	unsigned long timeout, read_time;
>   	u8 *buf = (u8 *) request;
>   	int ret;
> -	int msg_len;
>   	union i2c_smbus_data data;
>   
>   	/*
> -	 * subtract netfn_rs_lun payload since it is passed as arg
> +	 * skip netfn_rs_lun payload since it is passed as arg
>   	 * 5 to i2c_smbus_xfer.
>   	 */
> -	msg_len = ipmi_smi_to_ipmb_len(master->msg_to_send->data_size) - 1;
> -	if (msg_len > I2C_SMBUS_BLOCK_MAX)
> -		msg_len = I2C_SMBUS_BLOCK_MAX;
> -
> -	data.block[0] = msg_len;
> -	memcpy(&data.block[1], buf + 1, msg_len);
> +	data.block[0] = i2c_msg_len;
> +	memcpy(&data.block[1], buf + 1, i2c_msg_len);
>   
>   	timeout = jiffies + msecs_to_jiffies(WRITE_TIMEOUT);
>   	do {
> @@ -387,11 +386,12 @@ static int ipmb_receive_rsp(struct ipmb_master *master,
>   {
>   	struct ipmb_rsp_elem	*queue_elem;
>   	int			ret = 1;
> +	unsigned long		flags;
>   
> -	spin_lock_irq(&master->lock);
> +	spin_lock_irqsave(&master->lock, flags);
>   
>   	if (list_empty(&master->rsp_queue)) {
> -		spin_unlock_irq(&master->lock);
> +		spin_unlock_irqrestore(&master->lock, flags);
>   
>   		ret = wait_event_interruptible_timeout(master->wait_queue,
>   			!list_empty(&master->rsp_queue), IPMB_TIMEOUT);
> @@ -399,16 +399,17 @@ static int ipmb_receive_rsp(struct ipmb_master *master,
>   		if (ret <= 0)
>   			return ret;
>   
> -		spin_lock_irq(&master->lock);
> +		spin_lock_irqsave(&master->lock, flags);
>   	}
>   
>   	queue_elem = list_first_entry(&master->rsp_queue,
>   			struct ipmb_rsp_elem, list);
> +
>   	memcpy(ipmb_rsp, &queue_elem->rsp, sizeof(struct response));
>   	list_del(&queue_elem->list);
>   	kfree(queue_elem);
>   	atomic_dec(&master->rsp_queue_len);
> -	spin_unlock_irq(&master->lock);
> +	spin_unlock_irqrestore(&master->lock, flags);
>   
>   	return ret;
>   }
> @@ -438,6 +439,8 @@ static void ipmb_send_workfn(struct work_struct *work)
>   	int				rsp_msg_len;
>   	u8 				*buf_rsp;
>   	u8 				verify_checksum;
> +	u8				seq;
> +	u8				i2c_msg_len;
>   
>   	u8 *buf = (u8 *) &ipmb_req_msg;
>   
> @@ -460,11 +463,13 @@ static void ipmb_send_workfn(struct work_struct *work)
>   		return;
>   	}
>   
> -	if (!ipmb_assign_seq(master, req_msg, &ipmb_req_msg.rq_seq_rq_lun)) {
> +	if (!ipmb_assign_seq(master, req_msg, &seq)) {
>   		ipmb_error_reply(master, req_msg, IPMI_NODE_BUSY_ERR);
>   		return;
>   	}
>   
> +	ipmb_req_msg.rq_seq_rq_lun = seq << 2;
> +
>   	msg_len = ipmi_smi_to_ipmb_len(smi_msg_size);
>   
>   	/* Responder  */
> @@ -481,7 +486,15 @@ static void ipmb_send_workfn(struct work_struct *work)
>   	ipmb_req_msg.payload[ipmb_payload_len((size_t)msg_len)] =
>   		ipmb_checksum(buf + 2, msg_len - 2, 0);
>   
> -	if (ipmb_send_request(master, &ipmb_req_msg) < 0) {
> +	/*
> +	 * subtract netfn_rs_lun payload since it is passed as arg
> +	 * 5 to i2c_smbus_xfer.
> +	 */
> +	i2c_msg_len = ipmi_smi_to_ipmb_len(master->msg_to_send->data_size) - 1;
> +	if (i2c_msg_len > I2C_SMBUS_BLOCK_MAX)
> +		i2c_msg_len = I2C_SMBUS_BLOCK_MAX;
> +
> +	if (ipmb_send_request(master, &ipmb_req_msg, i2c_msg_len) < 0) {
>   		ipmb_free_seq(master, (GET_SEQ(ipmb_req_msg.rq_seq_rq_lun)));
>   		ipmb_error_reply(master, req_msg, IPMI_BUS_ERR);
>   		spin_lock_irqsave(&master->lock, flags);
> @@ -621,6 +634,11 @@ static int ipmb_slave_cb(struct i2c_client *client,
>   	struct ipmb_master *master = i2c_get_clientdata(client);
>   	u8 *buf;
>   
> +	if (!handshake_rsp) {
> +		handshake_rsp = true;
> +		return 0;
> +	}
> +
>   	spin_lock(&master->lock);
>   
>   	switch (event) {
> @@ -665,6 +683,56 @@ static unsigned short slave_add = 0x0;
>   module_param(slave_add, ushort, 0);
>   MODULE_PARM_DESC(slave_add, "The i2c slave address of the responding device");
>   
> +#define GET_DEVICE_ID_MSG_LEN 7
> +
> +static bool ipmb_detect(struct ipmb_master *master)
> +{
> +	struct ipmb_rsp_elem *q_elem, *tmp_q_elem;
> +	struct request request;
> +	u8 *buf = (u8 *) &request;
> +	struct device dev;
> +	int retry = 2000;
> +	u8 i2c_msg_len;
> +	int ret;
> +
> +	/* Subtract rs sa and netfn */
> +	i2c_msg_len = GET_DEVICE_ID_MSG_LEN - 2;
> +
> +	dev = master->client->dev;
> +
> +	request.netfn_rs_lun = IPMI_NETFN_APP_REQUEST << 2;
> +	request.checksum1 = ipmb_checksum1((u8)(master->rs_sa << 1),
> +						request.netfn_rs_lun);
> +	request.rq_sa = (u8)(master->client->addr << 1);
> +	request.rq_seq_rq_lun = 0;
> +	request.cmd = IPMI_GET_DEVICE_ID_CMD;
> +	request.payload[0] = ipmb_checksum(buf + 2, 3, 0);
> +
> +	ret = ipmb_send_request(master, &request, i2c_msg_len);
> +	if (ret < 0) {
> +		dev_err(&dev, "ERROR: ipmb_send_request failed during ipmb detection\n");
> +		return false;
> +	}
> +
> +	while(!handshake_rsp && (retry > 0)) {
> +		mdelay(10);
> +		retry--;
> +	}
> +
> +	if (!retry) {
> +		dev_err(&dev, "ERROR: Response timed out during ipmb detection\n");
> +		return false;
> +	}
> +
> +	list_for_each_entry_safe(q_elem, tmp_q_elem, &master->rsp_queue, list){
> +		list_del(&q_elem->list);
> +		kfree(q_elem);
> +		atomic_dec(&master->rsp_queue_len);
> +	}
> +
> +	return true;
> +}
> +
>   static int ipmb_probe(struct i2c_client *client,
>   			const struct i2c_device_id *id)
>   {
> @@ -702,12 +770,30 @@ static int ipmb_probe(struct i2c_client *client,
>   	if (ret)
>   		return ret;
>   
> +	master->slave_registered = true;
> +
> +	/*
> +	 * Send a simple message "get device ID" to detect whether the BMC is responsive or not.
> +	 * This is necessary before calling ipmi_register_smi which executes a handshake with the
> +	 * slave device and can hold the lock for a very long if the BMC is not up. This long wait
> +	 * at boot time causes the system to crash.
> +	 */
> +	if (!ipmb_detect(master)) {
> +		dev_err(&client->dev, "Unable to get response from slave device at this time\n");
> +		i2c_slave_unregister(client);
> +		master->slave_registered = false;
> +		return -ENXIO;
> +	}
> +
>   	ret = ipmi_register_smi(&ipmb_smi_handlers, master,
>   				&client->dev,
>   				(unsigned char)master->rs_sa);
>   
> -	if (ret)
> +	if (ret) {
> +		dev_err(&client->dev, "ipmi_register_smi failed with ret = %d\n", ret);
>   		i2c_slave_unregister(client);
> +		master->slave_registered = false;
> +	}
>   
>   	return ret;
>   }
> @@ -717,8 +803,13 @@ static int ipmb_remove(struct i2c_client *client)
>   	struct ipmb_master *master;
>   
>   	master = i2c_get_clientdata(client);
> -	ipmi_unregister_smi(master->intf);
> -	i2c_slave_unregister(client);
> +	if (!master)
> +		return 0;
> +
> +	if (master->slave_registered) {
> +		ipmi_unregister_smi(master->intf);
> +		i2c_slave_unregister(client);
> +	}
>   
>   	return 0;
>   }
>
diff mbox series

Patch

diff --git a/drivers/char/ipmi/ipmb_host.c b/drivers/char/ipmi/ipmb_host.c
index a408c1a..5cc9d92 100644
--- a/drivers/char/ipmi/ipmb_host.c
+++ b/drivers/char/ipmi/ipmb_host.c
@@ -3,7 +3,7 @@ 
 /*
  * Copyright 2020, NVIDIA Corporation. All rights reserved.
  *
- * This was inspired by Brendan Higgins' bt-i2c driver. 
+ * This was inspired by Brendan Higgins' bt-i2c driver.
  *
  */
 
@@ -24,6 +24,8 @@ 
 
 #define	IPMB_TIMEOUT			(msecs_to_jiffies(20000))
 
+static bool handshake_rsp = false;
+
 /*
  * The least we expect in an IPMB message is:
  * netfn_rs_lun, checksum1, rq_sa, rq_seq_rq_lun,
@@ -44,7 +46,7 @@ 
 #define	IPMB_MAX_SMI_SIZE		125
 #define	IPMB_SMI_MSG_HEADER_SIZE	2
 
-#define	IPMB_SEQ_MAX			1024
+#define	IPMB_SEQ_MAX			64
 
 #define	MAX_BUF_SIZE			122
 
@@ -132,6 +134,8 @@  struct ipmb_master {
 	struct list_head		rsp_queue;
 	atomic_t			rsp_queue_len;
 	wait_queue_head_t		wait_queue;
+
+	bool				slave_registered;
 };
 
 /* +1 is for the checksum integrated in payload */
@@ -206,25 +210,20 @@  static int ipmb_handle_response(struct ipmb_master *master)
  * i2c_master_send
  */
 static int ipmb_send_request(struct ipmb_master *master,
-				struct request *request)
+				struct request *request, u8 i2c_msg_len)
 {
 	struct i2c_client *client = master->client;
 	unsigned long timeout, read_time;
 	u8 *buf = (u8 *) request;
 	int ret;
-	int msg_len;
 	union i2c_smbus_data data;
 
 	/*
-	 * subtract netfn_rs_lun payload since it is passed as arg
+	 * skip netfn_rs_lun payload since it is passed as arg
 	 * 5 to i2c_smbus_xfer.
 	 */
-	msg_len = ipmi_smi_to_ipmb_len(master->msg_to_send->data_size) - 1;
-	if (msg_len > I2C_SMBUS_BLOCK_MAX)
-		msg_len = I2C_SMBUS_BLOCK_MAX;
-
-	data.block[0] = msg_len;
-	memcpy(&data.block[1], buf + 1, msg_len);
+	data.block[0] = i2c_msg_len;
+	memcpy(&data.block[1], buf + 1, i2c_msg_len);
 
 	timeout = jiffies + msecs_to_jiffies(WRITE_TIMEOUT);
 	do {
@@ -387,11 +386,12 @@  static int ipmb_receive_rsp(struct ipmb_master *master,
 {
 	struct ipmb_rsp_elem	*queue_elem;
 	int			ret = 1;
+	unsigned long		flags;
 
-	spin_lock_irq(&master->lock);
+	spin_lock_irqsave(&master->lock, flags);
 
 	if (list_empty(&master->rsp_queue)) {
-		spin_unlock_irq(&master->lock);
+		spin_unlock_irqrestore(&master->lock, flags);
 
 		ret = wait_event_interruptible_timeout(master->wait_queue,
 			!list_empty(&master->rsp_queue), IPMB_TIMEOUT);
@@ -399,16 +399,17 @@  static int ipmb_receive_rsp(struct ipmb_master *master,
 		if (ret <= 0)
 			return ret;
 
-		spin_lock_irq(&master->lock);
+		spin_lock_irqsave(&master->lock, flags);
 	}
 
 	queue_elem = list_first_entry(&master->rsp_queue,
 			struct ipmb_rsp_elem, list);
+
 	memcpy(ipmb_rsp, &queue_elem->rsp, sizeof(struct response));
 	list_del(&queue_elem->list);
 	kfree(queue_elem);
 	atomic_dec(&master->rsp_queue_len);
-	spin_unlock_irq(&master->lock);
+	spin_unlock_irqrestore(&master->lock, flags);
 
 	return ret;
 }
@@ -438,6 +439,8 @@  static void ipmb_send_workfn(struct work_struct *work)
 	int				rsp_msg_len;
 	u8 				*buf_rsp;
 	u8 				verify_checksum;
+	u8				seq;
+	u8				i2c_msg_len;
 
 	u8 *buf = (u8 *) &ipmb_req_msg;
 
@@ -460,11 +463,13 @@  static void ipmb_send_workfn(struct work_struct *work)
 		return;
 	}
 
-	if (!ipmb_assign_seq(master, req_msg, &ipmb_req_msg.rq_seq_rq_lun)) {
+	if (!ipmb_assign_seq(master, req_msg, &seq)) {
 		ipmb_error_reply(master, req_msg, IPMI_NODE_BUSY_ERR);
 		return;
 	}
 
+	ipmb_req_msg.rq_seq_rq_lun = seq << 2;
+
 	msg_len = ipmi_smi_to_ipmb_len(smi_msg_size);
 
 	/* Responder  */
@@ -481,7 +486,15 @@  static void ipmb_send_workfn(struct work_struct *work)
 	ipmb_req_msg.payload[ipmb_payload_len((size_t)msg_len)] =
 		ipmb_checksum(buf + 2, msg_len - 2, 0);
 
-	if (ipmb_send_request(master, &ipmb_req_msg) < 0) {
+	/*
+	 * subtract netfn_rs_lun payload since it is passed as arg
+	 * 5 to i2c_smbus_xfer.
+	 */
+	i2c_msg_len = ipmi_smi_to_ipmb_len(master->msg_to_send->data_size) - 1;
+	if (i2c_msg_len > I2C_SMBUS_BLOCK_MAX)
+		i2c_msg_len = I2C_SMBUS_BLOCK_MAX;
+
+	if (ipmb_send_request(master, &ipmb_req_msg, i2c_msg_len) < 0) {
 		ipmb_free_seq(master, (GET_SEQ(ipmb_req_msg.rq_seq_rq_lun)));
 		ipmb_error_reply(master, req_msg, IPMI_BUS_ERR);
 		spin_lock_irqsave(&master->lock, flags);
@@ -621,6 +634,11 @@  static int ipmb_slave_cb(struct i2c_client *client,
 	struct ipmb_master *master = i2c_get_clientdata(client);
 	u8 *buf;
 
+	if (!handshake_rsp) {
+		handshake_rsp = true;
+		return 0;
+	}
+
 	spin_lock(&master->lock);
 
 	switch (event) {
@@ -665,6 +683,56 @@  static unsigned short slave_add = 0x0;
 module_param(slave_add, ushort, 0);
 MODULE_PARM_DESC(slave_add, "The i2c slave address of the responding device");
 
+#define GET_DEVICE_ID_MSG_LEN 7
+
+static bool ipmb_detect(struct ipmb_master *master)
+{
+	struct ipmb_rsp_elem *q_elem, *tmp_q_elem;
+	struct request request;
+	u8 *buf = (u8 *) &request;
+	struct device dev;
+	int retry = 2000;
+	u8 i2c_msg_len;
+	int ret;
+
+	/* Subtract rs sa and netfn */
+	i2c_msg_len = GET_DEVICE_ID_MSG_LEN - 2;
+
+	dev = master->client->dev;
+
+	request.netfn_rs_lun = IPMI_NETFN_APP_REQUEST << 2;
+	request.checksum1 = ipmb_checksum1((u8)(master->rs_sa << 1),
+						request.netfn_rs_lun);
+	request.rq_sa = (u8)(master->client->addr << 1);
+	request.rq_seq_rq_lun = 0;
+	request.cmd = IPMI_GET_DEVICE_ID_CMD;
+	request.payload[0] = ipmb_checksum(buf + 2, 3, 0);
+
+	ret = ipmb_send_request(master, &request, i2c_msg_len);
+	if (ret < 0) {
+		dev_err(&dev, "ERROR: ipmb_send_request failed during ipmb detection\n");
+		return false;
+	}
+
+	while(!handshake_rsp && (retry > 0)) {
+		mdelay(10);
+		retry--;
+	}
+
+	if (!retry) {
+		dev_err(&dev, "ERROR: Response timed out during ipmb detection\n");
+		return false;
+	}
+
+	list_for_each_entry_safe(q_elem, tmp_q_elem, &master->rsp_queue, list){
+		list_del(&q_elem->list);
+		kfree(q_elem);
+		atomic_dec(&master->rsp_queue_len);
+	}
+
+	return true;
+}
+
 static int ipmb_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
 {
@@ -702,12 +770,30 @@  static int ipmb_probe(struct i2c_client *client,
 	if (ret)
 		return ret;
 
+	master->slave_registered = true;
+
+	/*
+	 * Send a simple message "get device ID" to detect whether the BMC is responsive or not.
+	 * This is necessary before calling ipmi_register_smi which executes a handshake with the
+	 * slave device and can hold the lock for a very long if the BMC is not up. This long wait
+	 * at boot time causes the system to crash.
+	 */
+	if (!ipmb_detect(master)) {
+		dev_err(&client->dev, "Unable to get response from slave device at this time\n");
+		i2c_slave_unregister(client);
+		master->slave_registered = false;
+		return -ENXIO;
+	}
+
 	ret = ipmi_register_smi(&ipmb_smi_handlers, master,
 				&client->dev,
 				(unsigned char)master->rs_sa);
 
-	if (ret)
+	if (ret) {
+		dev_err(&client->dev, "ipmi_register_smi failed with ret = %d\n", ret);
 		i2c_slave_unregister(client);
+		master->slave_registered = false;
+	}
 
 	return ret;
 }
@@ -717,8 +803,13 @@  static int ipmb_remove(struct i2c_client *client)
 	struct ipmb_master *master;
 
 	master = i2c_get_clientdata(client);
-	ipmi_unregister_smi(master->intf);
-	i2c_slave_unregister(client);
+	if (!master)
+		return 0;
+
+	if (master->slave_registered) {
+		ipmi_unregister_smi(master->intf);
+		i2c_slave_unregister(client);
+	}
 
 	return 0;
 }