From patchwork Fri Apr 2 21:40:50 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Asmaa Mnebhi X-Patchwork-Id: 1461897 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=lists.ubuntu.com (client-ip=91.189.94.19; helo=huckleberry.canonical.com; envelope-from=kernel-team-bounces@lists.ubuntu.com; receiver=) Received: from huckleberry.canonical.com (huckleberry.canonical.com [91.189.94.19]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4FBtlj24Bqz9sW4; Sat, 3 Apr 2021 08:41:03 +1100 (AEDT) Received: from localhost ([127.0.0.1] helo=huckleberry.canonical.com) by huckleberry.canonical.com with esmtp (Exim 4.86_2) (envelope-from ) id 1lSRXI-0001Y9-Ri; Fri, 02 Apr 2021 21:41:00 +0000 Received: from mail-il-dmz.mellanox.com ([193.47.165.129] helo=mellanox.co.il) by huckleberry.canonical.com with esmtp (Exim 4.86_2) (envelope-from ) id 1lSRXH-0001Xp-0t for kernel-team@lists.ubuntu.com; Fri, 02 Apr 2021 21:40:59 +0000 Received: from Internal Mail-Server by MTLPINE1 (envelope-from asmaa@mellanox.com) with SMTP; 3 Apr 2021 00:40:53 +0300 Received: from farm-0002.mtbu.labs.mlnx (farm-0002.mtbu.labs.mlnx [10.15.2.32]) by mtbu-labmailer.labs.mlnx (8.14.4/8.14.4) with ESMTP id 132LeqR2023811; Fri, 2 Apr 2021 17:40:52 -0400 Received: (from asmaa@localhost) by farm-0002.mtbu.labs.mlnx (8.14.7/8.13.8/Submit) id 132LeqAO028970; Fri, 2 Apr 2021 17:40:52 -0400 From: Asmaa Mnebhi To: kernel-team@lists.ubuntu.com Subject: [SRU][F:linux-bluefield][PATCH v1 1/1] UBUNTU: SAUCE: ipmb_host.c: Fix slow transactions Date: Fri, 2 Apr 2021 17:40:50 -0400 Message-Id: <1617399650-28927-2-git-send-email-asmaa@nvidia.com> X-Mailer: git-send-email 2.1.2 In-Reply-To: <1617399650-28927-1-git-send-email-asmaa@nvidia.com> References: <1617399650-28927-1-git-send-email-asmaa@nvidia.com> X-BeenThere: kernel-team@lists.ubuntu.com X-Mailman-Version: 2.1.20 Precedence: list List-Id: Kernel team discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: asmaa@nvidia.com MIME-Version: 1.0 Errors-To: kernel-team-bounces@lists.ubuntu.com Sender: "kernel-team" BugLink: https://bugs.launchpad.net/bugs/1922393 The previous ipmb_host patch broke customers' IPMB tests. They requested to make the ipmb_host response time as long as before. In the case where the I2C bus is made very busy, the ipmb_host driver just drops slow/delayed responses. This fix elongates the timeout of the response. So restore the previous stable ipmb_host patch. This patch also fixes a crash which occurs after powercycling certain BlueField-2 systems. The crash is due to the handshake which takes too long to wait for a response at boot time. Signed-off-by: Asmaa Mnebhi Reviewed-by: David Thompson Signed-off-by: Asmaa Mnebhi Acked-by: Stefan Bader Acked-by: Kleber Sacilotto de Souza --- drivers/char/ipmi/ipmb_host.c | 131 +++++++++++++++++++++++++++++++++++------- 1 file changed, 111 insertions(+), 20 deletions(-) diff --git a/drivers/char/ipmi/ipmb_host.c b/drivers/char/ipmi/ipmb_host.c index a408c1a..5cc9d92 100644 --- a/drivers/char/ipmi/ipmb_host.c +++ b/drivers/char/ipmi/ipmb_host.c @@ -3,7 +3,7 @@ /* * Copyright 2020, NVIDIA Corporation. All rights reserved. * - * This was inspired by Brendan Higgins' bt-i2c driver. + * This was inspired by Brendan Higgins' bt-i2c driver. * */ @@ -24,6 +24,8 @@ #define IPMB_TIMEOUT (msecs_to_jiffies(20000)) +static bool handshake_rsp = false; + /* * The least we expect in an IPMB message is: * netfn_rs_lun, checksum1, rq_sa, rq_seq_rq_lun, @@ -44,7 +46,7 @@ #define IPMB_MAX_SMI_SIZE 125 #define IPMB_SMI_MSG_HEADER_SIZE 2 -#define IPMB_SEQ_MAX 1024 +#define IPMB_SEQ_MAX 64 #define MAX_BUF_SIZE 122 @@ -132,6 +134,8 @@ struct ipmb_master { struct list_head rsp_queue; atomic_t rsp_queue_len; wait_queue_head_t wait_queue; + + bool slave_registered; }; /* +1 is for the checksum integrated in payload */ @@ -206,25 +210,20 @@ static int ipmb_handle_response(struct ipmb_master *master) * i2c_master_send */ static int ipmb_send_request(struct ipmb_master *master, - struct request *request) + struct request *request, u8 i2c_msg_len) { struct i2c_client *client = master->client; unsigned long timeout, read_time; u8 *buf = (u8 *) request; int ret; - int msg_len; union i2c_smbus_data data; /* - * subtract netfn_rs_lun payload since it is passed as arg + * skip netfn_rs_lun payload since it is passed as arg * 5 to i2c_smbus_xfer. */ - msg_len = ipmi_smi_to_ipmb_len(master->msg_to_send->data_size) - 1; - if (msg_len > I2C_SMBUS_BLOCK_MAX) - msg_len = I2C_SMBUS_BLOCK_MAX; - - data.block[0] = msg_len; - memcpy(&data.block[1], buf + 1, msg_len); + data.block[0] = i2c_msg_len; + memcpy(&data.block[1], buf + 1, i2c_msg_len); timeout = jiffies + msecs_to_jiffies(WRITE_TIMEOUT); do { @@ -387,11 +386,12 @@ static int ipmb_receive_rsp(struct ipmb_master *master, { struct ipmb_rsp_elem *queue_elem; int ret = 1; + unsigned long flags; - spin_lock_irq(&master->lock); + spin_lock_irqsave(&master->lock, flags); if (list_empty(&master->rsp_queue)) { - spin_unlock_irq(&master->lock); + spin_unlock_irqrestore(&master->lock, flags); ret = wait_event_interruptible_timeout(master->wait_queue, !list_empty(&master->rsp_queue), IPMB_TIMEOUT); @@ -399,16 +399,17 @@ static int ipmb_receive_rsp(struct ipmb_master *master, if (ret <= 0) return ret; - spin_lock_irq(&master->lock); + spin_lock_irqsave(&master->lock, flags); } queue_elem = list_first_entry(&master->rsp_queue, struct ipmb_rsp_elem, list); + memcpy(ipmb_rsp, &queue_elem->rsp, sizeof(struct response)); list_del(&queue_elem->list); kfree(queue_elem); atomic_dec(&master->rsp_queue_len); - spin_unlock_irq(&master->lock); + spin_unlock_irqrestore(&master->lock, flags); return ret; } @@ -438,6 +439,8 @@ static void ipmb_send_workfn(struct work_struct *work) int rsp_msg_len; u8 *buf_rsp; u8 verify_checksum; + u8 seq; + u8 i2c_msg_len; u8 *buf = (u8 *) &ipmb_req_msg; @@ -460,11 +463,13 @@ static void ipmb_send_workfn(struct work_struct *work) return; } - if (!ipmb_assign_seq(master, req_msg, &ipmb_req_msg.rq_seq_rq_lun)) { + if (!ipmb_assign_seq(master, req_msg, &seq)) { ipmb_error_reply(master, req_msg, IPMI_NODE_BUSY_ERR); return; } + ipmb_req_msg.rq_seq_rq_lun = seq << 2; + msg_len = ipmi_smi_to_ipmb_len(smi_msg_size); /* Responder */ @@ -481,7 +486,15 @@ static void ipmb_send_workfn(struct work_struct *work) ipmb_req_msg.payload[ipmb_payload_len((size_t)msg_len)] = ipmb_checksum(buf + 2, msg_len - 2, 0); - if (ipmb_send_request(master, &ipmb_req_msg) < 0) { + /* + * subtract netfn_rs_lun payload since it is passed as arg + * 5 to i2c_smbus_xfer. + */ + i2c_msg_len = ipmi_smi_to_ipmb_len(master->msg_to_send->data_size) - 1; + if (i2c_msg_len > I2C_SMBUS_BLOCK_MAX) + i2c_msg_len = I2C_SMBUS_BLOCK_MAX; + + if (ipmb_send_request(master, &ipmb_req_msg, i2c_msg_len) < 0) { ipmb_free_seq(master, (GET_SEQ(ipmb_req_msg.rq_seq_rq_lun))); ipmb_error_reply(master, req_msg, IPMI_BUS_ERR); spin_lock_irqsave(&master->lock, flags); @@ -621,6 +634,11 @@ static int ipmb_slave_cb(struct i2c_client *client, struct ipmb_master *master = i2c_get_clientdata(client); u8 *buf; + if (!handshake_rsp) { + handshake_rsp = true; + return 0; + } + spin_lock(&master->lock); switch (event) { @@ -665,6 +683,56 @@ static unsigned short slave_add = 0x0; module_param(slave_add, ushort, 0); MODULE_PARM_DESC(slave_add, "The i2c slave address of the responding device"); +#define GET_DEVICE_ID_MSG_LEN 7 + +static bool ipmb_detect(struct ipmb_master *master) +{ + struct ipmb_rsp_elem *q_elem, *tmp_q_elem; + struct request request; + u8 *buf = (u8 *) &request; + struct device dev; + int retry = 2000; + u8 i2c_msg_len; + int ret; + + /* Subtract rs sa and netfn */ + i2c_msg_len = GET_DEVICE_ID_MSG_LEN - 2; + + dev = master->client->dev; + + request.netfn_rs_lun = IPMI_NETFN_APP_REQUEST << 2; + request.checksum1 = ipmb_checksum1((u8)(master->rs_sa << 1), + request.netfn_rs_lun); + request.rq_sa = (u8)(master->client->addr << 1); + request.rq_seq_rq_lun = 0; + request.cmd = IPMI_GET_DEVICE_ID_CMD; + request.payload[0] = ipmb_checksum(buf + 2, 3, 0); + + ret = ipmb_send_request(master, &request, i2c_msg_len); + if (ret < 0) { + dev_err(&dev, "ERROR: ipmb_send_request failed during ipmb detection\n"); + return false; + } + + while(!handshake_rsp && (retry > 0)) { + mdelay(10); + retry--; + } + + if (!retry) { + dev_err(&dev, "ERROR: Response timed out during ipmb detection\n"); + return false; + } + + list_for_each_entry_safe(q_elem, tmp_q_elem, &master->rsp_queue, list){ + list_del(&q_elem->list); + kfree(q_elem); + atomic_dec(&master->rsp_queue_len); + } + + return true; +} + static int ipmb_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -702,12 +770,30 @@ static int ipmb_probe(struct i2c_client *client, if (ret) return ret; + master->slave_registered = true; + + /* + * Send a simple message "get device ID" to detect whether the BMC is responsive or not. + * This is necessary before calling ipmi_register_smi which executes a handshake with the + * slave device and can hold the lock for a very long if the BMC is not up. This long wait + * at boot time causes the system to crash. + */ + if (!ipmb_detect(master)) { + dev_err(&client->dev, "Unable to get response from slave device at this time\n"); + i2c_slave_unregister(client); + master->slave_registered = false; + return -ENXIO; + } + ret = ipmi_register_smi(&ipmb_smi_handlers, master, &client->dev, (unsigned char)master->rs_sa); - if (ret) + if (ret) { + dev_err(&client->dev, "ipmi_register_smi failed with ret = %d\n", ret); i2c_slave_unregister(client); + master->slave_registered = false; + } return ret; } @@ -717,8 +803,13 @@ static int ipmb_remove(struct i2c_client *client) struct ipmb_master *master; master = i2c_get_clientdata(client); - ipmi_unregister_smi(master->intf); - i2c_slave_unregister(client); + if (!master) + return 0; + + if (master->slave_registered) { + ipmi_unregister_smi(master->intf); + i2c_slave_unregister(client); + } return 0; }