Message ID | 1483595590-12932-1-git-send-email-hariprasad@chelsio.com |
---|---|
State | Changes Requested, archived |
Delegated to: | David Miller |
Headers | show |
Hi Hariprasad, [auto build test ERROR on net-next/master] url: https://github.com/0day-ci/linux/commits/Hariprasad-Shenai/cxgb4-Synchronize-access-to-mailbox/20170105-193032 config: i386-randconfig-x005-201701 (attached as .config) compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901 reproduce: # save the attached .config to linux build tree make ARCH=i386 All errors (new ones prefixed by >>): drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c: In function 'init_one': >> drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:4712:25: error: 'struct adapter' has no member named 'mbox_list'; did you mean 'mbox_lock'? INIT_LIST_HEAD(&adapter->mbox_list.list); ^~ vim +4712 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c 4706 4707 spin_lock_init(&adapter->stats_lock); 4708 spin_lock_init(&adapter->tid_release_lock); 4709 spin_lock_init(&adapter->win0_lock); 4710 spin_lock_init(&adapter->mbox_lock); 4711 > 4712 INIT_LIST_HEAD(&adapter->mbox_list.list); 4713 4714 INIT_WORK(&adapter->tid_release_task, process_tid_release_list); 4715 INIT_WORK(&adapter->db_full_task, process_db_full); --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
Hi Hariprasad, [auto build test ERROR on net-next/master] url: https://github.com/0day-ci/linux/commits/Hariprasad-Shenai/cxgb4-Synchronize-access-to-mailbox/20170105-193032 config: xtensa-allmodconfig (attached as .config) compiler: xtensa-linux-gcc (GCC) 4.9.0 reproduce: wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree make.cross ARCH=xtensa All errors (new ones prefixed by >>): drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c: In function 'init_one': >> drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:4712:25: error: 'struct adapter' has no member named 'mbox_list' INIT_LIST_HEAD(&adapter->mbox_list.list); ^ vim +4712 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c 4706 4707 spin_lock_init(&adapter->stats_lock); 4708 spin_lock_init(&adapter->tid_release_lock); 4709 spin_lock_init(&adapter->win0_lock); 4710 spin_lock_init(&adapter->mbox_lock); 4711 > 4712 INIT_LIST_HEAD(&adapter->mbox_list.list); 4713 4714 INIT_WORK(&adapter->tid_release_task, process_tid_release_list); 4715 INIT_WORK(&adapter->db_full_task, process_db_full); --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
From: Hariprasad Shenai <hariprasad@chelsio.com> Date: Thu, 5 Jan 2017 11:23:10 +0530 > @@ -844,6 +848,10 @@ struct adapter { > struct work_struct db_drop_task; > bool tid_release_task_busy; > > + /* lock for mailbox cmd list */ > + spinlock_t mbox_lock; > + struct mbox_list mlist; > + ... > @@ -4707,6 +4707,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) > spin_lock_init(&adapter->stats_lock); > spin_lock_init(&adapter->tid_release_lock); > spin_lock_init(&adapter->win0_lock); > + spin_lock_init(&adapter->mbox_lock); > + > + INIT_LIST_HEAD(&adapter->mbox_list.list); It is absolutely impossible that you even compiled this code.
> On 05-Jan-2017, at 9:35 PM, David Miller <davem@davemloft.net> wrote: > > From: Hariprasad Shenai <hariprasad@chelsio.com> > Date: Thu, 5 Jan 2017 11:23:10 +0530 > >> @@ -844,6 +848,10 @@ struct adapter { >> struct work_struct db_drop_task; >> bool tid_release_task_busy; >> >> + /* lock for mailbox cmd list */ >> + spinlock_t mbox_lock; >> + struct mbox_list mlist; >> + > ... >> @@ -4707,6 +4707,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) >> spin_lock_init(&adapter->stats_lock); >> spin_lock_init(&adapter->tid_release_lock); >> spin_lock_init(&adapter->win0_lock); >> + spin_lock_init(&adapter->mbox_lock); >> + >> + INIT_LIST_HEAD(&adapter->mbox_list.list); > > It is absolutely impossible that you even compiled this code. My bad. Looks like I sent wrong patch. I will send V2 for the same.
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 0bce1bf9ca0f..78a852c72f5d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -782,6 +782,10 @@ struct vf_info { bool pf_set_mac; }; +struct mbox_list { + struct list_head list; +}; + struct adapter { void __iomem *regs; void __iomem *bar2; @@ -844,6 +848,10 @@ struct adapter { struct work_struct db_drop_task; bool tid_release_task_busy; + /* lock for mailbox cmd list */ + spinlock_t mbox_lock; + struct mbox_list mlist; + /* support for mailbox command/reply logging */ #define T4_OS_LOG_MBOX_CMDS 256 struct mbox_cmd_log *mbox_log; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 6f951877430b..34ceb3518dd4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4707,6 +4707,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) spin_lock_init(&adapter->stats_lock); spin_lock_init(&adapter->tid_release_lock); spin_lock_init(&adapter->win0_lock); + spin_lock_init(&adapter->mbox_lock); + + INIT_LIST_HEAD(&adapter->mbox_list.list); INIT_WORK(&adapter->tid_release_task, process_tid_release_list); INIT_WORK(&adapter->db_full_task, process_db_full); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index e8139514d32c..7ac6ea531b0f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -284,6 +284,7 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, 1, 1, 3, 5, 10, 10, 20, 50, 100, 200 }; + struct mbox_list entry; u16 access = 0; u16 execute = 0; u32 v; @@ -311,11 +312,61 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, timeout = -timeout; } + /* Queue ourselves onto the mailbox access list. When our entry is at + * the front of the list, we have rights to access the mailbox. So we + * wait [for a while] till we're at the front [or bail out with an + * EBUSY] ... + */ + spin_lock(&adap->mbox_lock); + list_add_tail(&entry.list, &adap->mlist.list); + spin_unlock(&adap->mbox_lock); + + delay_idx = 0; + ms = delay[0]; + + for (i = 0; ; i += ms) { + /* If we've waited too long, return a busy indication. This + * really ought to be based on our initial position in the + * mailbox access list but this is a start. We very rearely + * contend on access to the mailbox ... + */ + if (i > FW_CMD_MAX_TIMEOUT) { + spin_lock(&adap->mbox_lock); + list_del(&entry.list); + spin_unlock(&adap->mbox_lock); + ret = -EBUSY; + t4_record_mbox(adap, cmd, size, access, ret); + return ret; + } + + /* If we're at the head, break out and start the mailbox + * protocol. + */ + if (list_first_entry(&adap->mlist.list, struct mbox_list, + list) == &entry) + break; + + /* Delay for a bit before checking again ... */ + if (sleep_ok) { + ms = delay[delay_idx]; /* last element may repeat */ + if (delay_idx < ARRAY_SIZE(delay) - 1) + delay_idx++; + msleep(ms); + } else { + mdelay(ms); + } + } + + /* Loop trying to get ownership of the mailbox. Return an error + * if we can't gain ownership. + */ v = MBOWNER_G(t4_read_reg(adap, ctl_reg)); for (i = 0; v == MBOX_OWNER_NONE && i < 3; i++) v = MBOWNER_G(t4_read_reg(adap, ctl_reg)); - if (v != MBOX_OWNER_DRV) { + spin_lock(&adap->mbox_lock); + list_del(&entry.list); + spin_unlock(&adap->mbox_lock); ret = (v == MBOX_OWNER_FW) ? -EBUSY : -ETIMEDOUT; t4_record_mbox(adap, cmd, MBOX_LEN, access, ret); return ret; @@ -366,6 +417,9 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, execute = i + ms; t4_record_mbox(adap, cmd_rpl, MBOX_LEN, access, execute); + spin_lock(&adap->mbox_lock); + list_del(&entry.list); + spin_unlock(&adap->mbox_lock); return -FW_CMD_RETVAL_G((int)res); } } @@ -375,6 +429,9 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, dev_err(adap->pdev_dev, "command %#x in mailbox %d timed out\n", *(const u8 *)cmd, mbox); t4_report_fw_error(adap); + spin_lock(&adap->mbox_lock); + list_del(&entry.list); + spin_unlock(&adap->mbox_lock); return ret; }
The issue comes when there are multiple threads attempting to use the mailbox facility at the same time. When DCB operations and interface up/down is run in a loop for every 0.1 sec, we observed mailbox collisions. And out of the two commands one would fail with the present code, since we don't queue the second command. To overcome the above issue, added a queue to access the mailbox. Whenever a mailbox command is issued add it to the queue. If its at the head issue the mailbox command, else wait for the existing command to complete. Usually command takes less than a milli-second to complete. Also timeout from the loop, if the command under execution takes long time to run. In reality, the number of mailbox access collisions is going to be very rare since no one runs such abusive script. Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com> --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 8 ++++ drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 3 ++ drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 59 ++++++++++++++++++++++++- 3 files changed, 69 insertions(+), 1 deletion(-)