From patchwork Wed Jul 11 10:44:25 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bert Kenward X-Patchwork-Id: 942423 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=solarflare.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 41QbLC6Z8nz9s01 for ; Wed, 11 Jul 2018 20:44:51 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726843AbeGKKsb (ORCPT ); Wed, 11 Jul 2018 06:48:31 -0400 Received: from dispatch1-us1.ppe-hosted.com ([148.163.129.52]:49650 "EHLO dispatch1-us1.ppe-hosted.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726557AbeGKKsb (ORCPT ); Wed, 11 Jul 2018 06:48:31 -0400 X-Virus-Scanned: Proofpoint Essentials engine Received: from webmail.solarflare.com (uk.solarflare.com [193.34.186.16]) (using TLSv1 with cipher ECDHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1-us4.ppe-hosted.com (Proofpoint Essentials ESMTP Server) with ESMTPS id 95B5ABC006D; Wed, 11 Jul 2018 10:44:48 +0000 (UTC) Received: from hgk-desktop.uk.solarflarecom.com (10.17.20.82) by ukex01.SolarFlarecom.com (10.17.10.4) with Microsoft SMTP Server (TLS) id 15.0.1044.25; Wed, 11 Jul 2018 11:44:36 +0100 From: Bert Kenward Subject: [PATCH net 1/2] sfc: avoid hang from nested use of the filter_sem To: Dave Miller CC: , References: Message-ID: <12aa44a4-8d1d-15b4-476a-e3cbabd1369b@solarflare.com> Date: Wed, 11 Jul 2018 11:44:25 +0100 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.5.2 MIME-Version: 1.0 In-Reply-To: Content-Language: en-US X-Originating-IP: [10.17.20.82] X-ClientProxiedBy: ocex03.SolarFlarecom.com (10.20.40.36) To ukex01.SolarFlarecom.com (10.17.10.4) X-TM-AS-Product-Ver: SMEX-11.0.0.1191-8.100.1062-23960.003 X-TM-AS-Result: No--3.834300-0.000000-31 X-TM-AS-User-Approved-Sender: Yes X-TM-AS-User-Blocked-Sender: No X-MDID: 1531305889-1LrXNunAkAJw Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org In some situations we may end up calling down_read while already holding the semaphore for write, thus hanging. This has been seen when setting the MAC address for the interface. The hung task log in this situation includes this stack: down_read efx_ef10_filter_insert efx_ef10_filter_insert_addr_list efx_ef10_filter_vlan_sync_rx_mode efx_ef10_filter_add_vlan efx_ef10_filter_table_probe efx_ef10_set_mac_address efx_set_mac_address dev_set_mac_address In addition, lockdep rightly points out that nested calling of down_read is incorrect. Fixes: c2bebe37c6b6 ("sfc: give ef10 its own rwsem in the filter table instead of filter_lock") Tested-by: Jarod Wilson Signed-off-by: Bert Kenward --- drivers/net/ethernet/sfc/ef10.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 23f0785c0573..7eeac3d6cfe8 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -4288,9 +4288,9 @@ static int efx_ef10_filter_pri(struct efx_ef10_filter_table *table, return -EPROTONOSUPPORT; } -static s32 efx_ef10_filter_insert(struct efx_nic *efx, - struct efx_filter_spec *spec, - bool replace_equal) +static s32 efx_ef10_filter_insert_locked(struct efx_nic *efx, + struct efx_filter_spec *spec, + bool replace_equal) { DECLARE_BITMAP(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT); struct efx_ef10_nic_data *nic_data = efx->nic_data; @@ -4307,7 +4307,7 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx, bool is_mc_recip; s32 rc; - down_read(&efx->filter_sem); + WARN_ON(!rwsem_is_locked(&efx->filter_sem)); table = efx->filter_state; down_write(&table->lock); @@ -4498,10 +4498,22 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx, if (rss_locked) mutex_unlock(&efx->rss_lock); up_write(&table->lock); - up_read(&efx->filter_sem); return rc; } +static s32 efx_ef10_filter_insert(struct efx_nic *efx, + struct efx_filter_spec *spec, + bool replace_equal) +{ + s32 ret; + + down_read(&efx->filter_sem); + ret = efx_ef10_filter_insert_locked(efx, spec, replace_equal); + up_read(&efx->filter_sem); + + return ret; +} + static void efx_ef10_filter_update_rx_scatter(struct efx_nic *efx) { /* no need to do anything here on EF10 */ @@ -5285,7 +5297,7 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx, EFX_WARN_ON_PARANOID(ids[i] != EFX_EF10_FILTER_ID_INVALID); efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0); efx_filter_set_eth_local(&spec, vlan->vid, addr_list[i].addr); - rc = efx_ef10_filter_insert(efx, &spec, true); + rc = efx_ef10_filter_insert_locked(efx, &spec, true); if (rc < 0) { if (rollback) { netif_info(efx, drv, efx->net_dev, @@ -5314,7 +5326,7 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx, efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0); eth_broadcast_addr(baddr); efx_filter_set_eth_local(&spec, vlan->vid, baddr); - rc = efx_ef10_filter_insert(efx, &spec, true); + rc = efx_ef10_filter_insert_locked(efx, &spec, true); if (rc < 0) { netif_warn(efx, drv, efx->net_dev, "Broadcast filter insert failed rc=%d\n", rc); @@ -5370,7 +5382,7 @@ static int efx_ef10_filter_insert_def(struct efx_nic *efx, if (vlan->vid != EFX_FILTER_VID_UNSPEC) efx_filter_set_eth_local(&spec, vlan->vid, NULL); - rc = efx_ef10_filter_insert(efx, &spec, true); + rc = efx_ef10_filter_insert_locked(efx, &spec, true); if (rc < 0) { const char *um = multicast ? "Multicast" : "Unicast"; const char *encap_name = ""; @@ -5430,7 +5442,7 @@ static int efx_ef10_filter_insert_def(struct efx_nic *efx, filter_flags, 0); eth_broadcast_addr(baddr); efx_filter_set_eth_local(&spec, vlan->vid, baddr); - rc = efx_ef10_filter_insert(efx, &spec, true); + rc = efx_ef10_filter_insert_locked(efx, &spec, true); if (rc < 0) { netif_warn(efx, drv, efx->net_dev, "Broadcast filter insert failed rc=%d\n", From patchwork Wed Jul 11 10:45:10 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bert Kenward X-Patchwork-Id: 942424 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=solarflare.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 41QbM06wQBz9s01 for ; Wed, 11 Jul 2018 20:45:32 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727019AbeGKKtN (ORCPT ); Wed, 11 Jul 2018 06:49:13 -0400 Received: from dispatch1-us1.ppe-hosted.com ([148.163.129.52]:51616 "EHLO dispatch1-us1.ppe-hosted.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726557AbeGKKtN (ORCPT ); Wed, 11 Jul 2018 06:49:13 -0400 X-Virus-Scanned: Proofpoint Essentials engine Received: from webmail.solarflare.com (uk.solarflare.com [193.34.186.16]) (using TLSv1 with cipher ECDHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1-us4.ppe-hosted.com (Proofpoint Essentials ESMTP Server) with ESMTPS id 145A3BC0068; Wed, 11 Jul 2018 10:45:30 +0000 (UTC) Received: from hgk-desktop.uk.solarflarecom.com (10.17.20.82) by ukex01.SolarFlarecom.com (10.17.10.4) with Microsoft SMTP Server (TLS) id 15.0.1044.25; Wed, 11 Jul 2018 11:45:18 +0100 From: Bert Kenward Subject: [PATCH net 2/2] sfc: hold filter_sem consistently during reset To: Dave Miller CC: , References: Message-ID: <1d8105c2-769e-3a36-55ed-852cc9ecf2d4@solarflare.com> Date: Wed, 11 Jul 2018 11:45:10 +0100 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.5.2 MIME-Version: 1.0 In-Reply-To: Content-Language: en-US X-Originating-IP: [10.17.20.82] X-ClientProxiedBy: ocex03.SolarFlarecom.com (10.20.40.36) To ukex01.SolarFlarecom.com (10.17.10.4) X-TM-AS-Product-Ver: SMEX-11.0.0.1191-8.100.1062-23960.003 X-TM-AS-Result: No-4.613100-0.000000-31 X-TM-AS-User-Approved-Sender: Yes X-TM-AS-User-Blocked-Sender: No X-MDID: 1531305930-WKDbpyR0KssJ Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org We should take and release the filter_sem consistently during the reset process, in the same manner as the mac_lock and reset_lock. For lockdep consistency we also take the filter_sem for write around other calls to efx->type->init(). Fixes: c2bebe37c6b6 ("sfc: give ef10 its own rwsem in the filter table instead of filter_lock") Signed-off-by: Bert Kenward --- drivers/net/ethernet/sfc/efx.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 570ec72266f3..ce3a177081a8 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -1871,12 +1871,6 @@ static void efx_remove_filters(struct efx_nic *efx) up_write(&efx->filter_sem); } -static void efx_restore_filters(struct efx_nic *efx) -{ - down_read(&efx->filter_sem); - efx->type->filter_table_restore(efx); - up_read(&efx->filter_sem); -} /************************************************************************** * @@ -2688,6 +2682,7 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method) efx_disable_interrupts(efx); mutex_lock(&efx->mac_lock); + down_write(&efx->filter_sem); mutex_lock(&efx->rss_lock); if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && method != RESET_TYPE_DATAPATH) @@ -2745,9 +2740,8 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) if (efx->type->rx_restore_rss_contexts) efx->type->rx_restore_rss_contexts(efx); mutex_unlock(&efx->rss_lock); - down_read(&efx->filter_sem); - efx_restore_filters(efx); - up_read(&efx->filter_sem); + efx->type->filter_table_restore(efx); + up_write(&efx->filter_sem); if (efx->type->sriov_reset) efx->type->sriov_reset(efx); @@ -2764,6 +2758,7 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) efx->port_initialized = false; mutex_unlock(&efx->rss_lock); + up_write(&efx->filter_sem); mutex_unlock(&efx->mac_lock); return rc; @@ -3473,7 +3468,9 @@ static int efx_pci_probe_main(struct efx_nic *efx) efx_init_napi(efx); + down_write(&efx->filter_sem); rc = efx->type->init(efx); + up_write(&efx->filter_sem); if (rc) { netif_err(efx, probe, efx->net_dev, "failed to initialise NIC\n"); @@ -3765,7 +3762,9 @@ static int efx_pm_resume(struct device *dev) rc = efx->type->reset(efx, RESET_TYPE_ALL); if (rc) return rc; + down_write(&efx->filter_sem); rc = efx->type->init(efx); + up_write(&efx->filter_sem); if (rc) return rc; rc = efx_pm_thaw(dev);