From patchwork Mon Feb 13 23:10:05 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kamal Mostafa X-Patchwork-Id: 727620 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from huckleberry.canonical.com (huckleberry.canonical.com [91.189.94.19]) by ozlabs.org (Postfix) with ESMTP id 3vMh9N1MKqz9s7F; Tue, 14 Feb 2017 10:10:32 +1100 (AEDT) Received: from localhost ([127.0.0.1] helo=huckleberry.canonical.com) by huckleberry.canonical.com with esmtp (Exim 4.76) (envelope-from ) id 1cdPlM-0000QY-LR; Mon, 13 Feb 2017 23:10:28 +0000 Received: from youngberry.canonical.com ([91.189.89.112]) by huckleberry.canonical.com with esmtps (TLS1.0:RSA_AES_256_CBC_SHA1:32) (Exim 4.76) (envelope-from ) id 1cdPlD-0000Dn-Oi for kernel-team@lists.ubuntu.com; Mon, 13 Feb 2017 23:10:19 +0000 Received: from 1.general.kamal.us.vpn ([10.172.68.52] helo=fourier) by youngberry.canonical.com with esmtpsa (TLS1.0:RSA_AES_128_CBC_SHA1:16) (Exim 4.76) (envelope-from ) id 1cdPlD-00049H-BG for kernel-team@lists.ubuntu.com; Mon, 13 Feb 2017 23:10:19 +0000 Received: from kamal by fourier with local (Exim 4.86_2) (envelope-from ) id 1cdPlB-0004nd-C1 for kernel-team@lists.ubuntu.com; Mon, 13 Feb 2017 15:10:17 -0800 From: Kamal Mostafa To: kernel-team@lists.ubuntu.com Subject: [aws][PATCH 11/16] net/ena: fix potential access to freed memory during device reset Date: Mon, 13 Feb 2017 15:10:05 -0800 Message-Id: <1487027410-18266-12-git-send-email-kamal@canonical.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1487027410-18266-1-git-send-email-kamal@canonical.com> References: <1487027410-18266-1-git-send-email-kamal@canonical.com> X-BeenThere: kernel-team@lists.ubuntu.com X-Mailman-Version: 2.1.14 Precedence: list List-Id: Kernel team discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Errors-To: kernel-team-bounces@lists.ubuntu.com Sender: kernel-team-bounces@lists.ubuntu.com From: Netanel Belgazal BugLink: http://bugs.launchpad.net/bugs/1664312 If the ena driver detects that the device is not behave as expected, it tries to reset the device. The reset flow calls ena_down, which will frees all the resources the driver allocates and then it will reset the device. This flow can cause memory corruption if the device is still writes to the driver's memory space. To overcome this potential race, move the reset before the device resources are freed. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller (cherry-picked from linux/net-next) Signed-off-by: Kamal Mostafa --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 56 +++++++++++++++++++++------- 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 2ef5f09..6b5014f 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -80,14 +80,18 @@ static void ena_tx_timeout(struct net_device *dev) { struct ena_adapter *adapter = netdev_priv(dev); + /* Change the state of the device to trigger reset + * Check that we are not in the middle or a trigger already + */ + + if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) + return; + u64_stats_update_begin(&adapter->syncp); adapter->dev_stats.tx_timeout++; u64_stats_update_end(&adapter->syncp); netif_err(adapter, tx_err, dev, "Transmit time out\n"); - - /* Change the state of the device to trigger reset */ - set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); } static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu) @@ -1116,7 +1120,8 @@ static int ena_io_poll(struct napi_struct *napi, int budget) tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER; - if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) { + if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) || + test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) { napi_complete_done(napi, 0); return 0; } @@ -1705,12 +1710,22 @@ static void ena_down(struct ena_adapter *adapter) adapter->dev_stats.interface_down++; u64_stats_update_end(&adapter->syncp); - /* After this point the napi handler won't enable the tx queue */ - ena_napi_disable_all(adapter); netif_carrier_off(adapter->netdev); netif_tx_disable(adapter->netdev); + /* After this point the napi handler won't enable the tx queue */ + ena_napi_disable_all(adapter); + /* After destroy the queue there won't be any new interrupts */ + + if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) { + int rc; + + rc = ena_com_dev_reset(adapter->ena_dev); + if (rc) + dev_err(&adapter->pdev->dev, "Device reset failed\n"); + } + ena_destroy_all_io_queues(adapter); ena_disable_io_intr_sync(adapter); @@ -2072,6 +2087,14 @@ static void ena_netpoll(struct net_device *netdev) struct ena_adapter *adapter = netdev_priv(netdev); int i; + /* Dont schedule NAPI if the driver is in the middle of reset + * or netdev is down. + */ + + if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags) || + test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) + return; + for (i = 0; i < adapter->num_queues; i++) napi_schedule(&adapter->ena_napi[i].napi); } @@ -2458,6 +2481,14 @@ static void ena_fw_reset_device(struct work_struct *work) bool dev_up, wd_state; int rc; + if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { + dev_err(&pdev->dev, + "device reset schedule while reset bit is off\n"); + return; + } + + netif_carrier_off(netdev); + del_timer_sync(&adapter->timer_service); rtnl_lock(); @@ -2471,12 +2502,6 @@ static void ena_fw_reset_device(struct work_struct *work) */ ena_close(netdev); - rc = ena_com_dev_reset(ena_dev); - if (rc) { - dev_err(&pdev->dev, "Device reset failed\n"); - goto err; - } - ena_free_mgmnt_irq(adapter); ena_disable_msix(adapter); @@ -2489,6 +2514,8 @@ static void ena_fw_reset_device(struct work_struct *work) ena_com_mmio_reg_read_request_destroy(ena_dev); + clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + /* Finish with the destroy part. Start the init part */ rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state); @@ -2554,6 +2581,9 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter) if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) return; + if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) + return; + budget = ENA_MONITORED_TX_QUEUES; for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) { @@ -2653,7 +2683,7 @@ static void ena_timer_service(unsigned long data) if (host_info) ena_update_host_info(host_info, adapter->netdev); - if (unlikely(test_and_clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { + if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { netif_err(adapter, drv, adapter->netdev, "Trigger reset is on\n"); ena_dump_stats_to_dmesg(adapter);