get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/819327/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 819327,
    "url": "http://patchwork.ozlabs.org/api/patches/819327/?format=api",
    "web_url": "http://patchwork.ozlabs.org/project/linux-pci/patch/20170927214220.41216-4-gvaradar@cisco.com/",
    "project": {
        "id": 28,
        "url": "http://patchwork.ozlabs.org/api/projects/28/?format=api",
        "name": "Linux PCI development",
        "link_name": "linux-pci",
        "list_id": "linux-pci.vger.kernel.org",
        "list_email": "linux-pci@vger.kernel.org",
        "web_url": null,
        "scm_url": null,
        "webscm_url": null,
        "list_archive_url": "",
        "list_archive_url_format": "",
        "commit_url_format": ""
    },
    "msgid": "<20170927214220.41216-4-gvaradar@cisco.com>",
    "list_archive_url": null,
    "date": "2017-09-27T21:42:19",
    "name": "[3/4] pci aer: fix deadlock in do_recovery",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": false,
    "hash": "995e2bf983f2cf9451cabd395fd1b04cedaed886",
    "submitter": {
        "id": 46073,
        "url": "http://patchwork.ozlabs.org/api/people/46073/?format=api",
        "name": "Govindarajulu Varadarajan",
        "email": "gvaradar@cisco.com"
    },
    "delegate": null,
    "mbox": "http://patchwork.ozlabs.org/project/linux-pci/patch/20170927214220.41216-4-gvaradar@cisco.com/mbox/",
    "series": [
        {
            "id": 5455,
            "url": "http://patchwork.ozlabs.org/api/series/5455/?format=api",
            "web_url": "http://patchwork.ozlabs.org/project/linux-pci/list/?series=5455",
            "date": "2017-09-27T21:42:16",
            "name": "pci aer: fix deadlock in do_recovery",
            "version": 1,
            "mbox": "http://patchwork.ozlabs.org/series/5455/mbox/"
        }
    ],
    "comments": "http://patchwork.ozlabs.org/api/patches/819327/comments/",
    "check": "pending",
    "checks": "http://patchwork.ozlabs.org/api/patches/819327/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<linux-pci-owner@vger.kernel.org>",
        "X-Original-To": "incoming@patchwork.ozlabs.org",
        "Delivered-To": "patchwork-incoming@bilbo.ozlabs.org",
        "Authentication-Results": [
            "ozlabs.org;\n\tspf=none (mailfrom) smtp.mailfrom=vger.kernel.org\n\t(client-ip=209.132.180.67; helo=vger.kernel.org;\n\tenvelope-from=linux-pci-owner@vger.kernel.org;\n\treceiver=<UNKNOWN>)",
            "ozlabs.org; dkim=pass (1024-bit key;\n\tunprotected) header.d=cisco.com header.i=@cisco.com\n\theader.b=\"NlbXpEJv\"; dkim-atps=neutral"
        ],
        "Received": [
            "from vger.kernel.org (vger.kernel.org [209.132.180.67])\n\tby ozlabs.org (Postfix) with ESMTP id 3y2Wll0QDMz9t67\n\tfor <incoming@patchwork.ozlabs.org>;\n\tThu, 28 Sep 2017 07:53:07 +1000 (AEST)",
            "(majordomo@vger.kernel.org) by vger.kernel.org via listexpand\n\tid S1752247AbdI0Vwi (ORCPT <rfc822;incoming@patchwork.ozlabs.org>);\n\tWed, 27 Sep 2017 17:52:38 -0400",
            "from rcdn-iport-1.cisco.com ([173.37.86.72]:54728 \"EHLO\n\trcdn-iport-1.cisco.com\" rhost-flags-OK-OK-OK-OK) by vger.kernel.org\n\twith ESMTP id S1752227AbdI0Vwg (ORCPT\n\t<rfc822; linux-pci@vger.kernel.org>); Wed, 27 Sep 2017 17:52:36 -0400",
            "from alln-core-6.cisco.com ([173.36.13.139])\n\tby rcdn-iport-1.cisco.com with ESMTP/TLS/DHE-RSA-AES256-SHA;\n\t27 Sep 2017 21:43:08 +0000",
            "from a6.cisco.com (arch-kvm-vm.cisco.com [10.193.184.6])\n\t(authenticated bits=0)\n\tby alln-core-6.cisco.com (8.14.5/8.14.5) with ESMTP id v8RLgpwh021066\n\t(version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO);\n\tWed, 27 Sep 2017 21:43:07 GMT"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/simple;\n\td=cisco.com; i=@cisco.com; l=12578; q=dns/txt;\n\ts=iport; t=1506549156; x=1507758756;\n\th=from:to:cc:subject:date:message-id:in-reply-to: references;\n\tbh=MwECNnOgpTnuOMgWt21nqXZQSzfPhwnC+VEF6mFj8qo=;\n\tb=NlbXpEJv6FqscJpk6rTJn15PPDpXs9lQSMZOeAiMrY+EdOVYMnM3DP8F\n\tZY1nptZc5/azUfc2HwgA3JQ49bXfor0n+95GEPzZxGqRJPr/h9CPq+ZeP\n\tYnM+1fng8bfP031InlkPgWErqaPlicQ+yvlZLb+Y5p4sfjy9Kqz6B0cNN 0=;",
        "X-IronPort-AV": "E=Sophos;i=\"5.42,446,1500940800\"; d=\"scan'208\";a=\"304432027\"",
        "From": "Govindarajulu Varadarajan <gvaradar@cisco.com>",
        "To": "benve@cisco.com, bhelgaas@google.com, linux-pci@vger.kernel.org,\n\tlinux-kernel@vger.kernel.org, jlbec@evilplan.org, hch@lst.de,\n\tmingo@redhat.com, peterz@infradead.org",
        "Cc": "Govindarajulu Varadarajan <gvaradar@cisco.com>",
        "Subject": "[PATCH 3/4] pci aer: fix deadlock in do_recovery",
        "Date": "Wed, 27 Sep 2017 14:42:19 -0700",
        "Message-Id": "<20170927214220.41216-4-gvaradar@cisco.com>",
        "X-Mailer": "git-send-email 2.14.1",
        "In-Reply-To": "<20170927214220.41216-1-gvaradar@cisco.com>",
        "References": "<20170927214220.41216-1-gvaradar@cisco.com>",
        "X-Authenticated-User": "gvaradar@cisco.com",
        "Sender": "linux-pci-owner@vger.kernel.org",
        "Precedence": "bulk",
        "List-ID": "<linux-pci.vger.kernel.org>",
        "X-Mailing-List": "linux-pci@vger.kernel.org"
    },
    "content": "CPU0\t\t\t\t\tCPU1\n---------------------------------------------------------------------\n__driver_attach()\ndevice_lock(&dev->mutex) <--- device mutex lock here\ndriver_probe_device()\npci_enable_sriov()\npci_iov_add_virtfn()\npci_device_add()\n\t\t\t\t\taer_isr()\t\t<--- pci aer error\n\t\t\t\t\tdo_recovery()\n\t\t\t\t\tbroadcast_error_message()\n\t\t\t\t\tpci_walk_bus()\n\t\t\t\t\tdown_read(&pci_bus_sem) <--- rd sem\ndown_write(&pci_bus_sem) <-- stuck on wr sem\n\t\t\t\t\treport_error_detected()\n\t\t\t\t\tdevice_lock(&dev->mutex)<--- DEAD LOCK\n\nThis can also happen when aer error occurs while pci_dev->sriov_config() is\ncalled.\n\nOnly fix I could think of is to lock &pci_bus_sem and try locking all\ndevice->mutex under that pci_bus. If it fails, unlock all device->mutex\nand &pci_bus_sem and try again.\n\n[   70.984091] pcieport 0000:00:02.0: AER: Uncorrected (Non-Fatal) error received: id=0010\n[   70.984112] pcieport 0000:00:02.0: PCIe Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, id=0010(Requester ID)\n[   70.984116] pcieport 0000:00:02.0:   device [8086:3c04] error status/mask=00004000/00100000\n[   70.984120] pcieport 0000:00:02.0:    [14] Completion Timeout     (First)\n...\n[  107.484190] INFO: task kworker/0:1:76 blocked for more than 30 seconds.\n[  107.563619]       Not tainted 4.13.0+ #28\n[  107.611618] \"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\" disables this message.\n[  107.705368] kworker/0:1     D    0    76      2 0x80000000\n[  107.771050] Workqueue: events aer_isr\n[  107.814895] Call Trace:\n[  107.844181]  __schedule+0x312/0xa40\n[  107.885928]  schedule+0x3d/0x90\n[  107.923506]  schedule_preempt_disabled+0x15/0x20\n[  107.978773]  __mutex_lock+0x304/0xa30\n[  108.022594]  ? dev_printk_emit+0x3b/0x50\n[  108.069534]  ? report_error_detected+0xa6/0x210\n[  108.123770]  mutex_lock_nested+0x1b/0x20\n[  108.170713]  ? mutex_lock_nested+0x1b/0x20\n[  108.219730]  report_error_detected+0xa6/0x210\n[  108.271881]  ? aer_recover_queue+0xe0/0xe0\n[  108.320904]  pci_walk_bus+0x46/0x90\n[  108.362645]  ? aer_recover_queue+0xe0/0xe0\n[  108.411658]  broadcast_error_message+0xc3/0xf0\n[  108.464835]  do_recovery+0x34/0x220\n[  108.506569]  ? get_device_error_info+0x92/0x130\n[  108.560785]  aer_isr+0x28f/0x3b0\n[  108.599410]  process_one_work+0x277/0x6c0\n[  108.647399]  worker_thread+0x4d/0x3b0\n[  108.691218]  kthread+0x171/0x190\n[  108.729830]  ? process_one_work+0x6c0/0x6c0\n[  108.779888]  ? kthread_create_on_node+0x40/0x40\n[  108.834110]  ret_from_fork+0x2a/0x40\n[  108.876916] INFO: task kworker/0:2:205 blocked for more than 30 seconds.\n[  108.957129]       Not tainted 4.13.0+ #28\n[  109.005114] \"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\" disables this message.\n[  109.098873] kworker/0:2     D    0   205      2 0x80000000\n[  109.164544] Workqueue: events work_for_cpu_fn\n[  109.216681] Call Trace:\n[  109.245943]  __schedule+0x312/0xa40\n[  109.287687]  ? rwsem_down_write_failed+0x308/0x4f0\n[  109.345021]  schedule+0x3d/0x90\n[  109.382603]  rwsem_down_write_failed+0x30d/0x4f0\n[  109.437869]  ? __lock_acquire+0x75c/0x1410\n[  109.486910]  call_rwsem_down_write_failed+0x17/0x30\n[  109.545287]  ? call_rwsem_down_write_failed+0x17/0x30\n[  109.605752]  down_write+0x88/0xb0\n[  109.645410]  pci_device_add+0x158/0x240\n[  109.691313]  pci_iov_add_virtfn+0x24f/0x340\n[  109.741375]  pci_enable_sriov+0x32b/0x420\n[  109.789466]  ? pci_read+0x2c/0x30\n[  109.829142]  enic_probe+0x5d4/0xff0 [enic]\n[  109.878184]  ? trace_hardirqs_on+0xd/0x10\n[  109.926180]  local_pci_probe+0x42/0xa0\n[  109.971037]  work_for_cpu_fn+0x14/0x20\n[  110.015898]  process_one_work+0x277/0x6c0\n[  110.063884]  worker_thread+0x1d6/0x3b0\n[  110.108750]  kthread+0x171/0x190\n[  110.147363]  ? process_one_work+0x6c0/0x6c0\n[  110.197426]  ? kthread_create_on_node+0x40/0x40\n[  110.251642]  ret_from_fork+0x2a/0x40\n[  110.294448] INFO: task systemd-udevd:492 blocked for more than 30 seconds.\n[  110.376742]       Not tainted 4.13.0+ #28\n[  110.424715] \"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\" disables this message.\n[  110.518457] systemd-udevd   D    0   492    444 0x80000180\n[  110.584116] Call Trace:\n[  110.613382]  __schedule+0x312/0xa40\n[  110.655127]  ? wait_for_completion+0x14a/0x1d0\n[  110.708302]  schedule+0x3d/0x90\n[  110.745875]  schedule_timeout+0x26e/0x5b0\n[  110.793887]  ? wait_for_completion+0x14a/0x1d0\n[  110.847068]  wait_for_completion+0x169/0x1d0\n[  110.898165]  ? wait_for_completion+0x169/0x1d0\n[  110.951354]  ? wake_up_q+0x80/0x80\n[  110.992060]  flush_work+0x237/0x300\n[  111.033795]  ? flush_workqueue_prep_pwqs+0x1b0/0x1b0\n[  111.093224]  ? wait_for_completion+0x5a/0x1d0\n[  111.145363]  ? flush_work+0x237/0x300\n[  111.189189]  work_on_cpu+0x94/0xb0\n[  111.229894]  ? work_is_static_object+0x20/0x20\n[  111.283070]  ? pci_device_shutdown+0x60/0x60\n[  111.334173]  pci_device_probe+0x17a/0x190\n[  111.382163]  driver_probe_device+0x2ff/0x450\n[  111.433260]  __driver_attach+0x103/0x140\n[  111.480195]  ? driver_probe_device+0x450/0x450\n[  111.533381]  bus_for_each_dev+0x74/0xb0\n[  111.579276]  driver_attach+0x1e/0x20\n[  111.622056]  bus_add_driver+0x1ca/0x270\n[  111.667955]  ? 0xffffffffc039c000\n[  111.707616]  driver_register+0x60/0xe0\n[  111.752472]  ? 0xffffffffc039c000\n[  111.792126]  __pci_register_driver+0x6b/0x70\n[  111.843275]  enic_init_module+0x38/0x1000 [enic]\n[  111.898533]  do_one_initcall+0x50/0x192\n[  111.944428]  ? trace_hardirqs_on+0xd/0x10\n[  111.992408]  do_init_module+0x5f/0x1f2\n[  112.037274]  load_module+0x1740/0x1f70\n[  112.082148]  SYSC_finit_module+0xd7/0xf0\n[  112.129083]  ? SYSC_finit_module+0xd7/0xf0\n[  112.178106]  SyS_finit_module+0xe/0x10\n[  112.222972]  do_syscall_64+0x69/0x180\n[  112.266793]  entry_SYSCALL64_slow_path+0x25/0x25\n[  112.322047] RIP: 0033:0x7f3da098b559\n[  112.364826] RSP: 002b:00007ffeb3306a38 EFLAGS: 00000246 ORIG_RAX: 0000000000000139\n[  112.455447] RAX: ffffffffffffffda RBX: 0000557fe41ed3d0 RCX: 00007f3da098b559\n[  112.540860] RDX: 0000000000000000 RSI: 00007f3da14c79c5 RDI: 0000000000000006\n[  112.626281] RBP: 00007f3da14c79c5 R08: 0000000000000000 R09: 00007ffeb3306b50\n[  112.711698] R10: 0000000000000006 R11: 0000000000000246 R12: 0000000000000000\n[  112.797114] R13: 0000557fe420e210 R14: 0000000000020000 R15: 0000557fe2c1ef4a\n[  112.882568]\n               Showing all locks held in the system:\n[  112.956545] 5 locks held by kworker/0:1/76:\n[  113.006616]  #0:  (\"events\"){+.+.}, at: [<ffffffffb00b10ed>] process_one_work+0x1ed/0x6c0\n[  113.104535]  #1:  ((&rpc->dpc_handler)){+.+.}, at: [<ffffffffb00b10ed>] process_one_work+0x1ed/0x6c0\n[  113.213894]  #2:  (&rpc->rpc_mutex){+.+.}, at: [<ffffffffb0505ca2>] aer_isr+0x32/0x3b0\n[  113.308711]  #3:  (pci_bus_sem){++++}, at: [<ffffffffb04ea18a>] pci_walk_bus+0x2a/0x90\n[  113.403501]  #4:  (&dev->mutex){....}, at: [<ffffffffb0505706>] report_error_detected+0xa6/0x210\n[  113.508715] 3 locks held by kworker/0:2/205:\n[  113.559808]  #0:  (\"events\"){+.+.}, at: [<ffffffffb00b10ed>] process_one_work+0x1ed/0x6c0\n[  113.657718]  #1:  ((&wfc.work)){+.+.}, at: [<ffffffffb00b10ed>] process_one_work+0x1ed/0x6c0\n[  113.758745]  #2:  (pci_bus_sem){++++}, at: [<ffffffffb04ec978>] pci_device_add+0x158/0x240\n[  113.857710] 1 lock held by khungtaskd/239:\n[  113.906729]  #0:  (tasklist_lock){.+.+}, at: [<ffffffffb00f07dd>] debug_show_all_locks+0x3d/0x1a0\n[  114.012972] 2 locks held by systemd-udevd/492:\n[  114.066148]  #0:  (&dev->mutex){....}, at: [<ffffffffb06254d5>] __driver_attach+0x55/0x140\n[  114.165107]  #1:  (&dev->mutex){....}, at: [<ffffffffb06254f2>] __driver_attach+0x72/0x140\n\n[  114.281879] =============================================\n\nSigned-off-by: Govindarajulu Varadarajan <gvaradar@cisco.com>\n---\n drivers/pci/pcie/aer/aerdrv_core.c | 50 ++++++++++++++++++++++++++++++--------\n 1 file changed, 40 insertions(+), 10 deletions(-)",
    "diff": "diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c\nindex 890efcc574cb..a3869a3b6e82 100644\n--- a/drivers/pci/pcie/aer/aerdrv_core.c\n+++ b/drivers/pci/pcie/aer/aerdrv_core.c\n@@ -26,6 +26,7 @@\n #include <linux/slab.h>\n #include <linux/kfifo.h>\n #include \"aerdrv.h\"\n+#include \"../../pci.h\"\n \n #define\tPCI_EXP_AER_FLAGS\t(PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \\\n \t\t\t\t PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)\n@@ -241,7 +242,6 @@ static int report_error_detected(struct pci_dev *dev, void *data)\n \tstruct aer_broadcast_data *result_data;\n \tresult_data = (struct aer_broadcast_data *) data;\n \n-\tdevice_lock(&dev->dev);\n \tdev->error_state = result_data->state;\n \n \tif (!dev->driver ||\n@@ -281,7 +281,6 @@ static int report_error_detected(struct pci_dev *dev, void *data)\n \t}\n \n \tresult_data->result = merge_result(result_data->result, vote);\n-\tdevice_unlock(&dev->dev);\n \treturn 0;\n }\n \n@@ -292,7 +291,6 @@ static int report_mmio_enabled(struct pci_dev *dev, void *data)\n \tstruct aer_broadcast_data *result_data;\n \tresult_data = (struct aer_broadcast_data *) data;\n \n-\tdevice_lock(&dev->dev);\n \tif (!dev->driver ||\n \t\t!dev->driver->err_handler ||\n \t\t!dev->driver->err_handler->mmio_enabled)\n@@ -302,7 +300,6 @@ static int report_mmio_enabled(struct pci_dev *dev, void *data)\n \tvote = err_handler->mmio_enabled(dev);\n \tresult_data->result = merge_result(result_data->result, vote);\n out:\n-\tdevice_unlock(&dev->dev);\n \treturn 0;\n }\n \n@@ -313,7 +310,6 @@ static int report_slot_reset(struct pci_dev *dev, void *data)\n \tstruct aer_broadcast_data *result_data;\n \tresult_data = (struct aer_broadcast_data *) data;\n \n-\tdevice_lock(&dev->dev);\n \tif (!dev->driver ||\n \t\t!dev->driver->err_handler ||\n \t\t!dev->driver->err_handler->slot_reset)\n@@ -323,7 +319,6 @@ static int report_slot_reset(struct pci_dev *dev, void *data)\n \tvote = err_handler->slot_reset(dev);\n \tresult_data->result = merge_result(result_data->result, vote);\n out:\n-\tdevice_unlock(&dev->dev);\n \treturn 0;\n }\n \n@@ -331,7 +326,6 @@ static int report_resume(struct pci_dev *dev, void *data)\n {\n \tconst struct pci_error_handlers *err_handler;\n \n-\tdevice_lock(&dev->dev);\n \tdev->error_state = pci_channel_io_normal;\n \n \tif (!dev->driver ||\n@@ -342,10 +336,46 @@ static int report_resume(struct pci_dev *dev, void *data)\n \terr_handler = dev->driver->err_handler;\n \terr_handler->resume(dev);\n out:\n-\tdevice_unlock(&dev->dev);\n \treturn 0;\n }\n \n+static void aer_pci_walk_bus(struct pci_bus *bus,\n+\t\t\t     int (*cb)(struct pci_dev *, void *),\n+\t\t\t     struct aer_broadcast_data *res)\n+{\n+\tbool locked;\n+\tuint8_t i;\n+\n+\tfor (i = 1; i; i++) {\n+\t\t/* PCI driver could hold device->mutex lock and call driver\n+\t\t * cb functions which may try to aquire pci_bus_sem.\n+\t\t * Trying to aquiring device->mutex lock holding pci_bus_sem\n+\t\t * could lead to deadlock.\n+\t\t *\n+\t\t * Holding pci_bus_sem lets try to aquire device->mutex lock.\n+\t\t * If trylock(device->mutex) fails, unlock pci_bus_sem and\n+\t\t * try again.\n+\t\t */\n+\t\tdown_read(&pci_bus_sem);\n+\t\tlocked = __pci_bus_trylock(bus, pci_device_trylock,\n+\t\t\t\t\t   pci_device_unlock);\n+\t\tif (locked)\n+\t\t\tgoto out;\n+\t\tup_read(&pci_bus_sem);\n+\t\tdev_info(&bus->self->dev, \"Could not aquire device lock on all subordinates, trying again.\");\n+\t\tmsleep(25);\n+\t};\n+\n+\tres->result = PCI_ERS_RESULT_NONE;\n+\tdev_err(&bus->self->dev, \"Could not aquire lock. No aer recovery done.\");\n+\treturn;\n+out:\n+\t/* all devices under this subordinate is locked */\n+\t__pci_walk_bus(bus, cb, res);\n+\t__pci_bus_unlock(bus, pci_device_unlock);\n+\tup_read(&pci_bus_sem);\n+}\n+\n /**\n  * broadcast_error_message - handle message broadcast to downstream drivers\n  * @dev: pointer to from where in a hierarchy message is broadcasted down\n@@ -380,7 +410,7 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,\n \t\t */\n \t\tif (cb == report_error_detected)\n \t\t\tdev->error_state = state;\n-\t\tpci_walk_bus(dev->subordinate, cb, &result_data);\n+\t\taer_pci_walk_bus(dev->subordinate, cb, &result_data);\n \t\tif (cb == report_resume) {\n \t\t\tpci_cleanup_aer_uncorrect_error_status(dev);\n \t\t\tdev->error_state = pci_channel_io_normal;\n@@ -390,7 +420,7 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,\n \t\t * If the error is reported by an end point, we think this\n \t\t * error is related to the upstream link of the end point.\n \t\t */\n-\t\tpci_walk_bus(dev->bus, cb, &result_data);\n+\t\taer_pci_walk_bus(dev->bus, cb, &result_data);\n \t}\n \n \treturn result_data.result;\n",
    "prefixes": [
        "3/4"
    ]
}