Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/2217335/?format=api
{ "id": 2217335, "url": "http://patchwork.ozlabs.org/api/patches/2217335/?format=api", "web_url": "http://patchwork.ozlabs.org/project/ubuntu-kernel/patch/20260327214242.4051172-5-john.cabaj@canonical.com/", "project": { "id": 15, "url": "http://patchwork.ozlabs.org/api/projects/15/?format=api", "name": "Ubuntu Kernel", "link_name": "ubuntu-kernel", "list_id": "kernel-team.lists.ubuntu.com", "list_email": "kernel-team@lists.ubuntu.com", "web_url": null, "scm_url": null, "webscm_url": null, "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<20260327214242.4051172-5-john.cabaj@canonical.com>", "list_archive_url": null, "date": "2026-03-27T21:42:42", "name": "[SRU,Q:linux-azure,3/3] net: mana: Trigger VF reset/recovery on health check failure due to HWC timeout", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "64bc2e19dd208c3db432d1d35809988e9b0f1ba0", "submitter": { "id": 85032, "url": "http://patchwork.ozlabs.org/api/people/85032/?format=api", "name": "John Cabaj", "email": "john.cabaj@canonical.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/ubuntu-kernel/patch/20260327214242.4051172-5-john.cabaj@canonical.com/mbox/", "series": [ { "id": 497836, "url": "http://patchwork.ozlabs.org/api/series/497836/?format=api", "web_url": "http://patchwork.ozlabs.org/project/ubuntu-kernel/list/?series=497836", "date": "2026-03-27T21:42:38", "name": "net: mana: Trigger VF reset/recovery on health check failure due to HWC timeout", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/497836/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/2217335/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2217335/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<kernel-team-bounces@lists.ubuntu.com>", "X-Original-To": "incoming@patchwork.ozlabs.org", "Delivered-To": "patchwork-incoming@legolas.ozlabs.org", "Authentication-Results": [ "legolas.ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (4096-bit key;\n unprotected) header.d=canonical.com header.i=@canonical.com\n header.a=rsa-sha256 header.s=20251003 header.b=KoV7r2SZ;\n\tdkim-atps=neutral", "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=lists.ubuntu.com\n (client-ip=185.125.189.65; helo=lists.ubuntu.com;\n envelope-from=kernel-team-bounces@lists.ubuntu.com;\n receiver=patchwork.ozlabs.org)" ], "Received": [ "from lists.ubuntu.com (lists.ubuntu.com [185.125.189.65])\n\t(using TLSv1.2 with cipher ECDHE-ECDSA-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4fjDdr4VgQz1yGQ\n\tfor <incoming@patchwork.ozlabs.org>; Sat, 28 Mar 2026 08:42:56 +1100 (AEDT)", "from localhost ([127.0.0.1] helo=lists.ubuntu.com)\n\tby lists.ubuntu.com with esmtp (Exim 4.86_2)\n\t(envelope-from <kernel-team-bounces@lists.ubuntu.com>)\n\tid 1w6Ex8-0006zs-8Q; Fri, 27 Mar 2026 21:42:50 +0000", "from smtp-relay-internal-0.internal ([10.131.114.225]\n helo=smtp-relay-internal-0.canonical.com)\n by lists.ubuntu.com with esmtps (TLS1.2:ECDHE_RSA_AES_128_GCM_SHA256:128)\n (Exim 4.86_2) (envelope-from <john.cabaj@canonical.com>)\n id 1w6Ex6-0006xm-Ir\n for kernel-team@lists.ubuntu.com; Fri, 27 Mar 2026 21:42:48 +0000", "from mail-qv1-f70.google.com (mail-qv1-f70.google.com\n [209.85.219.70])\n (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest\n SHA256)\n (No client certificate requested)\n by smtp-relay-internal-0.canonical.com (Postfix) with ESMTPS id 771CF3F5B0\n for <kernel-team@lists.ubuntu.com>; Fri, 27 Mar 2026 21:42:48 +0000 (UTC)", "by mail-qv1-f70.google.com with SMTP id\n 6a1803df08f44-89a0a2afc55so82000846d6.0\n for <kernel-team@lists.ubuntu.com>; Fri, 27 Mar 2026 14:42:48 -0700 (PDT)", "from smtp.gmail.com\n (h208-73-92-250.mdtnwi.broadband.dynamic.tds.net. [208.73.92.250])\n by smtp.gmail.com with ESMTPSA id\n 6a1803df08f44-89ecf95f4f9sm3448206d6.37.2026.03.27.14.42.46\n (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);\n Fri, 27 Mar 2026 14:42:46 -0700 (PDT)" ], "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=canonical.com;\n s=20251003; t=1774647768;\n bh=TSmMvDgxUNgmM0zKVUVaUz6VaaEWiiYI55gN8fKpWfI=;\n h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:\n MIME-Version;\n b=KoV7r2SZP8lTRIHQ+z3Z1gPDgsW+opcecYHNTrRykBp4chhHVSGzkd8kYL1+a927i\n ViBhH7WnLiLBMVTpI9T5W+6BW4zvGDTx367zD29IdK7lX/IDa4Q/qgG8z3e+UK84/a\n qvDIYYKVC0eYW6ubqpx3PjBrh4y6LvQ5RW+sHBpH+l5hh6vvoRyXRqxQrFpK9Yl4hB\n UXY3QRi3itEBm5+UK9YROJHH9V4uKPoxptQRvHx873r6WV4dLqeq6v6cqOIg95ZI0f\n q+j6vr38IeTvEZ2UaLqtPphQa1LUPRl6vLcsjTuAT6uE1iUbXrCIc0zXNKmyMEUz4b\n RIfvJFNSm1AgWed1n7i/lAfqQnEnYook9UVB4PmMQou9M8wIw8lAr2M8Qizu3ETixv\n IJYXt5bKYvwYeMTOSnxrzz1rOE902WrwNuTJm9lEp0t2qbZuVYOZwsFpis295S5Zkd\n MsRFh6ynHRXEBjA8iCDXiAPfGomuVcUqYW0q6R1/CGC5Twlc3VBlA87vjgd/yxWbqj\n FNBJQk76F7SdtLnYl+7vfqI605jFW/DJYF2hdtlKMum17SgMGjukcEH+dzJGHaGvd0\n Bq9aa5hobrWswcDlah2ikwRwjGCORP/vpFHC3YVhklOaGlC9Hq+rBvpvGy4PSh2i+K\n +lpdPZJcRo0z5V5p7rH7LqNk=", "X-Google-DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n d=1e100.net; s=20251104; t=1774647767; x=1775252567;\n h=content-transfer-encoding:mime-version:references:in-reply-to\n :message-id:date:subject:cc:to:from:x-gm-gg:x-gm-message-state:from\n :to:cc:subject:date:message-id:reply-to;\n bh=TSmMvDgxUNgmM0zKVUVaUz6VaaEWiiYI55gN8fKpWfI=;\n b=NnXW1yRWqd7voCdUmFMGWjMg9RHLCMQZtFU5/kX8WxmKXr7sJ+FjiqDxU6m1UDjVyi\n 5ObUxwIqVYdo0JYtUjyjLK6iuE/diEa+aoAs18LhB1wYFuPl0LWHgAmpiNUA9S1gE+lz\n m/OrsO8YHVty9VzQ6ko3MWly6/FprxCh4dh7ichOKqeTPTKgwTOd6K4SuO/A9w9qzfhz\n f4qkMHfOiVmtLS4fYs/hF0BgGHE4G614r1S5ZQZc6pQVVxMy08EjK+Vt6Zp9G46OYkkW\n 6pOWvm3FV421N7q1h/0U0AqX/OqKLKwwvbJvlszfAVTQEwE+uQRMUtJv21co6guGvctm\n d8pQ==", "X-Gm-Message-State": "AOJu0YzWKA1nVS7qHisk2uyWSO5RcE32oXtwbQpSJdborYZeivtzPmXJ\n FvC3S6Roq7gLk/4cy+kxcz57260xZruoxongGn9m5qBhdu3/N8mH1rsDXdSEvA+/uNPE4SdDa/a\n g6uCd/g/Gg44BNnikSxFPqlQCCU0mpGtN0jk0qhkw/CaN6JxJayGuUUxQM9ny3CGS1MFq0hxoaJ\n nTLRMqtk1HtCYQ4g==", "X-Gm-Gg": "ATEYQzzE9UVKGPTsPI0ZU6yQP2qoMMs7Oegu+xh4uv2/GGgf5RlsnJqwC10AJwKBQ6k\n eT+ZNuGDGbKe03CLolDz2yPvNMXP1AIiZGUmPl3edE3Kq6hNhXGPinFjM8WlrxfoUEH+/jSRmYF\n vPL3h3zqKGl+LIpFYkSx39SFPlShcATTt3sOw+0CWGl9e1dvvll77WSoX895RD2YZX0b0c2Atba\n bNAqzciY5ej+nZ5xpquiWOlBrXxIj1LuQC9pKAO7qrJrrLKNEFg8rCF/UucGka91J4mC4d0nJtj\n oAlKZSi+H9tgesbTPvAe9I4aKAa1NbXlfDebBBzhz8FR2aDW7jugkdFvMhYRT0Y+yLwyVnZy97I\n Vwbo+sw8cRsKc6ZhXvaRQK86cCXILx4zgBo4rTYA9ioulJDecS6hckc55GQlcqqrrnTI36z/Lpw\n XVEawOjcv+iA==", "X-Received": [ "by 2002:a05:6214:1bca:b0:89c:5385:3478 with SMTP id\n 6a1803df08f44-89ce8d583ffmr59079136d6.5.1774647767398;\n Fri, 27 Mar 2026 14:42:47 -0700 (PDT)", "by 2002:a05:6214:1bca:b0:89c:5385:3478 with SMTP id\n 6a1803df08f44-89ce8d583ffmr59078936d6.5.1774647767040;\n Fri, 27 Mar 2026 14:42:47 -0700 (PDT)" ], "From": "John Cabaj <john.cabaj@canonical.com>", "To": "kernel-team@lists.ubuntu.com", "Subject": "[SRU][Q:linux-azure][PATCH 3/3] net: mana: Trigger VF reset/recovery\n on health check failure due to HWC timeout", "Date": "Fri, 27 Mar 2026 16:42:42 -0500", "Message-ID": "<20260327214242.4051172-5-john.cabaj@canonical.com>", "X-Mailer": "git-send-email 2.43.0", "In-Reply-To": "<20260327214242.4051172-1-john.cabaj@canonical.com>", "References": "<20260327214242.4051172-1-john.cabaj@canonical.com>", "MIME-Version": "1.0", "X-BeenThere": "kernel-team@lists.ubuntu.com", "X-Mailman-Version": "2.1.20", "Precedence": "list", "List-Id": "Kernel team discussions <kernel-team.lists.ubuntu.com>", "List-Unsubscribe": "<https://lists.ubuntu.com/mailman/options/kernel-team>,\n <mailto:kernel-team-request@lists.ubuntu.com?subject=unsubscribe>", "List-Archive": "<https://lists.ubuntu.com/archives/kernel-team>", "List-Post": "<mailto:kernel-team@lists.ubuntu.com>", "List-Help": "<mailto:kernel-team-request@lists.ubuntu.com?subject=help>", "List-Subscribe": "<https://lists.ubuntu.com/mailman/listinfo/kernel-team>,\n <mailto:kernel-team-request@lists.ubuntu.com?subject=subscribe>", "Content-Type": "text/plain; charset=\"utf-8\"", "Content-Transfer-Encoding": "base64", "Errors-To": "kernel-team-bounces@lists.ubuntu.com", "Sender": "\"kernel-team\" <kernel-team-bounces@lists.ubuntu.com>" }, "content": "From: Dipayaan Roy <dipayanroy@linux.microsoft.com>\n\nBugLink: https://bugs.launchpad.net/bugs/2146601\n\nThe GF stats periodic query is used as mechanism to monitor HWC health\ncheck. If this HWC command times out, it is a strong indication that\nthe device/SoC is in a faulty state and requires recovery.\n\nToday, when a timeout is detected, the driver marks\nhwc_timeout_occurred, clears cached stats, and stops rescheduling the\nperiodic work. However, the device itself is left in the same failing\nstate.\n\nExtend the timeout handling path to trigger the existing MANA VF\nrecovery service by queueing a GDMA_EQE_HWC_RESET_REQUEST work item.\nThis is expected to initiate the appropriate recovery flow by suspende\nresume first and if it fails then trigger a bus rescan.\n\nThis change is intentionally limited to HWC command timeouts and does\nnot trigger recovery for errors reported by the SoC as a normal command\nresponse.\n\nSigned-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>\nReviewed-by: Haiyang Zhang <haiyangz@microsoft.com>\nReviewed-by: Simon Horman <horms@kernel.org>\nLink: https://patch.msgid.link/aaFShvKnwR5FY8dH@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net\nSigned-off-by: Paolo Abeni <pabeni@redhat.com>\n(backported from commit 2b12ffb669553972f5cd017c69a2b81593b09106 linux-next)\n[john-cabaj: missing commit 69050f8d6d07: \"treewide: Replace kmalloc\nwith kmalloc_obj for non-scalar types\" from upstream, which is too\nmuch of a lift to backport as it's treewide, so adjusting context here]\nSigned-off-by: John Cabaj <john.cabaj@canonical.com>\n---\n .../net/ethernet/microsoft/mana/gdma_main.c | 65 ++++++++++---------\n drivers/net/ethernet/microsoft/mana/mana_en.c | 9 ++-\n include/net/mana/gdma.h | 16 ++++-\n 3 files changed, 55 insertions(+), 35 deletions(-)", "diff": "diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c\nindex 829a7d98fa41..112226dff50b 100644\n--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c\n+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c\n@@ -490,15 +490,9 @@ static void mana_serv_reset(struct pci_dev *pdev)\n \t\tdev_info(&pdev->dev, \"MANA reset cycle completed\\n\");\n \n out:\n-\tgc->in_service = false;\n+\tclear_bit(GC_IN_SERVICE, &gc->flags);\n }\n \n-struct mana_serv_work {\n-\tstruct work_struct serv_work;\n-\tstruct pci_dev *pdev;\n-\tenum gdma_eqe_type type;\n-};\n-\n static void mana_do_service(enum gdma_eqe_type type, struct pci_dev *pdev)\n {\n \tswitch (type) {\n@@ -558,12 +552,42 @@ static void mana_serv_func(struct work_struct *w)\n \tmodule_put(THIS_MODULE);\n }\n \n+int mana_schedule_serv_work(struct gdma_context *gc, enum gdma_eqe_type type)\n+{\n+\tstruct mana_serv_work *mns_wk;\n+\n+\tif (test_and_set_bit(GC_IN_SERVICE, &gc->flags)) {\n+\t\tdev_info(gc->dev, \"Already in service\\n\");\n+\t\treturn -EBUSY;\n+\t}\n+\n+\tif (!try_module_get(THIS_MODULE)) {\n+\t\tdev_info(gc->dev, \"Module is unloading\\n\");\n+\t\tclear_bit(GC_IN_SERVICE, &gc->flags);\n+\t\treturn -ENODEV;\n+\t}\n+\n+\tmns_wk = kzalloc(sizeof(*mns_wk), GFP_ATOMIC);\n+\tif (!mns_wk) {\n+\t\tmodule_put(THIS_MODULE);\n+\t\tclear_bit(GC_IN_SERVICE, &gc->flags);\n+\t\treturn -ENOMEM;\n+\t}\n+\n+\tdev_info(gc->dev, \"Start MANA service type:%d\\n\", type);\n+\tmns_wk->pdev = to_pci_dev(gc->dev);\n+\tmns_wk->type = type;\n+\tpci_dev_get(mns_wk->pdev);\n+\tINIT_WORK(&mns_wk->serv_work, mana_serv_func);\n+\tschedule_work(&mns_wk->serv_work);\n+\treturn 0;\n+}\n+\n static void mana_gd_process_eqe(struct gdma_queue *eq)\n {\n \tu32 head = eq->head % (eq->queue_size / GDMA_EQE_SIZE);\n \tstruct gdma_context *gc = eq->gdma_dev->gdma_context;\n \tstruct gdma_eqe *eq_eqe_ptr = eq->queue_mem_ptr;\n-\tstruct mana_serv_work *mns_wk;\n \tunion gdma_eqe_info eqe_info;\n \tenum gdma_eqe_type type;\n \tstruct gdma_event event;\n@@ -623,30 +647,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)\n \t\t\t\t \"Service is to be processed in probe\\n\");\n \t\t\tbreak;\n \t\t}\n-\n-\t\tif (gc->in_service) {\n-\t\t\tdev_info(gc->dev, \"Already in service\\n\");\n-\t\t\tbreak;\n-\t\t}\n-\n-\t\tif (!try_module_get(THIS_MODULE)) {\n-\t\t\tdev_info(gc->dev, \"Module is unloading\\n\");\n-\t\t\tbreak;\n-\t\t}\n-\n-\t\tmns_wk = kzalloc(sizeof(*mns_wk), GFP_ATOMIC);\n-\t\tif (!mns_wk) {\n-\t\t\tmodule_put(THIS_MODULE);\n-\t\t\tbreak;\n-\t\t}\n-\n-\t\tdev_info(gc->dev, \"Start MANA service type:%d\\n\", type);\n-\t\tgc->in_service = true;\n-\t\tmns_wk->pdev = to_pci_dev(gc->dev);\n-\t\tmns_wk->type = type;\n-\t\tpci_dev_get(mns_wk->pdev);\n-\t\tINIT_WORK(&mns_wk->serv_work, mana_serv_func);\n-\t\tschedule_work(&mns_wk->serv_work);\n+\t\tmana_schedule_serv_work(gc, type);\n \t\tbreak;\n \n \tdefault:\ndiff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c\nindex 568613c1dbb4..96b501f95d01 100644\n--- a/drivers/net/ethernet/microsoft/mana/mana_en.c\n+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c\n@@ -847,7 +847,7 @@ static void mana_tx_timeout(struct net_device *netdev, unsigned int txqueue)\n \tstruct gdma_context *gc = ac->gdma_dev->gdma_context;\n \n \t/* Already in service, hence tx queue reset is not required.*/\n-\tif (gc->in_service)\n+\tif (test_bit(GC_IN_SERVICE, &gc->flags))\n \t\treturn;\n \n \t/* Note: If there are pending queue reset work for this port(apc),\n@@ -3508,6 +3508,7 @@ static void mana_gf_stats_work_handler(struct work_struct *work)\n {\n \tstruct mana_context *ac =\n \t\tcontainer_of(to_delayed_work(work), struct mana_context, gf_stats_work);\n+\tstruct gdma_context *gc = ac->gdma_dev->gdma_context;\n \tint err;\n \n \terr = mana_query_gf_stats(ac);\n@@ -3515,6 +3516,12 @@ static void mana_gf_stats_work_handler(struct work_struct *work)\n \t\t/* HWC timeout detected - reset stats and stop rescheduling */\n \t\tac->hwc_timeout_occurred = true;\n \t\tmemset(&ac->hc_stats, 0, sizeof(ac->hc_stats));\n+\t\tdev_warn(gc->dev,\n+\t\t\t \"Gf stats wk handler: gf stats query timed out.\\n\");\n+\t\t/* As HWC timed out, indicating a faulty HW state and needs a\n+\t\t * reset.\n+\t\t */\n+\t\tmana_schedule_serv_work(gc, GDMA_EQE_HWC_RESET_REQUEST);\n \t\treturn;\n \t}\n \tschedule_delayed_work(&ac->gf_stats_work, MANA_GF_STATS_PERIOD);\ndiff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h\nindex 04a13a6960f6..08e8e972c920 100644\n--- a/include/net/mana/gdma.h\n+++ b/include/net/mana/gdma.h\n@@ -213,6 +213,12 @@ enum gdma_page_type {\n \n #define GDMA_INVALID_DMA_REGION 0\n \n+struct mana_serv_work {\n+\tstruct work_struct serv_work;\n+\tstruct pci_dev *pdev;\n+\tenum gdma_eqe_type type;\n+};\n+\n struct gdma_mem_info {\n \tstruct device *dev;\n \n@@ -384,6 +390,7 @@ struct gdma_irq_context {\n \n enum gdma_context_flags {\n \tGC_PROBE_SUCCEEDED\t= 0,\n+\tGC_IN_SERVICE\t\t= 1,\n };\n \n struct gdma_context {\n@@ -409,7 +416,6 @@ struct gdma_context {\n \tu32\t\t\ttest_event_eq_id;\n \n \tbool\t\t\tis_pf;\n-\tbool\t\t\tin_service;\n \n \tphys_addr_t\t\tbar0_pa;\n \tvoid __iomem\t\t*bar0_va;\n@@ -471,6 +477,8 @@ int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe);\n \n void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit);\n \n+int mana_schedule_serv_work(struct gdma_context *gc, enum gdma_eqe_type type);\n+\n struct gdma_wqe {\n \tu32 reserved\t:24;\n \tu32 last_vbytes\t:8;\n@@ -608,6 +616,9 @@ enum {\n /* Driver can handle hardware recovery events during probe */\n #define GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY BIT(22)\n \n+/* Driver supports self recovery on Hardware Channel timeouts */\n+#define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECOVERY BIT(25)\n+\n #define GDMA_DRV_CAP_FLAGS1 \\\n \t(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \\\n \t GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \\\n@@ -620,7 +631,8 @@ enum {\n \t GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \\\n \t GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY | \\\n \t GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY | \\\n-\t GDMA_DRV_CAP_FLAG_1_HANDLE_STALL_SQ_RECOVERY)\n+\t GDMA_DRV_CAP_FLAG_1_HANDLE_STALL_SQ_RECOVERY | \\\n+\t GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECOVERY)\n \n #define GDMA_DRV_CAP_FLAGS2 0\n \n", "prefixes": [ "SRU", "Q:linux-azure", "3/3" ] }