Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/1.0/patches/2197802/?format=api
{ "id": 2197802, "url": "http://patchwork.ozlabs.org/api/1.0/patches/2197802/?format=api", "project": { "id": 21, "url": "http://patchwork.ozlabs.org/api/1.0/projects/21/?format=api", "name": "Linux Tegra Development", "link_name": "linux-tegra", "list_id": "linux-tegra.vger.kernel.org", "list_email": "linux-tegra@vger.kernel.org", "web_url": null, "scm_url": null, "webscm_url": null }, "msgid": "<20260218145809.1622856-5-bwicaksono@nvidia.com>", "date": "2026-02-18T14:58:05", "name": "[v2,4/8] perf/arm_cspmu: nvidia: Add Tegra410 PCIE PMU", "commit_ref": null, "pull_url": null, "state": "handled-elsewhere", "archived": false, "hash": "f987f2a71ff2b296e60968ac37b5f7390fd418bd", "submitter": { "id": 83903, "url": "http://patchwork.ozlabs.org/api/1.0/people/83903/?format=api", "name": "Besar Wicaksono", "email": "bwicaksono@nvidia.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/linux-tegra/patch/20260218145809.1622856-5-bwicaksono@nvidia.com/mbox/", "series": [ { "id": 492565, "url": "http://patchwork.ozlabs.org/api/1.0/series/492565/?format=api", "date": "2026-02-18T14:58:01", "name": "perf: add NVIDIA Tegra410 Uncore PMU support", "version": 2, "mbox": "http://patchwork.ozlabs.org/series/492565/mbox/" } ], "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2197802/checks/", "tags": {}, "headers": { "Return-Path": "\n <linux-tegra+bounces-12056-incoming=patchwork.ozlabs.org@vger.kernel.org>", "X-Original-To": [ "incoming@patchwork.ozlabs.org", "linux-tegra@vger.kernel.org" ], "Delivered-To": "patchwork-incoming@legolas.ozlabs.org", "Authentication-Results": [ "legolas.ozlabs.org;\n\tdkim=pass (2048-bit key;\n unprotected) header.d=Nvidia.com header.i=@Nvidia.com header.a=rsa-sha256\n header.s=selector2 header.b=jFte4kt8;\n\tdkim-atps=neutral", "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=vger.kernel.org\n (client-ip=172.232.135.74; helo=sto.lore.kernel.org;\n envelope-from=linux-tegra+bounces-12056-incoming=patchwork.ozlabs.org@vger.kernel.org;\n receiver=patchwork.ozlabs.org)", "smtp.subspace.kernel.org;\n\tdkim=pass (2048-bit key) header.d=Nvidia.com header.i=@Nvidia.com\n header.b=\"jFte4kt8\"", "smtp.subspace.kernel.org;\n arc=fail smtp.client-ip=52.101.43.49", "smtp.subspace.kernel.org;\n dmarc=pass (p=reject dis=none) header.from=nvidia.com", "smtp.subspace.kernel.org;\n spf=fail smtp.mailfrom=nvidia.com" ], "Received": [ "from sto.lore.kernel.org (sto.lore.kernel.org [172.232.135.74])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519 server-signature ECDSA (secp384r1) server-digest SHA384)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4fGKRB1wSjz1xpl\n\tfor <incoming@patchwork.ozlabs.org>; Thu, 19 Feb 2026 01:59:18 +1100 (AEDT)", "from smtp.subspace.kernel.org (conduit.subspace.kernel.org\n [100.90.174.1])\n\tby sto.lore.kernel.org (Postfix) with ESMTP id 3C8B530058D2\n\tfor <incoming@patchwork.ozlabs.org>; Wed, 18 Feb 2026 14:59:15 +0000 (UTC)", "from localhost.localdomain (localhost.localdomain [127.0.0.1])\n\tby smtp.subspace.kernel.org (Postfix) with ESMTP id 0FD3733EB07;\n\tWed, 18 Feb 2026 14:59:14 +0000 (UTC)", "from SJ2PR03CU001.outbound.protection.outlook.com\n (mail-westusazon11012049.outbound.protection.outlook.com [52.101.43.49])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby smtp.subspace.kernel.org (Postfix) with ESMTPS id ED71233EAEB;\n\tWed, 18 Feb 2026 14:59:11 +0000 (UTC)", "from BL1PR13CA0243.namprd13.prod.outlook.com (2603:10b6:208:2ba::8)\n by LV8PR12MB9450.namprd12.prod.outlook.com (2603:10b6:408:202::13) with\n Microsoft SMTP Server (version=TLS1_2,\n cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.9632.14; Wed, 18 Feb\n 2026 14:59:01 +0000", "from BL02EPF0001A103.namprd05.prod.outlook.com\n (2603:10b6:208:2ba:cafe::81) by BL1PR13CA0243.outlook.office365.com\n (2603:10b6:208:2ba::8) with Microsoft SMTP Server (version=TLS1_3,\n cipher=TLS_AES_256_GCM_SHA384) id 15.20.9632.13 via Frontend Transport; Wed,\n 18 Feb 2026 14:58:35 +0000", "from mail.nvidia.com (216.228.117.160) by\n BL02EPF0001A103.mail.protection.outlook.com (10.167.241.133) with Microsoft\n SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id\n 15.20.9632.12 via Frontend Transport; Wed, 18 Feb 2026 14:59:01 +0000", "from rnnvmail205.nvidia.com (10.129.68.10) by mail.nvidia.com\n (10.129.200.66) with Microsoft SMTP Server (version=TLS1_2,\n cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.2562.20; Wed, 18 Feb\n 2026 06:58:41 -0800", "from rnnvmail201.nvidia.com (10.129.68.8) by rnnvmail205.nvidia.com\n (10.129.68.10) with Microsoft SMTP Server (version=TLS1_2,\n cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.2562.20; Wed, 18 Feb\n 2026 06:58:40 -0800", "from build-bwicaksono-noble-20251018.internal (10.127.8.11) by\n mail.nvidia.com (10.129.68.8) with Microsoft SMTP Server id 15.2.2562.20 via\n Frontend Transport; Wed, 18 Feb 2026 06:58:39 -0800" ], "ARC-Seal": [ "i=2; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;\n\tt=1771426753; cv=fail;\n b=WooN/PPfxAUlO4lY+tS2Az13PSrOW2gIJIls5WMoH5E2x39LuSesnxDmsynpc6utExXahu9JURxMoBAPGp4HhF7dJCcw1ZiNLuYQPZYpNYydMSBT14z48lF05mPlKBD+rz3S16e4RtPm/pyvk7FQatGoM0SSFtBL3b/ym4aEhOI=", "i=1; a=rsa-sha256; s=arcselector10001; d=microsoft.com; cv=none;\n b=XvhACcxM47pDJ7N84Sd9fa8DqeGC3GGQoWvafyUh6JLBLX1nNG+kxobCOEBTa+OlgA2d5ztuFCXDb9L2UeXGD5q4rUvw4bzESz3DsEfQoz8OxRFLZ8RBPHucPHSSOHmXL0+wviYngTRfKXxVW03keZcZNSo1NfUN/ogQFH6wPdsI+yS34T7W9eLF+nYGGVDo17dM9MD1XZDqPIve1O6IJqDNJ2erAmTwOXaXusEKCWumfyxyeLzpUwoaEufkvn+hBDH0nqDOcJqLxxC/9NaM/HEM9ECLrsDFC4xdiUW0U4DZyZo629zMlS6XvRliXC+tBB81OsAOK9J72GdyusLYSw==" ], "ARC-Message-Signature": [ "i=2; a=rsa-sha256; d=subspace.kernel.org;\n\ts=arc-20240116; t=1771426753; c=relaxed/simple;\n\tbh=L9/TFM/NNGr+gNU5FJY/DLaHWpkU5DmACp7ggtCmXps=;\n\th=From:To:CC:Subject:Date:Message-ID:In-Reply-To:References:\n\t MIME-Version:Content-Type;\n b=fm60K+ejKjJOgwtk42hQmIeM7F7gaIGXw8X/xJjbIpZhSOHx1DljEiWJeFsuPhV1iX4RMmISSbBNiKoCOSkmmTyNRCVocbW8MBkwsGuMl2aefy9YOgyjy7ozS5lHL4mUd9Cse1MWYrr+3dn0zwnjQWvFIHwehcjgQO0b0POipvI=", "i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com;\n s=arcselector10001;\n h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1;\n bh=yyc+Ze8Em9/EV+uTCWp5JpUFh1eO9N+cJx+Q1ISRmbI=;\n b=idwxf9TVx8/GpYcYt7KAEOgBHdnbLvqB2QnOyUD+JemQyccqpL/geiS4235XbUuuC2eefp3FQI2Yjuhyrq/Fu562BbNvFzrJbxthdep1+F8ANDWQjevZOc91gly+mWfsmCsErzwedyT50t/ID+5ceQXFVdOX0us73h2QRUwsclJg+KpMGT1mi1QFBxtZ0pcHZpIimnhk9hC40DKv6+ku99/Ip+kadZuwNVC4O4S/uNlZQf0KwLi+ikeJCjnfzEj5vmtbG0G/T1OCpQk1MxYUhBywF5cvnM6y6oGyQk7hmscSbhDuGbCaGz/gAf1MtwmwCrjJLCMFGvKX3V+hAF2F+g==" ], "ARC-Authentication-Results": [ "i=2; smtp.subspace.kernel.org;\n dmarc=pass (p=reject dis=none) header.from=nvidia.com;\n spf=fail smtp.mailfrom=nvidia.com;\n dkim=pass (2048-bit key) header.d=Nvidia.com header.i=@Nvidia.com\n header.b=jFte4kt8; arc=fail smtp.client-ip=52.101.43.49", "i=1; mx.microsoft.com 1; spf=pass (sender ip is\n 216.228.117.160) smtp.rcpttodomain=kernel.org smtp.mailfrom=nvidia.com;\n dmarc=pass (p=reject sp=reject pct=100) action=none header.from=nvidia.com;\n dkim=none (message not signed); arc=none (0)" ], "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=Nvidia.com;\n s=selector2;\n h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck;\n bh=yyc+Ze8Em9/EV+uTCWp5JpUFh1eO9N+cJx+Q1ISRmbI=;\n b=jFte4kt8ra3umOE+XwknyDEZ+GJCGETm40jpcIFYGXUoWQNmM4yjudcY3+P9+dRnGtdD3AN6B3Dnv7O3BCu8N82hjMJcwAuGvfFEGzGHdrurU7hZpTZ36If2dR4s/gAMne1NyC8qvNeXZj94di3CkAthxbhVpoOEn235wELycFKI/uVboo6nmO3oADAR6fmIr6fckFPrbdXI75VvTBjZ8F54EImQl0DUqgxZb/XL9yaPaViGWnwQhVV7H0yF9/070pEUp2ZKkIV/RohAGG4US/tztOlS/VUsKkTgvOknzLeeHmj6z0G8c70lzkNlOJSORlNAxfkR77yslf4vLnIHtA==", "X-MS-Exchange-Authentication-Results": "spf=pass (sender IP is 216.228.117.160)\n smtp.mailfrom=nvidia.com; dkim=none (message not signed)\n header.d=none;dmarc=pass action=none header.from=nvidia.com;", "Received-SPF": "Pass (protection.outlook.com: domain of nvidia.com designates\n 216.228.117.160 as permitted sender) receiver=protection.outlook.com;\n client-ip=216.228.117.160; helo=mail.nvidia.com; pr=C", "From": "Besar Wicaksono <bwicaksono@nvidia.com>", "To": "<will@kernel.org>, <suzuki.poulose@arm.com>, <robin.murphy@arm.com>,\n\t<ilkka@os.amperecomputing.com>", "CC": "<linux-arm-kernel@lists.infradead.org>, <linux-kernel@vger.kernel.org>,\n\t<linux-tegra@vger.kernel.org>, <mark.rutland@arm.com>, <treding@nvidia.com>,\n\t<jonathanh@nvidia.com>, <vsethi@nvidia.com>, <rwiley@nvidia.com>,\n\t<sdonthineni@nvidia.com>, <skelley@nvidia.com>, <ywan@nvidia.com>,\n\t<mochs@nvidia.com>, <nirmoyd@nvidia.com>, Besar Wicaksono\n\t<bwicaksono@nvidia.com>", "Subject": "[PATCH v2 4/8] perf/arm_cspmu: nvidia: Add Tegra410 PCIE PMU", "Date": "Wed, 18 Feb 2026 14:58:05 +0000", "Message-ID": "<20260218145809.1622856-5-bwicaksono@nvidia.com>", "X-Mailer": "git-send-email 2.43.0", "In-Reply-To": "<20260218145809.1622856-1-bwicaksono@nvidia.com>", "References": "<20260218145809.1622856-1-bwicaksono@nvidia.com>", "Precedence": "bulk", "X-Mailing-List": "linux-tegra@vger.kernel.org", "List-Id": "<linux-tegra.vger.kernel.org>", "List-Subscribe": "<mailto:linux-tegra+subscribe@vger.kernel.org>", "List-Unsubscribe": "<mailto:linux-tegra+unsubscribe@vger.kernel.org>", "MIME-Version": "1.0", "Content-Transfer-Encoding": "8bit", "Content-Type": "text/plain", "X-NV-OnPremToCloud": "ExternallySecured", "X-EOPAttributedMessage": "0", "X-MS-PublicTrafficType": "Email", "X-MS-TrafficTypeDiagnostic": "BL02EPF0001A103:EE_|LV8PR12MB9450:EE_", "X-MS-Office365-Filtering-Correlation-Id": "db43016e-c223-44d3-685d-08de6efe4054", "X-MS-Exchange-SenderADCheck": "1", "X-MS-Exchange-AntiSpam-Relay": "0", "X-Microsoft-Antispam": "\n\tBCL:0;ARA:13230040|82310400026|376014|36860700013|1800799024;", "X-Microsoft-Antispam-Message-Info": "\n 61SkEApm76ixx/hax9JlPVs/MXVU0cx62in00lhlRA5VjRm9HR6lEh4uQgHV8UJcD1gIyxt8sBXQifgc3amr6Pp2y1JwZUxD59/sw06AIKJRBwI1NBOLg9eDqO92Jbs6hU+zz4mbPRAVhbYPuOqTd06J/y2nfLBawSiCVRe11F0sHdiJYNDrFOAYucOtOkyPh+5W0zkimbTqX4DNb3hAEZ7u8PUDfhMn0MYqysfKk279I/5rQTecwi8cjGQs+9OdzESnazOIEjU6KLSty1kZRWvDaAjVh3cGpVIcrEDmzHJ4Nx8czEmMGnYafGroW7zD+w3iqksAZ8AhmT7wFAa+kneb62Vd51gJJa5sYSAIT5pPMAg/sj7DVZG0/rLvFFw+a+rJ3NKuAucRzUoBo3rgpJCSQxCZBUBP+KC/O5+sM4p+lcb6HeefTucbiG5wdMhnewPGQIITjYLaKvggeS7m1fit3jGKW6OeMTm2Q1sGQ1rKk/HdokrE1Nz1Ippm2D+vcVSke+OwRrGLdZISu5BjTBk31y3A4TqepYyr5lj0eKnRn5llDd5xO+9B2pZq4oY7ylpXT7RfRfuoKQ4M9xgZweO2YNo7UAjOOWTKYTesbw7HdhhetosYEtHSzaFaJencfobgsfmUpVwN9v7fCEkvl4nIFfwCDpSJePnTe8qHa9E+c0M4VmjS4ZIgy6I++1QXfTapMht1b5tbc63M5bO86+mlzFoTZ9wIKvcZH5XCyqcitJFK6YUJNYaqViXPA9st3LthSPMDYZT9GfCjFhkBe9rDRmty/yL0Xld/ZZSz5ytEXibbAdhgdBPLMWyvdWF/PYNdpDkLBdCBY8ABbr/uBE5jT6GsQO7rYtPZ0WeZ5dedbPfprRXnBRloJZhYM2hJeJd83n0KJh4KFlrUxz9T/0b8wcIuuub97zlsJbBnoTLsAOzFdsV7Xb+rphWAHhriMXZ3zEXQUzqeEfDt1ANQlT7krbD4Y9HKqFt0qm3lZyDQEviukI27APCOGnPRkzzDU8Q/p+vcT06KMeSA//MwbYI4It4mjpwskfOb/kyk77UfU6cFL4jaVYKqwOvNn84QQSKL4szu33QwULAs09IIUChhjhW1U7DKiN1BZaWUY08BausgMMJ8I365e2+WOu6PVeDWu957rCkc2eJr/wj9JENIprj9eBOt96bKveRMj22o9U9kUOmkPVkGdUILP49WcKDJUL3KGh5sIOoCLSRWLlqU1f7pLHb2TGFwxbA49mmaQzU7ZEVQiRzI2We74ERd72osis75IanrD2AmCXNrSzGxRu57gAdVreps1VvphjlV3teIPI3e+0HLvEBlyK9R1q7l2YeNaN8cL1vkaBd4/Nj/A+NwS1blqLKp16HEUFMAIS44pirf37XFv1nESO3JM3LCHivgi7xptennhJlgN1ki5DXnfjR5KfSKJqMAKIo8wqoQpxqh99L7LQvD9w2tL6kDpIbyvMibBSu948w4yDqM0kcgF9vccV19ecEQc6aVMhlIjX5/3ZgYscdlcFpzEeGEo2r0LSJiXayiPE71M9BBaMMkp/LQq1wGCQD8w1X2WBWCqXxA8mnTtAbrFBgD5cP692Sn1EROx4ZY7nxUaAaT4VHtZQV5soMAbqKFKOKztwO8NDhXPG2NtbR3fUb555p/S9RH7H4Ayj5fFuewBQ==", "X-Forefront-Antispam-Report": "\n\tCIP:216.228.117.160;CTRY:US;LANG:en;SCL:1;SRV:;IPV:NLI;SFV:NSPM;H:mail.nvidia.com;PTR:dc6edge1.nvidia.com;CAT:NONE;SFS:(13230040)(82310400026)(376014)(36860700013)(1800799024);DIR:OUT;SFP:1101;", "X-MS-Exchange-AntiSpam-MessageData-ChunkCount": "1", "X-MS-Exchange-AntiSpam-MessageData-0": "\n\tZKiixPkX92y5Lx5jrOMtWLpRgoU4w+cA8eIjxVtFSm8szOVFuShBajMQ+ldtK0YGS5aM00yLHZcgy4UNKHeE5nVA7D14xrpZkQ2v4WCNrZ1yZau0dQ2fIi3i3EUtMU2puiMudttHmt+VJ3BpuVXc+LVL7FcS/4lRp75ob5qLMd4Zzlg1swTyxVD+5U4BuMwxSdeyigN/pdN6U50VWyrW+75i7CHCr311Gw2pG/Vxe1fX5qxfFvMp6icBF0UlDor0GnPW/qIAcdMnIBzW+4Yj6SVjq+HZG+f5XFmlDV6WKxd9eolW1TR3KAExEcZQJBvMhi6Koqdu47t9w8nQmIchi5Xk/7GRmRbxkSJgpZrEfKcc6TZnysMlX/kCZma+vDtvdtcFTGac4uPemXXIkccBlpLreGJWh2n/zClC3sdckvZuCYoIJ6f/K0e0QqTxM4Mw", "X-OriginatorOrg": "Nvidia.com", "X-MS-Exchange-CrossTenant-OriginalArrivalTime": "18 Feb 2026 14:59:01.1397\n (UTC)", "X-MS-Exchange-CrossTenant-Network-Message-Id": "\n db43016e-c223-44d3-685d-08de6efe4054", "X-MS-Exchange-CrossTenant-Id": "43083d15-7273-40c1-b7db-39efd9ccc17a", "X-MS-Exchange-CrossTenant-OriginalAttributedTenantConnectingIp": "\n TenantId=43083d15-7273-40c1-b7db-39efd9ccc17a;Ip=[216.228.117.160];Helo=[mail.nvidia.com]", "X-MS-Exchange-CrossTenant-AuthSource": "\n\tBL02EPF0001A103.namprd05.prod.outlook.com", "X-MS-Exchange-CrossTenant-AuthAs": "Anonymous", "X-MS-Exchange-CrossTenant-FromEntityHeader": "HybridOnPrem", "X-MS-Exchange-Transport-CrossTenantHeadersStamped": "LV8PR12MB9450" }, "content": "Adds PCIE PMU support in Tegra410 SOC. This PMU is instanced\nin each root complex in the SOC and can capture traffic from\nPCIE device to various memory types. This PMU can filter traffic\nbased on the originating root port or BDF and the target memory\ntypes (CPU DRAM, GPU Memory, CXL Memory, or remote Memory).\n\nReviewed-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>\nSigned-off-by: Besar Wicaksono <bwicaksono@nvidia.com>\n---\n .../admin-guide/perf/nvidia-tegra410-pmu.rst | 162 ++++++++++++++\n drivers/perf/arm_cspmu/nvidia_cspmu.c | 211 +++++++++++++++++-\n 2 files changed, 368 insertions(+), 5 deletions(-)", "diff": "diff --git a/Documentation/admin-guide/perf/nvidia-tegra410-pmu.rst b/Documentation/admin-guide/perf/nvidia-tegra410-pmu.rst\nindex 7b7ba5700ca1..8528685ddb61 100644\n--- a/Documentation/admin-guide/perf/nvidia-tegra410-pmu.rst\n+++ b/Documentation/admin-guide/perf/nvidia-tegra410-pmu.rst\n@@ -6,6 +6,7 @@ The NVIDIA Tegra410 SoC includes various system PMUs to measure key performance\n metrics like memory bandwidth, latency, and utilization:\n \n * Unified Coherence Fabric (UCF)\n+* PCIE\n \n PMU Driver\n ----------\n@@ -104,3 +105,164 @@ Example usage:\n destination filter = remote memory::\n \n perf stat -a -e nvidia_ucf_pmu_1/event=0x0,src_loc_noncpu=0x1,dst_rem=0x1/\n+\n+PCIE PMU\n+--------\n+\n+This PMU monitors all read/write traffic from the root port(s) or a particular\n+BDF in a PCIE root complex (RC) to local or remote memory. There is one PMU per\n+PCIE RC in the SoC. Each RC can have up to 16 lanes that can be bifurcated into\n+up to 8 root ports. The traffic from each root port can be filtered using RP or\n+BDF filter. For example, specifying \"src_rp_mask=0xFF\" means the PMU counter will\n+capture traffic from all RPs. Please see below for more details.\n+\n+The events and configuration options of this PMU device are described in sysfs,\n+see /sys/bus/event_source/devices/nvidia_pcie_pmu_<socket-id>_rc_<pcie-rc-id>.\n+\n+The events in this PMU can be used to measure bandwidth, utilization, and\n+latency:\n+\n+ * rd_req: count the number of read requests by PCIE device.\n+ * wr_req: count the number of write requests by PCIE device.\n+ * rd_bytes: count the number of bytes transferred by rd_req.\n+ * wr_bytes: count the number of bytes transferred by wr_req.\n+ * rd_cum_outs: count outstanding rd_req each cycle.\n+ * cycles: counts the PCIE cycles.\n+\n+The average bandwidth is calculated as::\n+\n+ AVG_RD_BANDWIDTH_IN_GBPS = RD_BYTES / ELAPSED_TIME_IN_NS\n+ AVG_WR_BANDWIDTH_IN_GBPS = WR_BYTES / ELAPSED_TIME_IN_NS\n+\n+The average request rate is calculated as::\n+\n+ AVG_RD_REQUEST_RATE = RD_REQ / CYCLES\n+ AVG_WR_REQUEST_RATE = WR_REQ / CYCLES\n+\n+\n+The average latency is calculated as::\n+\n+ FREQ_IN_GHZ = CYCLES / ELAPSED_TIME_IN_NS\n+ AVG_LATENCY_IN_CYCLES = RD_CUM_OUTS / RD_REQ\n+ AVERAGE_LATENCY_IN_NS = AVG_LATENCY_IN_CYCLES / FREQ_IN_GHZ\n+\n+The PMU events can be filtered based on the traffic source and destination.\n+The source filter indicates the PCIE devices that will be monitored. The\n+destination filter specifies the destination memory type, e.g. local system\n+memory (CMEM), local GPU memory (GMEM), or remote memory. The local/remote\n+classification of the destination filter is based on the home socket of the\n+address, not where the data actually resides. These filters can be found in\n+/sys/bus/event_source/devices/nvidia_pcie_pmu_<socket-id>_rc_<pcie-rc-id>/format/.\n+\n+The list of event filters:\n+\n+* Source filter:\n+\n+ * src_rp_mask: bitmask of root ports that will be monitored. Each bit in this\n+ bitmask represents the RP index in the RC. If the bit is set, all devices under\n+ the associated RP will be monitored. E.g \"src_rp_mask=0xF\" will monitor\n+ devices in root port 0 to 3.\n+ * src_bdf: the BDF that will be monitored. This is a 16-bit value that\n+ follows formula: (bus << 8) + (device << 3) + (function). For example, the\n+ value of BDF 27:01.1 is 0x2781.\n+ * src_bdf_en: enable the BDF filter. If this is set, the BDF filter value in\n+ \"src_bdf\" is used to filter the traffic.\n+\n+ Note that Root-Port and BDF filters are mutually exclusive and the PMU in\n+ each RC can only have one BDF filter for the whole counters. If BDF filter\n+ is enabled, the BDF filter value will be applied to all events.\n+\n+* Destination filter:\n+\n+ * dst_loc_cmem: if set, count events to local system memory (CMEM) address\n+ * dst_loc_gmem: if set, count events to local GPU memory (GMEM) address\n+ * dst_loc_pcie_p2p: if set, count events to local PCIE peer address\n+ * dst_loc_pcie_cxl: if set, count events to local CXL memory address\n+ * dst_rem: if set, count events to remote memory address\n+\n+If the source filter is not specified, the PMU will count events from all root\n+ports. If the destination filter is not specified, the PMU will count events\n+to all destinations.\n+\n+Example usage:\n+\n+* Count event id 0x0 from root port 0 of PCIE RC-0 on socket 0 targeting all\n+ destinations::\n+\n+ perf stat -a -e nvidia_pcie_pmu_0_rc_0/event=0x0,src_rp_mask=0x1/\n+\n+* Count event id 0x1 from root port 0 and 1 of PCIE RC-1 on socket 0 and\n+ targeting just local CMEM of socket 0::\n+\n+ perf stat -a -e nvidia_pcie_pmu_0_rc_1/event=0x1,src_rp_mask=0x3,dst_loc_cmem=0x1/\n+\n+* Count event id 0x2 from root port 0 of PCIE RC-2 on socket 1 targeting all\n+ destinations::\n+\n+ perf stat -a -e nvidia_pcie_pmu_1_rc_2/event=0x2,src_rp_mask=0x1/\n+\n+* Count event id 0x3 from root port 0 and 1 of PCIE RC-3 on socket 1 and\n+ targeting just local CMEM of socket 1::\n+\n+ perf stat -a -e nvidia_pcie_pmu_1_rc_3/event=0x3,src_rp_mask=0x3,dst_loc_cmem=0x1/\n+\n+* Count event id 0x4 from BDF 01:01.0 of PCIE RC-4 on socket 0 targeting all\n+ destinations::\n+\n+ perf stat -a -e nvidia_pcie_pmu_0_rc_4/event=0x4,src_bdf=0x0180,src_bdf_en=0x1/\n+\n+Mapping the RC# to lspci segment number can be non-trivial; hence a new NVIDIA\n+Designated Vendor Specific Capability (DVSEC) register is added into the PCIE config space\n+for each RP. This DVSEC has vendor id \"10de\" and DVSEC id of \"0x4\". The DVSEC register\n+contains the following information to map PCIE devices under the RP back to its RC# :\n+\n+ - Bus# (byte 0xc) : bus number as reported by the lspci output\n+ - Segment# (byte 0xd) : segment number as reported by the lspci output\n+ - RP# (byte 0xe) : port number as reported by LnkCap attribute from lspci for a device with Root Port capability\n+ - RC# (byte 0xf): root complex number associated with the RP\n+ - Socket# (byte 0x10): socket number associated with the RP\n+\n+Example script for mapping lspci BDF to RC# and socket#::\n+\n+ #!/bin/bash\n+ while read bdf rest; do\n+ dvsec4_reg=$(lspci -vv -s $bdf | awk '\n+ /Designated Vendor-Specific: Vendor=10de ID=0004/ {\n+ match($0, /\\[([0-9a-fA-F]+)/, arr);\n+ print \"0x\" arr[1];\n+ exit\n+ }\n+ ')\n+ if [ -n \"$dvsec4_reg\" ]; then\n+ bus=$(setpci -s $bdf $(printf '0x%x' $((${dvsec4_reg} + 0xc))).b)\n+ segment=$(setpci -s $bdf $(printf '0x%x' $((${dvsec4_reg} + 0xd))).b)\n+ rp=$(setpci -s $bdf $(printf '0x%x' $((${dvsec4_reg} + 0xe))).b)\n+ rc=$(setpci -s $bdf $(printf '0x%x' $((${dvsec4_reg} + 0xf))).b)\n+ socket=$(setpci -s $bdf $(printf '0x%x' $((${dvsec4_reg} + 0x10))).b)\n+ echo \"$bdf: Bus=$bus, Segment=$segment, RP=$rp, RC=$rc, Socket=$socket\"\n+ fi\n+ done < <(lspci -d 10de:)\n+\n+Example output::\n+\n+ 0001:00:00.0: Bus=00, Segment=01, RP=00, RC=00, Socket=00\n+ 0002:80:00.0: Bus=80, Segment=02, RP=01, RC=01, Socket=00\n+ 0002:a0:00.0: Bus=a0, Segment=02, RP=02, RC=01, Socket=00\n+ 0002:c0:00.0: Bus=c0, Segment=02, RP=03, RC=01, Socket=00\n+ 0002:e0:00.0: Bus=e0, Segment=02, RP=04, RC=01, Socket=00\n+ 0003:00:00.0: Bus=00, Segment=03, RP=00, RC=02, Socket=00\n+ 0004:00:00.0: Bus=00, Segment=04, RP=00, RC=03, Socket=00\n+ 0005:00:00.0: Bus=00, Segment=05, RP=00, RC=04, Socket=00\n+ 0005:40:00.0: Bus=40, Segment=05, RP=01, RC=04, Socket=00\n+ 0005:c0:00.0: Bus=c0, Segment=05, RP=02, RC=04, Socket=00\n+ 0006:00:00.0: Bus=00, Segment=06, RP=00, RC=05, Socket=00\n+ 0009:00:00.0: Bus=00, Segment=09, RP=00, RC=00, Socket=01\n+ 000a:80:00.0: Bus=80, Segment=0a, RP=01, RC=01, Socket=01\n+ 000a:a0:00.0: Bus=a0, Segment=0a, RP=02, RC=01, Socket=01\n+ 000a:e0:00.0: Bus=e0, Segment=0a, RP=03, RC=01, Socket=01\n+ 000b:00:00.0: Bus=00, Segment=0b, RP=00, RC=02, Socket=01\n+ 000c:00:00.0: Bus=00, Segment=0c, RP=00, RC=03, Socket=01\n+ 000d:00:00.0: Bus=00, Segment=0d, RP=00, RC=04, Socket=01\n+ 000d:40:00.0: Bus=40, Segment=0d, RP=01, RC=04, Socket=01\n+ 000d:c0:00.0: Bus=c0, Segment=0d, RP=02, RC=04, Socket=01\n+ 000e:00:00.0: Bus=00, Segment=0e, RP=00, RC=05, Socket=01\ndiff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c\nindex c67667097a3c..42f11f37bddf 100644\n--- a/drivers/perf/arm_cspmu/nvidia_cspmu.c\n+++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c\n@@ -8,6 +8,7 @@\n \n #include <linux/io.h>\n #include <linux/module.h>\n+#include <linux/property.h>\n #include <linux/topology.h>\n \n #include \"arm_cspmu.h\"\n@@ -28,6 +29,19 @@\n #define NV_UCF_FILTER_DST GENMASK_ULL(11, 8)\n #define NV_UCF_FILTER_DEFAULT (NV_UCF_FILTER_SRC | NV_UCF_FILTER_DST)\n \n+#define NV_PCIE_V2_PORT_COUNT 8ULL\n+#define NV_PCIE_V2_FILTER_ID_MASK GENMASK_ULL(24, 0)\n+#define NV_PCIE_V2_FILTER_PORT GENMASK_ULL(NV_PCIE_V2_PORT_COUNT - 1, 0)\n+#define NV_PCIE_V2_FILTER_BDF_VAL GENMASK_ULL(23, NV_PCIE_V2_PORT_COUNT)\n+#define NV_PCIE_V2_FILTER_BDF_EN BIT(24)\n+#define NV_PCIE_V2_FILTER_BDF_VAL_EN GENMASK_ULL(24, NV_PCIE_V2_PORT_COUNT)\n+#define NV_PCIE_V2_FILTER_DEFAULT NV_PCIE_V2_FILTER_PORT\n+\n+#define NV_PCIE_V2_DST_COUNT 5ULL\n+#define NV_PCIE_V2_FILTER2_ID_MASK GENMASK_ULL(4, 0)\n+#define NV_PCIE_V2_FILTER2_DST GENMASK_ULL(NV_PCIE_V2_DST_COUNT - 1, 0)\n+#define NV_PCIE_V2_FILTER2_DEFAULT NV_PCIE_V2_FILTER2_DST\n+\n #define NV_GENERIC_FILTER_ID_MASK GENMASK_ULL(31, 0)\n \n #define NV_PRODID_MASK\t(PMIIDR_PRODUCTID | PMIIDR_VARIANT | PMIIDR_REVISION)\n@@ -162,6 +176,16 @@ static struct attribute *ucf_pmu_event_attrs[] = {\n \tNULL,\n };\n \n+static struct attribute *pcie_v2_pmu_event_attrs[] = {\n+\tARM_CSPMU_EVENT_ATTR(rd_bytes,\t\t0x0),\n+\tARM_CSPMU_EVENT_ATTR(wr_bytes,\t\t0x1),\n+\tARM_CSPMU_EVENT_ATTR(rd_req,\t\t0x2),\n+\tARM_CSPMU_EVENT_ATTR(wr_req,\t\t0x3),\n+\tARM_CSPMU_EVENT_ATTR(rd_cum_outs,\t0x4),\n+\tARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),\n+\tNULL,\n+};\n+\n static struct attribute *generic_pmu_event_attrs[] = {\n \tARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),\n \tNULL,\n@@ -202,6 +226,19 @@ static struct attribute *ucf_pmu_format_attrs[] = {\n \tNULL,\n };\n \n+static struct attribute *pcie_v2_pmu_format_attrs[] = {\n+\tARM_CSPMU_FORMAT_EVENT_ATTR,\n+\tARM_CSPMU_FORMAT_ATTR(src_rp_mask, \"config1:0-7\"),\n+\tARM_CSPMU_FORMAT_ATTR(src_bdf, \"config1:8-23\"),\n+\tARM_CSPMU_FORMAT_ATTR(src_bdf_en, \"config1:24\"),\n+\tARM_CSPMU_FORMAT_ATTR(dst_loc_cmem, \"config2:0\"),\n+\tARM_CSPMU_FORMAT_ATTR(dst_loc_gmem, \"config2:1\"),\n+\tARM_CSPMU_FORMAT_ATTR(dst_loc_pcie_p2p, \"config2:2\"),\n+\tARM_CSPMU_FORMAT_ATTR(dst_loc_pcie_cxl, \"config2:3\"),\n+\tARM_CSPMU_FORMAT_ATTR(dst_rem, \"config2:4\"),\n+\tNULL,\n+};\n+\n static struct attribute *generic_pmu_format_attrs[] = {\n \tARM_CSPMU_FORMAT_EVENT_ATTR,\n \tARM_CSPMU_FORMAT_FILTER_ATTR,\n@@ -233,6 +270,32 @@ nv_cspmu_get_name(const struct arm_cspmu *cspmu)\n \treturn ctx->name;\n }\n \n+#if defined(CONFIG_ACPI)\n+static int nv_cspmu_get_inst_id(const struct arm_cspmu *cspmu, u32 *id)\n+{\n+\tstruct fwnode_handle *fwnode;\n+\tstruct acpi_device *adev;\n+\tint ret;\n+\n+\tadev = arm_cspmu_acpi_dev_get(cspmu);\n+\tif (!adev)\n+\t\treturn -ENODEV;\n+\n+\tfwnode = acpi_fwnode_handle(adev);\n+\tret = fwnode_property_read_u32(fwnode, \"instance_id\", id);\n+\tif (ret)\n+\t\tdev_err(cspmu->dev, \"Failed to get instance ID\\n\");\n+\n+\tacpi_dev_put(adev);\n+\treturn ret;\n+}\n+#else\n+static int nv_cspmu_get_inst_id(const struct arm_cspmu *cspmu, u32 *id)\n+{\n+\treturn -EINVAL;\n+}\n+#endif\n+\n static u32 nv_cspmu_event_filter(const struct perf_event *event)\n {\n \tconst struct nv_cspmu_ctx *ctx =\n@@ -278,6 +341,20 @@ static void nv_cspmu_set_ev_filter(struct arm_cspmu *cspmu,\n \t}\n }\n \n+static void nv_cspmu_reset_ev_filter(struct arm_cspmu *cspmu,\n+\t\t\t\t const struct perf_event *event)\n+{\n+\tconst struct nv_cspmu_ctx *ctx =\n+\t\tto_nv_cspmu_ctx(to_arm_cspmu(event->pmu));\n+\tconst u32 offset = 4 * event->hw.idx;\n+\n+\tif (ctx->get_filter)\n+\t\twritel(0, cspmu->base0 + PMEVFILTR + offset);\n+\n+\tif (ctx->get_filter2)\n+\t\twritel(0, cspmu->base0 + PMEVFILT2R + offset);\n+}\n+\n static void nv_cspmu_set_cc_filter(struct arm_cspmu *cspmu,\n \t\t\t\t const struct perf_event *event)\n {\n@@ -308,9 +385,103 @@ static u32 ucf_pmu_event_filter(const struct perf_event *event)\n \treturn ret;\n }\n \n+static u32 pcie_v2_pmu_bdf_val_en(u32 filter)\n+{\n+\tconst u32 bdf_en = FIELD_GET(NV_PCIE_V2_FILTER_BDF_EN, filter);\n+\n+\t/* Returns both BDF value and enable bit if BDF filtering is enabled. */\n+\tif (bdf_en)\n+\t\treturn FIELD_GET(NV_PCIE_V2_FILTER_BDF_VAL_EN, filter);\n+\n+\t/* Ignore the BDF value if BDF filter is not enabled. */\n+\treturn 0;\n+}\n+\n+static u32 pcie_v2_pmu_event_filter(const struct perf_event *event)\n+{\n+\tu32 filter, lead_filter, lead_bdf;\n+\tstruct perf_event *leader;\n+\tconst struct nv_cspmu_ctx *ctx =\n+\t\tto_nv_cspmu_ctx(to_arm_cspmu(event->pmu));\n+\n+\tfilter = event->attr.config1 & ctx->filter_mask;\n+\tif (filter != 0)\n+\t\treturn filter;\n+\n+\tleader = event->group_leader;\n+\n+\t/* Use leader's filter value if its BDF filtering is enabled. */\n+\tif (event != leader) {\n+\t\tlead_filter = pcie_v2_pmu_event_filter(leader);\n+\t\tlead_bdf = pcie_v2_pmu_bdf_val_en(lead_filter);\n+\t\tif (lead_bdf != 0)\n+\t\t\treturn lead_filter;\n+\t}\n+\n+\t/* Otherwise, return default filter value. */\n+\treturn ctx->filter_default_val;\n+}\n+\n+static int pcie_v2_pmu_validate_event(struct arm_cspmu *cspmu,\n+\t\t\t\t struct perf_event *new_ev)\n+{\n+\t/*\n+\t * Make sure the events are using same BDF filter since the PCIE-SRC PMU\n+\t * only supports one common BDF filter setting for all of the counters.\n+\t */\n+\n+\tint idx;\n+\tu32 new_filter, new_rp, new_bdf, new_lead_filter, new_lead_bdf;\n+\tstruct perf_event *leader, *new_leader;\n+\n+\tif (cspmu->impl.ops.is_cycle_counter_event(new_ev))\n+\t\treturn 0;\n+\n+\tnew_leader = new_ev->group_leader;\n+\n+\tnew_filter = pcie_v2_pmu_event_filter(new_ev);\n+\tnew_lead_filter = pcie_v2_pmu_event_filter(new_leader);\n+\n+\tnew_bdf = pcie_v2_pmu_bdf_val_en(new_filter);\n+\tnew_lead_bdf = pcie_v2_pmu_bdf_val_en(new_lead_filter);\n+\n+\tnew_rp = FIELD_GET(NV_PCIE_V2_FILTER_PORT, new_filter);\n+\n+\tif (new_rp != 0 && new_bdf != 0) {\n+\t\tdev_err(cspmu->dev,\n+\t\t\t\"RP and BDF filtering are mutually exclusive\\n\");\n+\t\treturn -EINVAL;\n+\t}\n+\n+\tif (new_bdf != new_lead_bdf) {\n+\t\tdev_err(cspmu->dev,\n+\t\t\t\"sibling and leader BDF value should be equal\\n\");\n+\t\treturn -EINVAL;\n+\t}\n+\n+\t/* Compare BDF filter on existing events. */\n+\tidx = find_first_bit(cspmu->hw_events.used_ctrs,\n+\t\t\t cspmu->cycle_counter_logical_idx);\n+\n+\tif (idx != cspmu->cycle_counter_logical_idx) {\n+\t\tleader = cspmu->hw_events.events[idx]->group_leader;\n+\n+\t\tconst u32 lead_filter = pcie_v2_pmu_event_filter(leader);\n+\t\tconst u32 lead_bdf = pcie_v2_pmu_bdf_val_en(lead_filter);\n+\n+\t\tif (new_lead_bdf != lead_bdf) {\n+\t\t\tdev_err(cspmu->dev, \"only one BDF value is supported\\n\");\n+\t\t\treturn -EINVAL;\n+\t\t}\n+\t}\n+\n+\treturn 0;\n+}\n+\n enum nv_cspmu_name_fmt {\n \tNAME_FMT_GENERIC,\n-\tNAME_FMT_SOCKET\n+\tNAME_FMT_SOCKET,\n+\tNAME_FMT_SOCKET_INST\n };\n \n struct nv_cspmu_match {\n@@ -430,6 +601,27 @@ static const struct nv_cspmu_match nv_cspmu_match[] = {\n \t\t.init_data = NULL\n \t },\n \t},\n+\t{\n+\t .prodid = 0x10301000,\n+\t .prodid_mask = NV_PRODID_MASK,\n+\t .name_pattern = \"nvidia_pcie_pmu_%u_rc_%u\",\n+\t .name_fmt = NAME_FMT_SOCKET_INST,\n+\t .template_ctx = {\n+\t\t.event_attr = pcie_v2_pmu_event_attrs,\n+\t\t.format_attr = pcie_v2_pmu_format_attrs,\n+\t\t.filter_mask = NV_PCIE_V2_FILTER_ID_MASK,\n+\t\t.filter_default_val = NV_PCIE_V2_FILTER_DEFAULT,\n+\t\t.filter2_mask = NV_PCIE_V2_FILTER2_ID_MASK,\n+\t\t.filter2_default_val = NV_PCIE_V2_FILTER2_DEFAULT,\n+\t\t.get_filter = pcie_v2_pmu_event_filter,\n+\t\t.get_filter2 = nv_cspmu_event_filter2,\n+\t\t.init_data = NULL\n+\t },\n+\t .ops = {\n+\t\t.validate_event = pcie_v2_pmu_validate_event,\n+\t\t.reset_ev_filter = nv_cspmu_reset_ev_filter,\n+\t }\n+\t},\n \t{\n \t .prodid = 0,\n \t .prodid_mask = 0,\n@@ -453,7 +645,7 @@ static const struct nv_cspmu_match nv_cspmu_match[] = {\n static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu,\n \t\t\t\t const struct nv_cspmu_match *match)\n {\n-\tchar *name;\n+\tchar *name = NULL;\n \tstruct device *dev = cspmu->dev;\n \n \tstatic atomic_t pmu_generic_idx = {0};\n@@ -467,13 +659,20 @@ static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu,\n \t\t\t\t socket);\n \t\tbreak;\n \t}\n+\tcase NAME_FMT_SOCKET_INST: {\n+\t\tconst int cpu = cpumask_first(&cspmu->associated_cpus);\n+\t\tconst int socket = cpu_to_node(cpu);\n+\t\tu32 inst_id;\n+\n+\t\tif (!nv_cspmu_get_inst_id(cspmu, &inst_id))\n+\t\t\tname = devm_kasprintf(dev, GFP_KERNEL,\n+\t\t\t\t\tmatch->name_pattern, socket, inst_id);\n+\t\tbreak;\n+\t}\n \tcase NAME_FMT_GENERIC:\n \t\tname = devm_kasprintf(dev, GFP_KERNEL, match->name_pattern,\n \t\t\t\t atomic_fetch_inc(&pmu_generic_idx));\n \t\tbreak;\n-\tdefault:\n-\t\tname = NULL;\n-\t\tbreak;\n \t}\n \n \treturn name;\n@@ -514,8 +713,10 @@ static int nv_cspmu_init_ops(struct arm_cspmu *cspmu)\n \tcspmu->impl.ctx = ctx;\n \n \t/* NVIDIA specific callbacks. */\n+\tSET_OP(validate_event, impl_ops, match, NULL);\n \tSET_OP(set_cc_filter, impl_ops, match, nv_cspmu_set_cc_filter);\n \tSET_OP(set_ev_filter, impl_ops, match, nv_cspmu_set_ev_filter);\n+\tSET_OP(reset_ev_filter, impl_ops, match, NULL);\n \tSET_OP(get_event_attrs, impl_ops, match, nv_cspmu_get_event_attrs);\n \tSET_OP(get_format_attrs, impl_ops, match, nv_cspmu_get_format_attrs);\n \tSET_OP(get_name, impl_ops, match, nv_cspmu_get_name);\n", "prefixes": [ "v2", "4/8" ] }