get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/1525731/
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 1525731,
    "url": "http://patchwork.ozlabs.org/api/patches/1525731/",
    "web_url": "http://patchwork.ozlabs.org/project/openvswitch/patch/125980e292af65c1581c5011d55b5508fc8b2302.1631094144.git.grive@u256.net/",
    "project": {
        "id": 47,
        "url": "http://patchwork.ozlabs.org/api/projects/47/",
        "name": "Open vSwitch",
        "link_name": "openvswitch",
        "list_id": "ovs-dev.openvswitch.org",
        "list_email": "ovs-dev@openvswitch.org",
        "web_url": "http://openvswitch.org/",
        "scm_url": "git@github.com:openvswitch/ovs.git",
        "webscm_url": "https://github.com/openvswitch/ovs",
        "list_archive_url": "",
        "list_archive_url_format": "",
        "commit_url_format": ""
    },
    "msgid": "<125980e292af65c1581c5011d55b5508fc8b2302.1631094144.git.grive@u256.net>",
    "list_archive_url": null,
    "date": "2021-09-08T09:47:50",
    "name": "[ovs-dev,v5,26/27] dpif-netdev: Use one or more offload threads",
    "commit_ref": null,
    "pull_url": null,
    "state": "new",
    "archived": false,
    "hash": "3304258df3129122bcaa81107a7dfc59978869c4",
    "submitter": {
        "id": 78795,
        "url": "http://patchwork.ozlabs.org/api/people/78795/",
        "name": "Gaëtan Rivet",
        "email": "grive@u256.net"
    },
    "delegate": null,
    "mbox": "http://patchwork.ozlabs.org/project/openvswitch/patch/125980e292af65c1581c5011d55b5508fc8b2302.1631094144.git.grive@u256.net/mbox/",
    "series": [
        {
            "id": 261424,
            "url": "http://patchwork.ozlabs.org/api/series/261424/",
            "web_url": "http://patchwork.ozlabs.org/project/openvswitch/list/?series=261424",
            "date": "2021-09-08T09:47:24",
            "name": "dpif-netdev: Parallel offload processing",
            "version": 5,
            "mbox": "http://patchwork.ozlabs.org/series/261424/mbox/"
        }
    ],
    "comments": "http://patchwork.ozlabs.org/api/patches/1525731/comments/",
    "check": "success",
    "checks": "http://patchwork.ozlabs.org/api/patches/1525731/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<ovs-dev-bounces@openvswitch.org>",
        "X-Original-To": [
            "incoming@patchwork.ozlabs.org",
            "ovs-dev@openvswitch.org"
        ],
        "Delivered-To": [
            "patchwork-incoming@bilbo.ozlabs.org",
            "ovs-dev@lists.linuxfoundation.org"
        ],
        "Authentication-Results": [
            "ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n unprotected) header.d=u256.net header.i=@u256.net header.a=rsa-sha256\n header.s=fm2 header.b=QRcLcd9T;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n unprotected) header.d=messagingengine.com header.i=@messagingengine.com\n header.a=rsa-sha256 header.s=fm3 header.b=hMAuRKog;\n\tdkim-atps=neutral",
            "ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=openvswitch.org\n (client-ip=2605:bc80:3010::133; helo=smtp2.osuosl.org;\n envelope-from=ovs-dev-bounces@openvswitch.org; receiver=<UNKNOWN>)"
        ],
        "Received": [
            "from smtp2.osuosl.org (smtp2.osuosl.org [IPv6:2605:bc80:3010::133])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest\n SHA256)\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 4H4HS900JMz9sW8\n\tfor <incoming@patchwork.ozlabs.org>; Wed,  8 Sep 2021 19:50:16 +1000 (AEST)",
            "from localhost (localhost [127.0.0.1])\n\tby smtp2.osuosl.org (Postfix) with ESMTP id 51D2A40878;\n\tWed,  8 Sep 2021 09:50:14 +0000 (UTC)",
            "from smtp2.osuosl.org ([127.0.0.1])\n\tby localhost (smtp2.osuosl.org [127.0.0.1]) (amavisd-new, port 10024)\n\twith ESMTP id MpDioMCdyYOQ; Wed,  8 Sep 2021 09:50:12 +0000 (UTC)",
            "from lists.linuxfoundation.org (lf-lists.osuosl.org\n [IPv6:2605:bc80:3010:104::8cd3:938])\n\tby smtp2.osuosl.org (Postfix) with ESMTPS id 7C8B440888;\n\tWed,  8 Sep 2021 09:50:11 +0000 (UTC)",
            "from lf-lists.osuosl.org (localhost [127.0.0.1])\n\tby lists.linuxfoundation.org (Postfix) with ESMTP id 53664C0011;\n\tWed,  8 Sep 2021 09:50:11 +0000 (UTC)",
            "from smtp2.osuosl.org (smtp2.osuosl.org [IPv6:2605:bc80:3010::133])\n by lists.linuxfoundation.org (Postfix) with ESMTP id 220F5C001D\n for <ovs-dev@openvswitch.org>; Wed,  8 Sep 2021 09:50:10 +0000 (UTC)",
            "from localhost (localhost [127.0.0.1])\n by smtp2.osuosl.org (Postfix) with ESMTP id 03328404F4\n for <ovs-dev@openvswitch.org>; Wed,  8 Sep 2021 09:48:45 +0000 (UTC)",
            "from smtp2.osuosl.org ([127.0.0.1])\n by localhost (smtp2.osuosl.org [127.0.0.1]) (amavisd-new, port 10024)\n with ESMTP id el4LwzzmhhNF for <ovs-dev@openvswitch.org>;\n Wed,  8 Sep 2021 09:48:43 +0000 (UTC)",
            "from wout3-smtp.messagingengine.com (wout3-smtp.messagingengine.com\n [64.147.123.19])\n by smtp2.osuosl.org (Postfix) with ESMTPS id 4A18F40731\n for <ovs-dev@openvswitch.org>; Wed,  8 Sep 2021 09:48:43 +0000 (UTC)",
            "from compute4.internal (compute4.nyi.internal [10.202.2.44])\n by mailout.west.internal (Postfix) with ESMTP id B934732009F7;\n Wed,  8 Sep 2021 05:48:42 -0400 (EDT)",
            "from mailfrontend2 ([10.202.2.163])\n by compute4.internal (MEProxy); Wed, 08 Sep 2021 05:48:42 -0400",
            "by mail.messagingengine.com (Postfix) with ESMTPA; Wed,\n 8 Sep 2021 05:48:41 -0400 (EDT)"
        ],
        "X-Virus-Scanned": [
            "amavisd-new at osuosl.org",
            "amavisd-new at osuosl.org"
        ],
        "X-Greylist": "from auto-whitelisted by SQLgrey-1.8.0",
        "DKIM-Signature": [
            "v=1; a=rsa-sha256; c=relaxed/relaxed; d=u256.net; h=from\n :to:cc:subject:date:message-id:in-reply-to:references\n :mime-version:content-transfer-encoding; s=fm2; bh=EsgrR2UjfL3t9\n yZ9XSzvixadedj5B4G/MQItn6DmU1I=; b=QRcLcd9TXpWJSFP7KEpyUYe4RFXEm\n 3LvO0sSxmAtCy0+h7RKcn+NGwC0krr44yPDoPYE4JhN1i8LGTmGC3Okn52FJPDW7\n aYqeEzF6tyHV+8ks+ein2PeeOE2tzY4Dfp5hLyC+Ha4qJu+sOnleoR8FGOVc0qV4\n 5u9miKc68/gmHwZ/5KFjZ6ONMJ7qzqdA2n2qkhBuBPDf7lGOH0HbaN4dFFfgy0ui\n miImPlP+yYDWtgsDJYnFU3xAchduzUIXvjQZwJ93HD9Wb1STaJArklFmys1yGZ/4\n g5dASMURVk2VmxE6Y4iAY67mSouj5ypQFR75AB+vEXf3WA/JcPBay7rTw==",
            "v=1; a=rsa-sha256; c=relaxed/relaxed; d=\n messagingengine.com; h=cc:content-transfer-encoding:date:from\n :in-reply-to:message-id:mime-version:references:subject:to\n :x-me-proxy:x-me-proxy:x-me-sender:x-me-sender:x-sasl-enc; s=\n fm3; bh=EsgrR2UjfL3t9yZ9XSzvixadedj5B4G/MQItn6DmU1I=; b=hMAuRKog\n lkq5K2WI46gO012kIXjlSRGm6iOfXBCFApKQx/JFRKM5KzCRKHC733agxEOfz3Vw\n J+MSy2Iudpk2pAk5ut3YifhUL2dgZP65v66/+A9V7imHVWpmq6aYpA121dvgZZo3\n pz4jghsQPPuNfWqnwl4M1CeoBaRDTp2m28KTH4J+vC/JIheHTbaRJ5THs+tGeuSc\n 9/gegDroAfGqc0yUt/MNDud+G5X8nUIBqGqaBPyKS13j76CvztwxCSNZekP/okNR\n /A797DtVSMO35wUAdHxbwGfxOikTSihbUOwEC85pQOoDd2wh6m3pBik8C8eu3SaZ\n 0e4pu2lCD5hm4A=="
        ],
        "X-ME-Sender": "<xms:-oY4YcXbJE_e_cjMnUNb_YbALm_4-EfeiaH-vS-2GwZwOWQofNpxdQ>\n <xme:-oY4YQmttoedmCERNyVU2qOrOUsFF3IqXmw9yNVzPnM93DYqpjIBnccGlwp1yHw9X\n ZAQ2_ZSUHgdg-SbwIA>",
        "X-ME-Received": "\n <xmr:-oY4YQYb6jEi4q-gXedfusKJ259inDQEtVkmizu3Qc-SmSrfPplpISehz3QtQHb4ccWCQoM12ycQBY25h2VtjmMRHQ>",
        "X-ME-Proxy-Cause": "\n gggruggvucftvghtrhhoucdtuddrgedvtddrudefjedgudekucetufdoteggodetrfdotf\n fvucfrrhhofhhilhgvmecuhfgrshhtofgrihhlpdfqfgfvpdfurfetoffkrfgpnffqhgen\n uceurghilhhouhhtmecufedttdenucesvcftvggtihhpihgvnhhtshculddquddttddmne\n cujfgurhephffvufffkffojghfggfgsedtkeertdertddtnecuhfhrohhmpefirggvthgr\n nhcutfhivhgvthcuoehgrhhivhgvsehuvdehiedrnhgvtheqnecuggftrfgrthhtvghrnh\n epkeelhfffgfekudfggfduleeukefghefgtdfhvdekuefhffeutdetveevudeivdfgnecu\n ffhomhgrihhnpehmvggrnhdrthhothgrlhdpshhtugguvghvrdhtohhtrghlnecuvehluh\n hsthgvrhfuihiivgeptdenucfrrghrrghmpehmrghilhhfrhhomhepghhrihhvvgesuhdv\n heeirdhnvght",
        "X-ME-Proxy": "<xmx:-oY4YbVRYPcvGn5MdIxNltLgbrWGxLInUtRANsYZjIwVQpqxIuTYXA>\n <xmx:-oY4YWksi2Lt-L6gnOiC9sRB49vXvz-frHAZ1lX8Enu1Ec1-2NIZng>\n <xmx:-oY4YQfb2n6S27LK6DXBHhr3DjQfxe6enfdzU0K399ednaCDyW9FXw>\n <xmx:-oY4Yav-qQsismx30HDVuWFGN5pcsKbd30aWKYiIM5a6ZEsTr7kYNA>",
        "From": "Gaetan Rivet <grive@u256.net>",
        "To": "ovs-dev@openvswitch.org",
        "Date": "Wed,  8 Sep 2021 11:47:50 +0200",
        "Message-Id": "\n <125980e292af65c1581c5011d55b5508fc8b2302.1631094144.git.grive@u256.net>",
        "X-Mailer": "git-send-email 2.31.1",
        "In-Reply-To": "<cover.1631094144.git.grive@u256.net>",
        "References": "<cover.1631094144.git.grive@u256.net>",
        "MIME-Version": "1.0",
        "Cc": "Eli Britstein <elibr@nvidia.com>,\n Maxime Coquelin <maxime.coquelin@redhat.com>",
        "Subject": "[ovs-dev] [PATCH v5 26/27] dpif-netdev: Use one or more offload\n\tthreads",
        "X-BeenThere": "ovs-dev@openvswitch.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "<ovs-dev.openvswitch.org>",
        "List-Unsubscribe": "<https://mail.openvswitch.org/mailman/options/ovs-dev>,\n <mailto:ovs-dev-request@openvswitch.org?subject=unsubscribe>",
        "List-Archive": "<http://mail.openvswitch.org/pipermail/ovs-dev/>",
        "List-Post": "<mailto:ovs-dev@openvswitch.org>",
        "List-Help": "<mailto:ovs-dev-request@openvswitch.org?subject=help>",
        "List-Subscribe": "<https://mail.openvswitch.org/mailman/listinfo/ovs-dev>,\n <mailto:ovs-dev-request@openvswitch.org?subject=subscribe>",
        "Content-Type": "text/plain; charset=\"us-ascii\"",
        "Content-Transfer-Encoding": "7bit",
        "Errors-To": "ovs-dev-bounces@openvswitch.org",
        "Sender": "\"dev\" <ovs-dev-bounces@openvswitch.org>"
    },
    "content": "Read the user configuration in the netdev-offload module to modify the\nnumber of threads used to manage hardware offload requests.\n\nThis allows processing insertion, deletion and modification\nconcurrently.\n\nThe offload thread structure was modified to contain all needed\nelements. This structure is multiplied by the number of requested\nthreads and used separately.\n\nSigned-off-by: Gaetan Rivet <grive@u256.net>\nReviewed-by: Eli Britstein <elibr@nvidia.com>\nReviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>\n---\n lib/dpif-netdev.c         | 290 ++++++++++++++++++++++++--------------\n lib/netdev-offload-dpdk.c |   7 +-\n 2 files changed, 193 insertions(+), 104 deletions(-)",
    "diff": "diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c\nindex 30547c0ec..cdeb11811 100644\n--- a/lib/dpif-netdev.c\n+++ b/lib/dpif-netdev.c\n@@ -374,25 +374,47 @@ struct dp_offload_thread_item {\n };\n \n struct dp_offload_thread {\n-    struct mpsc_queue queue;\n-    atomic_uint64_t enqueued_item;\n-    struct cmap megaflow_to_mark;\n-    struct cmap mark_to_flow;\n-    struct mov_avg_cma cma;\n-    struct mov_avg_ema ema;\n+    PADDED_MEMBERS(CACHE_LINE_SIZE,\n+        struct mpsc_queue queue;\n+        atomic_uint64_t enqueued_item;\n+        struct cmap megaflow_to_mark;\n+        struct cmap mark_to_flow;\n+        struct mov_avg_cma cma;\n+        struct mov_avg_ema ema;\n+    );\n };\n+static struct dp_offload_thread *dp_offload_threads;\n+static void *dp_netdev_flow_offload_main(void *arg);\n \n-static struct dp_offload_thread dp_offload_thread = {\n-    .queue = MPSC_QUEUE_INITIALIZER(&dp_offload_thread.queue),\n-    .megaflow_to_mark = CMAP_INITIALIZER,\n-    .mark_to_flow = CMAP_INITIALIZER,\n-    .enqueued_item = ATOMIC_VAR_INIT(0),\n-    .cma = MOV_AVG_CMA_INITIALIZER,\n-    .ema = MOV_AVG_EMA_INITIALIZER(100),\n-};\n+static void\n+dp_netdev_offload_init(void)\n+{\n+    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;\n+    unsigned int nb_offload_thread = netdev_offload_thread_nb();\n+    unsigned int tid;\n+\n+    if (!ovsthread_once_start(&once)) {\n+        return;\n+    }\n+\n+    dp_offload_threads = xcalloc(nb_offload_thread,\n+                                 sizeof *dp_offload_threads);\n \n-static struct ovsthread_once offload_thread_once\n-    = OVSTHREAD_ONCE_INITIALIZER;\n+    for (tid = 0; tid < nb_offload_thread; tid++) {\n+        struct dp_offload_thread *thread;\n+\n+        thread = &dp_offload_threads[tid];\n+        mpsc_queue_init(&thread->queue);\n+        cmap_init(&thread->megaflow_to_mark);\n+        cmap_init(&thread->mark_to_flow);\n+        atomic_init(&thread->enqueued_item, 0);\n+        mov_avg_cma_init(&thread->cma);\n+        mov_avg_ema_init(&thread->ema, 100);\n+        ovs_thread_create(\"hw_offload\", dp_netdev_flow_offload_main, thread);\n+    }\n+\n+    ovsthread_once_done(&once);\n+}\n \n #define XPS_TIMEOUT 500000LL    /* In microseconds. */\n \n@@ -2409,11 +2431,12 @@ megaflow_to_mark_associate(const ovs_u128 *mega_ufid, uint32_t mark)\n {\n     size_t hash = dp_netdev_flow_hash(mega_ufid);\n     struct megaflow_to_mark_data *data = xzalloc(sizeof(*data));\n+    unsigned int tid = netdev_offload_thread_id();\n \n     data->mega_ufid = *mega_ufid;\n     data->mark = mark;\n \n-    cmap_insert(&dp_offload_thread.megaflow_to_mark,\n+    cmap_insert(&dp_offload_threads[tid].megaflow_to_mark,\n                 CONST_CAST(struct cmap_node *, &data->node), hash);\n }\n \n@@ -2423,11 +2446,12 @@ megaflow_to_mark_disassociate(const ovs_u128 *mega_ufid)\n {\n     size_t hash = dp_netdev_flow_hash(mega_ufid);\n     struct megaflow_to_mark_data *data;\n+    unsigned int tid = netdev_offload_thread_id();\n \n     CMAP_FOR_EACH_WITH_HASH (data, node, hash,\n-                             &dp_offload_thread.megaflow_to_mark) {\n+                             &dp_offload_threads[tid].megaflow_to_mark) {\n         if (ovs_u128_equals(*mega_ufid, data->mega_ufid)) {\n-            cmap_remove(&dp_offload_thread.megaflow_to_mark,\n+            cmap_remove(&dp_offload_threads[tid].megaflow_to_mark,\n                         CONST_CAST(struct cmap_node *, &data->node), hash);\n             ovsrcu_postpone(free, data);\n             return;\n@@ -2443,9 +2467,10 @@ megaflow_to_mark_find(const ovs_u128 *mega_ufid)\n {\n     size_t hash = dp_netdev_flow_hash(mega_ufid);\n     struct megaflow_to_mark_data *data;\n+    unsigned int tid = netdev_offload_thread_id();\n \n     CMAP_FOR_EACH_WITH_HASH (data, node, hash,\n-                             &dp_offload_thread.megaflow_to_mark) {\n+                             &dp_offload_threads[tid].megaflow_to_mark) {\n         if (ovs_u128_equals(*mega_ufid, data->mega_ufid)) {\n             return data->mark;\n         }\n@@ -2460,9 +2485,10 @@ megaflow_to_mark_find(const ovs_u128 *mega_ufid)\n static void\n mark_to_flow_associate(const uint32_t mark, struct dp_netdev_flow *flow)\n {\n+    unsigned int tid = netdev_offload_thread_id();\n     dp_netdev_flow_ref(flow);\n \n-    cmap_insert(&dp_offload_thread.mark_to_flow,\n+    cmap_insert(&dp_offload_threads[tid].mark_to_flow,\n                 CONST_CAST(struct cmap_node *, &flow->mark_node),\n                 hash_int(mark, 0));\n     flow->mark = mark;\n@@ -2474,10 +2500,11 @@ mark_to_flow_associate(const uint32_t mark, struct dp_netdev_flow *flow)\n static bool\n flow_mark_has_no_ref(uint32_t mark)\n {\n+    unsigned int tid = netdev_offload_thread_id();\n     struct dp_netdev_flow *flow;\n \n     CMAP_FOR_EACH_WITH_HASH (flow, mark_node, hash_int(mark, 0),\n-                             &dp_offload_thread.mark_to_flow) {\n+                             &dp_offload_threads[tid].mark_to_flow) {\n         if (flow->mark == mark) {\n             return false;\n         }\n@@ -2493,6 +2520,7 @@ mark_to_flow_disassociate(struct dp_netdev_pmd_thread *pmd,\n     const char *dpif_type_str = dpif_normalize_type(pmd->dp->class->type);\n     struct cmap_node *mark_node = CONST_CAST(struct cmap_node *,\n                                              &flow->mark_node);\n+    unsigned int tid = netdev_offload_thread_id();\n     uint32_t mark = flow->mark;\n     int ret = 0;\n \n@@ -2502,7 +2530,8 @@ mark_to_flow_disassociate(struct dp_netdev_pmd_thread *pmd,\n         return EINVAL;\n     }\n \n-    cmap_remove(&dp_offload_thread.mark_to_flow, mark_node, hash_int(mark, 0));\n+    cmap_remove(&dp_offload_threads[tid].mark_to_flow,\n+                mark_node, hash_int(mark, 0));\n     flow->mark = INVALID_FLOW_MARK;\n \n     /*\n@@ -2539,12 +2568,21 @@ mark_to_flow_find(const struct dp_netdev_pmd_thread *pmd,\n                   const uint32_t mark)\n {\n     struct dp_netdev_flow *flow;\n+    unsigned int tid;\n+    size_t hash;\n \n-    CMAP_FOR_EACH_WITH_HASH (flow, mark_node, hash_int(mark, 0),\n-                             &dp_offload_thread.mark_to_flow) {\n-        if (flow->mark == mark && flow->pmd_id == pmd->core_id &&\n-            flow->dead == false) {\n-            return flow;\n+    if (dp_offload_threads == NULL) {\n+        return NULL;\n+    }\n+\n+    hash = hash_int(mark, 0);\n+    for (tid = 0; tid < netdev_offload_thread_nb(); tid++) {\n+        CMAP_FOR_EACH_WITH_HASH (flow, mark_node, hash,\n+                                 &dp_offload_threads[tid].mark_to_flow) {\n+            if (flow->mark == mark && flow->pmd_id == pmd->core_id &&\n+                flow->dead == false) {\n+                return flow;\n+            }\n         }\n     }\n \n@@ -2609,10 +2647,25 @@ dp_netdev_free_offload(struct dp_offload_thread_item *offload)\n }\n \n static void\n-dp_netdev_append_offload(struct dp_offload_thread_item *offload)\n+dp_netdev_append_offload(struct dp_offload_thread_item *offload,\n+                         unsigned int tid)\n+{\n+    dp_netdev_offload_init();\n+\n+    mpsc_queue_insert(&dp_offload_threads[tid].queue, &offload->node);\n+    atomic_count_inc64(&dp_offload_threads[tid].enqueued_item);\n+}\n+\n+static void\n+dp_netdev_offload_flow_enqueue(struct dp_offload_thread_item *item)\n {\n-    mpsc_queue_insert(&dp_offload_thread.queue, &offload->node);\n-    atomic_count_inc64(&dp_offload_thread.enqueued_item);\n+    struct dp_offload_flow_item *flow_offload = &item->data->flow;\n+    unsigned int tid;\n+\n+    ovs_assert(item->type == DP_OFFLOAD_FLOW);\n+\n+    tid = netdev_offload_ufid_to_thread_id(flow_offload->flow->mega_ufid);\n+    dp_netdev_append_offload(item, tid);\n }\n \n static int\n@@ -2751,8 +2804,8 @@ dp_offload_flush(struct dp_offload_thread_item *item)\n \n     ovs_barrier_block(flush->barrier);\n \n-    /* Allow the other thread to take again the port lock, before\n-     * continuing offload operations in this thread.\n+    /* Allow the initiator thread to take again the port lock,\n+     * before continuing offload operations in this thread.\n      */\n     ovs_barrier_block(flush->barrier);\n }\n@@ -2762,8 +2815,9 @@ dp_offload_flush(struct dp_offload_thread_item *item)\n #define DP_NETDEV_OFFLOAD_QUIESCE_INTERVAL_US (10 * 1000) /* 10 ms */\n \n static void *\n-dp_netdev_flow_offload_main(void *data OVS_UNUSED)\n+dp_netdev_flow_offload_main(void *arg)\n {\n+    struct dp_offload_thread *ofl_thread = arg;\n     struct dp_offload_thread_item *offload;\n     struct mpsc_queue_node *node;\n     struct mpsc_queue *queue;\n@@ -2772,7 +2826,7 @@ dp_netdev_flow_offload_main(void *data OVS_UNUSED)\n     long long int now;\n     uint64_t backoff;\n \n-    queue = &dp_offload_thread.queue;\n+    queue = &ofl_thread->queue;\n     mpsc_queue_acquire(queue);\n \n     while (true) {\n@@ -2787,7 +2841,7 @@ dp_netdev_flow_offload_main(void *data OVS_UNUSED)\n         next_rcu = time_usec() + DP_NETDEV_OFFLOAD_QUIESCE_INTERVAL_US;\n         MPSC_QUEUE_FOR_EACH_POP (node, queue) {\n             offload = CONTAINER_OF(node, struct dp_offload_thread_item, node);\n-            atomic_count_dec64(&dp_offload_thread.enqueued_item);\n+            atomic_count_dec64(&ofl_thread->enqueued_item);\n \n             switch (offload->type) {\n             case DP_OFFLOAD_FLOW:\n@@ -2803,8 +2857,8 @@ dp_netdev_flow_offload_main(void *data OVS_UNUSED)\n             now = time_usec();\n \n             latency_us = now - offload->timestamp;\n-            mov_avg_cma_update(&dp_offload_thread.cma, latency_us);\n-            mov_avg_ema_update(&dp_offload_thread.ema, latency_us);\n+            mov_avg_cma_update(&ofl_thread->cma, latency_us);\n+            mov_avg_ema_update(&ofl_thread->ema, latency_us);\n \n             dp_netdev_free_offload(offload);\n \n@@ -2828,16 +2882,10 @@ queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd,\n {\n     struct dp_offload_thread_item *offload;\n \n-    if (ovsthread_once_start(&offload_thread_once)) {\n-        mpsc_queue_init(&dp_offload_thread.queue);\n-        ovs_thread_create(\"hw_offload\", dp_netdev_flow_offload_main, NULL);\n-        ovsthread_once_done(&offload_thread_once);\n-    }\n-\n     offload = dp_netdev_alloc_flow_offload(pmd, flow,\n                                            DP_NETDEV_FLOW_OFFLOAD_OP_DEL);\n     offload->timestamp = pmd->ctx.now;\n-    dp_netdev_append_offload(offload);\n+    dp_netdev_offload_flow_enqueue(offload);\n }\n \n static void\n@@ -2918,12 +2966,6 @@ queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd,\n         return;\n     }\n \n-    if (ovsthread_once_start(&offload_thread_once)) {\n-        mpsc_queue_init(&dp_offload_thread.queue);\n-        ovs_thread_create(\"hw_offload\", dp_netdev_flow_offload_main, NULL);\n-        ovsthread_once_done(&offload_thread_once);\n-    }\n-\n     if (flow->mark != INVALID_FLOW_MARK) {\n         op = DP_NETDEV_FLOW_OFFLOAD_OP_MOD;\n     } else {\n@@ -2938,7 +2980,7 @@ queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd,\n     flow_offload->orig_in_port = orig_in_port;\n \n     item->timestamp = pmd->ctx.now;\n-    dp_netdev_append_offload(item);\n+    dp_netdev_offload_flow_enqueue(item);\n }\n \n static void\n@@ -2967,25 +3009,24 @@ dp_netdev_offload_flush_enqueue(struct dp_netdev *dp,\n                                 struct netdev *netdev,\n                                 struct ovs_barrier *barrier)\n {\n-    struct dp_offload_thread_item *item;\n-    struct dp_offload_flush_item *flush;\n+    unsigned int tid;\n+    long long int now_us = time_usec();\n \n-    if (ovsthread_once_start(&offload_thread_once)) {\n-        mpsc_queue_init(&dp_offload_thread.queue);\n-        ovs_thread_create(\"hw_offload\", dp_netdev_flow_offload_main, NULL);\n-        ovsthread_once_done(&offload_thread_once);\n-    }\n+    for (tid = 0; tid < netdev_offload_thread_nb(); tid++) {\n+        struct dp_offload_thread_item *item;\n+        struct dp_offload_flush_item *flush;\n \n-    item = xmalloc(sizeof *item + sizeof *flush);\n-    item->type = DP_OFFLOAD_FLUSH;\n-    item->timestamp = time_usec();\n+        item = xmalloc(sizeof *item + sizeof *flush);\n+        item->type = DP_OFFLOAD_FLUSH;\n+        item->timestamp = now_us;\n \n-    flush = &item->data->flush;\n-    flush->dp = dp;\n-    flush->netdev = netdev;\n-    flush->barrier = barrier;\n+        flush = &item->data->flush;\n+        flush->dp = dp;\n+        flush->netdev = netdev;\n+        flush->barrier = barrier;\n \n-    dp_netdev_append_offload(item);\n+        dp_netdev_append_offload(item, tid);\n+    }\n }\n \n /* Blocking call that will wait on the offload thread to\n@@ -3004,13 +3045,17 @@ dp_netdev_offload_flush(struct dp_netdev *dp,\n                         struct dp_netdev_port *port)\n     OVS_REQ_WRLOCK(dp->port_rwlock)\n {\n-    /* The flush mutex only serves to protect the static memory barrier.\n+    /* The flush mutex serves to exclude mutual access to the static\n+     * barrier, and to prevent multiple flush orders to several threads.\n+     *\n      * The memory barrier needs to go beyond the function scope as\n-     * the other thread can resume from blocking after this function\n+     * the other threads can resume from blocking after this function\n      * already finished.\n-     * As the barrier is made static, then it will be shared by\n-     * calls to this function, and it needs to be protected from\n-     * concurrent use.\n+     *\n+     * Additionally, because the flush operation is blocking, it would\n+     * deadlock if multiple offload threads were blocking on several\n+     * different barriers. Only allow a single flush order in the offload\n+     * queue at a time.\n      */\n     static struct ovs_mutex flush_mutex = OVS_MUTEX_INITIALIZER;\n     static struct ovs_barrier barrier OVS_GUARDED_BY(flush_mutex);\n@@ -3023,8 +3068,8 @@ dp_netdev_offload_flush(struct dp_netdev *dp,\n     ovs_rwlock_unlock(&dp->port_rwlock);\n     ovs_mutex_lock(&flush_mutex);\n \n-    /* This thread and the offload thread. */\n-    ovs_barrier_init(&barrier, 2);\n+    /* This thread and the offload threads. */\n+    ovs_barrier_init(&barrier, 1 + netdev_offload_thread_nb());\n \n     netdev = netdev_ref(port->netdev);\n     dp_netdev_offload_flush_enqueue(dp, netdev, &barrier);\n@@ -3032,7 +3077,7 @@ dp_netdev_offload_flush(struct dp_netdev *dp,\n     netdev_close(netdev);\n \n     /* Take back the datapath port lock before allowing the offload\n-     * thread to proceed further. The port deletion must complete first,\n+     * threads to proceed further. The port deletion must complete first,\n      * to ensure no further offloads are inserted after the flush.\n      *\n      * Some offload provider (e.g. DPDK) keeps a netdev reference with\n@@ -4352,60 +4397,99 @@ dpif_netdev_offload_stats_get(struct dpif *dpif,\n         DP_NETDEV_HW_OFFLOADS_STATS_LAT_EMA_MEAN,\n         DP_NETDEV_HW_OFFLOADS_STATS_LAT_EMA_STDDEV,\n     };\n-    const char *names[] = {\n+    struct {\n+        const char *name;\n+        uint64_t total;\n+    } hwol_stats[] = {\n         [DP_NETDEV_HW_OFFLOADS_STATS_ENQUEUED] =\n-            \"                Enqueued offloads\",\n+            { \"                Enqueued offloads\", 0 },\n         [DP_NETDEV_HW_OFFLOADS_STATS_INSERTED] =\n-            \"                Inserted offloads\",\n+            { \"                Inserted offloads\", 0 },\n         [DP_NETDEV_HW_OFFLOADS_STATS_LAT_CMA_MEAN] =\n-            \"  Cumulative Average latency (us)\",\n+            { \"  Cumulative Average latency (us)\", 0 },\n         [DP_NETDEV_HW_OFFLOADS_STATS_LAT_CMA_STDDEV] =\n-            \"   Cumulative Latency stddev (us)\",\n+            { \"   Cumulative Latency stddev (us)\", 0 },\n         [DP_NETDEV_HW_OFFLOADS_STATS_LAT_EMA_MEAN] =\n-            \" Exponential Average latency (us)\",\n+            { \" Exponential Average latency (us)\", 0 },\n         [DP_NETDEV_HW_OFFLOADS_STATS_LAT_EMA_STDDEV] =\n-            \"  Exponential Latency stddev (us)\",\n+            { \"  Exponential Latency stddev (us)\", 0 },\n     };\n     struct dp_netdev *dp = get_dp_netdev(dpif);\n     struct dp_netdev_port *port;\n-    uint64_t nb_offloads;\n+    unsigned int nb_thread;\n+    uint64_t *port_nb_offloads;\n+    uint64_t *nb_offloads;\n+    unsigned int tid;\n     size_t i;\n \n     if (!netdev_is_flow_api_enabled()) {\n         return EINVAL;\n     }\n \n-    stats->size = ARRAY_SIZE(names);\n+    nb_thread = netdev_offload_thread_nb();\n+    /* nb_thread counters for the overall total as well. */\n+    stats->size = ARRAY_SIZE(hwol_stats) * (nb_thread + 1);\n     stats->counters = xcalloc(stats->size, sizeof *stats->counters);\n \n-    nb_offloads = 0;\n+    nb_offloads = xcalloc(nb_thread, sizeof *nb_offloads);\n+    port_nb_offloads = xcalloc(nb_thread, sizeof *port_nb_offloads);\n \n     ovs_rwlock_rdlock(&dp->port_rwlock);\n     HMAP_FOR_EACH (port, node, &dp->ports) {\n-        uint64_t port_nb_offloads = 0;\n-\n+        memset(port_nb_offloads, 0, nb_thread * sizeof *port_nb_offloads);\n         /* Do not abort on read error from a port, just report 0. */\n-        if (!netdev_flow_get_n_flows(port->netdev, &port_nb_offloads)) {\n-            nb_offloads += port_nb_offloads;\n+        if (!netdev_flow_get_n_flows(port->netdev, port_nb_offloads)) {\n+            for (i = 0; i < nb_thread; i++) {\n+                nb_offloads[i] += port_nb_offloads[i];\n+            }\n         }\n     }\n     ovs_rwlock_unlock(&dp->port_rwlock);\n \n-    atomic_read_relaxed(&dp_offload_thread.enqueued_item,\n-        &stats->counters[DP_NETDEV_HW_OFFLOADS_STATS_ENQUEUED].value);\n-    stats->counters[DP_NETDEV_HW_OFFLOADS_STATS_INSERTED].value = nb_offloads;\n-    stats->counters[DP_NETDEV_HW_OFFLOADS_STATS_LAT_CMA_MEAN].value =\n-        mov_avg_cma(&dp_offload_thread.cma);\n-    stats->counters[DP_NETDEV_HW_OFFLOADS_STATS_LAT_CMA_STDDEV].value =\n-        mov_avg_cma_std_dev(&dp_offload_thread.cma);\n-    stats->counters[DP_NETDEV_HW_OFFLOADS_STATS_LAT_EMA_MEAN].value =\n-        mov_avg_ema(&dp_offload_thread.ema);\n-    stats->counters[DP_NETDEV_HW_OFFLOADS_STATS_LAT_EMA_STDDEV].value =\n-        mov_avg_ema_std_dev(&dp_offload_thread.ema);\n-\n-    for (i = 0; i < ARRAY_SIZE(names); i++) {\n+    free(port_nb_offloads);\n+\n+    for (tid = 0; tid < nb_thread; tid++) {\n+        uint64_t counts[ARRAY_SIZE(hwol_stats)];\n+        size_t idx = ((tid + 1) * ARRAY_SIZE(hwol_stats));\n+\n+        memset(counts, 0, sizeof counts);\n+        counts[DP_NETDEV_HW_OFFLOADS_STATS_INSERTED] = nb_offloads[tid];\n+        if (dp_offload_threads != NULL) {\n+            atomic_read_relaxed(&dp_offload_threads[tid].enqueued_item,\n+                                &counts[DP_NETDEV_HW_OFFLOADS_STATS_ENQUEUED]);\n+\n+            counts[DP_NETDEV_HW_OFFLOADS_STATS_LAT_CMA_MEAN] =\n+                mov_avg_cma(&dp_offload_threads[tid].cma);\n+            counts[DP_NETDEV_HW_OFFLOADS_STATS_LAT_CMA_STDDEV] =\n+                mov_avg_cma_std_dev(&dp_offload_threads[tid].cma);\n+\n+            counts[DP_NETDEV_HW_OFFLOADS_STATS_LAT_EMA_MEAN] =\n+                mov_avg_ema(&dp_offload_threads[tid].ema);\n+            counts[DP_NETDEV_HW_OFFLOADS_STATS_LAT_EMA_STDDEV] =\n+                mov_avg_ema_std_dev(&dp_offload_threads[tid].ema);\n+        }\n+\n+        for (i = 0; i < ARRAY_SIZE(hwol_stats); i++) {\n+            snprintf(stats->counters[idx + i].name,\n+                     sizeof(stats->counters[idx + i].name),\n+                     \"  [%3u] %s\", tid, hwol_stats[i].name);\n+            stats->counters[idx + i].value = counts[i];\n+            hwol_stats[i].total += counts[i];\n+        }\n+    }\n+\n+    free(nb_offloads);\n+\n+    /* Do an average of the average for the aggregate. */\n+    hwol_stats[DP_NETDEV_HW_OFFLOADS_STATS_LAT_CMA_MEAN].total /= nb_thread;\n+    hwol_stats[DP_NETDEV_HW_OFFLOADS_STATS_LAT_CMA_STDDEV].total /= nb_thread;\n+    hwol_stats[DP_NETDEV_HW_OFFLOADS_STATS_LAT_EMA_MEAN].total /= nb_thread;\n+    hwol_stats[DP_NETDEV_HW_OFFLOADS_STATS_LAT_EMA_STDDEV].total /= nb_thread;\n+\n+    for (i = 0; i < ARRAY_SIZE(hwol_stats); i++) {\n         snprintf(stats->counters[i].name, sizeof(stats->counters[i].name),\n-                 \"%s\", names[i]);\n+                 \"  Total %s\", hwol_stats[i].name);\n+        stats->counters[i].value = hwol_stats[i].total;\n     }\n \n     return 0;\ndiff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c\nindex ac4739b71..312686f11 100644\n--- a/lib/netdev-offload-dpdk.c\n+++ b/lib/netdev-offload-dpdk.c\n@@ -62,6 +62,7 @@ struct ufid_to_rte_flow_data {\n     struct dpif_flow_stats stats;\n     struct netdev *physdev;\n     struct ovs_mutex lock;\n+    unsigned int creation_tid;\n     bool dead;\n };\n \n@@ -240,6 +241,7 @@ ufid_to_rte_flow_associate(const ovs_u128 *ufid, struct netdev *netdev,\n     data->physdev = netdev != physdev ? netdev_ref(physdev) : physdev;\n     data->rte_flow = rte_flow;\n     data->actions_offloaded = actions_offloaded;\n+    data->creation_tid = netdev_offload_thread_id();\n     ovs_mutex_init(&data->lock);\n \n     cmap_insert(map, CONST_CAST(struct cmap_node *, &data->node), hash);\n@@ -2265,6 +2267,7 @@ netdev_offload_dpdk_flow_flush(struct netdev *netdev)\n {\n     struct cmap *map = offload_data_map(netdev);\n     struct ufid_to_rte_flow_data *data;\n+    unsigned int tid = netdev_offload_thread_id();\n \n     if (!map) {\n         return -1;\n@@ -2274,7 +2277,9 @@ netdev_offload_dpdk_flow_flush(struct netdev *netdev)\n         if (data->netdev != netdev && data->physdev != netdev) {\n             continue;\n         }\n-        netdev_offload_dpdk_flow_destroy(data);\n+        if (data->creation_tid == tid) {\n+            netdev_offload_dpdk_flow_destroy(data);\n+        }\n     }\n \n     return 0;\n",
    "prefixes": [
        "ovs-dev",
        "v5",
        "26/27"
    ]
}