Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/2216441/?format=api
{ "id": 2216441, "url": "http://patchwork.ozlabs.org/api/patches/2216441/?format=api", "web_url": "http://patchwork.ozlabs.org/project/netfilter-devel/patch/20260326125153.685915-6-pablo@netfilter.org/", "project": { "id": 26, "url": "http://patchwork.ozlabs.org/api/projects/26/?format=api", "name": "Netfilter Development", "link_name": "netfilter-devel", "list_id": "netfilter-devel.vger.kernel.org", "list_email": "netfilter-devel@vger.kernel.org", "web_url": null, "scm_url": null, "webscm_url": null, "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<20260326125153.685915-6-pablo@netfilter.org>", "list_archive_url": null, "date": "2026-03-26T12:51:46", "name": "[net,05/12] netfilter: nft_set_rbtree: revisit array resize logic", "commit_ref": null, "pull_url": null, "state": "handled-elsewhere", "archived": true, "hash": "4afab945c9000244b133cdd0ba8ac526588a0ed9", "submitter": { "id": 1315, "url": "http://patchwork.ozlabs.org/api/people/1315/?format=api", "name": "Pablo Neira Ayuso", "email": "pablo@netfilter.org" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/netfilter-devel/patch/20260326125153.685915-6-pablo@netfilter.org/mbox/", "series": [ { "id": 497584, "url": "http://patchwork.ozlabs.org/api/series/497584/?format=api", "web_url": "http://patchwork.ozlabs.org/project/netfilter-devel/list/?series=497584", "date": "2026-03-26T12:51:41", "name": "[net,01/12] netfilter: nft_set_pipapo_avx2: don't return non-matching entry on expiry", "version": 3, "mbox": "http://patchwork.ozlabs.org/series/497584/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/2216441/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2216441/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "\n <netfilter-devel+bounces-11444-incoming=patchwork.ozlabs.org@vger.kernel.org>", "X-Original-To": [ "incoming@patchwork.ozlabs.org", "netfilter-devel@vger.kernel.org" ], "Delivered-To": "patchwork-incoming@legolas.ozlabs.org", "Authentication-Results": [ "legolas.ozlabs.org;\n\tdkim=pass (2048-bit key;\n unprotected) header.d=netfilter.org header.i=@netfilter.org\n header.a=rsa-sha256 header.s=2025 header.b=vUS0uSbF;\n\tdkim-atps=neutral", "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=vger.kernel.org\n (client-ip=2600:3c0a:e001:db::12fc:5321; helo=sea.lore.kernel.org;\n envelope-from=netfilter-devel+bounces-11444-incoming=patchwork.ozlabs.org@vger.kernel.org;\n receiver=patchwork.ozlabs.org)", "smtp.subspace.kernel.org;\n\tdkim=pass (2048-bit key) header.d=netfilter.org header.i=@netfilter.org\n header.b=\"vUS0uSbF\"", "smtp.subspace.kernel.org;\n arc=none smtp.client-ip=217.70.190.124", "smtp.subspace.kernel.org;\n dmarc=none (p=none dis=none) header.from=netfilter.org", "smtp.subspace.kernel.org;\n spf=pass smtp.mailfrom=netfilter.org" ], "Received": [ "from sea.lore.kernel.org (sea.lore.kernel.org\n [IPv6:2600:3c0a:e001:db::12fc:5321])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519 server-signature ECDSA (secp384r1) server-digest SHA384)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4fhP2x3tsyz1yGD\n\tfor <incoming@patchwork.ozlabs.org>; Thu, 26 Mar 2026 23:58:17 +1100 (AEDT)", "from smtp.subspace.kernel.org (conduit.subspace.kernel.org\n [100.90.174.1])\n\tby sea.lore.kernel.org (Postfix) with ESMTP id A13C430F25B0\n\tfor <incoming@patchwork.ozlabs.org>; Thu, 26 Mar 2026 12:52:11 +0000 (UTC)", "from localhost.localdomain (localhost.localdomain [127.0.0.1])\n\tby smtp.subspace.kernel.org (Postfix) with ESMTP id 55F0726ED33;\n\tThu, 26 Mar 2026 12:52:07 +0000 (UTC)", "from mail.netfilter.org (mail.netfilter.org [217.70.190.124])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby smtp.subspace.kernel.org (Postfix) with ESMTPS id 5829923EA8B;\n\tThu, 26 Mar 2026 12:52:05 +0000 (UTC)", "from localhost.localdomain (mail-agni [217.70.190.124])\n\tby mail.netfilter.org (Postfix) with ESMTPSA id 5193560273;\n\tThu, 26 Mar 2026 13:52:03 +0100 (CET)" ], "ARC-Seal": "i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;\n\tt=1774529526; cv=none;\n b=tBW+h4y7e9npYZD61U48Vn+QokwonbWVQrbIWev5iBjNbYTQ7YGw7BbM6qSbohMZJjqw35bdcFP5IuSxaqF9JEP4pfcVbtnZNSx50MD4vcZKOdgIMj+4JJoXO57kGr6BnH1ZFhH4dSYBCojZ6cD0BheZxny7y0J1SVoqdv/zvPA=", "ARC-Message-Signature": "i=1; a=rsa-sha256; d=subspace.kernel.org;\n\ts=arc-20240116; t=1774529526; c=relaxed/simple;\n\tbh=bSSQeRLAt8ztWJBujreX42RmbME573EsxO32BJwsp4Y=;\n\th=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:\n\t MIME-Version;\n b=i7koPfSsfGwGqHmSWk1GTnA8zZkzYoJX0ju+Gl6jQuSsxUIRxsNd3M+02xT3W2zzyFs9To7r3SekTlwpEoxcpm317RtxsZwsK1wcJWjtzRDYHi1HYhLgVDY6nlc0Iqsa85YatcpU8R0R6K4bPaWL92NnKZV+EX6BObwz3ppxsVE=", "ARC-Authentication-Results": "i=1; smtp.subspace.kernel.org;\n dmarc=none (p=none dis=none) header.from=netfilter.org;\n spf=pass smtp.mailfrom=netfilter.org;\n dkim=pass (2048-bit key) header.d=netfilter.org header.i=@netfilter.org\n header.b=vUS0uSbF; arc=none smtp.client-ip=217.70.190.124", "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=netfilter.org;\n\ts=2025; t=1774529523;\n\tbh=WNxZdluDIh5L9aujGAa+6rLbauRkSSIt5jI038ei7zg=;\n\th=From:To:Cc:Subject:Date:In-Reply-To:References:From;\n\tb=vUS0uSbF28zD9gIlxnfbBYZfC6uiVwLcp9iMcdqy+80c0+ulIhlbuCO+LYYCYizqH\n\t 3PpLlMoFmlJOLPfcCOZhExJ78QLaFG+Q8os7ZbgNbKWv3NNj5jmPLPdfaBBQSp21rE\n\t OKoFtTNhGkQP7Fx4jKjOsIQeUT41IVSfUxx8Kw3WY9T42NT+4XlnnGubOwc7+LElt4\n\t FocTNw/mMkvJ8NoB3DaQsQPcXy8IKC0L487J9IPpcfqi+wNPsvsy5yq9puCptCaN6i\n\t 22Ins5Sg3u5H52qRXu3xEZCew5su/fEtbckm4AITrbH0dP81AFR3I6zap3t8LCZ/nA\n\t JtQYxbPTQnBFA==", "From": "Pablo Neira Ayuso <pablo@netfilter.org>", "To": "netfilter-devel@vger.kernel.org", "Cc": "davem@davemloft.net,\n\tnetdev@vger.kernel.org,\n\tkuba@kernel.org,\n\tpabeni@redhat.com,\n\tedumazet@google.com,\n\tfw@strlen.de,\n\thorms@kernel.org", "Subject": "[PATCH net 05/12] netfilter: nft_set_rbtree: revisit array resize\n logic", "Date": "Thu, 26 Mar 2026 13:51:46 +0100", "Message-ID": "<20260326125153.685915-6-pablo@netfilter.org>", "X-Mailer": "git-send-email 2.47.3", "In-Reply-To": "<20260326125153.685915-1-pablo@netfilter.org>", "References": "<20260326125153.685915-1-pablo@netfilter.org>", "Precedence": "bulk", "X-Mailing-List": "netfilter-devel@vger.kernel.org", "List-Id": "<netfilter-devel.vger.kernel.org>", "List-Subscribe": "<mailto:netfilter-devel+subscribe@vger.kernel.org>", "List-Unsubscribe": "<mailto:netfilter-devel+unsubscribe@vger.kernel.org>", "MIME-Version": "1.0", "Content-Transfer-Encoding": "8bit" }, "content": "Chris Arges reports high memory consumption with thousands of\ncontainers, this patch revisits the array allocation logic.\n\nFor anonymous sets, start by 16 slots (which takes 256 bytes on x86_64).\nExpand it by x2 until threshold of 512 slots is reached, over that\nthreshold, expand it by x1.5.\n\nFor non-anonymous set, start by 1024 slots in the array (which takes 16\nKbytes initially on x86_64). Expand it by x1.5.\n\nUse set->ndeact to subtract deactivated elements when calculating the\nnumber of the slots in the array, otherwise the array size array gets\nincreased artifically. Add special case shrink logic to deal with flush\nset too.\n\nThe shrink logic is skipped by anonymous sets.\n\nUse check_add_overflow() to calculate the new array size.\n\nAdd a WARN_ON_ONCE check to make sure elements fit into the new array\nsize.\n\nReported-by: Chris Arges <carges@cloudflare.com>\nFixes: 7e43e0a1141d (\"netfilter: nft_set_rbtree: translate rbtree to array for binary search\")\nSigned-off-by: Florian Westphal <fw@strlen.de>\nSigned-off-by: Pablo Neira Ayuso <pablo@netfilter.org>\n---\n net/netfilter/nft_set_rbtree.c | 92 +++++++++++++++++++++++++++-------\n 1 file changed, 75 insertions(+), 17 deletions(-)", "diff": "diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c\nindex fe8bd497d74a..737c339decd0 100644\n--- a/net/netfilter/nft_set_rbtree.c\n+++ b/net/netfilter/nft_set_rbtree.c\n@@ -572,14 +572,12 @@ static struct nft_array *nft_array_alloc(u32 max_intervals)\n \treturn array;\n }\n \n-#define NFT_ARRAY_EXTRA_SIZE\t10240\n-\n /* Similar to nft_rbtree_{u,k}size to hide details to userspace, but consider\n * packed representation coming from userspace for anonymous sets too.\n */\n static u32 nft_array_elems(const struct nft_set *set)\n {\n-\tu32 nelems = atomic_read(&set->nelems);\n+\tu32 nelems = atomic_read(&set->nelems) - set->ndeact;\n \n \t/* Adjacent intervals are represented with a single start element in\n \t * anonymous sets, use the current element counter as is.\n@@ -595,27 +593,87 @@ static u32 nft_array_elems(const struct nft_set *set)\n \treturn (nelems / 2) + 2;\n }\n \n-static int nft_array_may_resize(const struct nft_set *set)\n+#define NFT_ARRAY_INITIAL_SIZE\t\t1024\n+#define NFT_ARRAY_INITIAL_ANON_SIZE\t16\n+#define NFT_ARRAY_INITIAL_ANON_THRESH\t(8192U / sizeof(struct nft_array_interval))\n+\n+static int nft_array_may_resize(const struct nft_set *set, bool flush)\n {\n-\tu32 nelems = nft_array_elems(set), new_max_intervals;\n+\tu32 initial_intervals, max_intervals, new_max_intervals, delta;\n+\tu32 shrinked_max_intervals, nelems = nft_array_elems(set);\n \tstruct nft_rbtree *priv = nft_set_priv(set);\n \tstruct nft_array *array;\n \n-\tif (!priv->array_next) {\n-\t\tarray = nft_array_alloc(nelems + NFT_ARRAY_EXTRA_SIZE);\n-\t\tif (!array)\n-\t\t\treturn -ENOMEM;\n+\tif (nft_set_is_anonymous(set))\n+\t\tinitial_intervals = NFT_ARRAY_INITIAL_ANON_SIZE;\n+\telse\n+\t\tinitial_intervals = NFT_ARRAY_INITIAL_SIZE;\n+\n+\tif (priv->array_next) {\n+\t\tmax_intervals = priv->array_next->max_intervals;\n+\t\tnew_max_intervals = priv->array_next->max_intervals;\n+\t} else {\n+\t\tif (priv->array) {\n+\t\t\tmax_intervals = priv->array->max_intervals;\n+\t\t\tnew_max_intervals = priv->array->max_intervals;\n+\t\t} else {\n+\t\t\tmax_intervals = 0;\n+\t\t\tnew_max_intervals = initial_intervals;\n+\t\t}\n+\t}\n \n-\t\tpriv->array_next = array;\n+\tif (nft_set_is_anonymous(set))\n+\t\tgoto maybe_grow;\n+\n+\tif (flush) {\n+\t\t/* Set flush just started, nelems still report elements.*/\n+\t\tnelems = 0;\n+\t\tnew_max_intervals = NFT_ARRAY_INITIAL_SIZE;\n+\t\tgoto realloc_array;\n \t}\n \n-\tif (nelems < priv->array_next->max_intervals)\n-\t\treturn 0;\n+\tif (check_add_overflow(new_max_intervals, new_max_intervals,\n+\t\t\t &shrinked_max_intervals))\n+\t\treturn -EOVERFLOW;\n+\n+\tshrinked_max_intervals = DIV_ROUND_UP(shrinked_max_intervals, 3);\n \n-\tnew_max_intervals = priv->array_next->max_intervals + NFT_ARRAY_EXTRA_SIZE;\n-\tif (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0)\n+\tif (shrinked_max_intervals > NFT_ARRAY_INITIAL_SIZE &&\n+\t nelems < shrinked_max_intervals) {\n+\t\tnew_max_intervals = shrinked_max_intervals;\n+\t\tgoto realloc_array;\n+\t}\n+maybe_grow:\n+\tif (nelems > new_max_intervals) {\n+\t\tif (nft_set_is_anonymous(set) &&\n+\t\t new_max_intervals < NFT_ARRAY_INITIAL_ANON_THRESH) {\n+\t\t\tnew_max_intervals <<= 1;\n+\t\t} else {\n+\t\t\tdelta = new_max_intervals >> 1;\n+\t\t\tif (check_add_overflow(new_max_intervals, delta,\n+\t\t\t\t\t &new_max_intervals))\n+\t\t\t\treturn -EOVERFLOW;\n+\t\t}\n+\t}\n+\n+realloc_array:\n+\tif (WARN_ON_ONCE(nelems > new_max_intervals))\n \t\treturn -ENOMEM;\n \n+\tif (priv->array_next) {\n+\t\tif (max_intervals == new_max_intervals)\n+\t\t\treturn 0;\n+\n+\t\tif (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0)\n+\t\t\treturn -ENOMEM;\n+\t} else {\n+\t\tarray = nft_array_alloc(new_max_intervals);\n+\t\tif (!array)\n+\t\t\treturn -ENOMEM;\n+\n+\t\tpriv->array_next = array;\n+\t}\n+\n \treturn 0;\n }\n \n@@ -630,7 +688,7 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,\n \n \tnft_rbtree_maybe_reset_start_cookie(priv, tstamp);\n \n-\tif (nft_array_may_resize(set) < 0)\n+\tif (nft_array_may_resize(set, false) < 0)\n \t\treturn -ENOMEM;\n \n \tdo {\n@@ -741,7 +799,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,\n \t nft_rbtree_interval_null(set, this))\n \t\tpriv->start_rbe_cookie = 0;\n \n-\tif (nft_array_may_resize(set) < 0)\n+\tif (nft_array_may_resize(set, false) < 0)\n \t\treturn NULL;\n \n \twhile (parent != NULL) {\n@@ -811,7 +869,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,\n \n \tswitch (iter->type) {\n \tcase NFT_ITER_UPDATE_CLONE:\n-\t\tif (nft_array_may_resize(set) < 0) {\n+\t\tif (nft_array_may_resize(set, true) < 0) {\n \t\t\titer->err = -ENOMEM;\n \t\t\tbreak;\n \t\t}\n", "prefixes": [ "net", "05/12" ] }