Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/2228374/?format=api
{ "id": 2228374, "url": "http://patchwork.ozlabs.org/api/patches/2228374/?format=api", "web_url": "http://patchwork.ozlabs.org/project/netfilter-devel/patch/20260426102645.22229-1-ja@ssi.bg/", "project": { "id": 26, "url": "http://patchwork.ozlabs.org/api/projects/26/?format=api", "name": "Netfilter Development", "link_name": "netfilter-devel", "list_id": "netfilter-devel.vger.kernel.org", "list_email": "netfilter-devel@vger.kernel.org", "web_url": null, "scm_url": null, "webscm_url": null, "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<20260426102645.22229-1-ja@ssi.bg>", "list_archive_url": null, "date": "2026-04-26T10:26:45", "name": "[PATCHv5,net] ipvs: fix races around est_mutex and est_cpulist", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "73aff994c95ca9cebc497bda249d2fd8cd5295e7", "submitter": { "id": 2825, "url": "http://patchwork.ozlabs.org/api/people/2825/?format=api", "name": "Julian Anastasov", "email": "ja@ssi.bg" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/netfilter-devel/patch/20260426102645.22229-1-ja@ssi.bg/mbox/", "series": [ { "id": 501522, "url": "http://patchwork.ozlabs.org/api/series/501522/?format=api", "web_url": "http://patchwork.ozlabs.org/project/netfilter-devel/list/?series=501522", "date": "2026-04-26T10:26:45", "name": "[PATCHv5,net] ipvs: fix races around est_mutex and est_cpulist", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/501522/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/2228374/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2228374/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "\n <netfilter-devel+bounces-12197-incoming=patchwork.ozlabs.org@vger.kernel.org>", "X-Original-To": [ "incoming@patchwork.ozlabs.org", "netfilter-devel@vger.kernel.org" ], "Delivered-To": "patchwork-incoming@legolas.ozlabs.org", "Authentication-Results": [ "legolas.ozlabs.org;\n\tdkim=pass (4096-bit key;\n unprotected) header.d=ssi.bg header.i=@ssi.bg header.a=rsa-sha256\n header.s=ssi header.b=KZU7eP1s;\n\tdkim-atps=neutral", "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=vger.kernel.org\n (client-ip=172.105.105.114; helo=tor.lore.kernel.org;\n envelope-from=netfilter-devel+bounces-12197-incoming=patchwork.ozlabs.org@vger.kernel.org;\n receiver=patchwork.ozlabs.org)", "smtp.subspace.kernel.org;\n\tdkim=pass (4096-bit key) header.d=ssi.bg header.i=@ssi.bg header.b=\"KZU7eP1s\"", "smtp.subspace.kernel.org;\n arc=none smtp.client-ip=193.238.174.39", "smtp.subspace.kernel.org;\n dmarc=pass (p=reject dis=none) header.from=ssi.bg", "smtp.subspace.kernel.org;\n spf=pass smtp.mailfrom=ssi.bg" ], "Received": [ "from tor.lore.kernel.org (tor.lore.kernel.org [172.105.105.114])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519 server-signature ECDSA (secp384r1) server-digest SHA384)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4g3NDm6bMpz1xvV\n\tfor <incoming@patchwork.ozlabs.org>; Sun, 26 Apr 2026 20:27:36 +1000 (AEST)", "from smtp.subspace.kernel.org (conduit.subspace.kernel.org\n [100.90.174.1])\n\tby tor.lore.kernel.org (Postfix) with ESMTP id 9DA36300C81A\n\tfor <incoming@patchwork.ozlabs.org>; Sun, 26 Apr 2026 10:27:33 +0000 (UTC)", "from localhost.localdomain (localhost.localdomain [127.0.0.1])\n\tby smtp.subspace.kernel.org (Postfix) with ESMTP id 6789733D6F9;\n\tSun, 26 Apr 2026 10:27:31 +0000 (UTC)", "from mx.ssi.bg (mx.ssi.bg [193.238.174.39])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby smtp.subspace.kernel.org (Postfix) with ESMTPS id 1309235898;\n\tSun, 26 Apr 2026 10:27:24 +0000 (UTC)", "from mx.ssi.bg (localhost [127.0.0.1])\n\tby mx.ssi.bg (Potsfix) with ESMTP id 08E722126E;\n\tSun, 26 Apr 2026 13:27:22 +0300 (EEST)", "from box.ssi.bg (box.ssi.bg [193.238.174.46])\n\tby mx.ssi.bg (Potsfix) with ESMTPS;\n\tSun, 26 Apr 2026 13:27:19 +0300 (EEST)", "from ja.ssi.bg (unknown [213.16.62.126])\n\tby box.ssi.bg (Potsfix) with ESMTPSA id 5C40E608B6;\n\tSun, 26 Apr 2026 13:27:17 +0300 (EEST)", "from ja.home.ssi.bg (localhost.localdomain [127.0.0.1])\n\tby ja.ssi.bg (8.18.1/8.18.1) with ESMTP id 63QARGKQ022326;\n\tSun, 26 Apr 2026 13:27:16 +0300", "(from root@localhost)\n\tby ja.home.ssi.bg (8.18.1/8.18.1/Submit) id 63QARDtj022324;\n\tSun, 26 Apr 2026 13:27:13 +0300" ], "ARC-Seal": "i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;\n\tt=1777199249; cv=none;\n b=iPd13AmhtabuQbSHJxE1QBWXmds3ex5ZXmQ46ZjwVFO3jFyyIBhDc53Kk+taTN5gq5kVZ3VrAMTVMNsHWIA311gTxkKPPQLhyuGiLrWkuVc6VLiaxTDz04fg4OnclNd3UJqKOHOn9brnoqmzMkWMla9Rux5QIrqmKP2WTVv4lz4=", "ARC-Message-Signature": "i=1; a=rsa-sha256; d=subspace.kernel.org;\n\ts=arc-20240116; t=1777199249; c=relaxed/simple;\n\tbh=HlT9A6EPq0yH4dQsWWYd+U6hkrvtjNKxI2fdiUUAZsk=;\n\th=From:To:Cc:Subject:Date:Message-ID:MIME-Version;\n b=mvQpw4vJfYLuaDNfQlzzpF/VU32eXnCRFigbzakQEnicRlxD2lnUOzYR6gMEFaCi7nim94bI6eyv/9Rzg0IIpkjj8z92qWfHUJUSVvUUI9ufAkBGXd5om/M74lgM2wMRiH8MU2MSy1JjIoBEE+TjGo/uS9aQIqhnMIpGeYmhLG0=", "ARC-Authentication-Results": "i=1; smtp.subspace.kernel.org;\n dmarc=pass (p=reject dis=none) header.from=ssi.bg;\n spf=pass smtp.mailfrom=ssi.bg;\n dkim=pass (4096-bit key) header.d=ssi.bg header.i=@ssi.bg header.b=KZU7eP1s;\n arc=none smtp.client-ip=193.238.174.39", "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=ssi.bg; h=cc:cc\n\t:content-transfer-encoding:date:from:from:message-id\n\t:mime-version:reply-to:subject:subject:to:to; s=ssi; bh=ccIm0JDx\n\tMAeRMY14iNDxVlwCSQEpj1DoZs4JGPINtYk=; b=KZU7eP1sRRG6r3HTGB06w/tL\n\tYRA8rP0gIU7f5+X4lEX2Fh3BQz3VGdzrRioVQQsXIcmKEBAsYi+oe+843pI7M+mP\n\tZLoymHBk0Bk9COFu6QxsCwkZAl173jdBOuvEEu9w2210GM2RoiuLf9H1c4wQ+RXJ\n\tEthS0duOldqvhDBInOCsu4aNaZHDdiZEQDPwVDvNGfNzQ1/R42zJDAg1HpxJLp3W\n\tdnpvEkngQXO2Pnb3XoV9zMKhYWJIm9R65ETgihoizQ46NGRJFRp880cy5dIMJ4IL\n\tiAB3Fhp2UOgdXdQ35xMGW/elx3Ytj2dKp0pj5beeggGGPtbnS79IuZz3+nCBJ4Bj\n\tJ6bTXGc9JEi23FnV17rhcWI50aGoXCCPZ9EOxSWPpBfxgEZXHTTqWY0txHsLvpCN\n\tpxAS68olS5Ja6ggevg8zt54CGfDLBUYSbDPlTcSOLJLAiWcf01KD5ioURpp0vjlM\n\to3se5q+pWm75faW8Rt2uqZsePlcEzBQ9G3htijVQawr2WPeV0LImRfYGmUxe/DdR\n\tGjhg5d6I1xAPhVZQODY7EmhoWUgjGiWj7o6btSewL/hVpxEgdAOK5SpFd4dXer28\n\tEPu+CdDjbOEyEBTQJF2+q+KPrETcShGpUwAEkAnSgQzzhEbzWtQtmW3If1mliBex\n\tJ8/+HXCT8/jZ6sSUUqw=", "From": "Julian Anastasov <ja@ssi.bg>", "To": "Simon Horman <horms@verge.net.au>", "Cc": "Pablo Neira Ayuso <pablo@netfilter.org>, Florian Westphal <fw@strlen.de>,\n lvs-devel@vger.kernel.org, netfilter-devel@vger.kernel.org", "Subject": "[PATCHv5 net] ipvs: fix races around est_mutex and est_cpulist", "Date": "Sun, 26 Apr 2026 13:26:45 +0300", "Message-ID": "<20260426102645.22229-1-ja@ssi.bg>", "X-Mailer": "git-send-email 2.53.0", "Precedence": "bulk", "X-Mailing-List": "netfilter-devel@vger.kernel.org", "List-Id": "<netfilter-devel.vger.kernel.org>", "List-Subscribe": "<mailto:netfilter-devel+subscribe@vger.kernel.org>", "List-Unsubscribe": "<mailto:netfilter-devel+unsubscribe@vger.kernel.org>", "MIME-Version": "1.0", "Content-Transfer-Encoding": "8bit" }, "content": "Sashiko reports for races and possible crash around\nthe usage of est_cpulist_valid and sysctl_est_cpulist.\nThe problem is that we do not lock est_mutex in some\nplaces which can lead to wrong write ordering and\nas result problems when calling cpumask_weight()\nand cpumask_empty().\n\nFix them by moving the est_max_threads read/write under\nlocked est_mutex. Do the same for one ip_vs_est_reload_start()\ncall to protect the cpumask_empty() usage of sysctl_est_cpulist.\n\nTo remove the chance of deadlock while stopping the\nestimation kthreads, keep the data structure for kthread 0\neven after last estimator is removed and do not hold mutexes\nwhile stopping this task. Now we will use a new flag 'needed'\nto know when kthread 0 should run. The kthreads above 0\ndo not use mutexes, so stop them under est_mutex because\ntheir kthread data still can be destroyed if they do not\nserve estimators. Now all kthreads will be started by\nthe est_reload_work to properly serialize the stop/start\nfor kthread 0.\n\nReduce the use of service_mutex in ip_vs_est_calc_phase()\nbecause under est_mutex we can safely walk est_kt_arr to\nstop the kthreads above slot 0.\n\nAs ip_vs_stop_estimator() for tot_stats should be called\nunder service_mutex, do it early in the netns exit path\nin ip_vs_flush() to avoid locking the mutex again later.\nIt still should be called in ip_vs_control_net_cleanup_sysctl()\nwhen we are called during netns init error. Use -2 for ktid\nas indicator if estimator was already stopped.\n\nFinally, fix use-after-free for kd->est_row in\nip_vs_est_calc_phase(). est->ktrow should simply switch to\na delay value while estimator is linked to est_temp_list.\n\nLink: https://sashiko.dev/#/patchset/20260331165015.2777765-1-longman%40redhat.com\nLink: https://sashiko.dev/#/patchset/20260420171308.87192-1-ja%40ssi.bg\nLink: https://sashiko.dev/#/patchset/20260422125123.40658-1-ja%40ssi.bg\nLink: https://sashiko.dev/#/patchset/20260424175858.54752-1-ja%40ssi.bg\nLink: https://sashiko.dev/#/patchset/20260425103918.7447-1-ja%40ssi.bg\nFixes: f0be83d54217 (\"ipvs: add est_cpulist and est_nice sysctl vars\")\nSigned-off-by: Julian Anastasov <ja@ssi.bg>\n---\n include/net/ip_vs.h | 11 ++++-\n net/netfilter/ipvs/ip_vs_ctl.c | 51 +++++++++++++++++----\n net/netfilter/ipvs/ip_vs_est.c | 83 ++++++++++++++++++++--------------\n 3 files changed, 100 insertions(+), 45 deletions(-)", "diff": "diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h\nindex 72d325c81313..d28ad8a0541f 100644\n--- a/include/net/ip_vs.h\n+++ b/include/net/ip_vs.h\n@@ -491,6 +491,7 @@ struct ip_vs_est_kt_data {\n \tDECLARE_BITMAP(avail, IPVS_EST_NTICKS);\t/* tick has space for ests */\n \tunsigned long\t\test_timer;\t/* estimation timer (jiffies) */\n \tstruct ip_vs_stats\t*calc_stats;\t/* Used for calculation */\n+\tint\t\t\tneeded;\t\t/* task is needed */\n \tint\t\t\ttick_len[IPVS_EST_NTICKS];\t/* est count */\n \tint\t\t\tid;\t\t/* ktid per netns */\n \tint\t\t\tchain_max;\t/* max ests per tick chain */\n@@ -1884,11 +1885,19 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats);\n void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats);\n void ip_vs_zero_estimator(struct ip_vs_stats *stats);\n void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats);\n-void ip_vs_est_reload_start(struct netns_ipvs *ipvs);\n+void ip_vs_est_reload_start(struct netns_ipvs *ipvs, bool restart);\n int ip_vs_est_kthread_start(struct netns_ipvs *ipvs,\n \t\t\t struct ip_vs_est_kt_data *kd);\n void ip_vs_est_kthread_stop(struct ip_vs_est_kt_data *kd);\n \n+static inline void ip_vs_stop_estimator_tot_stats(struct netns_ipvs *ipvs)\n+{\n+#ifdef CONFIG_SYSCTL\n+\tip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s);\n+\tipvs->tot_stats->s.est.ktid = -2;\n+#endif\n+}\n+\n static inline void ip_vs_est_stopped_recalc(struct netns_ipvs *ipvs)\n {\n #ifdef CONFIG_SYSCTL\ndiff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c\nindex caec516856e9..18b89f096d83 100644\n--- a/net/netfilter/ipvs/ip_vs_ctl.c\n+++ b/net/netfilter/ipvs/ip_vs_ctl.c\n@@ -261,12 +261,28 @@ static void est_reload_work_handler(struct work_struct *work)\n \t\tif (!kd)\n \t\t\tcontinue;\n \t\t/* New config ? Stop kthread tasks */\n-\t\tif (genid != genid_done)\n-\t\t\tip_vs_est_kthread_stop(kd);\n+\t\tif (genid != genid_done) {\n+\t\t\tif (!id) {\n+\t\t\t\t/* Only we can stop kt 0 but not under mutex */\n+\t\t\t\tmutex_unlock(&ipvs->est_mutex);\n+\t\t\t\tip_vs_est_kthread_stop(kd);\n+\t\t\t\tmutex_lock(&ipvs->est_mutex);\n+\t\t\t\tif (!READ_ONCE(ipvs->enable))\n+\t\t\t\t\tgoto unlock;\n+\t\t\t\t/* kd for kt 0 is never destroyed */\n+\t\t\t} else {\n+\t\t\t\tip_vs_est_kthread_stop(kd);\n+\t\t\t}\n+\t\t}\n \t\tif (!kd->task && !ip_vs_est_stopped(ipvs)) {\n+\t\t\tbool start;\n+\n \t\t\t/* Do not start kthreads above 0 in calc phase */\n-\t\t\tif ((!id || !ipvs->est_calc_phase) &&\n-\t\t\t ip_vs_est_kthread_start(ipvs, kd) < 0)\n+\t\t\tif (id)\n+\t\t\t\tstart = !ipvs->est_calc_phase;\n+\t\t\telse\n+\t\t\t\tstart = kd->needed;\n+\t\t\tif (start && ip_vs_est_kthread_start(ipvs, kd) < 0)\n \t\t\t\trepeat = true;\n \t\t}\n \t}\n@@ -1812,11 +1828,16 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,\n \t*svc_p = svc;\n \n \tif (!READ_ONCE(ipvs->enable)) {\n+\t\tmutex_lock(&ipvs->est_mutex);\n+\n \t\t/* Now there is a service - full throttle */\n \t\tWRITE_ONCE(ipvs->enable, 1);\n \n+\t\tipvs->est_max_threads = ip_vs_est_max_threads(ipvs);\n+\n \t\t/* Start estimation for first time */\n-\t\tip_vs_est_reload_start(ipvs);\n+\t\tip_vs_est_reload_start(ipvs, true);\n+\t\tmutex_unlock(&ipvs->est_mutex);\n \t}\n \n \treturn 0;\n@@ -2092,6 +2113,11 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)\n \t\t\tt = p;\n \t\t}\n \t}\n+\t/* Stop the tot_stats estimator early under service_mutex\n+\t * to avoid locking it again later.\n+\t */\n+\tif (cleanup)\n+\t\tip_vs_stop_estimator_tot_stats(ipvs);\n \treturn 0;\n }\n \n@@ -2337,7 +2363,7 @@ static int ipvs_proc_est_cpumask_set(const struct ctl_table *table,\n \t/* est_max_threads may depend on cpulist size */\n \tipvs->est_max_threads = ip_vs_est_max_threads(ipvs);\n \tipvs->est_calc_phase = 1;\n-\tip_vs_est_reload_start(ipvs);\n+\tip_vs_est_reload_start(ipvs, true);\n \n unlock:\n \tmutex_unlock(&ipvs->est_mutex);\n@@ -2417,7 +2443,7 @@ static int ipvs_proc_est_nice(const struct ctl_table *table, int write,\n \t\t\tmutex_lock(&ipvs->est_mutex);\n \t\t\tif (*valp != val) {\n \t\t\t\t*valp = val;\n-\t\t\t\tip_vs_est_reload_start(ipvs);\n+\t\t\t\tip_vs_est_reload_start(ipvs, true);\n \t\t\t}\n \t\t\tmutex_unlock(&ipvs->est_mutex);\n \t\t}\n@@ -2444,7 +2470,7 @@ static int ipvs_proc_run_estimation(const struct ctl_table *table, int write,\n \t\tmutex_lock(&ipvs->est_mutex);\n \t\tif (*valp != val) {\n \t\t\t*valp = val;\n-\t\t\tip_vs_est_reload_start(ipvs);\n+\t\t\tip_vs_est_reload_start(ipvs, true);\n \t\t}\n \t\tmutex_unlock(&ipvs->est_mutex);\n \t}\n@@ -4994,7 +5020,14 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)\n \tcancel_delayed_work_sync(&ipvs->defense_work);\n \tcancel_work_sync(&ipvs->defense_work.work);\n \tunregister_net_sysctl_table(ipvs->sysctl_hdr);\n-\tip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s);\n+\tif (ipvs->tot_stats->s.est.ktid != -2) {\n+\t\t/* Not stopped yet? This happens only on netns init error and\n+\t\t * we even do not need to lock the service_mutex for this case.\n+\t\t */\n+\t\tmutex_lock(&ipvs->service_mutex);\n+\t\tip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s);\n+\t\tmutex_unlock(&ipvs->service_mutex);\n+\t}\n \n \tif (ipvs->est_cpulist_valid)\n \t\tfree_cpumask_var(ipvs->sysctl_est_cpulist);\ndiff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c\nindex 433ba3cab58c..ab09f5182951 100644\n--- a/net/netfilter/ipvs/ip_vs_est.c\n+++ b/net/netfilter/ipvs/ip_vs_est.c\n@@ -68,6 +68,11 @@\n and the limit of estimators per kthread\n - est_add_ktid: ktid where to add new ests, can point to empty slot where\n we should add kt data\n+ - data protected by service_mutex: est_temp_list, est_add_ktid,\n+ est_kt_count(R/W), est_kt_arr(R/W), est_genid_done, kd->needed(R/W)\n+ - data protected by est_mutex: est_genid, est_max_threads, sysctl_est_cpulist,\n+ est_cpulist_valid, sysctl_est_nice, est_stopped, sysctl_run_estimation,\n+ est_kt_count(R), est_kt_arr(R), kd->needed(R), kd->task (id > 0)\n */\n \n static struct lock_class_key __ipvs_est_key;\n@@ -227,14 +232,17 @@ static int ip_vs_estimation_kthread(void *data)\n }\n \n /* Schedule stop/start for kthread tasks */\n-void ip_vs_est_reload_start(struct netns_ipvs *ipvs)\n+void ip_vs_est_reload_start(struct netns_ipvs *ipvs, bool restart)\n {\n+\tlockdep_assert_held(&ipvs->est_mutex);\n+\n \t/* Ignore reloads before first service is added */\n \tif (!READ_ONCE(ipvs->enable))\n \t\treturn;\n \tip_vs_est_stopped_recalc(ipvs);\n-\t/* Bump the kthread configuration genid */\n-\tatomic_inc(&ipvs->est_genid);\n+\t/* Bump the kthread configuration genid if stopping is requested */\n+\tif (restart)\n+\t\tatomic_inc(&ipvs->est_genid);\n \tqueue_delayed_work(system_long_wq, &ipvs->est_reload_work, 0);\n }\n \n@@ -304,12 +312,17 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)\n \tvoid *arr = NULL;\n \tint i;\n \n-\tif ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads &&\n-\t READ_ONCE(ipvs->enable) && ipvs->est_max_threads)\n-\t\treturn -EINVAL;\n-\n \tmutex_lock(&ipvs->est_mutex);\n \n+\t/* Allow kt 0 data to be created before the services are added\n+\t * and limit the kthreads when services are present.\n+\t */\n+\tif ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads &&\n+\t READ_ONCE(ipvs->enable) && ipvs->est_max_threads) {\n+\t\tret = -EINVAL;\n+\t\tgoto out;\n+\t}\n+\n \tfor (i = 0; i < id; i++) {\n \t\tif (!ipvs->est_kt_arr[i])\n \t\t\tbreak;\n@@ -333,6 +346,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)\n \tkd->est_timer = jiffies;\n \tkd->id = id;\n \tip_vs_est_set_params(ipvs, kd);\n+\tkd->needed = 1;\n \n \t/* Pre-allocate stats used in calc phase */\n \tif (!id && !kd->calc_stats) {\n@@ -341,12 +355,8 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)\n \t\t\tgoto out;\n \t}\n \n-\t/* Start kthread tasks only when services are present */\n-\tif (READ_ONCE(ipvs->enable) && !ip_vs_est_stopped(ipvs)) {\n-\t\tret = ip_vs_est_kthread_start(ipvs, kd);\n-\t\tif (ret < 0)\n-\t\t\tgoto out;\n-\t}\n+\t/* Request kthread to be started */\n+\tip_vs_est_reload_start(ipvs, false);\n \n \tif (arr)\n \t\tipvs->est_kt_count++;\n@@ -482,12 +492,11 @@ static int ip_vs_enqueue_estimator(struct netns_ipvs *ipvs,\n /* Start estimation for stats */\n int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)\n {\n+\tstruct ip_vs_est_kt_data *kd = ipvs->est_kt_count > 0 ?\n+\t\t\t\t ipvs->est_kt_arr[0] : NULL;\n \tstruct ip_vs_estimator *est = &stats->est;\n \tint ret;\n \n-\tif (!ipvs->est_max_threads && READ_ONCE(ipvs->enable))\n-\t\tipvs->est_max_threads = ip_vs_est_max_threads(ipvs);\n-\n \test->ktid = -1;\n \test->ktrow = IPVS_EST_NTICKS - 1;\t/* Initial delay */\n \n@@ -496,8 +505,15 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)\n \t * will not allocate much memory, just for kt 0.\n \t */\n \tret = 0;\n-\tif (!ipvs->est_kt_count || !ipvs->est_kt_arr[0])\n+\tif (!kd) {\n \t\tret = ip_vs_est_add_kthread(ipvs);\n+\t} else if (!kd->needed) {\n+\t\tmutex_lock(&ipvs->est_mutex);\n+\t\t/* We have job for the kt 0 task */\n+\t\tkd->needed = 1;\n+\t\tip_vs_est_reload_start(ipvs, true);\n+\t\tmutex_unlock(&ipvs->est_mutex);\n+\t}\n \tif (ret >= 0)\n \t\thlist_add_head(&est->list, &ipvs->est_temp_list);\n \telse\n@@ -578,16 +594,14 @@ void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)\n \t}\n \n end_kt0:\n-\t/* kt 0 is freed after all other kthreads and chains are empty */\n+\t/* kt 0 task is stopped after all other kt slots and chains are empty */\n \tif (ipvs->est_kt_count == 1 && hlist_empty(&ipvs->est_temp_list)) {\n \t\tkd = ipvs->est_kt_arr[0];\n-\t\tif (!kd || !kd->est_count) {\n+\t\tif (kd && !kd->est_count) {\n \t\t\tmutex_lock(&ipvs->est_mutex);\n-\t\t\tif (kd) {\n-\t\t\t\tip_vs_est_kthread_destroy(kd);\n-\t\t\t\tipvs->est_kt_arr[0] = NULL;\n-\t\t\t}\n-\t\t\tipvs->est_kt_count--;\n+\t\t\t/* Keep the kt0 data but request kthread_stop */\n+\t\t\tkd->needed = 0;\n+\t\t\tip_vs_est_reload_start(ipvs, true);\n \t\t\tmutex_unlock(&ipvs->est_mutex);\n \t\t\tipvs->est_add_ktid = 0;\n \t\t}\n@@ -647,9 +661,9 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)\n \tu64 val;\n \n \tINIT_HLIST_HEAD(&chain);\n-\tmutex_lock(&ipvs->service_mutex);\n+\tmutex_lock(&ipvs->est_mutex);\n \tkd = ipvs->est_kt_arr[0];\n-\tmutex_unlock(&ipvs->service_mutex);\n+\tmutex_unlock(&ipvs->est_mutex);\n \ts = kd ? kd->calc_stats : NULL;\n \tif (!s)\n \t\tgoto out;\n@@ -748,16 +762,16 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)\n \tif (!ip_vs_est_calc_limits(ipvs, &chain_max))\n \t\treturn;\n \n-\tmutex_lock(&ipvs->service_mutex);\n-\n \t/* Stop all other tasks, so that we can immediately move the\n \t * estimators to est_temp_list without RCU grace period\n \t */\n \tmutex_lock(&ipvs->est_mutex);\n \tfor (id = 1; id < ipvs->est_kt_count; id++) {\n \t\t/* netns clean up started, abort */\n-\t\tif (!READ_ONCE(ipvs->enable))\n-\t\t\tgoto unlock2;\n+\t\tif (kthread_should_stop() || !READ_ONCE(ipvs->enable)) {\n+\t\t\tmutex_unlock(&ipvs->est_mutex);\n+\t\t\treturn;\n+\t\t}\n \t\tkd = ipvs->est_kt_arr[id];\n \t\tif (!kd)\n \t\t\tcontinue;\n@@ -765,9 +779,11 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)\n \t}\n \tmutex_unlock(&ipvs->est_mutex);\n \n+\tmutex_lock(&ipvs->service_mutex);\n+\n \t/* Move all estimators to est_temp_list but carefully,\n \t * all estimators and kthread data can be released while\n-\t * we reschedule. Even for kthread 0.\n+\t * we reschedule.\n \t */\n \tstep = 0;\n \n@@ -849,9 +865,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)\n \tip_vs_stop_estimator(ipvs, stats);\n \t/* Tasks are stopped, move without RCU grace period */\n \test->ktid = -1;\n-\test->ktrow = row - kd->est_row;\n-\tif (est->ktrow < 0)\n-\t\test->ktrow += IPVS_EST_NTICKS;\n+\test->ktrow = delay;\n \thlist_add_head(&est->list, &ipvs->est_temp_list);\n \t/* kd freed ? */\n \tif (last)\n@@ -889,7 +903,6 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)\n \tif (genid == atomic_read(&ipvs->est_genid))\n \t\tipvs->est_calc_phase = 0;\n \n-unlock2:\n \tmutex_unlock(&ipvs->est_mutex);\n \n unlock:\n", "prefixes": [ "PATCHv5", "net" ] }