Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/808604/?format=api
{ "id": 808604, "url": "http://patchwork.ozlabs.org/api/patches/808604/?format=api", "web_url": "http://patchwork.ozlabs.org/project/netdev/patch/150425796850.22227.6149901788992481211.stgit@firesoul/", "project": { "id": 7, "url": "http://patchwork.ozlabs.org/api/projects/7/?format=api", "name": "Linux network development", "link_name": "netdev", "list_id": "netdev.vger.kernel.org", "list_email": "netdev@vger.kernel.org", "web_url": null, "scm_url": null, "webscm_url": null, "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<150425796850.22227.6149901788992481211.stgit@firesoul>", "list_archive_url": null, "date": "2017-09-01T09:26:08", "name": "[net,1/2] Revert \"net: use lib/percpu_counter API for fragmentation mem accounting\"", "commit_ref": null, "pull_url": null, "state": "accepted", "archived": true, "hash": "d238d3fece4fa2ab4f7257a50c39a2b0f71e1bef", "submitter": { "id": 13625, "url": "http://patchwork.ozlabs.org/api/people/13625/?format=api", "name": "Jesper Dangaard Brouer", "email": "brouer@redhat.com" }, "delegate": { "id": 34, "url": "http://patchwork.ozlabs.org/api/users/34/?format=api", "username": "davem", "first_name": "David", "last_name": "Miller", "email": "davem@davemloft.net" }, "mbox": "http://patchwork.ozlabs.org/project/netdev/patch/150425796850.22227.6149901788992481211.stgit@firesoul/mbox/", "series": [ { "id": 984, "url": "http://patchwork.ozlabs.org/api/series/984/?format=api", "web_url": "http://patchwork.ozlabs.org/project/netdev/list/?series=984", "date": "2017-09-01T09:26:03", "name": "net: revert lib/percpu_counter API for fragmentation mem accounting", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/984/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/808604/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/808604/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<netdev-owner@vger.kernel.org>", "X-Original-To": "patchwork-incoming@ozlabs.org", "Delivered-To": "patchwork-incoming@ozlabs.org", "Authentication-Results": [ "ozlabs.org;\n\tspf=none (mailfrom) smtp.mailfrom=vger.kernel.org\n\t(client-ip=209.132.180.67; helo=vger.kernel.org;\n\tenvelope-from=netdev-owner@vger.kernel.org;\n\treceiver=<UNKNOWN>)", "ext-mx07.extmail.prod.ext.phx2.redhat.com;\n\tdmarc=none (p=none dis=none) header.from=redhat.com", "ext-mx07.extmail.prod.ext.phx2.redhat.com;\n\tspf=fail smtp.mailfrom=brouer@redhat.com" ], "Received": [ "from vger.kernel.org (vger.kernel.org [209.132.180.67])\n\tby ozlabs.org (Postfix) with ESMTP id 3xkDQC6qRDz9sPk\n\tfor <patchwork-incoming@ozlabs.org>;\n\tFri, 1 Sep 2017 19:26:27 +1000 (AEST)", "(majordomo@vger.kernel.org) by vger.kernel.org via listexpand\n\tid S1751686AbdIAJ0N (ORCPT <rfc822;patchwork-incoming@ozlabs.org>);\n\tFri, 1 Sep 2017 05:26:13 -0400", "from mx1.redhat.com ([209.132.183.28]:60344 \"EHLO mx1.redhat.com\"\n\trhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP\n\tid S1751581AbdIAJ0K (ORCPT <rfc822;netdev@vger.kernel.org>);\n\tFri, 1 Sep 2017 05:26:10 -0400", "from smtp.corp.redhat.com\n\t(int-mx06.intmail.prod.int.phx2.redhat.com [10.5.11.16])\n\t(using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))\n\t(No client certificate requested)\n\tby mx1.redhat.com (Postfix) with ESMTPS id 011AFC047B9D;\n\tFri, 1 Sep 2017 09:26:10 +0000 (UTC)", "from firesoul.localdomain (ovpn-200-42.brq.redhat.com\n\t[10.40.200.42])\n\tby smtp.corp.redhat.com (Postfix) with ESMTP id 68DCE6A578;\n\tFri, 1 Sep 2017 09:26:09 +0000 (UTC)", "from [192.168.5.1] (localhost [IPv6:::1])\n\tby firesoul.localdomain (Postfix) with ESMTP id 8DFAB3318E71C;\n\tFri, 1 Sep 2017 11:26:08 +0200 (CEST)" ], "DMARC-Filter": "OpenDMARC Filter v1.3.2 mx1.redhat.com 011AFC047B9D", "Subject": "[PATCH net 1/2] Revert \"net: use lib/percpu_counter API for\n\tfragmentation mem accounting\"", "From": "Jesper Dangaard Brouer <brouer@redhat.com>", "To": "netdev@vger.kernel.org", "Cc": "mkubecek@suse.cz, Florian Westphal <fw@strlen.de>,\n\tliujian56@huawei.com, Jesper Dangaard Brouer <brouer@redhat.com>", "Date": "Fri, 01 Sep 2017 11:26:08 +0200", "Message-ID": "<150425796850.22227.6149901788992481211.stgit@firesoul>", "In-Reply-To": "<150425790711.22227.12264977619066874632.stgit@firesoul>", "References": "<150425790711.22227.12264977619066874632.stgit@firesoul>", "User-Agent": "StGit/0.17.1-dirty", "MIME-Version": "1.0", "Content-Type": "text/plain; charset=\"utf-8\"", "Content-Transfer-Encoding": "7bit", "X-Scanned-By": "MIMEDefang 2.79 on 10.5.11.16", "X-Greylist": "Sender IP whitelisted, not delayed by milter-greylist-4.5.16\n\t(mx1.redhat.com [10.5.110.31]);\n\tFri, 01 Sep 2017 09:26:10 +0000 (UTC)", "Sender": "netdev-owner@vger.kernel.org", "Precedence": "bulk", "List-ID": "<netdev.vger.kernel.org>", "X-Mailing-List": "netdev@vger.kernel.org" }, "content": "This reverts commit 6d7b857d541ecd1d9bd997c97242d4ef94b19de2.\n\nThere is a bug in fragmentation codes use of the percpu_counter API,\nthat can cause issues on systems with many CPUs.\n\nThe frag_mem_limit() just reads the global counter (fbc->count),\nwithout considering other CPUs can have upto batch size (130K) that\nhaven't been subtracted yet. Due to the 3MBytes lower thresh limit,\nthis become dangerous at >=24 CPUs (3*1024*1024/130000=24).\n\nThe correct API usage would be to use __percpu_counter_compare() which\ndoes the right thing, and takes into account the number of (online)\nCPUs and batch size, to account for this and call __percpu_counter_sum()\nwhen needed.\n\nWe choose to revert the use of the lib/percpu_counter API for frag\nmemory accounting for several reasons:\n\n1) On systems with CPUs > 24, the heavier fully locked\n __percpu_counter_sum() is always invoked, which will be more\n expensive than the atomic_t that is reverted to.\n\nGiven systems with more than 24 CPUs are becoming common this doesn't\nseem like a good option. To mitigate this, the batch size could be\ndecreased and thresh be increased.\n\n2) The add_frag_mem_limit+sub_frag_mem_limit pairs happen on the RX\n CPU, before SKBs are pushed into sockets on remote CPUs. Given\n NICs can only hash on L2 part of the IP-header, the NIC-RXq's will\n likely be limited. Thus, a fair chance that atomic add+dec happen\n on the same CPU.\n\nRevert note that commit 1d6119baf061 (\"net: fix percpu memory leaks\")\nremoved init_frag_mem_limit() and instead use inet_frags_init_net().\nAfter this revert, inet_frags_uninit_net() becomes empty.\n\nFixes: 6d7b857d541e (\"net: use lib/percpu_counter API for fragmentation mem accounting\")\nFixes: 1d6119baf061 (\"net: fix percpu memory leaks\")\nSigned-off-by: Jesper Dangaard Brouer <brouer@redhat.com>\n---\n include/net/inet_frag.h | 30 +++++++++---------------------\n net/ipv4/inet_fragment.c | 4 +---\n 2 files changed, 10 insertions(+), 24 deletions(-)", "diff": "diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h\nindex 6fdcd2427776..fa635aa6d0b9 100644\n--- a/include/net/inet_frag.h\n+++ b/include/net/inet_frag.h\n@@ -1,14 +1,9 @@\n #ifndef __NET_FRAG_H__\n #define __NET_FRAG_H__\n \n-#include <linux/percpu_counter.h>\n-\n struct netns_frags {\n-\t/* The percpu_counter \"mem\" need to be cacheline aligned.\n-\t * mem.count must not share cacheline with other writers\n-\t */\n-\tstruct percpu_counter mem ____cacheline_aligned_in_smp;\n-\n+\t/* Keep atomic mem on separate cachelines in structs that include it */\n+\tatomic_t\t\tmem ____cacheline_aligned_in_smp;\n \t/* sysctls */\n \tint\t\t\ttimeout;\n \tint\t\t\thigh_thresh;\n@@ -110,11 +105,11 @@ void inet_frags_fini(struct inet_frags *);\n \n static inline int inet_frags_init_net(struct netns_frags *nf)\n {\n-\treturn percpu_counter_init(&nf->mem, 0, GFP_KERNEL);\n+\tatomic_set(&nf->mem, 0);\n+\treturn 0;\n }\n static inline void inet_frags_uninit_net(struct netns_frags *nf)\n {\n-\tpercpu_counter_destroy(&nf->mem);\n }\n \n void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);\n@@ -140,31 +135,24 @@ static inline bool inet_frag_evicting(struct inet_frag_queue *q)\n \n /* Memory Tracking Functions. */\n \n-/* The default percpu_counter batch size is not big enough to scale to\n- * fragmentation mem acct sizes.\n- * The mem size of a 64K fragment is approx:\n- * (44 fragments * 2944 truesize) + frag_queue struct(200) = 129736 bytes\n- */\n-static unsigned int frag_percpu_counter_batch = 130000;\n-\n static inline int frag_mem_limit(struct netns_frags *nf)\n {\n-\treturn percpu_counter_read(&nf->mem);\n+\treturn atomic_read(&nf->mem);\n }\n \n static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)\n {\n-\tpercpu_counter_add_batch(&nf->mem, -i, frag_percpu_counter_batch);\n+\tatomic_sub(i, &nf->mem);\n }\n \n static inline void add_frag_mem_limit(struct netns_frags *nf, int i)\n {\n-\tpercpu_counter_add_batch(&nf->mem, i, frag_percpu_counter_batch);\n+\tatomic_add(i, &nf->mem);\n }\n \n-static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf)\n+static inline int sum_frag_mem_limit(struct netns_frags *nf)\n {\n-\treturn percpu_counter_sum_positive(&nf->mem);\n+\treturn atomic_read(&nf->mem);\n }\n \n /* RFC 3168 support :\ndiff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c\nindex 96e95e83cc61..af74d0433453 100644\n--- a/net/ipv4/inet_fragment.c\n+++ b/net/ipv4/inet_fragment.c\n@@ -234,10 +234,8 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)\n \tcond_resched();\n \n \tif (read_seqretry(&f->rnd_seqlock, seq) ||\n-\t percpu_counter_sum(&nf->mem))\n+\t sum_frag_mem_limit(nf))\n \t\tgoto evict_again;\n-\n-\tpercpu_counter_destroy(&nf->mem);\n }\n EXPORT_SYMBOL(inet_frags_exit_net);\n \n", "prefixes": [ "net", "1/2" ] }