Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/806918/?format=api
{ "id": 806918, "url": "http://patchwork.ozlabs.org/api/patches/806918/?format=api", "web_url": "http://patchwork.ozlabs.org/project/linuxppc-dev/patch/1503972142-289376-4-git-send-email-pasha.tatashin@oracle.com/", "project": { "id": 2, "url": "http://patchwork.ozlabs.org/api/projects/2/?format=api", "name": "Linux PPC development", "link_name": "linuxppc-dev", "list_id": "linuxppc-dev.lists.ozlabs.org", "list_email": "linuxppc-dev@lists.ozlabs.org", "web_url": "https://github.com/linuxppc/wiki/wiki", "scm_url": "https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git", "webscm_url": "https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/", "list_archive_url": "https://lore.kernel.org/linuxppc-dev/", "list_archive_url_format": "https://lore.kernel.org/linuxppc-dev/{}/", "commit_url_format": "https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/commit/?id={}" }, "msgid": "<1503972142-289376-4-git-send-email-pasha.tatashin@oracle.com>", "list_archive_url": "https://lore.kernel.org/linuxppc-dev/1503972142-289376-4-git-send-email-pasha.tatashin@oracle.com/", "date": "2017-08-29T02:02:14", "name": "[v7,03/11] mm: deferred_init_memmap improvements", "commit_ref": null, "pull_url": null, "state": "not-applicable", "archived": false, "hash": "179cd48f72e5c43cdd4affdff0a40799cfd97dd1", "submitter": { "id": 71010, "url": "http://patchwork.ozlabs.org/api/people/71010/?format=api", "name": "Pavel Tatashin", "email": "pasha.tatashin@oracle.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/linuxppc-dev/patch/1503972142-289376-4-git-send-email-pasha.tatashin@oracle.com/mbox/", "series": [ { "id": 286, "url": "http://patchwork.ozlabs.org/api/series/286/?format=api", "web_url": "http://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=286", "date": "2017-08-29T02:02:21", "name": "complete deferred page initialization", "version": 7, "mbox": "http://patchwork.ozlabs.org/series/286/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/806918/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/806918/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>", "X-Original-To": [ "patchwork-incoming@ozlabs.org", "linuxppc-dev@lists.ozlabs.org" ], "Delivered-To": [ "patchwork-incoming@ozlabs.org", "linuxppc-dev@lists.ozlabs.org" ], "Received": [ "from lists.ozlabs.org (lists.ozlabs.org [103.22.144.68])\n\t(using TLSv1.2 with cipher ADH-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3xhC4w5L9gz9s75\n\tfor <patchwork-incoming@ozlabs.org>;\n\tTue, 29 Aug 2017 12:19:28 +1000 (AEST)", "from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\tby lists.ozlabs.org (Postfix) with ESMTP id 3xhC4w4TNtzDqXj\n\tfor <patchwork-incoming@ozlabs.org>;\n\tTue, 29 Aug 2017 12:19:28 +1000 (AEST)", "from aserp1040.oracle.com (aserp1040.oracle.com [141.146.126.69])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256\n\tbits)) (No client certificate requested)\n\tby lists.ozlabs.org (Postfix) with ESMTPS id 3xhBkw6g2qzDqYR\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tTue, 29 Aug 2017 12:03:52 +1000 (AEST)", "from userv0022.oracle.com (userv0022.oracle.com [156.151.31.74])\n\tby aserp1040.oracle.com (Sentrion-MTA-4.3.2/Sentrion-MTA-4.3.2) with\n\tESMTP id v7T22WtG011005\n\t(version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256\n\tverify=OK); Tue, 29 Aug 2017 02:02:32 GMT", "from aserv0121.oracle.com (aserv0121.oracle.com [141.146.126.235])\n\tby userv0022.oracle.com (8.14.4/8.14.4) with ESMTP id\n\tv7T22VDx005125\n\t(version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-GCM-SHA384 bits=256\n\tverify=OK); Tue, 29 Aug 2017 02:02:31 GMT", "from abhmp0011.oracle.com (abhmp0011.oracle.com [141.146.116.17])\n\tby aserv0121.oracle.com (8.14.4/8.13.8) with ESMTP id\n\tv7T22V2i011301; Tue, 29 Aug 2017 02:02:31 GMT", "from ca-ldom-ol-build-1.us.oracle.com (/10.129.68.23)\n\tby default (Oracle Beehive Gateway v4.0)\n\twith ESMTP ; Mon, 28 Aug 2017 19:02:30 -0700" ], "From": "Pavel Tatashin <pasha.tatashin@oracle.com>", "To": "linux-kernel@vger.kernel.org, sparclinux@vger.kernel.org,\n\tlinux-mm@kvack.org, linuxppc-dev@lists.ozlabs.org,\n\tlinux-s390@vger.kernel.org, linux-arm-kernel@lists.infradead.org,\n\tx86@kernel.org, kasan-dev@googlegroups.com, borntraeger@de.ibm.com,\n\theiko.carstens@de.ibm.com, davem@davemloft.net, willy@infradead.org, \n\tmhocko@kernel.org, ard.biesheuvel@linaro.org, will.deacon@arm.com,\n\tcatalin.marinas@arm.com, sam@ravnborg.org, mgorman@techsingularity.net,\n\tSteven.Sistare@oracle.com, daniel.m.jordan@oracle.com,\n\tbob.picco@oracle.com", "Subject": "[PATCH v7 03/11] mm: deferred_init_memmap improvements", "Date": "Mon, 28 Aug 2017 22:02:14 -0400", "Message-Id": "<1503972142-289376-4-git-send-email-pasha.tatashin@oracle.com>", "X-Mailer": "git-send-email 1.7.1", "In-Reply-To": "<1503972142-289376-1-git-send-email-pasha.tatashin@oracle.com>", "References": "<1503972142-289376-1-git-send-email-pasha.tatashin@oracle.com>", "X-Source-IP": "userv0022.oracle.com [156.151.31.74]", "X-BeenThere": "linuxppc-dev@lists.ozlabs.org", "X-Mailman-Version": "2.1.23", "Precedence": "list", "List-Id": "Linux on PowerPC Developers Mail List\n\t<linuxppc-dev.lists.ozlabs.org>", "List-Unsubscribe": "<https://lists.ozlabs.org/options/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=unsubscribe>", "List-Archive": "<http://lists.ozlabs.org/pipermail/linuxppc-dev/>", "List-Post": "<mailto:linuxppc-dev@lists.ozlabs.org>", "List-Help": "<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=help>", "List-Subscribe": "<https://lists.ozlabs.org/listinfo/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=subscribe>", "Errors-To": "linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org", "Sender": "\"Linuxppc-dev\"\n\t<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>" }, "content": "This patch fixes two issues in deferred_init_memmap", "diff": "=====\nIn deferred_init_memmap() where all deferred struct pages are initialized\nwe have a check like this:\n\nif (page->flags) {\n\tVM_BUG_ON(page_zone(page) != zone);\n\tgoto free_range;\n}\n\nThis way we are checking if the current deferred page has already been\ninitialized. It works, because memory for struct pages has been zeroed, and\nthe only way flags are not zero if it went through __init_single_page()\nbefore. But, once we change the current behavior and won't zero the memory\nin memblock allocator, we cannot trust anything inside \"struct page\"es\nuntil they are initialized. This patch fixes this.\n\nThe deferred_init_memmap() is re-written to loop through only free memory\nranges provided by memblock.\n\n=====\nThis patch fixes another existing issue on systems that have holes in\nzones i.e CONFIG_HOLES_IN_ZONE is defined.\n\nIn for_each_mem_pfn_range() we have code like this:\n\nif (!pfn_valid_within(pfn)\n\tgoto free_range;\n\nNote: 'page' is not set to NULL and is not incremented but 'pfn' advances.\nThus means if deferred struct pages are enabled on systems with these kind\nof holes, linux would get memory corruptions. I have fixed this issue by\ndefining a new macro that performs all the necessary operations when we\nfree the current set of pages.\n\nSigned-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>\nReviewed-by: Steven Sistare <steven.sistare@oracle.com>\nReviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com>\nReviewed-by: Bob Picco <bob.picco@oracle.com>\n---\n mm/page_alloc.c | 161 +++++++++++++++++++++++++++-----------------------------\n 1 file changed, 78 insertions(+), 83 deletions(-)\n\ndiff --git a/mm/page_alloc.c b/mm/page_alloc.c\nindex 7a58eb5757e3..c170ac569aec 100644\n--- a/mm/page_alloc.c\n+++ b/mm/page_alloc.c\n@@ -1409,14 +1409,17 @@ void clear_zone_contiguous(struct zone *zone)\n }\n \n #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT\n-static void __init deferred_free_range(struct page *page,\n-\t\t\t\t\tunsigned long pfn, int nr_pages)\n+static void __init deferred_free_range(unsigned long pfn,\n+\t\t\t\t unsigned long nr_pages)\n {\n-\tint i;\n+\tstruct page *page;\n+\tunsigned long i;\n \n-\tif (!page)\n+\tif (!nr_pages)\n \t\treturn;\n \n+\tpage = pfn_to_page(pfn);\n+\n \t/* Free a large naturally-aligned chunk if possible */\n \tif (nr_pages == pageblock_nr_pages &&\n \t (pfn & (pageblock_nr_pages - 1)) == 0) {\n@@ -1442,19 +1445,82 @@ static inline void __init pgdat_init_report_one_done(void)\n \t\tcomplete(&pgdat_init_all_done_comp);\n }\n \n+#define DEFERRED_FREE(nr_free, free_base_pfn, page)\t\t\t\\\n+({\t\t\t\t\t\t\t\t\t\\\n+\tunsigned long nr = (nr_free);\t\t\t\t\t\\\n+\t\t\t\t\t\t\t\t\t\\\n+\tdeferred_free_range((free_base_pfn), (nr));\t\t\t\\\n+\t(free_base_pfn) = 0;\t\t\t\t\t\t\\\n+\t(nr_free) = 0;\t\t\t\t\t\t\t\\\n+\tpage = NULL;\t\t\t\t\t\t\t\\\n+\tnr;\t\t\t\t\t\t\t\t\\\n+})\n+\n+static unsigned long deferred_init_range(int nid, int zid, unsigned long pfn,\n+\t\t\t\t\t unsigned long end_pfn)\n+{\n+\tstruct mminit_pfnnid_cache nid_init_state = { };\n+\tunsigned long nr_pgmask = pageblock_nr_pages - 1;\n+\tunsigned long free_base_pfn = 0;\n+\tunsigned long nr_pages = 0;\n+\tunsigned long nr_free = 0;\n+\tstruct page *page = NULL;\n+\n+\tfor (; pfn < end_pfn; pfn++) {\n+\t\t/*\n+\t\t * First we check if pfn is valid on architectures where it is\n+\t\t * possible to have holes within pageblock_nr_pages. On systems\n+\t\t * where it is not possible, this function is optimized out.\n+\t\t *\n+\t\t * Then, we check if a current large page is valid by only\n+\t\t * checking the validity of the head pfn.\n+\t\t *\n+\t\t * meminit_pfn_in_nid is checked on systems where pfns can\n+\t\t * interleave within a node: a pfn is between start and end\n+\t\t * of a node, but does not belong to this memory node.\n+\t\t *\n+\t\t * Finally, we minimize pfn page lookups and scheduler checks by\n+\t\t * performing it only once every pageblock_nr_pages.\n+\t\t */\n+\t\tif (!pfn_valid_within(pfn)) {\n+\t\t\tnr_pages += DEFERRED_FREE(nr_free, free_base_pfn, page);\n+\t\t} else if (!(pfn & nr_pgmask) && !pfn_valid(pfn)) {\n+\t\t\tnr_pages += DEFERRED_FREE(nr_free, free_base_pfn, page);\n+\t\t} else if (!meminit_pfn_in_nid(pfn, nid, &nid_init_state)) {\n+\t\t\tnr_pages += DEFERRED_FREE(nr_free, free_base_pfn, page);\n+\t\t} else if (page && (pfn & nr_pgmask)) {\n+\t\t\tpage++;\n+\t\t\t__init_single_page(page, pfn, zid, nid);\n+\t\t\tnr_free++;\n+\t\t} else {\n+\t\t\tnr_pages += DEFERRED_FREE(nr_free, free_base_pfn, page);\n+\t\t\tpage = pfn_to_page(pfn);\n+\t\t\t__init_single_page(page, pfn, zid, nid);\n+\t\t\tfree_base_pfn = pfn;\n+\t\t\tnr_free = 1;\n+\t\t\tcond_resched();\n+\t\t}\n+\t}\n+\t/* Free the last block of pages to allocator */\n+\tnr_pages += DEFERRED_FREE(nr_free, free_base_pfn, page);\n+\n+\treturn nr_pages;\n+}\n+\n /* Initialise remaining memory on a node */\n static int __init deferred_init_memmap(void *data)\n {\n \tpg_data_t *pgdat = data;\n \tint nid = pgdat->node_id;\n-\tstruct mminit_pfnnid_cache nid_init_state = { };\n \tunsigned long start = jiffies;\n \tunsigned long nr_pages = 0;\n-\tunsigned long walk_start, walk_end;\n-\tint i, zid;\n+\tunsigned long spfn, epfn;\n+\tphys_addr_t spa, epa;\n+\tint zid;\n \tstruct zone *zone;\n \tunsigned long first_init_pfn = pgdat->first_deferred_pfn;\n \tconst struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);\n+\tu64 i;\n \n \tif (first_init_pfn == ULONG_MAX) {\n \t\tpgdat_init_report_one_done();\n@@ -1476,83 +1542,12 @@ static int __init deferred_init_memmap(void *data)\n \t\tif (first_init_pfn < zone_end_pfn(zone))\n \t\t\tbreak;\n \t}\n+\tfirst_init_pfn = max(zone->zone_start_pfn, first_init_pfn);\n \n-\tfor_each_mem_pfn_range(i, nid, &walk_start, &walk_end, NULL) {\n-\t\tunsigned long pfn, end_pfn;\n-\t\tstruct page *page = NULL;\n-\t\tstruct page *free_base_page = NULL;\n-\t\tunsigned long free_base_pfn = 0;\n-\t\tint nr_to_free = 0;\n-\n-\t\tend_pfn = min(walk_end, zone_end_pfn(zone));\n-\t\tpfn = first_init_pfn;\n-\t\tif (pfn < walk_start)\n-\t\t\tpfn = walk_start;\n-\t\tif (pfn < zone->zone_start_pfn)\n-\t\t\tpfn = zone->zone_start_pfn;\n-\n-\t\tfor (; pfn < end_pfn; pfn++) {\n-\t\t\tif (!pfn_valid_within(pfn))\n-\t\t\t\tgoto free_range;\n-\n-\t\t\t/*\n-\t\t\t * Ensure pfn_valid is checked every\n-\t\t\t * pageblock_nr_pages for memory holes\n-\t\t\t */\n-\t\t\tif ((pfn & (pageblock_nr_pages - 1)) == 0) {\n-\t\t\t\tif (!pfn_valid(pfn)) {\n-\t\t\t\t\tpage = NULL;\n-\t\t\t\t\tgoto free_range;\n-\t\t\t\t}\n-\t\t\t}\n-\n-\t\t\tif (!meminit_pfn_in_nid(pfn, nid, &nid_init_state)) {\n-\t\t\t\tpage = NULL;\n-\t\t\t\tgoto free_range;\n-\t\t\t}\n-\n-\t\t\t/* Minimise pfn page lookups and scheduler checks */\n-\t\t\tif (page && (pfn & (pageblock_nr_pages - 1)) != 0) {\n-\t\t\t\tpage++;\n-\t\t\t} else {\n-\t\t\t\tnr_pages += nr_to_free;\n-\t\t\t\tdeferred_free_range(free_base_page,\n-\t\t\t\t\t\tfree_base_pfn, nr_to_free);\n-\t\t\t\tfree_base_page = NULL;\n-\t\t\t\tfree_base_pfn = nr_to_free = 0;\n-\n-\t\t\t\tpage = pfn_to_page(pfn);\n-\t\t\t\tcond_resched();\n-\t\t\t}\n-\n-\t\t\tif (page->flags) {\n-\t\t\t\tVM_BUG_ON(page_zone(page) != zone);\n-\t\t\t\tgoto free_range;\n-\t\t\t}\n-\n-\t\t\t__init_single_page(page, pfn, zid, nid);\n-\t\t\tif (!free_base_page) {\n-\t\t\t\tfree_base_page = page;\n-\t\t\t\tfree_base_pfn = pfn;\n-\t\t\t\tnr_to_free = 0;\n-\t\t\t}\n-\t\t\tnr_to_free++;\n-\n-\t\t\t/* Where possible, batch up pages for a single free */\n-\t\t\tcontinue;\n-free_range:\n-\t\t\t/* Free the current block of pages to allocator */\n-\t\t\tnr_pages += nr_to_free;\n-\t\t\tdeferred_free_range(free_base_page, free_base_pfn,\n-\t\t\t\t\t\t\t\tnr_to_free);\n-\t\t\tfree_base_page = NULL;\n-\t\t\tfree_base_pfn = nr_to_free = 0;\n-\t\t}\n-\t\t/* Free the last block of pages to allocator */\n-\t\tnr_pages += nr_to_free;\n-\t\tdeferred_free_range(free_base_page, free_base_pfn, nr_to_free);\n-\n-\t\tfirst_init_pfn = max(end_pfn, first_init_pfn);\n+\tfor_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {\n+\t\tspfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));\n+\t\tepfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));\n+\t\tnr_pages += deferred_init_range(nid, zid, spfn, epfn);\n \t}\n \n \t/* Sanity check that the next zone really is unpopulated */\n", "prefixes": [ "v7", "03/11" ] }