Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/815403/?format=api
{ "id": 815403, "url": "http://patchwork.ozlabs.org/api/patches/815403/?format=api", "web_url": "http://patchwork.ozlabs.org/project/linuxppc-dev/patch/1505815439-18720-3-git-send-email-wei.guo.simon@gmail.com/", "project": { "id": 2, "url": "http://patchwork.ozlabs.org/api/projects/2/?format=api", "name": "Linux PPC development", "link_name": "linuxppc-dev", "list_id": "linuxppc-dev.lists.ozlabs.org", "list_email": "linuxppc-dev@lists.ozlabs.org", "web_url": "https://github.com/linuxppc/wiki/wiki", "scm_url": "https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git", "webscm_url": "https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/", "list_archive_url": "https://lore.kernel.org/linuxppc-dev/", "list_archive_url_format": "https://lore.kernel.org/linuxppc-dev/{}/", "commit_url_format": "https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/commit/?id={}" }, "msgid": "<1505815439-18720-3-git-send-email-wei.guo.simon@gmail.com>", "list_archive_url": "https://lore.kernel.org/linuxppc-dev/1505815439-18720-3-git-send-email-wei.guo.simon@gmail.com/", "date": "2017-09-19T10:03:58", "name": "[v1,2/3] powerpc: enhance memcmp() with VMX instruction for long bytes comparision", "commit_ref": null, "pull_url": null, "state": "superseded", "archived": true, "hash": "3f3fa40a9b9140b451644b56fc4130625c398227", "submitter": { "id": 68632, "url": "http://patchwork.ozlabs.org/api/people/68632/?format=api", "name": "Simon Guo", "email": "wei.guo.simon@gmail.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/linuxppc-dev/patch/1505815439-18720-3-git-send-email-wei.guo.simon@gmail.com/mbox/", "series": [ { "id": 3823, "url": "http://patchwork.ozlabs.org/api/series/3823/?format=api", "web_url": "http://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=3823", "date": "2017-09-19T10:03:56", "name": "powerpc: memcmp() optimization", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/3823/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/815403/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/815403/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>", "X-Original-To": [ "patchwork-incoming@ozlabs.org", "linuxppc-dev@lists.ozlabs.org" ], "Delivered-To": [ "patchwork-incoming@ozlabs.org", "linuxppc-dev@lists.ozlabs.org" ], "Received": [ "from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\t(using TLSv1.2 with cipher ADH-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3xxJXR4MsVz9ryr\n\tfor <patchwork-incoming@ozlabs.org>;\n\tTue, 19 Sep 2017 20:10:15 +1000 (AEST)", "from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\tby lists.ozlabs.org (Postfix) with ESMTP id 3xxJXR1wWlzDqXp\n\tfor <patchwork-incoming@ozlabs.org>;\n\tTue, 19 Sep 2017 20:10:15 +1000 (AEST)", "from mail-pf0-x244.google.com (mail-pf0-x244.google.com\n\t[IPv6:2607:f8b0:400e:c00::244])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128\n\tbits)) (No client certificate requested)\n\tby lists.ozlabs.org (Postfix) with ESMTPS id 3xxJPm5XPhzDqY8\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tTue, 19 Sep 2017 20:04:28 +1000 (AEST)", "by mail-pf0-x244.google.com with SMTP id h4so1330964pfk.0\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tTue, 19 Sep 2017 03:04:28 -0700 (PDT)", "from simonLocalRHEL7.x64 ([112.73.6.48])\n\tby smtp.gmail.com with ESMTPSA id\n\tf10sm2911307pgr.67.2017.09.19.03.04.23\n\t(version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128);\n\tTue, 19 Sep 2017 03:04:25 -0700 (PDT)" ], "Authentication-Results": [ "ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"Iq+UxZGU\"; dkim-atps=neutral", "lists.ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"Iq+UxZGU\"; dkim-atps=neutral", "ozlabs.org;\n\tspf=pass (mailfrom) smtp.mailfrom=gmail.com\n\t(client-ip=2607:f8b0:400e:c00::244; helo=mail-pf0-x244.google.com;\n\tenvelope-from=wei.guo.simon@gmail.com; receiver=<UNKNOWN>)", "lists.ozlabs.org; dkim=pass (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"Iq+UxZGU\"; dkim-atps=neutral" ], "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025;\n\th=from:to:cc:subject:date:message-id:in-reply-to:references;\n\tbh=otCs2JA55f6p+dBC1vyk3Wt1uS728oyObvPuTC8UrP4=;\n\tb=Iq+UxZGUbj2EwkOM5qY6SxRaNcehep6LOnv0dgktO0JKxoEGXa8D74UTSb1UEwGJWU\n\tDmu7rpPzl/sg4skjlbceCUg8s594ALQApNIS9kKD82HX4etlkYxJ4SHfLNWQnmWO+TGg\n\tECCyb1GGwDOsoFDLDzdO7PuHUTgqhT36lkuSPRZPDdvVcJGXycUrdq0urd9cMhm7Fipf\n\twq1DIhbTXOmSRVcnOFs5JQp3n8BFqglnJSAGzco1LhKonsmWmYKG037MBcqFkwXlY6NQ\n\th7FedhJ7fjyNF2246YUulWCTJrCp6x+y11ugVJ9V9IgSUCQOUO81K+OD/2+ozsnYztN4\n\tP09Q==", "X-Google-DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=1e100.net; s=20161025;\n\th=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to\n\t:references;\n\tbh=otCs2JA55f6p+dBC1vyk3Wt1uS728oyObvPuTC8UrP4=;\n\tb=umSVA18VkZKJCI3vEfRGDEb6VoEfvTj2nIueavFfaEZ4Zk2sQqzKmd6HUGy6P1/xZ2\n\tOe4kzP5bWrSzh1eC0s1RU94Q0+kCQGCCwe6GK1F3j8sTiTBX9fGgD5S3kgCYKTZd/tjr\n\tDO3EE1WBKKFKIGcGIYafW2ap84/jRtPgn5H3qiqgNLwBiNhm3SrlbdcUniTO3V6dt1i5\n\tQAAOjZPrcy1E9ZmlxAEzIVQZbFgPnL80D2c6/0744DXiI+ONtvjMo7AxP0apj0ek8KsE\n\t/5aPQ9KJ6VZTjG8LLuRCZXClfdgfAevAPcg30OgtM+AISvGgPRcr6e7Ev+egsekZe+ZV\n\thmJw==", "X-Gm-Message-State": "AHPjjUgz8xdt84FJ2R9LLno2lsmHbyahoCA+O5KZOyaA3ulWXsHcYs/D\n\tJfFU48rRXkevR6bODZWvad+FCw==", "X-Google-Smtp-Source": "AOwi7QCN695BigZsCOoClIyT27Ukmv8QNaNfjhhnoVNUGxDKLxiUNVEL4HQxShWjC5HZkrZxz7qXdA==", "X-Received": "by 10.101.80.140 with SMTP id r12mr791632pgp.267.1505815466400; \n\tTue, 19 Sep 2017 03:04:26 -0700 (PDT)", "From": "wei.guo.simon@gmail.com", "To": "linuxppc-dev@lists.ozlabs.org", "Subject": "[PATCH v1 2/3] powerpc: enhance memcmp() with VMX instruction for\n\tlong bytes comparision", "Date": "Tue, 19 Sep 2017 18:03:58 +0800", "Message-Id": "<1505815439-18720-3-git-send-email-wei.guo.simon@gmail.com>", "X-Mailer": "git-send-email 1.8.3.1", "In-Reply-To": "<1505815439-18720-1-git-send-email-wei.guo.simon@gmail.com>", "References": "<1505815439-18720-1-git-send-email-wei.guo.simon@gmail.com>", "X-BeenThere": "linuxppc-dev@lists.ozlabs.org", "X-Mailman-Version": "2.1.24", "Precedence": "list", "List-Id": "Linux on PowerPC Developers Mail List\n\t<linuxppc-dev.lists.ozlabs.org>", "List-Unsubscribe": "<https://lists.ozlabs.org/options/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=unsubscribe>", "List-Archive": "<http://lists.ozlabs.org/pipermail/linuxppc-dev/>", "List-Post": "<mailto:linuxppc-dev@lists.ozlabs.org>", "List-Help": "<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=help>", "List-Subscribe": "<https://lists.ozlabs.org/listinfo/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=subscribe>", "Cc": "\"Naveen N. Rao\" <naveen.n.rao@linux.vnet.ibm.com>,\n\tSimon Guo <wei.guo.simon@gmail.com>", "Errors-To": "linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org", "Sender": "\"Linuxppc-dev\"\n\t<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>" }, "content": "From: Simon Guo <wei.guo.simon@gmail.com>\n\nThis patch add VMX primitives to do memcmp() in case the compare size\nexceeds 4K bytes.\n\nTest result with following test program:\n------\ntools/testing/selftests/powerpc/stringloops# cat memcmp.c\n\nint test_memcmp(const void *s1, const void *s2, size_t n);\n\nstatic int testcase(void)\n{\n\tchar *s1;\n\tchar *s2;\n\tunsigned long i;\n\n\ts1 = memalign(128, SIZE);\n\tif (!s1) {\n\t\tperror(\"memalign\");\n\t\texit(1);\n\t}\n\n\ts2 = memalign(128, SIZE);\n\tif (!s2) {\n\t\tperror(\"memalign\");\n\t\texit(1);\n\t}\n\n\tfor (i = 0; i < SIZE; i++) {\n\t\ts1[i] = i & 0xff;\n\t\ts2[i] = i & 0xff;\n\t}\n\tfor (i = 0; i < ITERATIONS; i++)\n\t\ttest_memcmp(s1, s2, SIZE);\n\n\treturn 0;\n}\n\nint main(void)\n{\n\treturn test_harness(testcase, \"memcmp\");\n}\n\n------\nWithout VMX patch:\n 5.085776331 seconds time elapsed ( +- 0.28% )\nWith VMX patch:\n 4.584002052 seconds time elapsed ( +- 0.02% )\n\n\t\tThere is ~10% improvement.\n\nHowever I am not aware whether there is use case in kernel for memcmp on\nlarge size yet.\n\nSigned-off-by: Simon Guo <wei.guo.simon@gmail.com>\n---\n arch/powerpc/include/asm/asm-prototypes.h | 2 +-\n arch/powerpc/lib/copypage_power7.S | 2 +-\n arch/powerpc/lib/memcmp_64.S | 79 +++++++++++++++++++++++++++++++\n arch/powerpc/lib/memcpy_power7.S | 2 +-\n arch/powerpc/lib/vmx-helper.c | 2 +-\n 5 files changed, 83 insertions(+), 4 deletions(-)", "diff": "diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h\nindex 7330150..e6530d8 100644\n--- a/arch/powerpc/include/asm/asm-prototypes.h\n+++ b/arch/powerpc/include/asm/asm-prototypes.h\n@@ -49,7 +49,7 @@ void __trace_hcall_exit(long opcode, unsigned long retval,\n /* VMX copying */\n int enter_vmx_usercopy(void);\n int exit_vmx_usercopy(void);\n-int enter_vmx_copy(void);\n+int enter_vmx_ops(void);\n void * exit_vmx_copy(void *dest);\n \n /* Traps */\ndiff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S\nindex ca5fc8f..9e7729e 100644\n--- a/arch/powerpc/lib/copypage_power7.S\n+++ b/arch/powerpc/lib/copypage_power7.S\n@@ -60,7 +60,7 @@ _GLOBAL(copypage_power7)\n \tstd\tr4,-STACKFRAMESIZE+STK_REG(R30)(r1)\n \tstd\tr0,16(r1)\n \tstdu\tr1,-STACKFRAMESIZE(r1)\n-\tbl\tenter_vmx_copy\n+\tbl\tenter_vmx_ops\n \tcmpwi\tr3,0\n \tld\tr0,STACKFRAMESIZE+16(r1)\n \tld\tr3,STK_REG(R31)(r1)\ndiff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S\nindex 6dbafdb..b86a1d3 100644\n--- a/arch/powerpc/lib/memcmp_64.S\n+++ b/arch/powerpc/lib/memcmp_64.S\n@@ -153,6 +153,13 @@ _GLOBAL(memcmp)\n \tblr\n \n .Llong:\n+#ifdef CONFIG_ALTIVEC\n+\t/* Try to use vmx loop if length is larger than 4K */\n+\tcmpldi cr6,r5,4096\n+\tbgt\tcr6,.Lvmx_cmp\n+\n+.Llong_novmx_cmp:\n+#endif\n \tli\toff8,8\n \tli\toff16,16\n \tli\toff24,24\n@@ -310,4 +317,76 @@ _GLOBAL(memcmp)\n 8:\n \tblr\n \n+#ifdef CONFIG_ALTIVEC\n+.Lvmx_cmp:\n+\tmflr r0\n+\tstd r3,-STACKFRAMESIZE+STK_REG(R31)(r1)\n+\tstd r4,-STACKFRAMESIZE+STK_REG(R30)(r1)\n+\tstd r5,-STACKFRAMESIZE+STK_REG(R29)(r1)\n+\tstd r0,16(r1)\n+\tstdu r1,-STACKFRAMESIZE(r1)\n+\tbl enter_vmx_ops\n+\tcmpwi cr1,r3,0\n+\tld r0,STACKFRAMESIZE+16(r1)\n+\tld r3,STK_REG(R31)(r1)\n+\tld r4,STK_REG(R30)(r1)\n+\tld r5,STK_REG(R29)(r1)\n+\taddi\tr1,r1,STACKFRAMESIZE\n+\tmtlr r0\n+\tbeq cr1,.Llong_novmx_cmp\n+\n+3:\n+\t/* Enter with src/dst address 8 bytes aligned, and len is\n+\t * no less than 4KB. Need to align with 16 bytes further.\n+\t */\n+\tandi.\trA,r3,8\n+\tbeq\t4f\n+\tLD\trA,0,r3\n+\tLD\trB,0,r4\n+\tcmpld\tcr0,rA,rB\n+\tbne\tcr0,.LcmpAB_lightweight\n+\n+\taddi\tr3,r3,8\n+\taddi\tr4,r4,8\n+\n+4:\n+\t/* compare 32 bytes for each loop */\n+\tsrdi\tr0,r5,5\n+\tmtctr\tr0\n+\tandi.\tr5,r5,31\n+\tli\toff16,16\n+5:\n+\tlvx \tv0,0,r3\n+\tlvx \tv1,0,r4\n+\tvcmpequd. v0,v0,v1\n+\tbf\t24,7f\n+\tlvx \tv0,off16,r3\n+\tlvx \tv1,off16,r4\n+\tvcmpequd. v0,v0,v1\n+\tbf\t24,6f\n+\taddi\tr3,r3,32\n+\taddi\tr4,r4,32\n+\tbdnz\t5b\n+\n+\tcmpdi\tr5,0\n+\tbeq\t.Lzero\n+\tb\t.Lshort\n+\n+6:\n+\taddi\tr3,r3,16\n+\taddi\tr4,r4,16\n+\n+7:\n+\tLD\trA,0,r3\n+\tLD\trB,0,r4\n+\tcmpld\tcr0,rA,rB\n+\tbne\tcr0,.LcmpAB_lightweight\n+\n+\tli\toff8,8\n+\tLD\trA,off8,r3\n+\tLD\trB,off8,r4\n+\tcmpld\tcr0,rA,rB\n+\tbne\tcr0,.LcmpAB_lightweight\n+\tb\t.Lzero\n+#endif\n EXPORT_SYMBOL(memcmp)\ndiff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S\nindex 193909a..682e386 100644\n--- a/arch/powerpc/lib/memcpy_power7.S\n+++ b/arch/powerpc/lib/memcpy_power7.S\n@@ -230,7 +230,7 @@ _GLOBAL(memcpy_power7)\n \tstd\tr5,-STACKFRAMESIZE+STK_REG(R29)(r1)\n \tstd\tr0,16(r1)\n \tstdu\tr1,-STACKFRAMESIZE(r1)\n-\tbl\tenter_vmx_copy\n+\tbl\tenter_vmx_ops\n \tcmpwi\tcr1,r3,0\n \tld\tr0,STACKFRAMESIZE+16(r1)\n \tld\tr3,STK_REG(R31)(r1)\ndiff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c\nindex bf925cd..923a9ab 100644\n--- a/arch/powerpc/lib/vmx-helper.c\n+++ b/arch/powerpc/lib/vmx-helper.c\n@@ -53,7 +53,7 @@ int exit_vmx_usercopy(void)\n \treturn 0;\n }\n \n-int enter_vmx_copy(void)\n+int enter_vmx_ops(void)\n {\n \tif (in_interrupt())\n \t\treturn 0;\n", "prefixes": [ "v1", "2/3" ] }