[{"id":1773307,"web_url":"http://patchwork.ozlabs.org/comment/1773307/","msgid":"<20170921005426.GC3387@simonLocalRHEL7.x64>","date":"2017-09-21T00:54:26","subject":"Re: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX instruction\n\tfor long bytes comparision","submitter":{"id":68632,"url":"http://patchwork.ozlabs.org/api/people/68632/","name":"Simon Guo","email":"wei.guo.simon@gmail.com"},"content":"Hi,\nOn Thu, Sep 21, 2017 at 07:34:39AM +0800, wei.guo.simon@gmail.com wrote:\n> From: Simon Guo <wei.guo.simon@gmail.com>\n> \n> This patch add VMX primitives to do memcmp() in case the compare size\n> exceeds 4K bytes.\n> \n> Test result with following test program(replace the \"^>\" with \"\"):\n> ------\nI missed the exit_vmx_ops() part and need to rework on v3.\n\nThanks,\n- Simon","headers":{"Return-Path":"<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>","X-Original-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Delivered-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Received":["from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\t(using TLSv1.2 with cipher ADH-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3xz49X2772z9s82\n\tfor <patchwork-incoming@ozlabs.org>;\n\tFri, 22 Sep 2017 17:00:00 +1000 (AEST)","from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\tby lists.ozlabs.org (Postfix) with ESMTP id 3xz49X0yzgzDsPB\n\tfor <patchwork-incoming@ozlabs.org>;\n\tFri, 22 Sep 2017 17:00:00 +1000 (AEST)","from mail-io0-x243.google.com (mail-io0-x243.google.com\n\t[IPv6:2607:f8b0:4001:c06::243])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128\n\tbits)) (No client certificate requested)\n\tby lists.ozlabs.org (Postfix) with ESMTPS id 3xz47g3Pf7zDsM7\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tFri, 22 Sep 2017 16:58:23 +1000 (AEST)","by mail-io0-x243.google.com with SMTP id j26so575282iod.0\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tThu, 21 Sep 2017 23:58:23 -0700 (PDT)","from localhost ([112.73.6.48]) by smtp.gmail.com with ESMTPSA id\n\tj10sm1020375ioo.3.2017.09.21.23.58.19\n\t(version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128);\n\tThu, 21 Sep 2017 23:58:20 -0700 (PDT)"],"Authentication-Results":["ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"juKismE8\"; dkim-atps=neutral","lists.ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"juKismE8\"; dkim-atps=neutral","ozlabs.org;\n\tspf=pass (mailfrom) smtp.mailfrom=gmail.com\n\t(client-ip=2607:f8b0:4001:c06::243; helo=mail-io0-x243.google.com;\n\tenvelope-from=wei.guo.simon@gmail.com; receiver=<UNKNOWN>)","lists.ozlabs.org; dkim=pass (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"juKismE8\"; dkim-atps=neutral"],"DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025;\n\th=date:from:to:cc:subject:message-id:references:mime-version\n\t:content-disposition:in-reply-to:user-agent;\n\tbh=EHqbdHG+nZumjUgBhhcEcenGWKBFgXt2prHtSNGtWaY=;\n\tb=juKismE8l+OiLLAbTmlE1DbIjVCFU2FtWA1d4s6CQ5SDxvwqAjJUkE+1zp5+AVZIPu\n\tFa/TLy3OAV/tbttho7NKlzx9rs2W0DQCJxf8H7eEAJ1Le/AK3a83AFd+sRmdsvKjPE13\n\tE3JPMAsUHFVYo3a3OcfxkJP4CHJ3v+dauYRX1iAZ58c7MJFD4MlHR5O+6fVlqCGKbJH+\n\tDBSLEeLi/u2KOE9Y1lO1dA2tEbH7kxJbNv532mg5ZNr6+Wtk3ZY6xp4CDeBry2raLCz3\n\tyZMBiC7LwzWkQ68oT1A8a2W+jTKbBZoyC2s2Zg+fjNUk1IAdfYkDoCbCA/r/UHeCE49J\n\t5XqA==","X-Google-DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=1e100.net; s=20161025;\n\th=x-gm-message-state:date:from:to:cc:subject:message-id:references\n\t:mime-version:content-disposition:in-reply-to:user-agent;\n\tbh=EHqbdHG+nZumjUgBhhcEcenGWKBFgXt2prHtSNGtWaY=;\n\tb=KwsPAasTNV7V7Jfq93kpVYUUm7f3e6rMSYn746FrbmPISWV1ihDu+3wQIVrmoX4Q7H\n\tlXmv/Ngy7mtLSBESy2YA1dCPlWx0KlT4xaDwrbgF63eMy8p+BbqpQsvozEvHQOHVJvDJ\n\ttrzQT//1buyBArE6SguVeyhdgkmeNGBnwsJk0Wnykg/lXxYy0gORKieJUKsVeNMgfDfU\n\txmknppFYnsKrhHpxOyy/NLpcWJtXjdrGJhE/2X/Gmd0a483ALLvxthQ0BpzGvhseV7/M\n\t7Nbp2Es+pE/ThQj0PCSd9XVCVGaET7Rct2gBXJYJOIG0yNS7SnGLRtIza0vMPrWl3IVO\n\tbm5Q==","X-Gm-Message-State":"AHPjjUgwTcZUz84CMUArcPmlIzFKvpaJ5+RWFbZUccVL7EZ42aBxGJwK\n\tWSfV4hlXcn5PWGpnPaWbZqnPmQ==","X-Google-Smtp-Source":"AOwi7QBEb3M35ZjS7y8bkFEM/EZmb9BEEFaYteDG4YuJ5snThy7POvxznq5ZOuy4B38+uSEBGBfsXw==","X-Received":"by 10.107.12.195 with SMTP id 64mr6935769iom.157.1506063500872; \n\tThu, 21 Sep 2017 23:58:20 -0700 (PDT)","Date":"Thu, 21 Sep 2017 08:54:26 +0800","From":"Simon Guo <wei.guo.simon@gmail.com>","To":"linuxppc-dev@lists.ozlabs.org","Subject":"Re: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX instruction\n\tfor long bytes comparision","Message-ID":"<20170921005426.GC3387@simonLocalRHEL7.x64>","References":"<1505950480-14830-1-git-send-email-wei.guo.simon@gmail.com>\n\t<1505950480-14830-3-git-send-email-wei.guo.simon@gmail.com>","MIME-Version":"1.0","Content-Type":"text/plain; charset=us-ascii","Content-Disposition":"inline","In-Reply-To":"<1505950480-14830-3-git-send-email-wei.guo.simon@gmail.com>","User-Agent":"Mutt/1.5.21 (2010-09-15)","X-BeenThere":"linuxppc-dev@lists.ozlabs.org","X-Mailman-Version":"2.1.24","Precedence":"list","List-Id":"Linux on PowerPC Developers Mail List\n\t<linuxppc-dev.lists.ozlabs.org>","List-Unsubscribe":"<https://lists.ozlabs.org/options/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=unsubscribe>","List-Archive":"<http://lists.ozlabs.org/pipermail/linuxppc-dev/>","List-Post":"<mailto:linuxppc-dev@lists.ozlabs.org>","List-Help":"<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=help>","List-Subscribe":"<https://lists.ozlabs.org/listinfo/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=subscribe>","Cc":"\"Naveen N.  Rao\" <naveen.n.rao@linux.vnet.ibm.com>,\n\tDavid Laight <David.Laight@ACULAB.COM>","Errors-To":"linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org","Sender":"\"Linuxppc-dev\"\n\t<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>"}},{"id":1773629,"web_url":"http://patchwork.ozlabs.org/comment/1773629/","msgid":"<1506089208.1155.32.camel@gmail.com>","date":"2017-09-22T14:06:48","subject":"Re: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX\n\tinstruction for long bytes comparision","submitter":{"id":64457,"url":"http://patchwork.ozlabs.org/api/people/64457/","name":"Cyril Bur","email":"cyrilbur@gmail.com"},"content":"On Thu, 2017-09-21 at 07:34 +0800, wei.guo.simon@gmail.com wrote:\n> From: Simon Guo <wei.guo.simon@gmail.com>\n> \n> This patch add VMX primitives to do memcmp() in case the compare size\n> exceeds 4K bytes.\n> \n\nHi Simon,\n\nSorry I didn't see this sooner, I've actually been working on a kernel\nversion of glibc commit dec4a7105e (powerpc: Improve memcmp performance\nfor POWER8) unfortunately I've been distracted and it still isn't done.\nI wonder if we can consolidate our efforts here. One thing I did come\nacross in my testing is that for memcmp() that will fail early (I\nhaven't narrowed down the the optimal number yet) the cost of enabling\nVMX actually turns out to be a performance regression, as such I've\nadded a small check of the first 64 bytes to the start before enabling\nVMX to ensure the penalty is worth taking.\n\nAlso, you should consider doing 4K and greater, KSM (Kernel Samepage\nMerging) uses PAGE_SIZE which can be as small as 4K.\n\nCyril\n\n> Test result with following test program(replace the \"^>\" with \"\"):\n> ------\n> > # cat tools/testing/selftests/powerpc/stringloops/memcmp.c\n> > #include <malloc.h>\n> > #include <stdlib.h>\n> > #include <string.h>\n> > #include <time.h>\n> > #include \"utils.h\"\n> > #define SIZE (1024 * 1024 * 900)\n> > #define ITERATIONS 40\n> \n> int test_memcmp(const void *s1, const void *s2, size_t n);\n> \n> static int testcase(void)\n> {\n>         char *s1;\n>         char *s2;\n>         unsigned long i;\n> \n>         s1 = memalign(128, SIZE);\n>         if (!s1) {\n>                 perror(\"memalign\");\n>                 exit(1);\n>         }\n> \n>         s2 = memalign(128, SIZE);\n>         if (!s2) {\n>                 perror(\"memalign\");\n>                 exit(1);\n>         }\n> \n>         for (i = 0; i < SIZE; i++)  {\n>                 s1[i] = i & 0xff;\n>                 s2[i] = i & 0xff;\n>         }\n>         for (i = 0; i < ITERATIONS; i++) {\n> \t\tint ret = test_memcmp(s1, s2, SIZE);\n> \n> \t\tif (ret) {\n> \t\t\tprintf(\"return %d at[%ld]! should have returned zero\\n\", ret, i);\n> \t\t\tabort();\n> \t\t}\n> \t}\n> \n>         return 0;\n> }\n> \n> int main(void)\n> {\n>         return test_harness(testcase, \"memcmp\");\n> }\n> ------\n> Without VMX patch:\n>        7.435191479 seconds time elapsed                                          ( +- 0.51% )\n> With VMX patch:\n>        6.802038938 seconds time elapsed                                          ( +- 0.56% )\n> \t\tThere is ~+8% improvement.\n> \n> However I am not aware whether there is use case in kernel for memcmp on\n> large size yet.\n> \n> Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>\n> ---\n>  arch/powerpc/include/asm/asm-prototypes.h |  2 +-\n>  arch/powerpc/lib/copypage_power7.S        |  2 +-\n>  arch/powerpc/lib/memcmp_64.S              | 82 +++++++++++++++++++++++++++++++\n>  arch/powerpc/lib/memcpy_power7.S          |  2 +-\n>  arch/powerpc/lib/vmx-helper.c             |  2 +-\n>  5 files changed, 86 insertions(+), 4 deletions(-)\n> \n> diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h\n> index 7330150..e6530d8 100644\n> --- a/arch/powerpc/include/asm/asm-prototypes.h\n> +++ b/arch/powerpc/include/asm/asm-prototypes.h\n> @@ -49,7 +49,7 @@ void __trace_hcall_exit(long opcode, unsigned long retval,\n>  /* VMX copying */\n>  int enter_vmx_usercopy(void);\n>  int exit_vmx_usercopy(void);\n> -int enter_vmx_copy(void);\n> +int enter_vmx_ops(void);\n>  void * exit_vmx_copy(void *dest);\n>  \n>  /* Traps */\n> diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S\n> index ca5fc8f..9e7729e 100644\n> --- a/arch/powerpc/lib/copypage_power7.S\n> +++ b/arch/powerpc/lib/copypage_power7.S\n> @@ -60,7 +60,7 @@ _GLOBAL(copypage_power7)\n>  \tstd\tr4,-STACKFRAMESIZE+STK_REG(R30)(r1)\n>  \tstd\tr0,16(r1)\n>  \tstdu\tr1,-STACKFRAMESIZE(r1)\n> -\tbl\tenter_vmx_copy\n> +\tbl\tenter_vmx_ops\n>  \tcmpwi\tr3,0\n>  \tld\tr0,STACKFRAMESIZE+16(r1)\n>  \tld\tr3,STK_REG(R31)(r1)\n> diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S\n> index 6dccfb8..40218fc 100644\n> --- a/arch/powerpc/lib/memcmp_64.S\n> +++ b/arch/powerpc/lib/memcmp_64.S\n> @@ -162,6 +162,13 @@ _GLOBAL(memcmp)\n>  \tblr\n>  \n>  .Llong:\n> +#ifdef CONFIG_ALTIVEC\n> +\t/* Try to use vmx loop if length is larger than 4K */\n> +\tcmpldi  cr6,r5,4096\n> +\tbgt\tcr6,.Lvmx_cmp\n> +\n> +.Llong_novmx_cmp:\n> +#endif\n>  \tli\toff8,8\n>  \tli\toff16,16\n>  \tli\toff24,24\n> @@ -319,4 +326,79 @@ _GLOBAL(memcmp)\n>  8:\n>  \tblr\n>  \n> +#ifdef CONFIG_ALTIVEC\n> +.Lvmx_cmp:\n> +\tmflr    r0\n> +\tstd     r3,-STACKFRAMESIZE+STK_REG(R31)(r1)\n> +\tstd     r4,-STACKFRAMESIZE+STK_REG(R30)(r1)\n> +\tstd     r5,-STACKFRAMESIZE+STK_REG(R29)(r1)\n> +\tstd     r0,16(r1)\n> +\tstdu    r1,-STACKFRAMESIZE(r1)\n> +\tbl      enter_vmx_ops\n> +\tcmpwi   cr1,r3,0\n> +\tld      r0,STACKFRAMESIZE+16(r1)\n> +\tld      r3,STK_REG(R31)(r1)\n> +\tld      r4,STK_REG(R30)(r1)\n> +\tld      r5,STK_REG(R29)(r1)\n> +\taddi\tr1,r1,STACKFRAMESIZE\n> +\tmtlr    r0\n> +\tbeq     cr1,.Llong_novmx_cmp\n> +\n> +3:\n> +\t/* Enter with src/dst address 8 bytes aligned, and len is\n> +\t * no less than 4KB. Need to align with 16 bytes further.\n> +\t */\n> +\tandi.\trA,r3,8\n> +\tbeq\t4f\n> +\tLD\trA,0,r3\n> +\tLD\trB,0,r4\n> +\tcmpld\tcr0,rA,rB\n> +\tbne\tcr0,.LcmpAB_lightweight\n> +\n> +\taddi\tr3,r3,8\n> +\taddi\tr4,r4,8\n> +\taddi\tr5,r5,-8\n> +\n> +4:\n> +\t/* compare 32 bytes for each loop */\n> +\tsrdi\tr0,r5,5\n> +\tmtctr\tr0\n> +\tandi.\tr5,r5,31\n> +\tli\toff16,16\n> +\n> +.balign 16\n> +5:\n> +\tlvx \tv0,0,r3\n> +\tlvx \tv1,0,r4\n> +\tvcmpequd. v0,v0,v1\n> +\tbf\t24,7f\n> +\tlvx \tv0,off16,r3\n> +\tlvx \tv1,off16,r4\n> +\tvcmpequd. v0,v0,v1\n> +\tbf\t24,6f\n> +\taddi\tr3,r3,32\n> +\taddi\tr4,r4,32\n> +\tbdnz\t5b\n> +\n> +\tcmpdi\tr5,0\n> +\tbeq\t.Lzero\n> +\tb\t.L8bytes_aligned\n> +\n> +6:\n> +\taddi\tr3,r3,16\n> +\taddi\tr4,r4,16\n> +\n> +7:\n> +\tLD\trA,0,r3\n> +\tLD\trB,0,r4\n> +\tcmpld\tcr0,rA,rB\n> +\tbne\tcr0,.LcmpAB_lightweight\n> +\n> +\tli\toff8,8\n> +\tLD\trA,off8,r3\n> +\tLD\trB,off8,r4\n> +\tcmpld\tcr0,rA,rB\n> +\tbne\tcr0,.LcmpAB_lightweight\n> +\tb\t.Lzero\n> +#endif\n>  EXPORT_SYMBOL(memcmp)\n> diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S\n> index 193909a..682e386 100644\n> --- a/arch/powerpc/lib/memcpy_power7.S\n> +++ b/arch/powerpc/lib/memcpy_power7.S\n> @@ -230,7 +230,7 @@ _GLOBAL(memcpy_power7)\n>  \tstd\tr5,-STACKFRAMESIZE+STK_REG(R29)(r1)\n>  \tstd\tr0,16(r1)\n>  \tstdu\tr1,-STACKFRAMESIZE(r1)\n> -\tbl\tenter_vmx_copy\n> +\tbl\tenter_vmx_ops\n>  \tcmpwi\tcr1,r3,0\n>  \tld\tr0,STACKFRAMESIZE+16(r1)\n>  \tld\tr3,STK_REG(R31)(r1)\n> diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c\n> index bf925cd..923a9ab 100644\n> --- a/arch/powerpc/lib/vmx-helper.c\n> +++ b/arch/powerpc/lib/vmx-helper.c\n> @@ -53,7 +53,7 @@ int exit_vmx_usercopy(void)\n>  \treturn 0;\n>  }\n>  \n> -int enter_vmx_copy(void)\n> +int enter_vmx_ops(void)\n>  {\n>  \tif (in_interrupt())\n>  \t\treturn 0;","headers":{"Return-Path":"<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>","X-Original-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Delivered-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Received":["from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\t(using TLSv1.2 with cipher ADH-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3xzFnV5mZlz9t3h\n\tfor <patchwork-incoming@ozlabs.org>;\n\tSat, 23 Sep 2017 00:13:18 +1000 (AEST)","from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\tby lists.ozlabs.org (Postfix) with ESMTP id 3xzFnV4f3pzDsMr\n\tfor <patchwork-incoming@ozlabs.org>;\n\tSat, 23 Sep 2017 00:13:18 +1000 (AEST)","from mail-pg0-x243.google.com (mail-pg0-x243.google.com\n\t[IPv6:2607:f8b0:400e:c05::243])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128\n\tbits)) (No client certificate requested)\n\tby lists.ozlabs.org (Postfix) with ESMTPS id 3xzFf85J1FzDsMK\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tSat, 23 Sep 2017 00:06:56 +1000 (AEST)","by mail-pg0-x243.google.com with SMTP id j16so703513pga.2\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tFri, 22 Sep 2017 07:06:56 -0700 (PDT)","from camb691 ([220.240.15.54])\n\tby smtp.googlemail.com with ESMTPSA id\n\te69sm7188910pfc.79.2017.09.22.07.06.51\n\t(version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256);\n\tFri, 22 Sep 2017 07:06:53 -0700 (PDT)"],"Authentication-Results":["ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"r9Nn+/sm\"; dkim-atps=neutral","lists.ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"r9Nn+/sm\"; dkim-atps=neutral","ozlabs.org;\n\tspf=pass (mailfrom) smtp.mailfrom=gmail.com\n\t(client-ip=2607:f8b0:400e:c05::243; helo=mail-pg0-x243.google.com;\n\tenvelope-from=cyrilbur@gmail.com; receiver=<UNKNOWN>)","lists.ozlabs.org; dkim=pass (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"r9Nn+/sm\"; dkim-atps=neutral"],"DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025;\n\th=message-id:subject:from:to:cc:date:in-reply-to:references\n\t:mime-version:content-transfer-encoding;\n\tbh=Q4iJ9MHzH4hFXCLYGh3SI0ezq8qkpD2iNcptaba8iXU=;\n\tb=r9Nn+/smR6y+1vh1iCVFbYcgCf+MiN3LBNqlnK7gOCj/73XmdT5XYLO3TgERhjgZV8\n\tsY2D7CEV7rV/8zgeuZgWI3Gvk65CbLr/bu8AfFsBIpG7CDfVJNH6QEVpkrVpgmx7bmcV\n\t1JnaEYXpATd+XdcHkbznCv5H776gJZd7ilTecbWcatX4Top88wA4Z45DKVc9ISI9bb8w\n\tGrXZmn5gSOgs2oiYlQBu0/VOx2/ahzaWQdYcZ3VZpd0aiq9M8JKkRhFHEqwy03m4dWEJ\n\tF+S2Y2xOAO+wZ0Nss6N5rSdtXqwSgTaxCu/0m9X25Z3wPDhpHcrPDCGhuNu1TXY4S5vG\n\tdZNA==","X-Google-DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=1e100.net; s=20161025;\n\th=x-gm-message-state:message-id:subject:from:to:cc:date:in-reply-to\n\t:references:mime-version:content-transfer-encoding;\n\tbh=Q4iJ9MHzH4hFXCLYGh3SI0ezq8qkpD2iNcptaba8iXU=;\n\tb=jaNqdDBLBe9PDvAqmxcCCZb6AgOT8AL5V7c3m5wSaQt2zapw/t9tfqHIi76V3sZHzc\n\t88DRqUZaEcKmZGxsEO5KltH50SjdWEkMg9Fc7znZdvX8dpJHIsf6iwndOEU6zJN7SyNU\n\tWj5eZnNRCsdzmWkKBocax+X2yeHNIBrDe7he9pOVV2TWfgWWaKp+fQc5xwZtR/D2/iYs\n\tqRxCRbWI46sd5I8PTxBpxYA6iB475XqgKDKk43i2BJsSRoh+Ik8Yu99FmJvo/pqzV/F2\n\t52OcMp9CKR6X+Tw8QkcGiyWQRnq8qNBXNkYEgzPeiHIru9xxqO090L5EbgAT9Z4bGJDP\n\tAV8A==","X-Gm-Message-State":"AHPjjUgQe4Nt+z8caXb/JD36oylZ8P3pMOPNx/YEp95a7ciD8vFvx9rn\n\tgmMAxKbt9BGBsO+R2yTMzm0=","X-Google-Smtp-Source":"AOwi7QBF1NZyXS0rcZvphx9PGarzla2irKbxoQa2prNeuq02DK6/d9SpQ8JQjhHGqynG18JE4odjvQ==","X-Received":"by 10.99.169.25 with SMTP id u25mr9431359pge.363.1506089213997; \n\tFri, 22 Sep 2017 07:06:53 -0700 (PDT)","Message-ID":"<1506089208.1155.32.camel@gmail.com>","Subject":"Re: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX\n\tinstruction for long bytes comparision","From":"Cyril Bur <cyrilbur@gmail.com>","To":"wei.guo.simon@gmail.com, linuxppc-dev@lists.ozlabs.org","Date":"Sat, 23 Sep 2017 00:06:48 +1000","In-Reply-To":"<1505950480-14830-3-git-send-email-wei.guo.simon@gmail.com>","References":"<1505950480-14830-1-git-send-email-wei.guo.simon@gmail.com>\n\t<1505950480-14830-3-git-send-email-wei.guo.simon@gmail.com>","Content-Type":"text/plain; charset=\"UTF-8\"","X-Mailer":"Evolution 3.24.5 ","Mime-Version":"1.0","Content-Transfer-Encoding":"7bit","X-BeenThere":"linuxppc-dev@lists.ozlabs.org","X-Mailman-Version":"2.1.24","Precedence":"list","List-Id":"Linux on PowerPC Developers Mail List\n\t<linuxppc-dev.lists.ozlabs.org>","List-Unsubscribe":"<https://lists.ozlabs.org/options/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=unsubscribe>","List-Archive":"<http://lists.ozlabs.org/pipermail/linuxppc-dev/>","List-Post":"<mailto:linuxppc-dev@lists.ozlabs.org>","List-Help":"<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=help>","List-Subscribe":"<https://lists.ozlabs.org/listinfo/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=subscribe>","Cc":"\"Naveen N.  Rao\" <naveen.n.rao@linux.vnet.ibm.com>,\n\tDavid Laight <David.Laight@ACULAB.COM>","Errors-To":"linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org","Sender":"\"Linuxppc-dev\"\n\t<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>"}},{"id":1774395,"web_url":"http://patchwork.ozlabs.org/comment/1774395/","msgid":"<20170923211843.GA10899@simonLocalRHEL7.x64>","date":"2017-09-23T21:18:43","subject":"Re: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX instruction\n\tfor long bytes comparision","submitter":{"id":68632,"url":"http://patchwork.ozlabs.org/api/people/68632/","name":"Simon Guo","email":"wei.guo.simon@gmail.com"},"content":"Hi Cyril,\nOn Sat, Sep 23, 2017 at 12:06:48AM +1000, Cyril Bur wrote:\n> On Thu, 2017-09-21 at 07:34 +0800, wei.guo.simon@gmail.com wrote:\n> > From: Simon Guo <wei.guo.simon@gmail.com>\n> > \n> > This patch add VMX primitives to do memcmp() in case the compare size\n> > exceeds 4K bytes.\n> > \n> \n> Hi Simon,\n> \n> Sorry I didn't see this sooner, I've actually been working on a kernel\n> version of glibc commit dec4a7105e (powerpc: Improve memcmp performance\n> for POWER8) unfortunately I've been distracted and it still isn't done.\nThanks for sync with me. Let's consolidate our effort together :)\n\nI have a quick check on glibc commit dec4a7105e. \nLooks the aligned case comparison with VSX is launched without rN size\nlimitation, which means it will have a VSX reg load penalty even when the \nlength is 9 bytes.\n\nIt did some optimization when src/dest addrs don't have the same offset \non 8 bytes alignment boundary. I need to read more closely.\n\n> I wonder if we can consolidate our efforts here. One thing I did come\n> across in my testing is that for memcmp() that will fail early (I\n> haven't narrowed down the the optimal number yet) the cost of enabling\n> VMX actually turns out to be a performance regression, as such I've\n> added a small check of the first 64 bytes to the start before enabling\n> VMX to ensure the penalty is worth taking.\nWill there still be a penalty if the 65th byte differs?  \n\n> \n> Also, you should consider doing 4K and greater, KSM (Kernel Samepage\n> Merging) uses PAGE_SIZE which can be as small as 4K.\nCurrently the VMX will only be applied when size exceeds 4K. Are you\nsuggesting a bigger threshold than 4K?\n\nWe can sync more offline for v3.\n\nThanks,\n- Simon","headers":{"Return-Path":"<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>","X-Original-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Delivered-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Received":["from lists.ozlabs.org (lists.ozlabs.org [103.22.144.68])\n\t(using TLSv1.2 with cipher ADH-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3y0pzz3Wlgz9sNr\n\tfor <patchwork-incoming@ozlabs.org>;\n\tMon, 25 Sep 2017 13:12:47 +1000 (AEST)","from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\tby lists.ozlabs.org (Postfix) with ESMTP id 3y0pzz2G7YzDsPS\n\tfor <patchwork-incoming@ozlabs.org>;\n\tMon, 25 Sep 2017 13:12:47 +1000 (AEST)","from mail-pf0-x242.google.com (mail-pf0-x242.google.com\n\t[IPv6:2607:f8b0:400e:c00::242])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128\n\tbits)) (No client certificate requested)\n\tby lists.ozlabs.org (Postfix) with ESMTPS id 3y0pyL19XNzDr4N\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tMon, 25 Sep 2017 13:11:21 +1000 (AEST)","by mail-pf0-x242.google.com with SMTP id i23so3062613pfi.2\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tSun, 24 Sep 2017 20:11:21 -0700 (PDT)","from localhost ([218.82.63.235]) by smtp.gmail.com with ESMTPSA id\n\tg16sm9100604pgn.62.2017.09.24.20.11.17\n\t(version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128);\n\tSun, 24 Sep 2017 20:11:18 -0700 (PDT)"],"Authentication-Results":["ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"rTf7GQEF\"; dkim-atps=neutral","lists.ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"rTf7GQEF\"; dkim-atps=neutral","ozlabs.org;\n\tspf=pass (mailfrom) smtp.mailfrom=gmail.com\n\t(client-ip=2607:f8b0:400e:c00::242; helo=mail-pf0-x242.google.com;\n\tenvelope-from=wei.guo.simon@gmail.com; receiver=<UNKNOWN>)","lists.ozlabs.org; dkim=pass (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"rTf7GQEF\"; dkim-atps=neutral"],"DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025;\n\th=date:from:to:cc:subject:message-id:references:mime-version\n\t:content-disposition:in-reply-to:user-agent;\n\tbh=ZAk2VFQ/mcA4HVxshSO0Aetwajg7Pe/rMJEhIketZWg=;\n\tb=rTf7GQEFASUYyKfaWORLJyAcO0hkRBrStjzJQIZT94Q+gGnvHfBaeoiCsieh+FeP5N\n\t7A2fCz6Pca9/dW7oRsmo6ByZCJZswyQ1sogDeUxqktEHn8mgqyKwKZfgn/IDaPUKsiEY\n\tnFlek9ensjh8sn4+W+JVeXIdr3EeO8xI99n/wri+NVI8DvgMD9bvzmXma0u1djZS4O6M\n\tDxeEIu5YdezZW6HuT7dItkGlYjN31orhrL6N9Dpng8iHAS94guddwHO/dUeoaxWuN3Y/\n\tcr490CJ8w4xu5/RcyHZXYqAZVmDEWedQccC0cKC7XCy5buW6KF1jd9QrnTBGfi8+o1NC\n\tDJgA==","X-Google-DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=1e100.net; s=20161025;\n\th=x-gm-message-state:date:from:to:cc:subject:message-id:references\n\t:mime-version:content-disposition:in-reply-to:user-agent;\n\tbh=ZAk2VFQ/mcA4HVxshSO0Aetwajg7Pe/rMJEhIketZWg=;\n\tb=Fjj8u5HnIT+NZOvaeFcE0Vi2oMUtp8lHo/tJ/dU00MQ/1DCa39ylm5sRhtcqTpE7OT\n\tbw1sum4/u8ESLHToy0atS59Ng+zzzQ6NPzinjClimZAJ4qisaMfqHCjz0qie/dsR+/8J\n\tVCs4p5Cl5sxYau85oEJ/KlN7HeaJjy71aL12anO4pQfcO4if85s+7FpErfdU82RXyEii\n\tVn+TUTXPTfwMb4ouFIJZMznHcxoEp/gX5uMRm14iSjvAc6+lFv6jrg31XLUsSSsjxQ/l\n\tGlEVVntkyMswKR4j/75u/G4Dqmz2eC+ABjWL2yFjZH3NWzKmhHILVsqmu5OelrfAblyb\n\tbsTA==","X-Gm-Message-State":"AHPjjUgYN5gHEBpMZO9pcXYgUCrFmdyXMOJRZrKAd7yHwGJ3g0FKBQyz\n\tL2BKw8pMrS3LRDVjj9vufJU=","X-Google-Smtp-Source":"AOwi7QBuzzOJSTiPjm/kR7m84nJ4JFMhp2+N14OHIoSoQsJwFOUSI68bM1T9zrLGdJjOxtZ8m+aKnQ==","X-Received":"by 10.99.186.69 with SMTP id l5mr6235389pgu.160.1506309078830;\n\tSun, 24 Sep 2017 20:11:18 -0700 (PDT)","Date":"Sun, 24 Sep 2017 05:18:43 +0800","From":"Simon Guo <wei.guo.simon@gmail.com>","To":"Cyril Bur <cyrilbur@gmail.com>","Subject":"Re: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX instruction\n\tfor long bytes comparision","Message-ID":"<20170923211843.GA10899@simonLocalRHEL7.x64>","References":"<1505950480-14830-1-git-send-email-wei.guo.simon@gmail.com>\n\t<1505950480-14830-3-git-send-email-wei.guo.simon@gmail.com>\n\t<1506089208.1155.32.camel@gmail.com>","MIME-Version":"1.0","Content-Type":"text/plain; charset=us-ascii","Content-Disposition":"inline","In-Reply-To":"<1506089208.1155.32.camel@gmail.com>","User-Agent":"Mutt/1.5.21 (2010-09-15)","X-BeenThere":"linuxppc-dev@lists.ozlabs.org","X-Mailman-Version":"2.1.24","Precedence":"list","List-Id":"Linux on PowerPC Developers Mail List\n\t<linuxppc-dev.lists.ozlabs.org>","List-Unsubscribe":"<https://lists.ozlabs.org/options/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=unsubscribe>","List-Archive":"<http://lists.ozlabs.org/pipermail/linuxppc-dev/>","List-Post":"<mailto:linuxppc-dev@lists.ozlabs.org>","List-Help":"<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=help>","List-Subscribe":"<https://lists.ozlabs.org/listinfo/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=subscribe>","Cc":"\"Naveen N.  Rao\" <naveen.n.rao@linux.vnet.ibm.com>,\n\tDavid Laight <David.Laight@ACULAB.COM>, linuxppc-dev@lists.ozlabs.org","Errors-To":"linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org","Sender":"\"Linuxppc-dev\"\n\t<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>"}},{"id":1775057,"web_url":"http://patchwork.ozlabs.org/comment/1775057/","msgid":"<1506383986.2918.4.camel@gmail.com>","date":"2017-09-25T23:59:46","subject":"Re: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX\n\tinstruction for long bytes comparision","submitter":{"id":64457,"url":"http://patchwork.ozlabs.org/api/people/64457/","name":"Cyril Bur","email":"cyrilbur@gmail.com"},"content":"On Sun, 2017-09-24 at 05:18 +0800, Simon Guo wrote:\n> Hi Cyril,\n> On Sat, Sep 23, 2017 at 12:06:48AM +1000, Cyril Bur wrote:\n> > On Thu, 2017-09-21 at 07:34 +0800, wei.guo.simon@gmail.com wrote:\n> > > From: Simon Guo <wei.guo.simon@gmail.com>\n> > > \n> > > This patch add VMX primitives to do memcmp() in case the compare size\n> > > exceeds 4K bytes.\n> > > \n> > \n> > Hi Simon,\n> > \n> > Sorry I didn't see this sooner, I've actually been working on a kernel\n> > version of glibc commit dec4a7105e (powerpc: Improve memcmp performance\n> > for POWER8) unfortunately I've been distracted and it still isn't done.\n> \n> Thanks for sync with me. Let's consolidate our effort together :)\n> \n> I have a quick check on glibc commit dec4a7105e. \n> Looks the aligned case comparison with VSX is launched without rN size\n> limitation, which means it will have a VSX reg load penalty even when the \n> length is 9 bytes.\n> \n\nThis was written for userspace which doesn't have to explicitly enable\nVMX in order to use it - we need to be smarter in the kernel.\n\n> It did some optimization when src/dest addrs don't have the same offset \n> on 8 bytes alignment boundary. I need to read more closely.\n> \n> > I wonder if we can consolidate our efforts here. One thing I did come\n> > across in my testing is that for memcmp() that will fail early (I\n> > haven't narrowed down the the optimal number yet) the cost of enabling\n> > VMX actually turns out to be a performance regression, as such I've\n> > added a small check of the first 64 bytes to the start before enabling\n> > VMX to ensure the penalty is worth taking.\n> \n> Will there still be a penalty if the 65th byte differs?  \n> \n\nI haven't benchmarked it exactly, my rationale for 64 bytes was that it\nis the stride of the vectorised copy loop so, if we know we'll fail\nbefore even completing one iteration of the vectorized loop there isn't\nany point using the vector regs.\n\n> > \n> > Also, you should consider doing 4K and greater, KSM (Kernel Samepage\n> > Merging) uses PAGE_SIZE which can be as small as 4K.\n> \n> Currently the VMX will only be applied when size exceeds 4K. Are you\n> suggesting a bigger threshold than 4K?\n> \n\nEqual to or greater than 4K, KSM will benefit.\n\n> We can sync more offline for v3.\n> \n> Thanks,\n> - Simon","headers":{"Return-Path":"<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>","X-Original-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Delivered-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Received":["from lists.ozlabs.org (lists.ozlabs.org [103.22.144.68])\n\t(using TLSv1.2 with cipher ADH-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3y1Lj45fhdz9sRm\n\tfor <patchwork-incoming@ozlabs.org>;\n\tTue, 26 Sep 2017 10:01:44 +1000 (AEST)","from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\tby lists.ozlabs.org (Postfix) with ESMTP id 3y1Lj44W8RzDsPX\n\tfor <patchwork-incoming@ozlabs.org>;\n\tTue, 26 Sep 2017 10:01:44 +1000 (AEST)","from mail-pg0-x241.google.com (mail-pg0-x241.google.com\n\t[IPv6:2607:f8b0:400e:c05::241])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128\n\tbits)) (No client certificate requested)\n\tby lists.ozlabs.org (Postfix) with ESMTPS id 3y1Lfy0NmRzDsM9\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tTue, 26 Sep 2017 09:59:53 +1000 (AEST)","by mail-pg0-x241.google.com with SMTP id j16so5727862pga.2\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tMon, 25 Sep 2017 16:59:53 -0700 (PDT)","from camb691.ozlabs.ibm.com ([122.99.82.10])\n\tby smtp.googlemail.com with ESMTPSA id\n\tw134sm12911512pfd.186.2017.09.25.16.59.49\n\t(version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256);\n\tMon, 25 Sep 2017 16:59:50 -0700 (PDT)"],"Authentication-Results":["ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"dXkwQBKt\"; dkim-atps=neutral","lists.ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"dXkwQBKt\"; dkim-atps=neutral","ozlabs.org;\n\tspf=pass (mailfrom) smtp.mailfrom=gmail.com\n\t(client-ip=2607:f8b0:400e:c05::241; helo=mail-pg0-x241.google.com;\n\tenvelope-from=cyrilbur@gmail.com; receiver=<UNKNOWN>)","lists.ozlabs.org; dkim=pass (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"dXkwQBKt\"; dkim-atps=neutral"],"DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025;\n\th=message-id:subject:from:to:cc:date:in-reply-to:references\n\t:mime-version:content-transfer-encoding;\n\tbh=uwoH5yQLfYLp49CpVKlwcigJzj7aqqSJdc3eKl7Dx00=;\n\tb=dXkwQBKt1jSJjZ9qvOPfd/ZymjdeDXhuu82TjwJ8v0B4tVg3Fe/aWRLsRKkXioLRoj\n\toT4oDL9311DgFQdvRJmJsfcGZBFeOF5EDyJezHij+hQr7+9fwECQ7b3l46w6EVW/KxXr\n\tV/WpbPtdm4/q0HPJbGY10VqXzqj0ZyOhL5SozOv6O3QUytAx82je0HeoKYwlIQB5QVDR\n\tq7cvnWyf7LaQKjLsugyaqZbkJIJ+uoEXLnRjwe4kRfCsPKO703vtWoirrM0VQRqLC+gh\n\tmkwzBm48ZXK25h6Pgu1LbJgFQn6XyeLCvSBid7oiSruwrw+ArQClOMt6iuDuUU81GfK2\n\tShSQ==","X-Google-DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=1e100.net; s=20161025;\n\th=x-gm-message-state:message-id:subject:from:to:cc:date:in-reply-to\n\t:references:mime-version:content-transfer-encoding;\n\tbh=uwoH5yQLfYLp49CpVKlwcigJzj7aqqSJdc3eKl7Dx00=;\n\tb=BEB5jtWgZEsKTnNzIdyaUZoKJA7oMX2PoebNXvJhl8O8IoNl7TaBiRh091J0EGgMoW\n\tZgzQVcsJaxwAB8CXB/oAQR9FG79Z0sljEbCSBgNko4lQlbFxI5fLOE9J2hmbnkvzOqTG\n\t+Rkr+WPAbUN4KNcyNA4GxyqQCj3D05B1tyI87wRpEJvHOrD1o1RIyWwUb8oRh1efLYnd\n\tgb+NmhoGydYmV5oDOxZ3ABf+iEl0FtgeRg1Ry+CdLsLDuuD+ig/0FtwVCmMnHZqWrQFi\n\tn7yRd+XQOKSgglBdm/GWB7Cz3AVZ/i7dYmn0T8pXgNoJOlccPt5Hz2OJ8vntRNS6e0qP\n\trQWQ==","X-Gm-Message-State":"AHPjjUgyJcbZGva0wYmKEqQ8NT3Xqh9MyFWDhGnHL8b41Ij+8x9y0V/j\n\tVC7lQldkDP+IsysVkW0swvY=","X-Google-Smtp-Source":"AOwi7QDXWCpcAAFwb7IVcXVacKm7+oVg5cdruhCo6fGwpw+H4cKvC2YT86GLmYV6T1ZgXZBy6r8Evg==","X-Received":"by 10.98.210.138 with SMTP id c132mr8925972pfg.331.1506383991558;\n\tMon, 25 Sep 2017 16:59:51 -0700 (PDT)","Message-ID":"<1506383986.2918.4.camel@gmail.com>","Subject":"Re: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX\n\tinstruction for long bytes comparision","From":"Cyril Bur <cyrilbur@gmail.com>","To":"Simon Guo <wei.guo.simon@gmail.com>","Date":"Tue, 26 Sep 2017 09:59:46 +1000","In-Reply-To":"<20170923211843.GA10899@simonLocalRHEL7.x64>","References":"<1505950480-14830-1-git-send-email-wei.guo.simon@gmail.com>\n\t<1505950480-14830-3-git-send-email-wei.guo.simon@gmail.com>\n\t<1506089208.1155.32.camel@gmail.com>\n\t<20170923211843.GA10899@simonLocalRHEL7.x64>","Content-Type":"text/plain; charset=\"UTF-8\"","X-Mailer":"Evolution 3.24.5 ","Mime-Version":"1.0","Content-Transfer-Encoding":"7bit","X-BeenThere":"linuxppc-dev@lists.ozlabs.org","X-Mailman-Version":"2.1.24","Precedence":"list","List-Id":"Linux on PowerPC Developers Mail List\n\t<linuxppc-dev.lists.ozlabs.org>","List-Unsubscribe":"<https://lists.ozlabs.org/options/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=unsubscribe>","List-Archive":"<http://lists.ozlabs.org/pipermail/linuxppc-dev/>","List-Post":"<mailto:linuxppc-dev@lists.ozlabs.org>","List-Help":"<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=help>","List-Subscribe":"<https://lists.ozlabs.org/listinfo/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=subscribe>","Cc":"\"Naveen N.  Rao\" <naveen.n.rao@linux.vnet.ibm.com>,\n\tDavid Laight <David.Laight@ACULAB.COM>, linuxppc-dev@lists.ozlabs.org","Errors-To":"linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org","Sender":"\"Linuxppc-dev\"\n\t<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>"}},{"id":1775177,"web_url":"http://patchwork.ozlabs.org/comment/1775177/","msgid":"<87k20mf2fn.fsf@concordia.ellerman.id.au>","date":"2017-09-26T05:34:36","subject":"Re: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX instruction\n\tfor long bytes comparision","submitter":{"id":46580,"url":"http://patchwork.ozlabs.org/api/people/46580/","name":"Michael Ellerman","email":"mpe@ellerman.id.au"},"content":"Cyril Bur <cyrilbur@gmail.com> writes:\n\n> On Sun, 2017-09-24 at 05:18 +0800, Simon Guo wrote:\n>> Hi Cyril,\n>> On Sat, Sep 23, 2017 at 12:06:48AM +1000, Cyril Bur wrote:\n>> > On Thu, 2017-09-21 at 07:34 +0800, wei.guo.simon@gmail.com wrote:\n>> > > From: Simon Guo <wei.guo.simon@gmail.com>\n>> > > \n>> > > This patch add VMX primitives to do memcmp() in case the compare size\n>> > > exceeds 4K bytes.\n>> > \n>> > Sorry I didn't see this sooner, I've actually been working on a kernel\n>> > version of glibc commit dec4a7105e (powerpc: Improve memcmp performance\n>> > for POWER8) unfortunately I've been distracted and it still isn't done.\n>> \n>> Thanks for sync with me. Let's consolidate our effort together :)\n>> \n>> I have a quick check on glibc commit dec4a7105e. \n>> Looks the aligned case comparison with VSX is launched without rN size\n>> limitation, which means it will have a VSX reg load penalty even when the \n>> length is 9 bytes.\n>> \n>\n> This was written for userspace which doesn't have to explicitly enable\n> VMX in order to use it - we need to be smarter in the kernel.\n\nWell the kernel has to do it for them after a trap, which is actually\neven more expensive, so arguably the glibc code should be smarter too\nand the threshold before using VMX should probably be higher than in the\nkernel (to cover the cost of the trap).\n\nBut I digress :)\n\ncheers","headers":{"Return-Path":"<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>","X-Original-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Delivered-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Received":["from lists.ozlabs.org (lists.ozlabs.org [103.22.144.68])\n\t(using TLSv1.2 with cipher ADH-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3y1V6h22gDz9t30\n\tfor <patchwork-incoming@ozlabs.org>;\n\tTue, 26 Sep 2017 15:35:56 +1000 (AEST)","from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\tby lists.ozlabs.org (Postfix) with ESMTP id 3y1V6h01x9zDsPT\n\tfor <patchwork-incoming@ozlabs.org>;\n\tTue, 26 Sep 2017 15:35:56 +1000 (AEST)","from ozlabs.org (bilbo.ozlabs.org [103.22.144.67])\n\t(using TLSv1.2 with cipher ADH-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby lists.ozlabs.org (Postfix) with ESMTPS id 3y1V583l78zDsPC\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tTue, 26 Sep 2017 15:34:36 +1000 (AEST)","from authenticated.ozlabs.org (localhost [127.0.0.1])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128\n\tbits)) (No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPSA id 3y1V582Bkpz9t30;\n\tTue, 26 Sep 2017 15:34:36 +1000 (AEST)"],"From":"Michael Ellerman <mpe@ellerman.id.au>","To":"Cyril Bur <cyrilbur@gmail.com>, Simon Guo <wei.guo.simon@gmail.com>","Subject":"Re: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX instruction\n\tfor long bytes comparision","In-Reply-To":"<1506383986.2918.4.camel@gmail.com>","References":"<1505950480-14830-1-git-send-email-wei.guo.simon@gmail.com>\n\t<1505950480-14830-3-git-send-email-wei.guo.simon@gmail.com>\n\t<1506089208.1155.32.camel@gmail.com>\n\t<20170923211843.GA10899@simonLocalRHEL7.x64>\n\t<1506383986.2918.4.camel@gmail.com>","User-Agent":"Notmuch/0.21 (https://notmuchmail.org)","Date":"Tue, 26 Sep 2017 15:34:36 +1000","Message-ID":"<87k20mf2fn.fsf@concordia.ellerman.id.au>","MIME-Version":"1.0","Content-Type":"text/plain","X-BeenThere":"linuxppc-dev@lists.ozlabs.org","X-Mailman-Version":"2.1.24","Precedence":"list","List-Id":"Linux on PowerPC Developers Mail List\n\t<linuxppc-dev.lists.ozlabs.org>","List-Unsubscribe":"<https://lists.ozlabs.org/options/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=unsubscribe>","List-Archive":"<http://lists.ozlabs.org/pipermail/linuxppc-dev/>","List-Post":"<mailto:linuxppc-dev@lists.ozlabs.org>","List-Help":"<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=help>","List-Subscribe":"<https://lists.ozlabs.org/listinfo/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=subscribe>","Cc":"raji@linux.vnet.ibm.com,\n\t\"Naveen N.  Rao\" <naveen.n.rao@linux.vnet.ibm.com>, \n\tDavid Laight <David.Laight@ACULAB.COM>, linuxppc-dev@lists.ozlabs.org","Errors-To":"linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org","Sender":"\"Linuxppc-dev\"\n\t<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>"}},{"id":1775385,"web_url":"http://patchwork.ozlabs.org/comment/1775385/","msgid":"<20170926112623.GZ8421@gate.crashing.org>","date":"2017-09-26T11:26:24","subject":"Re: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX instruction\n\tfor long bytes comparision","submitter":{"id":134,"url":"http://patchwork.ozlabs.org/api/people/134/","name":"Segher Boessenkool","email":"segher@kernel.crashing.org"},"content":"On Tue, Sep 26, 2017 at 03:34:36PM +1000, Michael Ellerman wrote:\n> Cyril Bur <cyrilbur@gmail.com> writes:\n> > This was written for userspace which doesn't have to explicitly enable\n> > VMX in order to use it - we need to be smarter in the kernel.\n> \n> Well the kernel has to do it for them after a trap, which is actually\n> even more expensive, so arguably the glibc code should be smarter too\n> and the threshold before using VMX should probably be higher than in the\n> kernel (to cover the cost of the trap).\n\nA lot of userspace code uses V*X, more and more with newer CPUs and newer\ncompiler versions.  If you already paid the price for using vector\nregisters you do not need to again :-)\n\n> But I digress :)\n\nYeah sorry :-)\n\n\nSegher","headers":{"Return-Path":"<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>","X-Original-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Delivered-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Received":["from lists.ozlabs.org (lists.ozlabs.org [103.22.144.68])\n\t(using TLSv1.2 with cipher ADH-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3y1dxJ5mVRz9tXP\n\tfor <patchwork-incoming@ozlabs.org>;\n\tTue, 26 Sep 2017 21:28:20 +1000 (AEST)","from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\tby lists.ozlabs.org (Postfix) with ESMTP id 3y1dxJ3g0pzDsPw\n\tfor <patchwork-incoming@ozlabs.org>;\n\tTue, 26 Sep 2017 21:28:20 +1000 (AEST)","from gate.crashing.org (gate.crashing.org [63.228.1.57])\n\t(using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits))\n\t(No client certificate requested)\n\tby lists.ozlabs.org (Postfix) with ESMTPS id 3y1dvZ0XcVzDsPC\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tTue, 26 Sep 2017 21:26:49 +1000 (AEST)","from gate.crashing.org (localhost.localdomain [127.0.0.1])\n\tby gate.crashing.org (8.14.1/8.13.8) with ESMTP id v8QBQRJK016424;\n\tTue, 26 Sep 2017 06:26:28 -0500","(from segher@localhost)\n\tby gate.crashing.org (8.14.1/8.14.1/Submit) id v8QBQO5g016420;\n\tTue, 26 Sep 2017 06:26:24 -0500"],"Authentication-Results":"ozlabs.org; spf=permerror (mailfrom)\n\tsmtp.mailfrom=gate.crashing.org (client-ip=63.228.1.57;\n\thelo=gate.crashing.org; \n\tenvelope-from=segher@gate.crashing.org; receiver=<UNKNOWN>)","Date":"Tue, 26 Sep 2017 06:26:24 -0500","From":"Segher Boessenkool <segher@kernel.crashing.org>","To":"Michael Ellerman <mpe@ellerman.id.au>","Subject":"Re: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX instruction\n\tfor long bytes comparision","Message-ID":"<20170926112623.GZ8421@gate.crashing.org>","References":"<1505950480-14830-1-git-send-email-wei.guo.simon@gmail.com>\n\t<1505950480-14830-3-git-send-email-wei.guo.simon@gmail.com>\n\t<1506089208.1155.32.camel@gmail.com>\n\t<20170923211843.GA10899@simonLocalRHEL7.x64>\n\t<1506383986.2918.4.camel@gmail.com>\n\t<87k20mf2fn.fsf@concordia.ellerman.id.au>","Mime-Version":"1.0","Content-Type":"text/plain; charset=us-ascii","Content-Disposition":"inline","In-Reply-To":"<87k20mf2fn.fsf@concordia.ellerman.id.au>","User-Agent":"Mutt/1.4.2.3i","X-BeenThere":"linuxppc-dev@lists.ozlabs.org","X-Mailman-Version":"2.1.24","Precedence":"list","List-Id":"Linux on PowerPC Developers Mail List\n\t<linuxppc-dev.lists.ozlabs.org>","List-Unsubscribe":"<https://lists.ozlabs.org/options/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=unsubscribe>","List-Archive":"<http://lists.ozlabs.org/pipermail/linuxppc-dev/>","List-Post":"<mailto:linuxppc-dev@lists.ozlabs.org>","List-Help":"<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=help>","List-Subscribe":"<https://lists.ozlabs.org/listinfo/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=subscribe>","Cc":"Simon Guo <wei.guo.simon@gmail.com>, raji@linux.vnet.ibm.com,\n\tDavid Laight <David.Laight@ACULAB.COM>,\n\t\"Naveen N.  Rao\" <naveen.n.rao@linux.vnet.ibm.com>,\n\tlinuxppc-dev@lists.ozlabs.org, Cyril Bur <cyrilbur@gmail.com>","Errors-To":"linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org","Sender":"\"Linuxppc-dev\"\n\t<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>"}},{"id":1776013,"web_url":"http://patchwork.ozlabs.org/comment/1776013/","msgid":"<877ewkg6am.fsf@concordia.ellerman.id.au>","date":"2017-09-27T03:38:09","subject":"Re: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX instruction\n\tfor long bytes comparision","submitter":{"id":46580,"url":"http://patchwork.ozlabs.org/api/people/46580/","name":"Michael Ellerman","email":"mpe@ellerman.id.au"},"content":"Segher Boessenkool <segher@kernel.crashing.org> writes:\n\n> On Tue, Sep 26, 2017 at 03:34:36PM +1000, Michael Ellerman wrote:\n>> Cyril Bur <cyrilbur@gmail.com> writes:\n>> > This was written for userspace which doesn't have to explicitly enable\n>> > VMX in order to use it - we need to be smarter in the kernel.\n>> \n>> Well the kernel has to do it for them after a trap, which is actually\n>> even more expensive, so arguably the glibc code should be smarter too\n>> and the threshold before using VMX should probably be higher than in the\n>> kernel (to cover the cost of the trap).\n>\n> A lot of userspace code uses V*X, more and more with newer CPUs and newer\n> compiler versions.  If you already paid the price for using vector\n> registers you do not need to again :-)\n\nTrue, but you don't know if you've paid the price already.\n\nYou also pay the price on every context switch (more state to switch),\nso it's not free even once enabled. Which is why the kernel will\neventually turn it off if it's unused again.\n\nBut now that I've actually looked at the glibc version, it does do some\nchecks for minimum length before doing any vector instructions, so\nthat's probably all we want. The exact trade off between checking some\nbytes without vector vs turning on vector depends on your input data, so\nit's tricky to tune in general.\n\n>> But I digress :)\n>\n> Yeah sorry :-)\n\n:)\n\ncheers","headers":{"Return-Path":"<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>","X-Original-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Delivered-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Received":["from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\t(using TLSv1.2 with cipher ADH-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3y23TT0QBvz9t4Z\n\tfor <patchwork-incoming@ozlabs.org>;\n\tWed, 27 Sep 2017 13:39:09 +1000 (AEST)","from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\tby lists.ozlabs.org (Postfix) with ESMTP id 3y23TS6h5hzDsPp\n\tfor <patchwork-incoming@ozlabs.org>;\n\tWed, 27 Sep 2017 13:39:08 +1000 (AEST)","from ozlabs.org (ozlabs.org [IPv6:2401:3900:2:1::2])\n\t(using TLSv1.2 with cipher ADH-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby lists.ozlabs.org (Postfix) with ESMTPS id 3y23SL0W6xzDqNm\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tWed, 27 Sep 2017 13:38:10 +1000 (AEST)","from authenticated.ozlabs.org (localhost [127.0.0.1])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128\n\tbits)) (No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPSA id 3y23SK5y5hz9t4b;\n\tWed, 27 Sep 2017 13:38:09 +1000 (AEST)"],"From":"Michael Ellerman <mpe@ellerman.id.au>","To":"Segher Boessenkool <segher@kernel.crashing.org>","Subject":"Re: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX instruction\n\tfor long bytes comparision","In-Reply-To":"<20170926112623.GZ8421@gate.crashing.org>","References":"<1505950480-14830-1-git-send-email-wei.guo.simon@gmail.com>\n\t<1505950480-14830-3-git-send-email-wei.guo.simon@gmail.com>\n\t<1506089208.1155.32.camel@gmail.com>\n\t<20170923211843.GA10899@simonLocalRHEL7.x64>\n\t<1506383986.2918.4.camel@gmail.com>\n\t<87k20mf2fn.fsf@concordia.ellerman.id.au>\n\t<20170926112623.GZ8421@gate.crashing.org>","User-Agent":"Notmuch/0.21 (https://notmuchmail.org)","Date":"Wed, 27 Sep 2017 13:38:09 +1000","Message-ID":"<877ewkg6am.fsf@concordia.ellerman.id.au>","MIME-Version":"1.0","Content-Type":"text/plain","X-BeenThere":"linuxppc-dev@lists.ozlabs.org","X-Mailman-Version":"2.1.24","Precedence":"list","List-Id":"Linux on PowerPC Developers Mail List\n\t<linuxppc-dev.lists.ozlabs.org>","List-Unsubscribe":"<https://lists.ozlabs.org/options/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=unsubscribe>","List-Archive":"<http://lists.ozlabs.org/pipermail/linuxppc-dev/>","List-Post":"<mailto:linuxppc-dev@lists.ozlabs.org>","List-Help":"<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=help>","List-Subscribe":"<https://lists.ozlabs.org/listinfo/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=subscribe>","Cc":"Simon Guo <wei.guo.simon@gmail.com>, raji@linux.vnet.ibm.com,\n\tDavid Laight <David.Laight@ACULAB.COM>,\n\t\"Naveen N.  Rao\" <naveen.n.rao@linux.vnet.ibm.com>,\n\tlinuxppc-dev@lists.ozlabs.org, Cyril Bur <cyrilbur@gmail.com>","Errors-To":"linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org","Sender":"\"Linuxppc-dev\"\n\t<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>"}},{"id":1776183,"web_url":"http://patchwork.ozlabs.org/comment/1776183/","msgid":"<20170927092758.GL8421@gate.crashing.org>","date":"2017-09-27T09:27:58","subject":"Re: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX instruction\n\tfor long bytes comparision","submitter":{"id":134,"url":"http://patchwork.ozlabs.org/api/people/134/","name":"Segher Boessenkool","email":"segher@kernel.crashing.org"},"content":"On Wed, Sep 27, 2017 at 01:38:09PM +1000, Michael Ellerman wrote:\n> Segher Boessenkool <segher@kernel.crashing.org> writes:\n> > A lot of userspace code uses V*X, more and more with newer CPUs and newer\n> > compiler versions.  If you already paid the price for using vector\n> > registers you do not need to again :-)\n> \n> True, but you don't know if you've paid the price already.\n> \n> You also pay the price on every context switch (more state to switch),\n> so it's not free even once enabled. Which is why the kernel will\n> eventually turn it off if it's unused again.\n\nYup.  But my point is that because user space code uses vector registers\nmore and more, the penalty for user space code to use vector registers\neven more keeps shrinking.\n\n> But now that I've actually looked at the glibc version, it does do some\n> checks for minimum length before doing any vector instructions, so\n> that's probably all we want. The exact trade off between checking some\n> bytes without vector vs turning on vector depends on your input data, so\n> it's tricky to tune in general.\n\nYou also need nasty code to deal with the start and end of strings, with\nconditional branches and whatnot, which quickly overwhelms the benefit\nof using vector registers at all.  This tradeoff also changes with newer\nISA versions.\n\nThings have to become *really* cheap before it will be good to often use\nvector registers in the kernel though.\n\n\nSegher","headers":{"Return-Path":"<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>","X-Original-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Delivered-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Received":["from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\t(using TLSv1.2 with cipher ADH-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3y2CFs6fGJz9tXp\n\tfor <patchwork-incoming@ozlabs.org>;\n\tWed, 27 Sep 2017 19:29:37 +1000 (AEST)","from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\tby lists.ozlabs.org (Postfix) with ESMTP id 3y2CFs5pJ2zDsSb\n\tfor <patchwork-incoming@ozlabs.org>;\n\tWed, 27 Sep 2017 19:29:37 +1000 (AEST)","from gate.crashing.org (gate.crashing.org [63.228.1.57])\n\t(using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits))\n\t(No client certificate requested)\n\tby lists.ozlabs.org (Postfix) with ESMTPS id 3y2CDM5XDYzDsPQ\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tWed, 27 Sep 2017 19:28:19 +1000 (AEST)","from gate.crashing.org (localhost.localdomain [127.0.0.1])\n\tby gate.crashing.org (8.14.1/8.13.8) with ESMTP id v8R9RxPh020971;\n\tWed, 27 Sep 2017 04:27:59 -0500","(from segher@localhost)\n\tby gate.crashing.org (8.14.1/8.14.1/Submit) id v8R9RwdZ020970;\n\tWed, 27 Sep 2017 04:27:58 -0500"],"Authentication-Results":"ozlabs.org; spf=permerror (mailfrom)\n\tsmtp.mailfrom=gate.crashing.org (client-ip=63.228.1.57;\n\thelo=gate.crashing.org; \n\tenvelope-from=segher@gate.crashing.org; receiver=<UNKNOWN>)","Date":"Wed, 27 Sep 2017 04:27:58 -0500","From":"Segher Boessenkool <segher@kernel.crashing.org>","To":"Michael Ellerman <mpe@ellerman.id.au>","Subject":"Re: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX instruction\n\tfor long bytes comparision","Message-ID":"<20170927092758.GL8421@gate.crashing.org>","References":"<1505950480-14830-1-git-send-email-wei.guo.simon@gmail.com>\n\t<1505950480-14830-3-git-send-email-wei.guo.simon@gmail.com>\n\t<1506089208.1155.32.camel@gmail.com>\n\t<20170923211843.GA10899@simonLocalRHEL7.x64>\n\t<1506383986.2918.4.camel@gmail.com>\n\t<87k20mf2fn.fsf@concordia.ellerman.id.au>\n\t<20170926112623.GZ8421@gate.crashing.org>\n\t<877ewkg6am.fsf@concordia.ellerman.id.au>","Mime-Version":"1.0","Content-Type":"text/plain; charset=us-ascii","Content-Disposition":"inline","In-Reply-To":"<877ewkg6am.fsf@concordia.ellerman.id.au>","User-Agent":"Mutt/1.4.2.3i","X-BeenThere":"linuxppc-dev@lists.ozlabs.org","X-Mailman-Version":"2.1.24","Precedence":"list","List-Id":"Linux on PowerPC Developers Mail List\n\t<linuxppc-dev.lists.ozlabs.org>","List-Unsubscribe":"<https://lists.ozlabs.org/options/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=unsubscribe>","List-Archive":"<http://lists.ozlabs.org/pipermail/linuxppc-dev/>","List-Post":"<mailto:linuxppc-dev@lists.ozlabs.org>","List-Help":"<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=help>","List-Subscribe":"<https://lists.ozlabs.org/listinfo/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=subscribe>","Cc":"Simon Guo <wei.guo.simon@gmail.com>, raji@linux.vnet.ibm.com,\n\tDavid Laight <David.Laight@ACULAB.COM>,\n\t\"Naveen N.  Rao\" <naveen.n.rao@linux.vnet.ibm.com>,\n\tlinuxppc-dev@lists.ozlabs.org, Cyril Bur <cyrilbur@gmail.com>","Errors-To":"linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org","Sender":"\"Linuxppc-dev\"\n\t<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>"}},{"id":1776190,"web_url":"http://patchwork.ozlabs.org/comment/1776190/","msgid":"<063D6719AE5E284EB5DD2968C1650D6DD0082414@AcuExch.aculab.com>","date":"2017-09-27T09:43:44","subject":"RE: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX\n\tinstruction for long bytes comparision","submitter":{"id":6689,"url":"http://patchwork.ozlabs.org/api/people/6689/","name":"David Laight","email":"David.Laight@ACULAB.COM"},"content":"From: Segher Boessenkool\n> Sent: 27 September 2017 10:28\n...\n> You also need nasty code to deal with the start and end of strings, with\n> conditional branches and whatnot, which quickly overwhelms the benefit\n> of using vector registers at all.  This tradeoff also changes with newer\n> ISA versions.\n\nThe goal posts keep moving.\nFor instance with modern intel x86 cpus 'rep movsb' is by far the fastest\nway to copy data (from cached memory).\n\n> Things have to become *really* cheap before it will be good to often use\n> vector registers in the kernel though.\n\nI've had thoughts about this in the past.\nIf the vector registers belong to the current process then you might\nget away with just saving the ones you want to use.\nIf they belong to a different process then you also need to tell the\nFPU save code where you've saved the registers.\nThen the IPI code can recover all the correct values.\n\nOn X86 all the AVX registers are caller saved, the system call\nentry could issue the instruction that invalidates them all.\nKernel code running in the context of a user process could then\nuse the registers without saving them.\nIt would only need to set a mark to ensure they are invalidated\nagain on return to user (might be cheap enough to do anyway).\nDunno about PPC though.\n\n\tDavid","headers":{"Return-Path":"<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>","X-Original-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Delivered-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Received":["from lists.ozlabs.org (lists.ozlabs.org [103.22.144.68])\n\t(using TLSv1.2 with cipher ADH-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3y2Cbc1slTz9sBd\n\tfor <patchwork-incoming@ozlabs.org>;\n\tWed, 27 Sep 2017 19:45:00 +1000 (AEST)","from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\tby lists.ozlabs.org (Postfix) with ESMTP id 3y2Cbc0tyxzDsQ9\n\tfor <patchwork-incoming@ozlabs.org>;\n\tWed, 27 Sep 2017 19:45:00 +1000 (AEST)","from smtp-out6.electric.net (smtp-out6.electric.net\n\t[192.162.217.183])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256\n\tbits)) (No client certificate requested)\n\tby lists.ozlabs.org (Postfix) with ESMTPS id 3y2CZK3Kb5zDsPQ\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tWed, 27 Sep 2017 19:43:51 +1000 (AEST)","from 1dx8sZ-0002FF-V9 by out6a.electric.net with emc1-ok (Exim\n\t4.87) (envelope-from <David.Laight@ACULAB.COM>)\n\tid 1dx8sa-0002Ql-Va; Wed, 27 Sep 2017 02:43:44 -0700","by emcmailer; Wed, 27 Sep 2017 02:43:44 -0700","from [156.67.243.126] (helo=AcuExch.aculab.com)\n\tby out6a.electric.net with esmtps (TLSv1:AES128-SHA:128) (Exim 4.87)\n\t(envelope-from <David.Laight@ACULAB.COM>)\n\tid 1dx8sZ-0002FF-V9; Wed, 27 Sep 2017 02:43:43 -0700","from ACUEXCH.Aculab.com ([::1]) by AcuExch.aculab.com ([::1]) with\n\tmapi id 14.03.0123.003; Wed, 27 Sep 2017 10:43:44 +0100"],"Authentication-Results":"ozlabs.org;\n\tspf=softfail (mailfrom) smtp.mailfrom=aculab.com\n\t(client-ip=192.162.217.183; helo=smtp-out6.electric.net;\n\tenvelope-from=david.laight@aculab.com; receiver=<UNKNOWN>)","From":"David Laight <David.Laight@ACULAB.COM>","To":"'Segher Boessenkool' <segher@kernel.crashing.org>, Michael Ellerman\n\t<mpe@ellerman.id.au>","Subject":"RE: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX\n\tinstruction for long bytes comparision","Thread-Topic":"[PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX\n\tinstruction for long bytes comparision","Thread-Index":"AQHTM2UX+37OVPEjQUScD5WZwJQviqLA4IEAgAILAoCAA1GpAIAAXY0AgABiSwCAAQ+BgIAAYb0AgAASorA=","Date":"Wed, 27 Sep 2017 09:43:44 +0000","Message-ID":"<063D6719AE5E284EB5DD2968C1650D6DD0082414@AcuExch.aculab.com>","References":"<1505950480-14830-1-git-send-email-wei.guo.simon@gmail.com>\n\t<1505950480-14830-3-git-send-email-wei.guo.simon@gmail.com>\n\t<1506089208.1155.32.camel@gmail.com>\n\t<20170923211843.GA10899@simonLocalRHEL7.x64>\n\t<1506383986.2918.4.camel@gmail.com>\n\t<87k20mf2fn.fsf@concordia.ellerman.id.au>\n\t<20170926112623.GZ8421@gate.crashing.org>\n\t<877ewkg6am.fsf@concordia.ellerman.id.au>\n\t<20170927092758.GL8421@gate.crashing.org>","In-Reply-To":"<20170927092758.GL8421@gate.crashing.org>","Accept-Language":"en-GB, en-US","Content-Language":"en-US","X-MS-Has-Attach":"","X-MS-TNEF-Correlator":"","x-originating-ip":"[10.202.99.200]","Content-Type":"text/plain; charset=\"Windows-1252\"","Content-Transfer-Encoding":"quoted-printable","MIME-Version":"1.0","X-Outbound-IP":"156.67.243.126","X-Env-From":"David.Laight@ACULAB.COM","X-Proto":"esmtps","X-Revdns":"","X-HELO":"AcuExch.aculab.com","X-TLS":"TLSv1:AES128-SHA:128","X-Authenticated_ID":"","X-PolicySMART":"3396946, 3397078","X-Virus-Status":["Scanned by VirusSMART (c)","Scanned by VirusSMART (s)"],"X-BeenThere":"linuxppc-dev@lists.ozlabs.org","X-Mailman-Version":"2.1.24","Precedence":"list","List-Id":"Linux on PowerPC Developers Mail List\n\t<linuxppc-dev.lists.ozlabs.org>","List-Unsubscribe":"<https://lists.ozlabs.org/options/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=unsubscribe>","List-Archive":"<http://lists.ozlabs.org/pipermail/linuxppc-dev/>","List-Post":"<mailto:linuxppc-dev@lists.ozlabs.org>","List-Help":"<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=help>","List-Subscribe":"<https://lists.ozlabs.org/listinfo/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=subscribe>","Cc":"\"raji@linux.vnet.ibm.com\" <raji@linux.vnet.ibm.com>,\n\t\"Naveen N.  Rao\" <naveen.n.rao@linux.vnet.ibm.com>,\n\t\"linuxppc-dev@lists.ozlabs.org\" <linuxppc-dev@lists.ozlabs.org>,\n\tSimon Guo <wei.guo.simon@gmail.com>, Cyril Bur <cyrilbur@gmail.com>","Errors-To":"linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org","Sender":"\"Linuxppc-dev\"\n\t<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>"}},{"id":1776733,"web_url":"http://patchwork.ozlabs.org/comment/1776733/","msgid":"<20170927162243.GA2752@simonLocalRHEL7.x64>","date":"2017-09-27T16:22:44","subject":"Re: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX instruction\n\tfor long bytes comparision","submitter":{"id":68632,"url":"http://patchwork.ozlabs.org/api/people/68632/","name":"Simon Guo","email":"wei.guo.simon@gmail.com"},"content":"Hi Michael,\nOn Wed, Sep 27, 2017 at 01:38:09PM +1000, Michael Ellerman wrote:\n> Segher Boessenkool <segher@kernel.crashing.org> writes:\n> \n> > On Tue, Sep 26, 2017 at 03:34:36PM +1000, Michael Ellerman wrote:\n> >> Cyril Bur <cyrilbur@gmail.com> writes:\n> >> > This was written for userspace which doesn't have to explicitly enable\n> >> > VMX in order to use it - we need to be smarter in the kernel.\n> >> \n> >> Well the kernel has to do it for them after a trap, which is actually\n> >> even more expensive, so arguably the glibc code should be smarter too\n> >> and the threshold before using VMX should probably be higher than in the\n> >> kernel (to cover the cost of the trap).\n> >\n> > A lot of userspace code uses V*X, more and more with newer CPUs and newer\n> > compiler versions.  If you already paid the price for using vector\n> > registers you do not need to again :-)\n> \n> True, but you don't know if you've paid the price already.\n> \n> You also pay the price on every context switch (more state to switch),\n> so it's not free even once enabled. Which is why the kernel will\n> eventually turn it off if it's unused again.\n> \n> But now that I've actually looked at the glibc version, it does do some\n> checks for minimum length before doing any vector instructions, so\n\nLooks the glibc version will use VSX instruction and lead to trap in a \n9 bytes size cmp with src/dst 16 bytes aligned. \n 132         /* Now both rSTR1 and rSTR2 are aligned to QW.  */\n 133         .align  4\n 134 L(qw_align):\n 135         vspltisb        v0, 0\n 136         srdi.   r6, rN, 6\n 137         li      r8, 16\n 138         li      r10, 32\n 139         li      r11, 48\n 140         ble     cr0, L(lessthan64)\n 141         mtctr   r6\n 142         vspltisb        v8, 0\n 143         vspltisb        v6, 0\n 144         /* Aligned vector loop.  */\n 145         .align  4\nline 135 is the VSX instruction causing trap. Did I miss anything?\n\n> that's probably all we want. The exact trade off between checking some\n> bytes without vector vs turning on vector depends on your input data, so\n> it's tricky to tune in general.\n\nDiscussed offline with Cyril. The plan is to use (>=4KB) as the minimum len \nbefore vector regs steps at v3. Cyril will consolidate his existing work on \nKSM optimization later, which is probably making 64bytes comparison-ahead to \ndetermine whether it is an early or late matching pattern.\n\nCyril has also some other valuable comments and I will rework on v3.\n\nIs it OK for you?\n\nThanks,\n- Simon","headers":{"Return-Path":"<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>","X-Original-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Delivered-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Received":["from lists.ozlabs.org (lists.ozlabs.org [103.22.144.68])\n\t(using TLSv1.2 with cipher ADH-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3y2fxD6jDsz9t33\n\tfor <patchwork-incoming@ozlabs.org>;\n\tThu, 28 Sep 2017 13:16:48 +1000 (AEST)","from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\tby lists.ozlabs.org (Postfix) with ESMTP id 3y2fxD5CP3zDsPv\n\tfor <patchwork-incoming@ozlabs.org>;\n\tThu, 28 Sep 2017 13:16:48 +1000 (AEST)","from mail-pf0-x241.google.com (mail-pf0-x241.google.com\n\t[IPv6:2607:f8b0:400e:c00::241])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128\n\tbits)) (No client certificate requested)\n\tby lists.ozlabs.org (Postfix) with ESMTPS id 3y2fvR5XgdzDsM9\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tThu, 28 Sep 2017 13:15:14 +1000 (AEST)","by mail-pf0-x241.google.com with SMTP id f84so252621pfj.3\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tWed, 27 Sep 2017 20:15:14 -0700 (PDT)","from localhost ([101.224.174.51]) by smtp.gmail.com with ESMTPSA id\n\td69sm527577pfl.50.2017.09.27.20.15.11\n\t(version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128);\n\tWed, 27 Sep 2017 20:15:11 -0700 (PDT)"],"Authentication-Results":["ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"qaDsFVIJ\"; dkim-atps=neutral","lists.ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"qaDsFVIJ\"; dkim-atps=neutral","ozlabs.org;\n\tspf=pass (mailfrom) smtp.mailfrom=gmail.com\n\t(client-ip=2607:f8b0:400e:c00::241; helo=mail-pf0-x241.google.com;\n\tenvelope-from=wei.guo.simon@gmail.com; receiver=<UNKNOWN>)","lists.ozlabs.org; dkim=pass (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"qaDsFVIJ\"; dkim-atps=neutral"],"DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025;\n\th=date:from:to:cc:subject:message-id:references:mime-version\n\t:content-disposition:in-reply-to:user-agent;\n\tbh=8yu6xjeVbPZg3pSz6EIkgqVxKY0M3JSu1i0o9VeKezs=;\n\tb=qaDsFVIJDrJhtaZXejJjdXdg+2MQJXl6+MdDOr5GEsUamRqwMHv1VzGn6uHc2AenKW\n\t67TvPQzYLbjos51+2aCx9ZX3+dUAC5vsdENd40sbhqe5Dy3gn06G2gKXdrfgt25jqK1l\n\tP5yPGbKG6reyEQVrmP6XQad7Jn1AYK+Y2Vcdjl/cYOyT+VHnpuV/DRwTRdQe9h32XZUp\n\tHr5Y9hBeJJDwZBomgooX0q4mZ+xzJQb+O+/rN+NVYzGIUA9bCL4RQmUcXsoQdOftXF6h\n\taseiIwvtkZHnKHXIFwuAopJUkFRlmPtzNmqaFCmMqScPSgIvWQjL6pPlHbpj//br2frM\n\tZIMg==","X-Google-DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=1e100.net; s=20161025;\n\th=x-gm-message-state:date:from:to:cc:subject:message-id:references\n\t:mime-version:content-disposition:in-reply-to:user-agent;\n\tbh=8yu6xjeVbPZg3pSz6EIkgqVxKY0M3JSu1i0o9VeKezs=;\n\tb=nEjuOrIZ4dBrkD7RlD2W7T/3YEz8dv0qoRFZfND39Iyv/Y+oOr/u/iipczkVbWOEul\n\trBzvFia4bfEj5+qKeQPTkhFjr/GSZ/FFxCnqLe7Szitot2jnG8GB3SUSj9u6lfXSktun\n\t3PoK2L6aFozdMUKTXUDaQMrIGvJB70CmiKNCsS5hbYBClRJSbEkqJ3/pvzxUmJBztt7H\n\tc2OmOctoFBoh92807msEvW4T1z55xNCii7eqCaXQxAwWtCIRJpCsgfpegjOUOzcXdl55\n\t2KCHDnE05Gsq5lhPIgoKprz51LEiuXod9juKdhS85YjJobwoIMSTHJz/F5jQj6ngrUz9\n\tvIPA==","X-Gm-Message-State":"AHPjjUj6ruDKDOvPB+Mz28NkVHakS6G3iDKCpP+agIq7aD+1seR7quiT\n\t6CBFiTAAjTMEWKiBp5cxe4c=","X-Google-Smtp-Source":"AOwi7QAiGsjwQ0tnsKFbxMRZ/yQBXa582zr9a/wLoeQL09poT1tqejlN+xLRGdt6NbgNXSVDkjYY2w==","X-Received":"by 10.99.121.141 with SMTP id u135mr3005829pgc.262.1506568512305;\n\tWed, 27 Sep 2017 20:15:12 -0700 (PDT)","Date":"Thu, 28 Sep 2017 00:22:44 +0800","From":"Simon Guo <wei.guo.simon@gmail.com>","To":"Michael Ellerman <mpe@ellerman.id.au>","Subject":"Re: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX instruction\n\tfor long bytes comparision","Message-ID":"<20170927162243.GA2752@simonLocalRHEL7.x64>","References":"<1505950480-14830-1-git-send-email-wei.guo.simon@gmail.com>\n\t<1505950480-14830-3-git-send-email-wei.guo.simon@gmail.com>\n\t<1506089208.1155.32.camel@gmail.com>\n\t<20170923211843.GA10899@simonLocalRHEL7.x64>\n\t<1506383986.2918.4.camel@gmail.com>\n\t<87k20mf2fn.fsf@concordia.ellerman.id.au>\n\t<20170926112623.GZ8421@gate.crashing.org>\n\t<877ewkg6am.fsf@concordia.ellerman.id.au>","MIME-Version":"1.0","Content-Type":"text/plain; charset=us-ascii","Content-Disposition":"inline","In-Reply-To":"<877ewkg6am.fsf@concordia.ellerman.id.au>","User-Agent":"Mutt/1.5.21 (2010-09-15)","X-BeenThere":"linuxppc-dev@lists.ozlabs.org","X-Mailman-Version":"2.1.24","Precedence":"list","List-Id":"Linux on PowerPC Developers Mail List\n\t<linuxppc-dev.lists.ozlabs.org>","List-Unsubscribe":"<https://lists.ozlabs.org/options/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=unsubscribe>","List-Archive":"<http://lists.ozlabs.org/pipermail/linuxppc-dev/>","List-Post":"<mailto:linuxppc-dev@lists.ozlabs.org>","List-Help":"<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=help>","List-Subscribe":"<https://lists.ozlabs.org/listinfo/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=subscribe>","Cc":"raji@linux.vnet.ibm.com, David Laight <David.Laight@ACULAB.COM>,\n\t\"Naveen N.  Rao\" <naveen.n.rao@linux.vnet.ibm.com>,\n\tlinuxppc-dev@lists.ozlabs.org, Cyril Bur <cyrilbur@gmail.com>","Errors-To":"linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org","Sender":"\"Linuxppc-dev\"\n\t<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>"}},{"id":1776777,"web_url":"http://patchwork.ozlabs.org/comment/1776777/","msgid":"<20170927183346.GB2752@simonLocalRHEL7.x64>","date":"2017-09-27T18:33:46","subject":"Re: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX instruction\n\tfor long bytes comparision","submitter":{"id":68632,"url":"http://patchwork.ozlabs.org/api/people/68632/","name":"Simon Guo","email":"wei.guo.simon@gmail.com"},"content":"On Wed, Sep 27, 2017 at 09:43:44AM +0000, David Laight wrote:\n> From: Segher Boessenkool\n> > Sent: 27 September 2017 10:28\n> ...\n> > You also need nasty code to deal with the start and end of strings, with\n> > conditional branches and whatnot, which quickly overwhelms the benefit\n> > of using vector registers at all.  This tradeoff also changes with newer\n> > ISA versions.\n> \n> The goal posts keep moving.\n> For instance with modern intel x86 cpus 'rep movsb' is by far the fastest\n> way to copy data (from cached memory).\n> \n> > Things have to become *really* cheap before it will be good to often use\n> > vector registers in the kernel though.\n> \n> I've had thoughts about this in the past.\n> If the vector registers belong to the current process then you might\n> get away with just saving the ones you want to use.\n> If they belong to a different process then you also need to tell the\n> FPU save code where you've saved the registers.\n> Then the IPI code can recover all the correct values.\n> \n> On X86 all the AVX registers are caller saved, the system call\n> entry could issue the instruction that invalidates them all.\n> Kernel code running in the context of a user process could then\n> use the registers without saving them.\n> It would only need to set a mark to ensure they are invalidated\n> again on return to user (might be cheap enough to do anyway).\n> Dunno about PPC though.\n\nI am not aware of any ppc instruction which can set a \"mark\" or provide \nany high granularity flag against single or subgroup of vec regs' validity.\nBut ppc experts may want to correct me.\n\nThanks,\n- Simon","headers":{"Return-Path":"<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>","X-Original-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Delivered-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Received":["from lists.ozlabs.org (lists.ozlabs.org [103.22.144.68])\n\t(using TLSv1.2 with cipher ADH-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3y2jrJ3mrCz9t5C\n\tfor <patchwork-incoming@ozlabs.org>;\n\tThu, 28 Sep 2017 15:27:44 +1000 (AEST)","from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\tby lists.ozlabs.org (Postfix) with ESMTP id 3y2jrJ2hxRzDsQf\n\tfor <patchwork-incoming@ozlabs.org>;\n\tThu, 28 Sep 2017 15:27:44 +1000 (AEST)","from mail-pg0-x243.google.com (mail-pg0-x243.google.com\n\t[IPv6:2607:f8b0:400e:c05::243])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128\n\tbits)) (No client certificate requested)\n\tby lists.ozlabs.org (Postfix) with ESMTPS id 3y2jpd4pBdzDsPp\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tThu, 28 Sep 2017 15:26:16 +1000 (AEST)","by mail-pg0-x243.google.com with SMTP id u18so680777pgo.1\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tWed, 27 Sep 2017 22:26:16 -0700 (PDT)","from localhost ([101.224.174.51]) by smtp.gmail.com with ESMTPSA id\n\tu8sm983277pgq.52.2017.09.27.22.26.13\n\t(version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128);\n\tWed, 27 Sep 2017 22:26:13 -0700 (PDT)"],"Authentication-Results":["ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"Hz5CytHJ\"; dkim-atps=neutral","lists.ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"Hz5CytHJ\"; dkim-atps=neutral","ozlabs.org;\n\tspf=pass (mailfrom) smtp.mailfrom=gmail.com\n\t(client-ip=2607:f8b0:400e:c05::243; helo=mail-pg0-x243.google.com;\n\tenvelope-from=wei.guo.simon@gmail.com; receiver=<UNKNOWN>)","lists.ozlabs.org; dkim=pass (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"Hz5CytHJ\"; dkim-atps=neutral"],"DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025;\n\th=date:from:to:cc:subject:message-id:references:mime-version\n\t:content-disposition:in-reply-to:user-agent;\n\tbh=Ouqpl4FX4alSXATmY2MQS1HfAm6bi27s1wp/5y9GkwU=;\n\tb=Hz5CytHJSR1rSDT2lNS8mPFn6ySYUOayYo84FTbi77tsrSM14uJNhBf2H84LJzLoH4\n\tomc6Zul62/m6OhEQ4Bp2EVMoXkbQyds10tI9tQr1bYirgzBVsdzulw2IqNXiVJsTpAUn\n\tT5yuneI1qJkVwvStElRMX+df1xT5NenZzMRH3uhm1HX4QBF3jzctlX+t1poCejc8ClZm\n\tcUtSIziLhxIIJeapyV6u4LDXT+ECOM5ydZ1/DLSzdUHazFul2cvC6ar1YpcdTI9TBu/k\n\tUM1e0q/Lg/W6AVQ1ctVFJRXNyzICEo5C9TuF3WAqGo2hGQ7S8j2KSf5V90aYNQ3KBUvL\n\tftmA==","X-Google-DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=1e100.net; s=20161025;\n\th=x-gm-message-state:date:from:to:cc:subject:message-id:references\n\t:mime-version:content-disposition:in-reply-to:user-agent;\n\tbh=Ouqpl4FX4alSXATmY2MQS1HfAm6bi27s1wp/5y9GkwU=;\n\tb=sMtmgaWCl6/w8ZYT8RkT+yaPq9k4MxqZDj3xkeFlbPSzpUtnOwtV8K/DefWGYtM/dc\n\t28rSUd5kdcawHYKEiC0l9P0HtBJ6p2HJeRoGFEdtrAZ24b/d+41ncItxsPEHb2R6yjj+\n\tHkxnwz/M2Zz9DmdnLDUupXu4tFW4f7/s3ypXRcqeqFQXDbPL3/grDUTosagNpmGsnYvm\n\t6wotZIfLIsoOlTbTGvydhfUja76L+mvFR2sC983SX1oePq1VqJnZAxdwjngUchkj/K8l\n\t3GEjv0m62oWYKXZQY/eS8eHZyyF+Xmr1SxClEio2cOvsZpSZEaskAue4bn9QwcwOXQdE\n\tEThw==","X-Gm-Message-State":"AHPjjUiJnqcB0n/2lVUrvQS1KZgvQkCsAEaztZrLg+GJ0Lk8U02am8kN\n\tIYcRFfM4qnhBzRI/ZDSRf/g=","X-Google-Smtp-Source":"AOwi7QD1lDsnoLw0uS43Wm4eyiXdrzaMbYDRx6urd3Uo5bvXyJMD4Z4NjE9FDZaU9bsbBSfvsy6fsQ==","X-Received":"by 10.99.66.196 with SMTP id p187mr3326706pga.0.1506576374744;\n\tWed, 27 Sep 2017 22:26:14 -0700 (PDT)","Date":"Thu, 28 Sep 2017 02:33:46 +0800","From":"Simon Guo <wei.guo.simon@gmail.com>","To":"David Laight <David.Laight@ACULAB.COM>","Subject":"Re: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX instruction\n\tfor long bytes comparision","Message-ID":"<20170927183346.GB2752@simonLocalRHEL7.x64>","References":"<1505950480-14830-1-git-send-email-wei.guo.simon@gmail.com>\n\t<1505950480-14830-3-git-send-email-wei.guo.simon@gmail.com>\n\t<1506089208.1155.32.camel@gmail.com>\n\t<20170923211843.GA10899@simonLocalRHEL7.x64>\n\t<1506383986.2918.4.camel@gmail.com>\n\t<87k20mf2fn.fsf@concordia.ellerman.id.au>\n\t<20170926112623.GZ8421@gate.crashing.org>\n\t<877ewkg6am.fsf@concordia.ellerman.id.au>\n\t<20170927092758.GL8421@gate.crashing.org>\n\t<063D6719AE5E284EB5DD2968C1650D6DD0082414@AcuExch.aculab.com>","MIME-Version":"1.0","Content-Type":"text/plain; charset=us-ascii","Content-Disposition":"inline","In-Reply-To":"<063D6719AE5E284EB5DD2968C1650D6DD0082414@AcuExch.aculab.com>","User-Agent":"Mutt/1.5.21 (2010-09-15)","X-BeenThere":"linuxppc-dev@lists.ozlabs.org","X-Mailman-Version":"2.1.24","Precedence":"list","List-Id":"Linux on PowerPC Developers Mail List\n\t<linuxppc-dev.lists.ozlabs.org>","List-Unsubscribe":"<https://lists.ozlabs.org/options/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=unsubscribe>","List-Archive":"<http://lists.ozlabs.org/pipermail/linuxppc-dev/>","List-Post":"<mailto:linuxppc-dev@lists.ozlabs.org>","List-Help":"<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=help>","List-Subscribe":"<https://lists.ozlabs.org/listinfo/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=subscribe>","Cc":"\"raji@linux.vnet.ibm.com\" <raji@linux.vnet.ibm.com>,\n\t\"Naveen N.  Rao\" <naveen.n.rao@linux.vnet.ibm.com>,\n\t\"linuxppc-dev@lists.ozlabs.org\" <linuxppc-dev@lists.ozlabs.org>,\n\tCyril Bur <cyrilbur@gmail.com>","Errors-To":"linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org","Sender":"\"Linuxppc-dev\"\n\t<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>"}},{"id":1776906,"web_url":"http://patchwork.ozlabs.org/comment/1776906/","msgid":"<063D6719AE5E284EB5DD2968C1650D6DD0083059@AcuExch.aculab.com>","date":"2017-09-28T09:24:17","subject":"RE: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX\n\tinstruction for long bytes comparision","submitter":{"id":6689,"url":"http://patchwork.ozlabs.org/api/people/6689/","name":"David Laight","email":"David.Laight@ACULAB.COM"},"content":"From: Simon Guo\n> Sent: 27 September 2017 19:34\n...\n> > On X86 all the AVX registers are caller saved, the system call\n> > entry could issue the instruction that invalidates them all.\n> > Kernel code running in the context of a user process could then\n> > use the registers without saving them.\n> > It would only need to set a mark to ensure they are invalidated\n> > again on return to user (might be cheap enough to do anyway).\n> > Dunno about PPC though.\n> \n> I am not aware of any ppc instruction which can set a \"mark\" or provide\n> any high granularity flag against single or subgroup of vec regs' validity.\n> But ppc experts may want to correct me.\n\nI was just thinking of a software flag.\n\n\tDavid","headers":{"Return-Path":"<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>","X-Original-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Delivered-To":["patchwork-incoming@ozlabs.org","linuxppc-dev@lists.ozlabs.org"],"Received":["from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\t(using TLSv1.2 with cipher ADH-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3y2q7N2gDnz9t3B\n\tfor <patchwork-incoming@ozlabs.org>;\n\tThu, 28 Sep 2017 19:26:08 +1000 (AEST)","from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\tby lists.ozlabs.org (Postfix) with ESMTP id 3y2q7N1bVpzDsRN\n\tfor <patchwork-incoming@ozlabs.org>;\n\tThu, 28 Sep 2017 19:26:08 +1000 (AEST)","from smtp-out4.electric.net (smtp-out4.electric.net\n\t[192.162.216.181])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256\n\tbits)) (No client certificate requested)\n\tby lists.ozlabs.org (Postfix) with ESMTPS id 3y2q5M1WfJzDsPp\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tThu, 28 Sep 2017 19:24:22 +1000 (AEST)","from 1dxV3I-0000pl-VD by out4b.electric.net with emc1-ok (Exim\n\t4.87) (envelope-from <David.Laight@ACULAB.COM>)\n\tid 1dxV3J-0000vi-VM; Thu, 28 Sep 2017 02:24:17 -0700","by emcmailer; Thu, 28 Sep 2017 02:24:17 -0700","from [156.67.243.126] (helo=AcuExch.aculab.com)\n\tby out4b.electric.net with esmtps (TLSv1:AES128-SHA:128) (Exim 4.87)\n\t(envelope-from <David.Laight@ACULAB.COM>)\n\tid 1dxV3I-0000pl-VD; Thu, 28 Sep 2017 02:24:16 -0700","from ACUEXCH.Aculab.com ([::1]) by AcuExch.aculab.com ([::1]) with\n\tmapi id 14.03.0123.003; Thu, 28 Sep 2017 10:24:18 +0100"],"Authentication-Results":"ozlabs.org;\n\tspf=softfail (mailfrom) smtp.mailfrom=aculab.com\n\t(client-ip=192.162.216.181; helo=smtp-out4.electric.net;\n\tenvelope-from=david.laight@aculab.com; receiver=<UNKNOWN>)","From":"David Laight <David.Laight@ACULAB.COM>","To":"'Simon Guo' <wei.guo.simon@gmail.com>","Subject":"RE: [PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX\n\tinstruction for long bytes comparision","Thread-Topic":"[PATCH v2 2/3] powerpc/64: enhance memcmp() with VMX\n\tinstruction for long bytes comparision","Thread-Index":"AQHTM2UX+37OVPEjQUScD5WZwJQviqLA4IEAgAILAoCAA1GpAIAAXY0AgABiSwCAAQ+BgIAAYb0AgAASorCAAIXcAIABCV9g","Date":"Thu, 28 Sep 2017 09:24:17 +0000","Message-ID":"<063D6719AE5E284EB5DD2968C1650D6DD0083059@AcuExch.aculab.com>","References":"<1505950480-14830-1-git-send-email-wei.guo.simon@gmail.com>\n\t<1505950480-14830-3-git-send-email-wei.guo.simon@gmail.com>\n\t<1506089208.1155.32.camel@gmail.com>\n\t<20170923211843.GA10899@simonLocalRHEL7.x64>\n\t<1506383986.2918.4.camel@gmail.com>\n\t<87k20mf2fn.fsf@concordia.ellerman.id.au>\n\t<20170926112623.GZ8421@gate.crashing.org>\n\t<877ewkg6am.fsf@concordia.ellerman.id.au>\n\t<20170927092758.GL8421@gate.crashing.org>\n\t<063D6719AE5E284EB5DD2968C1650D6DD0082414@AcuExch.aculab.com>\n\t<20170927183346.GB2752@simonLocalRHEL7.x64>","In-Reply-To":"<20170927183346.GB2752@simonLocalRHEL7.x64>","Accept-Language":"en-GB, en-US","Content-Language":"en-US","X-MS-Has-Attach":"","X-MS-TNEF-Correlator":"","x-originating-ip":"[10.202.99.200]","Content-Type":"text/plain; charset=\"Windows-1252\"","Content-Transfer-Encoding":"quoted-printable","MIME-Version":"1.0","X-Outbound-IP":"156.67.243.126","X-Env-From":"David.Laight@ACULAB.COM","X-Proto":"esmtps","X-Revdns":"","X-HELO":"AcuExch.aculab.com","X-TLS":"TLSv1:AES128-SHA:128","X-Authenticated_ID":"","X-PolicySMART":"3396946, 3397078","X-Virus-Status":["Scanned by VirusSMART (c)","Scanned by VirusSMART (s)"],"X-BeenThere":"linuxppc-dev@lists.ozlabs.org","X-Mailman-Version":"2.1.24","Precedence":"list","List-Id":"Linux on PowerPC Developers Mail List\n\t<linuxppc-dev.lists.ozlabs.org>","List-Unsubscribe":"<https://lists.ozlabs.org/options/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=unsubscribe>","List-Archive":"<http://lists.ozlabs.org/pipermail/linuxppc-dev/>","List-Post":"<mailto:linuxppc-dev@lists.ozlabs.org>","List-Help":"<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=help>","List-Subscribe":"<https://lists.ozlabs.org/listinfo/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=subscribe>","Cc":"\"raji@linux.vnet.ibm.com\" <raji@linux.vnet.ibm.com>,\n\t\"Naveen N.  Rao\" <naveen.n.rao@linux.vnet.ibm.com>,\n\t\"linuxppc-dev@lists.ozlabs.org\" <linuxppc-dev@lists.ozlabs.org>,\n\tCyril Bur <cyrilbur@gmail.com>","Errors-To":"linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org","Sender":"\"Linuxppc-dev\"\n\t<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>"}}]