Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/806763/?format=api
{ "id": 806763, "url": "http://patchwork.ozlabs.org/api/patches/806763/?format=api", "web_url": "http://patchwork.ozlabs.org/project/gcc/patch/3b753f75-ada4-6f84-a497-8fce45187ae9@linux.vnet.ibm.com/", "project": { "id": 17, "url": "http://patchwork.ozlabs.org/api/projects/17/?format=api", "name": "GNU Compiler Collection", "link_name": "gcc", "list_id": "gcc-patches.gcc.gnu.org", "list_email": "gcc-patches@gcc.gnu.org", "web_url": null, "scm_url": null, "webscm_url": null, "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<3b753f75-ada4-6f84-a497-8fce45187ae9@linux.vnet.ibm.com>", "list_archive_url": null, "date": "2017-08-28T20:56:21", "name": "[rs6000] Fix PR81833 (incorrect code gen for vec_msum)", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "63a953bfae72d3e5f700bc938e678642208c101c", "submitter": { "id": 6459, "url": "http://patchwork.ozlabs.org/api/people/6459/?format=api", "name": "Bill Schmidt", "email": "wschmidt@linux.vnet.ibm.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/gcc/patch/3b753f75-ada4-6f84-a497-8fce45187ae9@linux.vnet.ibm.com/mbox/", "series": [ { "id": 257, "url": "http://patchwork.ozlabs.org/api/series/257/?format=api", "web_url": "http://patchwork.ozlabs.org/project/gcc/list/?series=257", "date": "2017-08-28T20:56:21", "name": "[rs6000] Fix PR81833 (incorrect code gen for vec_msum)", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/257/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/806763/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/806763/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<gcc-patches-return-461050-incoming=patchwork.ozlabs.org@gcc.gnu.org>", "X-Original-To": "incoming@patchwork.ozlabs.org", "Delivered-To": [ "patchwork-incoming@bilbo.ozlabs.org", "mailing list gcc-patches@gcc.gnu.org" ], "Authentication-Results": [ "ozlabs.org;\n\tspf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org\n\t(client-ip=209.132.180.131; helo=sourceware.org;\n\tenvelope-from=gcc-patches-return-461050-incoming=patchwork.ozlabs.org@gcc.gnu.org;\n\treceiver=<UNKNOWN>)", "ozlabs.org; dkim=pass (1024-bit key;\n\tunprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org\n\theader.b=\"tso8t3UD\"; dkim-atps=neutral", "sourceware.org; auth=none" ], "Received": [ "from sourceware.org (server1.sourceware.org [209.132.180.131])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256\n\tbits)) (No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3xh3wr2j9Rz9sN7\n\tfor <incoming@patchwork.ozlabs.org>;\n\tTue, 29 Aug 2017 06:56:48 +1000 (AEST)", "(qmail 125500 invoked by alias); 28 Aug 2017 20:56:40 -0000", "(qmail 125489 invoked by uid 89); 28 Aug 2017 20:56:39 -0000", "from mx0a-001b2d01.pphosted.com (HELO mx0a-001b2d01.pphosted.com)\n\t(148.163.156.1) by sourceware.org\n\t(qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP;\n\tMon, 28 Aug 2017 20:56:29 +0000", "from pps.filterd (m0098399.ppops.net [127.0.0.1])\tby\n\tmx0a-001b2d01.pphosted.com (8.16.0.21/8.16.0.21) with SMTP id\n\tv7SKs3n9065978\tfor <gcc-patches@gcc.gnu.org>;\n\tMon, 28 Aug 2017 16:56:25 -0400", "from e14.ny.us.ibm.com (e14.ny.us.ibm.com [129.33.205.204])\tby\n\tmx0a-001b2d01.pphosted.com with ESMTP id\n\t2cmp7nphmu-1\t(version=TLSv1.2 cipher=AES256-SHA bits=256\n\tverify=NOT)\tfor <gcc-patches@gcc.gnu.org>;\n\tMon, 28 Aug 2017 16:56:25 -0400", "from localhost\tby e14.ny.us.ibm.com with IBM ESMTP SMTP Gateway:\n\tAuthorized Use Only! Violators will be prosecuted\tfor\n\t<gcc-patches@gcc.gnu.org> from <wschmidt@linux.vnet.ibm.com>;\n\tMon, 28 Aug 2017 16:56:24 -0400", "from b01cxnp22035.gho.pok.ibm.com (9.57.198.25)\tby\n\te14.ny.us.ibm.com (146.89.104.201) with IBM ESMTP SMTP\n\tGateway: Authorized Use Only! Violators will be prosecuted;\n\tMon, 28 Aug 2017 16:56:23 -0400", "from b01ledav006.gho.pok.ibm.com (b01ledav006.gho.pok.ibm.com\n\t[9.57.199.111])\tby b01cxnp22035.gho.pok.ibm.com\n\t(8.14.9/8.14.9/NCO v10.0) with ESMTP id v7SKuMrL33030162;\n\tMon, 28 Aug 2017 20:56:22 GMT", "from b01ledav006.gho.pok.ibm.com (unknown [127.0.0.1])\tby IMSVA\n\t(Postfix) with ESMTP id 94173AC03F;\n\tMon, 28 Aug 2017 16:56:46 -0400 (EDT)", "from bigmac.rchland.ibm.com (unknown [9.10.86.161])\tby\n\tb01ledav006.gho.pok.ibm.com (Postfix) with ESMTP id\n\t55B64AC040; Mon, 28 Aug 2017 16:56:46 -0400 (EDT)" ], "DomainKey-Signature": "a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id\n\t:list-unsubscribe:list-archive:list-post:list-help:sender:to:cc\n\t:from:subject:date:mime-version:content-type\n\t:content-transfer-encoding:message-id; q=dns; s=default; b=FXcvD\n\tSaqzTk18yVJGig8SPpBjRheLJ8jIOMNnpZsDv12Gyi7wYvpJZzpl2XzLtO6i7yng\n\tW1tCzR0rh41OfkCmN3t11HBmfAoN5HfRWD531p8gRzX0FrJkB9DnNy/v6Z3wLFZE\n\tezEn2TzxlUGH/Sotd38CnZwsvBTfo/u1OUgAv0=", "DKIM-Signature": "v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id\n\t:list-unsubscribe:list-archive:list-post:list-help:sender:to:cc\n\t:from:subject:date:mime-version:content-type\n\t:content-transfer-encoding:message-id; s=default; bh=S9BUTfbqyW7\n\tiHZjYH0UpsMzqIwE=; b=tso8t3UD1aKQ9814pOjxJuG76NH4Tj4qW0yYKSqEwIz\n\t0vDRsffTSUWg1xCFeW7o0CIGDUuFFdFspKeMmZKykOn0h1aA205S60SFceyNDrlG\n\thkPqj2oI1VBziWirkU6y/97bkwGOGYjtIee0UOosL1pGU14BMqC8Wv7sVPp50nV0\n\t=", "Mailing-List": "contact gcc-patches-help@gcc.gnu.org; run by ezmlm", "Precedence": "bulk", "List-Id": "<gcc-patches.gcc.gnu.org>", "List-Unsubscribe": "<mailto:gcc-patches-unsubscribe-incoming=patchwork.ozlabs.org@gcc.gnu.org>", "List-Archive": "<http://gcc.gnu.org/ml/gcc-patches/>", "List-Post": "<mailto:gcc-patches@gcc.gnu.org>", "List-Help": "<mailto:gcc-patches-help@gcc.gnu.org>", "Sender": "gcc-patches-owner@gcc.gnu.org", "X-Virus-Found": "No", "X-Spam-SWARE-Status": "No, score=-10.4 required=5.0 tests=AWL, BAYES_00,\n\tGIT_PATCH_2, GIT_PATCH_3, KAM_ASCII_DIVIDERS,\n\tKAM_LAZY_DOMAIN_SECURITY,\n\tRCVD_IN_DNSWL_LOW autolearn=ham version=3.3.2 spammy=", "X-HELO": "mx0a-001b2d01.pphosted.com", "To": "GCC Patches <gcc-patches@gcc.gnu.org>", "Cc": "Segher Boessenkool <segher@kernel.crashing.org>,\n\tDavid Edelsohn <dje.gcc@gmail.com>", "From": "Bill Schmidt <wschmidt@linux.vnet.ibm.com>", "Subject": "[PATCH, rs6000] Fix PR81833 (incorrect code gen for vec_msum)", "Date": "Mon, 28 Aug 2017 15:56:21 -0500", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12;\n\trv:52.0) Gecko/20100101 Thunderbird/52.3.0", "MIME-Version": "1.0", "Content-Type": "text/plain; charset=utf-8", "Content-Transfer-Encoding": "7bit", "X-TM-AS-GCONF": "00", "x-cbid": "17082820-0052-0000-0000-000002572059", "X-IBM-SpamModules-Scores": "", "X-IBM-SpamModules-Versions": "BY=3.00007629; HX=3.00000241; KW=3.00000007;\n\tPH=3.00000004; SC=3.00000226; SDB=6.00908961; UDB=6.00455809;\n\tIPR=6.00689212; BA=6.00005557; NDR=6.00000001; ZLA=6.00000005;\n\tZF=6.00000009; ZB=6.00000000; ZP=6.00000000; ZH=6.00000000;\n\tZU=6.00000002; MB=3.00016907; XFM=3.00000015;\n\tUTC=2017-08-28 20:56:24", "X-IBM-AV-DETECTION": "SAVI=unused REMOTE=unused XFE=unused", "x-cbparentid": "17082820-0053-0000-0000-000051D17A4D", "Message-Id": "<3b753f75-ada4-6f84-a497-8fce45187ae9@linux.vnet.ibm.com>", "X-Proofpoint-Virus-Version": "vendor=fsecure engine=2.50.10432:, ,\n\tdefinitions=2017-08-28_12:, , signatures=0", "X-Proofpoint-Spam-Details": "rule=outbound_notspam policy=outbound score=0\n\tspamscore=0 suspectscore=0 malwarescore=0 phishscore=0\n\tadultscore=0 bulkscore=0 classifier=spam adjust=0 reason=mlx\n\tscancount=1 engine=8.0.1-1707230000\n\tdefinitions=main-1708280328", "X-IsSubscribed": "yes" }, "content": "Hi, \n\nPR81833 identifies a problem with the little-endian vector multiply-sum\ninstructions. The original implementation is quite poor (and I am allowed\nto say that, since it was mine). This patch fixes the code properly.\n\nThe revised code still uses UNSPECs for these ops, which is not strictly\nnecessary, although descriptive rtl for them would be pretty complex. I've\nput in a FIXME to make note of that for a future cleanup.\n\nBootstrapped and tested on powerpc64le-linux-gnu with no regressions. I am\ncurrently testing on powerpc64-linux-gnu for 32- and 64-bit. Provided that\ntesting succeeds, is this ok for trunk, and for eventual backport to all\nsupported releases?\n\nThanks,\nBill\n\n\n[gcc]\n\n2017-08-28 Bill Schmidt <wschmidt@linux.vnet.ibm.com>\n\n\tPR target/81833\n\t* config/rs6000/altivec.md (altivec_vsum2sws): Convert from a\n\tdefine_insn to a define_expand.\n\t(altivec_vsum2sws_direct): New define_insn.\n\t(altivec_vsumsws): Convert from a define_insn to a define_expand.\n\n[gcc/testsuite]\n\n2017-08-28 Bill Schmidt <wschmidt@linux.vnet.ibm.com>\n\n\tPR target/81833\n\t* gcc.target/powerpc/pr81833.c: New file.", "diff": "Index: gcc/config/rs6000/altivec.md\n===================================================================\n--- gcc/config/rs6000/altivec.md\t(revision 251369)\n+++ gcc/config/rs6000/altivec.md\t(working copy)\n@@ -1804,51 +1804,61 @@\n \"vsum4s<VI_char>s %0,%1,%2\"\n [(set_attr \"type\" \"veccomplex\")])\n \n-;; FIXME: For the following two patterns, the scratch should only be\n-;; allocated for !VECTOR_ELT_ORDER_BIG, and the instructions should\n-;; be emitted separately.\n-(define_insn \"altivec_vsum2sws\"\n- [(set (match_operand:V4SI 0 \"register_operand\" \"=v\")\n- (unspec:V4SI [(match_operand:V4SI 1 \"register_operand\" \"v\")\n- (match_operand:V4SI 2 \"register_operand\" \"v\")]\n-\t\t UNSPEC_VSUM2SWS))\n- (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))\n- (clobber (match_scratch:V4SI 3 \"=v\"))]\n+(define_expand \"altivec_vsum2sws\"\n+ [(use (match_operand:V4SI 0 \"register_operand\"))\n+ (use (match_operand:V4SI 1 \"register_operand\"))\n+ (use (match_operand:V4SI 2 \"register_operand\"))]\n \"TARGET_ALTIVEC\"\n {\n if (VECTOR_ELT_ORDER_BIG)\n- return \"vsum2sws %0,%1,%2\";\n+ emit_insn (gen_altivec_vsum2sws_direct (operands[0], operands[1],\n+ operands[2]));\n else\n- return \"vsldoi %3,%2,%2,12\\n\\tvsum2sws %3,%1,%3\\n\\tvsldoi %0,%3,%3,4\";\n-}\n- [(set_attr \"type\" \"veccomplex\")\n- (set (attr \"length\")\n- (if_then_else\n- (match_test \"VECTOR_ELT_ORDER_BIG\")\n- (const_string \"4\")\n- (const_string \"12\")))])\n+ {\n+ rtx tmp1 = gen_reg_rtx (V4SImode);\n+ rtx tmp2 = gen_reg_rtx (V4SImode);\n+ emit_insn (gen_altivec_vsldoi_v4si (tmp1, operands[2],\n+ operands[2], GEN_INT (12)));\n+ emit_insn (gen_altivec_vsum2sws_direct (tmp2, operands[1], tmp1));\n+ emit_insn (gen_altivec_vsldoi_v4si (operands[0], tmp2, tmp2,\n+ GEN_INT (4)));\n+ }\n+ DONE;\n+})\n \n-(define_insn \"altivec_vsumsws\"\n+; FIXME: This can probably be expressed without an UNSPEC.\n+(define_insn \"altivec_vsum2sws_direct\"\n [(set (match_operand:V4SI 0 \"register_operand\" \"=v\")\n (unspec:V4SI [(match_operand:V4SI 1 \"register_operand\" \"v\")\n- (match_operand:V4SI 2 \"register_operand\" \"v\")]\n-\t\t UNSPEC_VSUMSWS))\n- (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))\n- (clobber (match_scratch:V4SI 3 \"=v\"))]\n+\t (match_operand:V4SI 2 \"register_operand\" \"v\")]\n+\t\t UNSPEC_VSUM2SWS))]\n \"TARGET_ALTIVEC\"\n+ \"vsum2sws %0,%1,%2\"\n+ [(set_attr \"type\" \"veccomplex\")\n+ (set_attr \"length\" \"4\")])\n+\n+(define_expand \"altivec_vsumsws\"\n+ [(use (match_operand:V4SI 0 \"register_operand\"))\n+ (use (match_operand:V4SI 1 \"register_operand\"))\n+ (use (match_operand:V4SI 2 \"register_operand\"))]\n+ \"TARGET_ALTIVEC\"\n {\n if (VECTOR_ELT_ORDER_BIG)\n- return \"vsumsws %0,%1,%2\";\n+ emit_insn (gen_altivec_vsumsws_direct (operands[0], operands[1],\n+ operands[2]));\n else\n- return \"vspltw %3,%2,0\\n\\tvsumsws %3,%1,%3\\n\\tvsldoi %0,%3,%3,12\";\n-}\n- [(set_attr \"type\" \"veccomplex\")\n- (set (attr \"length\")\n- (if_then_else\n- (match_test \"(VECTOR_ELT_ORDER_BIG)\")\n- (const_string \"4\")\n- (const_string \"12\")))])\n+ {\n+ rtx tmp1 = gen_reg_rtx (V4SImode);\n+ rtx tmp2 = gen_reg_rtx (V4SImode);\n+ emit_insn (gen_altivec_vspltw_direct (tmp1, operands[2], const0_rtx));\n+ emit_insn (gen_altivec_vsumsws_direct (tmp2, operands[1], tmp1));\n+ emit_insn (gen_altivec_vsldoi_v4si (operands[0], tmp2, tmp2,\n+ GEN_INT (12)));\n+ }\n+ DONE;\n+})\n \n+; FIXME: This can probably be expressed without an UNSPEC.\n (define_insn \"altivec_vsumsws_direct\"\n [(set (match_operand:V4SI 0 \"register_operand\" \"=v\")\n (unspec:V4SI [(match_operand:V4SI 1 \"register_operand\" \"v\")\nIndex: gcc/testsuite/gcc.target/powerpc/pr81833.c\n===================================================================\n--- gcc/testsuite/gcc.target/powerpc/pr81833.c\t(nonexistent)\n+++ gcc/testsuite/gcc.target/powerpc/pr81833.c\t(working copy)\n@@ -0,0 +1,54 @@\n+/* PR81833: This used to fail due to improper implementation of vec_msum. */\n+\n+/* { dg-do run {target { lp64 } } } */\n+/* { dg-require-effective-target powerpc_altivec_ok } */\n+\n+#include <altivec.h>\n+\n+#define vec_u8 vector unsigned char\n+#define vec_s8 vector signed char\n+#define vec_u16 vector unsigned short\n+#define vec_s16 vector signed short\n+#define vec_u32 vector unsigned int\n+#define vec_s32 vector signed int\n+#define vec_f vector float\n+\n+#define LOAD_ZERO const vec_u8 zerov = vec_splat_u8 (0)\n+\n+#define zero_u8v (vec_u8) zerov\n+#define zero_s8v (vec_s8) zerov\n+#define zero_u16v (vec_u16) zerov\n+#define zero_s16v (vec_s16) zerov\n+#define zero_u32v (vec_u32) zerov\n+#define zero_s32v (vec_s32) zerov\n+\n+signed int __attribute__((noinline))\n+scalarproduct_int16_vsx (const signed short *v1, const signed short *v2,\n+\t\t\t int order)\n+{\n+ int i;\n+ LOAD_ZERO;\n+ register vec_s16 vec1;\n+ register vec_s32 res = vec_splat_s32 (0), t;\n+ signed int ires;\n+\n+ for (i = 0; i < order; i += 8) {\n+ vec1 = vec_vsx_ld (0, v1);\n+ t = vec_msum (vec1, vec_ld (0, v2), zero_s32v);\n+ res = vec_sums (t, res);\n+ v1 += 8;\n+ v2 += 8;\n+ }\n+ res = vec_splat (res, 3);\n+ vec_ste (res, 0, &ires);\n+\n+ return ires;\n+}\n+\n+int main(void)\n+{\n+ const signed short test_vec[] = { 1, 1, 1, 1, 1, 1, 1, 1 };\n+ if (scalarproduct_int16_vsx (test_vec, test_vec, 8) != 8)\n+ __builtin_abort ();\n+ return 0;\n+}\n", "prefixes": [ "rs6000" ] }