Patch Detail

GET /api/patches/2216468/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 2216468,
    "url": "http://patchwork.ozlabs.org/api/patches/2216468/?format=api",
    "web_url": "http://patchwork.ozlabs.org/project/gcc/patch/DHCRTLO0OG2I.36YG6NATGX5GE@gmail.com/",
    "project": {
        "id": 17,
        "url": "http://patchwork.ozlabs.org/api/projects/17/?format=api",
        "name": "GNU Compiler Collection",
        "link_name": "gcc",
        "list_id": "gcc-patches.gcc.gnu.org",
        "list_email": "gcc-patches@gcc.gnu.org",
        "web_url": null,
        "scm_url": null,
        "webscm_url": null,
        "list_archive_url": "",
        "list_archive_url_format": "",
        "commit_url_format": ""
    },
    "msgid": "<DHCRTLO0OG2I.36YG6NATGX5GE@gmail.com>",
    "list_archive_url": null,
    "date": "2026-03-26T14:07:32",
    "name": "vect: Version for unsigned overflow. [PR121908]",
    "commit_ref": null,
    "pull_url": null,
    "state": "new",
    "archived": false,
    "hash": "e67a7e3b02d31cc7375455be6277152b4c031d72",
    "submitter": {
        "id": 86205,
        "url": "http://patchwork.ozlabs.org/api/people/86205/?format=api",
        "name": "Robin Dapp",
        "email": "rdapp.gcc@gmail.com"
    },
    "delegate": null,
    "mbox": "http://patchwork.ozlabs.org/project/gcc/patch/DHCRTLO0OG2I.36YG6NATGX5GE@gmail.com/mbox/",
    "series": [
        {
            "id": 497595,
            "url": "http://patchwork.ozlabs.org/api/series/497595/?format=api",
            "web_url": "http://patchwork.ozlabs.org/project/gcc/list/?series=497595",
            "date": "2026-03-26T14:07:32",
            "name": "vect: Version for unsigned overflow. [PR121908]",
            "version": 1,
            "mbox": "http://patchwork.ozlabs.org/series/497595/mbox/"
        }
    ],
    "comments": "http://patchwork.ozlabs.org/api/patches/2216468/comments/",
    "check": "pending",
    "checks": "http://patchwork.ozlabs.org/api/patches/2216468/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org>",
        "X-Original-To": [
            "incoming@patchwork.ozlabs.org",
            "gcc-patches@gcc.gnu.org"
        ],
        "Delivered-To": [
            "patchwork-incoming@legolas.ozlabs.org",
            "gcc-patches@gcc.gnu.org"
        ],
        "Authentication-Results": [
            "legolas.ozlabs.org;\n\tdkim=pass (2048-bit key;\n unprotected) header.d=gmail.com header.i=@gmail.com header.a=rsa-sha256\n header.s=20251104 header.b=C1+/4asB;\n\tdkim-atps=neutral",
            "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=gcc.gnu.org\n (client-ip=2620:52:6:3111::32; helo=vm01.sourceware.org;\n envelope-from=gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org;\n receiver=patchwork.ozlabs.org)",
            "sourceware.org;\n\tdkim=pass (2048-bit key,\n unprotected) header.d=gmail.com header.i=@gmail.com header.a=rsa-sha256\n header.s=20251104 header.b=C1+/4asB",
            "sourceware.org;\n dmarc=pass (p=none dis=none) header.from=gmail.com",
            "sourceware.org; spf=pass smtp.mailfrom=gmail.com",
            "server2.sourceware.org;\n arc=none smtp.remote-ip=209.85.221.49"
        ],
        "Received": [
            "from vm01.sourceware.org (vm01.sourceware.org\n [IPv6:2620:52:6:3111::32])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519 server-signature ECDSA (secp384r1) server-digest SHA384)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4fhQfq1PZvz1y1x\n\tfor <incoming@patchwork.ozlabs.org>; Fri, 27 Mar 2026 01:10:59 +1100 (AEDT)",
            "from vm01.sourceware.org (localhost [127.0.0.1])\n\tby sourceware.org (Postfix) with ESMTP id 4D6C84BA23DB\n\tfor <incoming@patchwork.ozlabs.org>; Thu, 26 Mar 2026 14:10:57 +0000 (GMT)",
            "from mail-wr1-f49.google.com (mail-wr1-f49.google.com\n [209.85.221.49])\n by sourceware.org (Postfix) with ESMTPS id F3D144BA23EF\n for <gcc-patches@gcc.gnu.org>; Thu, 26 Mar 2026 14:07:35 +0000 (GMT)",
            "by mail-wr1-f49.google.com with SMTP id\n ffacd0b85a97d-43b4d734678so880602f8f.1\n for <gcc-patches@gcc.gnu.org>; Thu, 26 Mar 2026 07:07:35 -0700 (PDT)",
            "from localhost (ip-149-172-150-237.um42.pools.vodafone-ip.de.\n [149.172.150.237]) by smtp.gmail.com with ESMTPSA id\n 5b1f17b1804b1-48722c6b105sm66798385e9.1.2026.03.26.07.07.33\n (version=TLS1_3 cipher=TLS_AES_128_GCM_SHA256 bits=128/128);\n Thu, 26 Mar 2026 07:07:33 -0700 (PDT)"
        ],
        "DKIM-Filter": [
            "OpenDKIM Filter v2.11.0 sourceware.org 4D6C84BA23DB",
            "OpenDKIM Filter v2.11.0 sourceware.org F3D144BA23EF"
        ],
        "DMARC-Filter": "OpenDMARC Filter v1.4.2 sourceware.org F3D144BA23EF",
        "ARC-Filter": "OpenARC Filter v1.0.0 sourceware.org F3D144BA23EF",
        "ARC-Seal": "i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1774534056; cv=none;\n b=YpXdHPeY7vCe1+fzxODg48x9F1K0Wym2Lmv4Qx2JduiWN93Fq7lFEa7ZfbBXyHLQq4lEK36UNXqwd11rwaJby5XJUGOGYRyxLhbq+pX7kOVurBmGU9I7bnhtMh7SUh4zMGtqDPgsKB+3GtP+UHeAnAbaOHlwwO+1Rdg/VNsGtnM=",
        "ARC-Message-Signature": "i=1; a=rsa-sha256; d=sourceware.org; s=key;\n t=1774534056; c=relaxed/simple;\n bh=/lQoTrpUW9yX5B1TGiB/o4u5AjvlDTpUSPGEdcHrDjM=;\n h=DKIM-Signature:Mime-Version:Date:Message-Id:To:From:Subject;\n b=gRgVdItONlAVMRBV41Rzy53vFHqERX+9LUamsLDhUnlMaBIqOhRX5IFYHapg0agCmxOm7P4VSxKLiwWtjXWsrhxjb7hyK9rU+1alrG2nC6pTTAYJQ0bdowXaVhIdfBRiC4MpEDtv48uPCIZty4prFPYU+va5QHpq+MgkEn+G8B0=",
        "ARC-Authentication-Results": "i=1; server2.sourceware.org",
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n d=gmail.com; s=20251104; t=1774534054; x=1775138854; darn=gcc.gnu.org;\n h=subject:from:to:cc:message-id:date:content-transfer-encoding\n :mime-version:from:to:cc:subject:date:message-id:reply-to;\n bh=qIr6hcpF3YAnoML2rt24YI4+B46lQb1eXdVWf2B51l4=;\n b=C1+/4asBnAb+1F8QD3Ynsu8G4wFyQl/yMDmfYOWPkhe0z/5Ixl9Nm2TeaoiycJmM+/\n RFXKIGBtqJPPs5FrG6IHkQ2JBaSu5AdTVkldyifqxKGx28wLKOxvX8BEGoaxK3nev77j\n rkQ3gwIDTWnYOLj59qKWdlfQwj4K9KavJZcr82d5v7Qak1mDtJmw1zLUIkQGzkipy9hi\n aeab03jwSc2b3p0B/9zZRp2o73SnVgjLHJ/5DoXETRzIvmoom86PJQHQFkZ1LBxNLpZh\n rrNpM/v274/3dN3KOwe0X8wxD+2yeW+jV4KWPFbRJrlAH4SA6/0NxuejJSj8y+9Fgh8T\n VJcA==",
        "X-Google-DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n d=1e100.net; s=20251104; t=1774534054; x=1775138854;\n h=subject:from:to:cc:message-id:date:content-transfer-encoding\n :mime-version:x-gm-gg:x-gm-message-state:from:to:cc:subject:date\n :message-id:reply-to;\n bh=qIr6hcpF3YAnoML2rt24YI4+B46lQb1eXdVWf2B51l4=;\n b=WXcjvUSKUvuJW+Bt0SoYFyk9rQwZ72bg70Gz8HyVObt/Fma06rABZMcdlZvr92KZJ0\n o2UHDhiZE4/40s++7xvMpgaPBxuN38NPpa+D/ZUpXfBRPLurvvW+Zgu3X6t11/WLbyKM\n 0xPdDQtHar+GWqbYTM+iJYDJdVm33evLOVAG2QnpNKa4yEy5W96VByB7Id3MP9lnLQ3V\n idVsQWxPIstR0CkD827LmRhh9x00cXRGbxYbvRdzrvmo2xuf8IQhGplX7JwxBvTri2yf\n BkzcrCseZZqbzTayzMM1uU5erNTdlM/MbAW57U4feu54ArlK2+7y7YK5lnq8tw58YgVu\n bGSQ==",
        "X-Gm-Message-State": "AOJu0Yzicn+F2od/s7aIi70OIukNNev9akxFI2xcgYCth6Z1lq/4yysy\n H7APD6Um+NkTIUJryctTk5RICfrrLPrdyQmXt/tMSdmlkVsSJb3ywdd/ikrcFg==",
        "X-Gm-Gg": "ATEYQzxNO5//3OjNDErk8HwrxtJ7T2/OSYo/eoNBVes5UxmasrVh+YZh9bH/N8ONhIT\n xK7yBM6vmK4bwZee0tcPDjD/H0bcPK3k2OmkRQUNJtoWOYzfHnLmeyLvSBkwN+3jJatnHHvC7vV\n hr6XxgbRpV0F6Hx7lEiHH+ZHQ/BbAE7l0RgQrhTYVtX55UwfHx5fFWWFM+5OXzPosREcqLdPH1C\n ugxjSAgv42TV7fXmAiJXqoVLX6I0gfcsnee4IlGxrH32tmLUSjcUvoL+dvmn75RsnJ2X8t1e4JT\n +ExfrCe+WvA8Bpe0czht14vVFXWRjC2gDTuuMCZNSy+wgW1T0JcxIvmv+iQjHKgjPWQ9eaWlcf4\n /zcqEcaDzyuMCoEWSB2RpiLyLgYX9R2OgoGTzXVuUCW+B7AoYz1mpcowTWuGwN/xGN0YTGsjwIh\n 50XxOMHY36acDzbahhVMH0q3avMg1bASoD6hqmOBZOf+wBscLSg/01PvfUT1YdM+mYfQpVgYwUg\n MuUDERmpCA=",
        "X-Received": "by 2002:a05:600c:3b12:b0:485:3b00:f92e with SMTP id\n 5b1f17b1804b1-48715fc370fmr117634715e9.2.1774534053882;\n Thu, 26 Mar 2026 07:07:33 -0700 (PDT)",
        "Mime-Version": "1.0",
        "Content-Transfer-Encoding": "quoted-printable",
        "Content-Type": "text/plain; charset=UTF-8",
        "Date": "Thu, 26 Mar 2026 15:07:32 +0100",
        "Message-Id": "<DHCRTLO0OG2I.36YG6NATGX5GE@gmail.com>",
        "Cc": "\"Robin Dapp\" <rdapp.gcc@gmail.com>",
        "To": "\"gcc-patches\" <gcc-patches@gcc.gnu.org>",
        "From": "\"Robin Dapp\" <rdapp.gcc@gmail.com>",
        "Subject": "[PATCH] vect: Version for unsigned overflow. [PR121908]",
        "X-BeenThere": "gcc-patches@gcc.gnu.org",
        "X-Mailman-Version": "2.1.30",
        "Precedence": "list",
        "List-Id": "Gcc-patches mailing list <gcc-patches.gcc.gnu.org>",
        "List-Unsubscribe": "<https://gcc.gnu.org/mailman/options/gcc-patches>,\n <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>",
        "List-Archive": "<https://gcc.gnu.org/pipermail/gcc-patches/>",
        "List-Post": "<mailto:gcc-patches@gcc.gnu.org>",
        "List-Help": "<mailto:gcc-patches-request@gcc.gnu.org?subject=help>",
        "List-Subscribe": "<https://gcc.gnu.org/mailman/listinfo/gcc-patches>,\n <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>",
        "Errors-To": "gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org"
    },
    "content": "Hi,\n\n(Obviously for GCC 17)\n\nThis patch adds versioning to ensure that the loop control IV does\nnot overflow when vectorizing.  It does so by adding a no-overflow\nassumption in the vectorizer as well as a new loop constraint\nLOOP_C_NOWRAP.  In niter, we can make use of this constraint to\nprove that a specific candidate IV does not overflow.\n\nA drawback of this approach is that we might version loops too eagerly.\nBefore dataref analysis has been done we don't know whether the control\nIV's no-overflow property is actually needed.  In situations where the\ndataref just depends on a pointer IV that does not wrap, we only realize\nthat after having decided to perform versioning.  This leads to a\npessimization and additional loop prolog overhead.\n\nI experimented with keeping a second \"assumptions_without_nowrap\"\naround.  Just before the actual versioning, we can then do another\ndataref check, excluding all the nowrap control IVs, clearing\nSCEV caches etc.  If that check still result in \"no overflow\", we know\nthat the assumption was redundant and fall back to\n\"assumptions_without_nowrap\".  What makes this difficult is having a\nsecond niter assumption throughout the analysis flow and needing to\nduplicate or enhance several checks against the original assumptions to\noperate on assumptions_without_nowrap as well.\nMy gut feeling is that this isn't worth it, so I left that part out of\nthe patch.\n\nAnother difference to before is that the default assumption\nnow, obviously, is that no overflow happens at runtime.  If it does\nhappen, we fall back to scalar code.  Before, we would vectorize using\ngather/scatter that can handle overflow.  Given that gather/scatter is\nusually slow and a wrapping of their indices would likely make it even\nslower, I'd say the new assumption is reasonable.\nThere's still tension regarding interpretation of a user's intent of\ncourse.  If somebody deliberately wrote a wrapping IV, expecting\ngather/scatter vectorization, we pessimize now.  A better solution could\nbe to fall back to gather/scatter instead of scalar code right away\nand, depending on costs either vectorize or not.  Vectorizing twice\nis another can of worms that I didn't want to open.\n\nFor SPECint2017 on x86 we add assumptions to 107 loops in total, the bulk\nof which are in xalan, 10 in 502.gcc, and 14 in 557.xz.\n\nPerformance, unfortunately, is a bit disappointing.  On my x86 test\nmachine (Sandy Bridge) xz regresses a bit (2-3%).  Same on aarch64\nwithout SVE, improvements are, small at best:\n\nInput 1 degrades by 4%, input 2 improves by 12%, input 3 regresses by 7%.\nInput 2 has a longer window size, while input 1 and 3 have 64 bytes and\ntypically \"break early\" for <= 10 iterations.\n\nMy suspicion is that the additional prolog assumption checking eats away\nprofitability.  We even statically estimate 11 iters for profitability.\nOn top, we need an epilogue, and with early break we currently re-do the\nfull vector iteration in scalar.  I expect fully-masking architectures to\nfare better but IMHO these experiments confirm the difficulty of costing\nsmall early-break loops.\n\nBootstrapped and regtested on x86, power10, and aarch64.\nRegtested on riscv64.\n\nRegards\n Robin\n\n\tPR tree-optimization/121908\n\ngcc/ChangeLog:\n\n\t* cfgloop.h (struct GTY):  Add condition.\n\t(LOOP_C_NOWRAP): New constraint.\n\t* tree-ssa-loop-niter.cc (number_of_iterations_ne):\n\tUse LOOP_C_NOWRAP.\n\t(record_control_iv): Record condition.\n\t(loop_exits_before_overflow): Use LOOP_C_NOWRAP.\n\t* tree-vect-loop.cc (vect_get_loop_niters): Add no-overflow\n\tassumption and set LOOP_C_NOWRAP.\n\t* tree-vectorizer.cc (vect_free_loop_info_assumptions): Clear\n\tLOOP_C_NOWRAP.\n\t(try_vectorize_loop_1): Ditto.\n\t(pass_vectorize::execute): Ditto.\n\ngcc/testsuite/ChangeLog:\n\n\t* gcc.dg/tree-ssa/scev-8.c: Don't vectorize.\n\t* gcc.dg/vect/vect-unsigned-assump-1.c: New test.\n\t* gcc.dg/vect/vect-unsigned-assump-2.c: New test.\n\t* gcc.dg/vect/vect-unsigned-assump-3.c: New test.\n\t* gcc.dg/vect/vect-unsigned-assump-4.c: New test.\n\t* gcc.dg/vect/vect-unsigned-assump-5.c: New test.\n\t* gcc.dg/vect/vect-unsigned-assump-6.c: New test.\n\t* gcc.dg/vect/vect-unsigned-assump-7.c: New test.\n\t* gcc.dg/vect/vect-unsigned-assump-8.c: New test.\n---\n gcc/cfgloop.h                                 |  4 ++\n gcc/testsuite/gcc.dg/tree-ssa/scev-8.c        |  2 +-\n .../gcc.dg/vect/vect-unsigned-assump-1.c      | 22 ++++++\n .../gcc.dg/vect/vect-unsigned-assump-2.c      | 24 +++++++\n .../gcc.dg/vect/vect-unsigned-assump-3.c      | 20 ++++++\n .../gcc.dg/vect/vect-unsigned-assump-4.c      | 16 +++++\n .../gcc.dg/vect/vect-unsigned-assump-5.c      | 14 ++++\n .../gcc.dg/vect/vect-unsigned-assump-6.c      | 13 ++++\n .../gcc.dg/vect/vect-unsigned-assump-7.c      | 17 +++++\n .../gcc.dg/vect/vect-unsigned-assump-8.c      | 16 +++++\n gcc/tree-ssa-loop-niter.cc                    | 60 +++++++++++++++-\n gcc/tree-vect-loop.cc                         | 69 +++++++++++++++++++\n gcc/tree-vectorizer.cc                        | 11 ++-\n 13 files changed, 284 insertions(+), 4 deletions(-)\n create mode 100644 gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-1.c\n create mode 100644 gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-2.c\n create mode 100644 gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-3.c\n create mode 100644 gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-4.c\n create mode 100644 gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-5.c\n create mode 100644 gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-6.c\n create mode 100644 gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-7.c\n create mode 100644 gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-8.c",
    "diff": "diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h\nindex de8d97c6141..9678b1363a2 100644\n--- a/gcc/cfgloop.h\n+++ b/gcc/cfgloop.h\n@@ -113,6 +113,7 @@ enum loop_estimation\n struct GTY ((chain_next (\"%h.next\"))) control_iv {\n   tree base;\n   tree step;\n+  HOST_WIDE_INT condition;\n   struct control_iv *next;\n };\n \n@@ -281,6 +282,9 @@ public:\n #define LOOP_C_INFINITE\t\t(1 << 0)\n /* Set if the loop is known to be finite without any assumptions.  */\n #define LOOP_C_FINITE\t\t(1 << 1)\n+/* Set if the loop is known to be non-wrapping under assumptions,\n+   thus only valid in vectorizer context where the loop is versioned.  */\n+#define LOOP_C_NOWRAP\t\t(1 << 2)\n \n /* Set C to the LOOP constraint.  */\n inline void\ndiff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-8.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-8.c\nindex a5b2ff71958..abdbf0250dd 100644\n--- a/gcc/testsuite/gcc.dg/tree-ssa/scev-8.c\n+++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-8.c\n@@ -1,5 +1,5 @@\n /* { dg-do compile } */\n-/* { dg-options \"-O2 -fdump-tree-ivopts-details\" } */\n+/* { dg-options \"-O2 -fno-tree-vectorize -fdump-tree-ivopts-details\" } */\n \n int *a;\n \ndiff --git a/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-1.c b/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-1.c\nnew file mode 100644\nindex 00000000000..5d9c297c921\n--- /dev/null\n+++ b/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-1.c\n@@ -0,0 +1,22 @@\n+/* { dg-do compile } */\n+/* { dg-require-effective-target vect_int } */\n+/* { dg-require-effective-target vect_early_break } */\n+\n+#define uint8_t unsigned char\n+#define uint32_t unsigned int\n+\n+int foo (const uint8_t *const cur, uint32_t len, uint32_t len_limit,\n+         uint32_t pos, uint32_t cur_match)\n+{\n+  const uint32_t delta = pos - cur_match;\n+  const uint8_t *pb = cur - delta;\n+\n+  while (++len != len_limit)\n+    if (pb[len] != cur[len])\n+      break;\n+\n+  return len;\n+}\n+\n+/* { dg-final { scan-tree-dump \"vectorized 1 loops in function\" \"vect\" } } */\n+/* { dg-final { scan-tree-dump \"adding no-overflow assumption\" \"vect\" } } */\ndiff --git a/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-2.c b/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-2.c\nnew file mode 100644\nindex 00000000000..249afe0127a\n--- /dev/null\n+++ b/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-2.c\n@@ -0,0 +1,24 @@\n+/* { dg-do compile } */\n+/* { dg-require-effective-target vect_int } */\n+/* { dg-require-effective-target vect_early_break } */\n+/* { dg-require-effective-target vect_load_lanes } */\n+\n+#define uint8_t unsigned char\n+#define uint32_t unsigned int\n+\n+int foo (const uint8_t *const cur, uint32_t len, uint32_t len_limit,\n+         uint32_t pos, uint32_t cur_match)\n+{\n+  const uint32_t delta = pos - cur_match;\n+  const uint8_t *pb = cur - delta;\n+\n+  /* We vectorize this with struct loads right now.  */\n+  while ((len += 2) != len_limit)\n+    if (pb[len] != cur[len])\n+      break;\n+\n+  return len;\n+}\n+\n+/* { dg-final { scan-tree-dump \"vectorized 1 loops in function\" \"vect\" } } */\n+/* { dg-final { scan-tree-dump \"adding no-overflow assumption\" \"vect\" } } */\ndiff --git a/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-3.c b/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-3.c\nnew file mode 100644\nindex 00000000000..5547c478ffb\n--- /dev/null\n+++ b/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-3.c\n@@ -0,0 +1,20 @@\n+/* { dg-do compile } */\n+/* { dg-require-effective-target vect_int } */\n+/* { dg-require-effective-target vect_early_break } */\n+\n+#define uint8_t unsigned char\n+#define uint32_t unsigned int\n+\n+int foo (const uint8_t *const cur, uint32_t n)\n+{\n+  uint32_t i = 15;\n+\n+  while (i++ != n)\n+    if (cur[i - 15] != cur[i])\n+      break;\n+\n+  return i;\n+}\n+\n+/* { dg-final { scan-tree-dump \"vectorized 1 loops in function\" \"vect\" } } */\n+/* { dg-final { scan-tree-dump \"adding no-overflow assumption\" \"vect\" } } */\ndiff --git a/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-4.c b/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-4.c\nnew file mode 100644\nindex 00000000000..e5d4022a5bd\n--- /dev/null\n+++ b/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-4.c\n@@ -0,0 +1,16 @@\n+/* { dg-do compile } */\n+/* { dg-additional-options \"-fno-builtin\" } */\n+/* { dg-require-effective-target vect_int } */\n+\n+/* We should vectorize this without additional assumptions.\n+   The data-ref uses a pointer-based access which never wraps.\n+   However, this is not implemented right now and we always version.\n+   Thus, the test is xfailed unconditionally.  */\n+\n+void f4 (int *p, unsigned start, unsigned end) {\n+  for (unsigned i = start; i != end; i++)\n+    *p++ = 0;\n+}\n+\n+/* { dg-final { scan-tree-dump \"Discarding tentative nowrap\" \"vect\" { xfail *-*-* } } } */\n+/* { dg-final { scan-tree-dump \"adding no-overflow assumption\" \"vect\" } } */\ndiff --git a/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-5.c b/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-5.c\nnew file mode 100644\nindex 00000000000..05b0cb40cd3\n--- /dev/null\n+++ b/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-5.c\n@@ -0,0 +1,14 @@\n+/* { dg-do compile } */\n+/* { dg-require-effective-target vect_int } */\n+/* { dg-require-effective-target vect_gather_load_ifn } */\n+\n+/* This should be vectorized without gather/scatter.  */\n+\n+void f5 (int *__restrict a, int *__restrict b, unsigned start, unsigned end) {\n+    for (unsigned i = start; i != end; i++)\n+      a[i] = b[i];\n+}\n+\n+/* { dg-final { scan-tree-dump-not \"gather\" \"vect\" } } */\n+/* { dg-final { scan-tree-dump \"vectorized 1 loops in function\" \"vect\" } } */\n+/* { dg-final { scan-tree-dump \"adding no-overflow assumption\" \"vect\" } } */\ndiff --git a/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-6.c b/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-6.c\nnew file mode 100644\nindex 00000000000..e869e8095ea\n--- /dev/null\n+++ b/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-6.c\n@@ -0,0 +1,13 @@\n+/* { dg-do compile } */\n+/* { dg-require-effective-target vect_int } */\n+/* { dg-require-effective-target vect_gather_load_ifn } */\n+\n+/* This should be vectorized without gather/scatter.  */\n+\n+void f6 (int *__restrict a, int *__restrict b, unsigned start, unsigned end) {\n+  for (unsigned i = start; i <= end; i++)\n+    a[i] = b[i];\n+}\n+\n+/* { dg-final { scan-tree-dump-not \"gather\" \"vect\" } } */\n+/* { dg-final { scan-tree-dump \"vectorized 1 loops in function\" \"vect\" } } */\ndiff --git a/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-7.c b/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-7.c\nnew file mode 100644\nindex 00000000000..0d588b6e19e\n--- /dev/null\n+++ b/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-7.c\n@@ -0,0 +1,17 @@\n+/* { dg-do compile } */\n+/* { dg-require-effective-target vect_int } */\n+\n+/* This should be vectorized without gather/scatter.  */\n+\n+void f7 (int *__restrict dst, unsigned n) {\n+  unsigned i = 1;\n+  while (i != n)\n+    {\n+      dst[i] = i;\n+      i++;\n+    }\n+}\n+\n+/* { dg-final { scan-tree-dump-not \"gather\" \"vect\" } } */\n+/* { dg-final { scan-tree-dump \"vectorized 1 loops in function\" \"vect\" } } */\n+/* { dg-final { scan-tree-dump \"adding no-overflow assumption\" \"vect\" } } */\ndiff --git a/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-8.c b/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-8.c\nnew file mode 100644\nindex 00000000000..cfa66a3807b\n--- /dev/null\n+++ b/gcc/testsuite/gcc.dg/vect/vect-unsigned-assump-8.c\n@@ -0,0 +1,16 @@\n+/* { dg-do compile } */\n+/* { dg-additional-options \"-fno-builtin\" } */\n+/* { dg-require-effective-target vect_int } */\n+\n+/* We should vectorize this without additional assumptions.\n+   The data-ref uses a pointer-based access which never wraps.\n+   However, this is not implemented right now and we always version.\n+   Thus, the test is xfailed unconditionally.  */\n+\n+void f8 (int *__restrict a, unsigned start, unsigned n) {\n+  for (unsigned i = start; i != n; i++)\n+    a[i - start] = 0;\n+}\n+\n+/* { dg-final { scan-tree-dump \"Discarding tentative nowrap\" \"vect\" { xfail *-*-* } } } */\n+/* { dg-final { scan-tree-dump \"adding no-overflow assumption\" \"vect\" } } */\ndiff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc\nindex 207bf8ccf60..bca1909d698 100644\n--- a/gcc/tree-ssa-loop-niter.cc\n+++ b/gcc/tree-ssa-loop-niter.cc\n@@ -1098,6 +1098,14 @@ number_of_iterations_ne (class loop *loop, tree type, affine_iv *iv,\n \t}\n     }\n \n+  /* If we know the loop does not wrap, we're done here.  */\n+  if (loop_constraint_set_p (loop, LOOP_C_NOWRAP))\n+    {\n+      niter->control.no_overflow = true;\n+      niter->niter = fold_build2 (EXACT_DIV_EXPR, niter_type, c, s);\n+      return true;\n+    }\n+\n   /* Let nsd (step, size of mode) = d.  If d does not divide c, the loop\n      is infinite.  Otherwise, the number of iterations is\n      (inverse(s/d) * (c/d)) mod (size of mode/d).  */\n@@ -4145,6 +4153,7 @@ record_control_iv (class loop *loop, class tree_niter_desc *niter)\n   iv = ggc_alloc<control_iv> ();\n   iv->base = niter->control.base;\n   iv->step = niter->control.step;\n+  iv->condition = niter->cmp;\n   iv->next = loop->control_ivs;\n   loop->control_ivs = iv;\n \n@@ -5490,6 +5499,23 @@ loop_exits_before_overflow (tree base, tree step,\n \n \t     by proving the reverse conditions are false using loop's initial\n \t     condition.  */\n+\n+\t  /* Before proving the above we can try to prove\n+\t       base < civ_base ;; step > 0\n+\t       base > civ_base ;; step < 0\n+\t     (or rather its respective reverse condition).\n+\t     That's a stronger guarantee than\n+\t       civ_base = base + step.  */\n+\t  if (TREE_TYPE (expanded_base) == TREE_TYPE (civ->base))\n+\t    {\n+\t      e = fold_build2\n+\t\t(tree_int_cst_sign_bit (step) ? LT_EXPR : GT_EXPR,\n+\t\t boolean_type_node, expanded_base, civ->base);\n+\t      e = simplify_using_initial_conditions (loop, e);\n+\t      if (integer_zerop (e))\n+\t\treturn true;\n+\t    }\n+\n \t  if (POINTER_TYPE_P (TREE_TYPE (base)))\n \t    code = POINTER_PLUS_EXPR;\n \t  else\n@@ -5503,6 +5529,9 @@ loop_exits_before_overflow (tree base, tree step,\n \t    {\n \t      tree extreme;\n \n+\t      if (loop_constraint_set_p (loop, LOOP_C_NOWRAP))\n+\t\treturn true;\n+\n \t      if (tree_int_cst_sign_bit (step))\n \t\t{\n \t\t  code = LT_EXPR;\n@@ -5519,7 +5548,36 @@ loop_exits_before_overflow (tree base, tree step,\n \t      if (integer_zerop (e))\n \t\treturn true;\n \t    }\n-        }\n+\n+\t  /* There are also \"after stepping\" controls like\n+\n+\t       {civ_base, step} = {base - step, step}\n+\n+\t     where BASE is one step ahead of CIV_BASE.\n+\t     We would need to prove that BASE does not overflow in the last\n+\t     step.  That's difficult without knowing the exact number of\n+\t     iterations.\n+\t     What we can do, though, is make use of the loop condition:\n+\t     As the candidate IV is one step ahead of the non-overflowing\n+\t     CIV, the critical point is when CIV reaches BOUND and the next\n+\t     step could overflow.\n+\t     If the loop condition guarantees that we only enter the loop\n+\t     body when CIV != BOUND, we are good.  */\n+\t  if (loop_constraint_set_p (loop, LOOP_C_NOWRAP)\n+\t      && (civ->condition == LT_EXPR\n+\t\t  || civ->condition == GT_EXPR\n+\t\t  || civ->condition == NE_EXPR)\n+\t      && !POINTER_TYPE_P (TREE_TYPE (base)))\n+\t    {\n+\t      code = MINUS_EXPR;\n+\t      stepped = fold_build2 (code, TREE_TYPE (base), base, step);\n+\t      expanded_stepped = fold_build2 (code, TREE_TYPE (base),\n+\t\t\t\t\t      expanded_base, step);\n+\t      if (operand_equal_p (stepped, civ->base, 0)\n+\t\t  || operand_equal_p (expanded_stepped, civ->base, 0))\n+\t\treturn true;\n+\t    }\n+\t}\n     }\n \n   return false;\ndiff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc\nindex 7121edb8d81..b86743d2a7d 100644\n--- a/gcc/tree-vect-loop.cc\n+++ b/gcc/tree-vect-loop.cc\n@@ -616,6 +616,75 @@ vect_get_loop_niters (class loop *loop, const_edge main_exit, tree *assumptions,\n \t    continue;\n        }\n \n+      /* For NE_EXPR number_of_iterations_ne will only add assumptions to\n+\t ensure the loop is finite.  In order to vectorize, we want stricter\n+\t boundary conditions, i.e. no overflow so we can use regular vector\n+\t loads/stores rather than gather/scatter.\n+\t Adding assumptions in number_of_iterations_ne to enforce no overflow\n+\t might penalize other passes that don't perform versioning and can't\n+\t use niters with assumptions.\n+\t Thus, we add the no-overflow condition here after the analysis is\n+\t done and NITER_DESC's properties are conservatively correct.  */\n+\n+      if (niter_desc.cmp == NE_EXPR\n+\t  && !niter_desc.control.no_overflow\n+\t  && niter_desc.control.base\n+\t  && niter_desc.control.step\n+\t  && niter_desc.bound\n+\t  && TYPE_OVERFLOW_WRAPS (TREE_TYPE (niter_desc.control.base))\n+\t  && TREE_CODE (niter_desc.control.step) == INTEGER_CST\n+\t  && !integer_zerop (niter_desc.control.step)\n+\t  && TREE_CODE (niter_desc.bound) != INTEGER_CST)\n+\t{\n+\t  tree nowrap;\n+\t  /* For \"before stepping\" controls, like\n+\t      { base + step, step }\n+\t     that loop-ch creates we don't just need to ensure that\n+\t\tbase + step <= bound, but also that\n+\t\tbase + step does not overflow.  */\n+\t  tree nowrap_first_step;\n+\t  if (tree_int_cst_sign_bit (niter_desc.control.step))\n+\t    {\n+\t      nowrap = fold_build2 (GE_EXPR, boolean_type_node,\n+\t\t\t\t    niter_desc.control.base, niter_desc.bound);\n+\t      nowrap_first_step\n+\t\t= fold_build2 (GE_EXPR, boolean_type_node,\n+\t\t\t       niter_desc.control.base,\n+\t\t\t       fold_build1 (NEGATE_EXPR, TREE_TYPE (niter),\n+\t\t\t\t\t    niter_desc.control.step));\n+\t    }\n+\t  else\n+\t    {\n+\t      nowrap = fold_build2 (LE_EXPR, boolean_type_node,\n+\t\t\t\t    niter_desc.control.base, niter_desc.bound);\n+\t      nowrap_first_step\n+\t\t= fold_build2 (GE_EXPR, boolean_type_node,\n+\t\t\t       fold_build2 (MINUS_EXPR, TREE_TYPE (niter),\n+\t\t\t\t\t    TYPE_MAX_VALUE (TREE_TYPE (niter)),\n+\t\t\t\t\t    niter_desc.control.step),\n+\t\t\t       niter_desc.control.base);\n+\t    }\n+\n+\t  /* Add both assumptions for versioning.  */\n+\t  niter_assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,\n+\t\t\t\t\t   niter_assumptions,\n+\t\t\t\t\t   nowrap);\n+\t  niter_assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,\n+\t\t\t\t\t   niter_assumptions,\n+\t\t\t\t\t   nowrap_first_step);\n+\t  if (dump_enabled_p ())\n+\t    dump_printf_loc (MSG_NOTE, vect_location, \"adding no-overflow \"\n+\t\t\t     \"assumption.\\n\");\n+\n+\t  /* Mark control IV accordingly.  */\n+\t  niter_desc.control.no_overflow = true;\n+\t}\n+\n+      /* Set LOOP_C_NOWRAP so we can use it in\n+\t loop_exits_before_overflow.  */\n+      if (niter_desc.control.no_overflow)\n+\tloop_constraint_set (loop, LOOP_C_NOWRAP);\n+\n       /* Loop assumptions are based off the normal exit.  */\n       *assumptions = niter_assumptions;\n       *number_of_iterationsm1 = niter;\ndiff --git a/gcc/tree-vectorizer.cc b/gcc/tree-vectorizer.cc\nindex b73243252c0..f64e3987699 100644\n--- a/gcc/tree-vectorizer.cc\n+++ b/gcc/tree-vectorizer.cc\n@@ -862,6 +862,7 @@ vect_free_loop_info_assumptions (class loop *loop)\n   loop->any_likely_upper_bound = false;\n   free_numbers_of_iterations_estimates (loop);\n   loop_constraint_clear (loop, LOOP_C_FINITE);\n+  loop_constraint_clear (loop, LOOP_C_NOWRAP);\n }\n \n /* If LOOP has been versioned during ifcvt, return the internal call\n@@ -1111,7 +1112,8 @@ try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,\n     {\n       /* Free existing information if loop is analyzed with some\n \t assumptions.  */\n-      if (loop_constraint_set_p (loop, LOOP_C_FINITE))\n+      if (loop_constraint_set_p (loop, LOOP_C_FINITE)\n+\t  || loop_constraint_set_p (loop, LOOP_C_NOWRAP))\n \tvect_free_loop_info_assumptions (loop);\n \n       /* If we applied if-conversion then try to vectorize the\n@@ -1176,7 +1178,8 @@ try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,\n     {\n       /* Free existing information if loop is analyzed with some\n \t assumptions.  */\n-      if (loop_constraint_set_p (loop, LOOP_C_FINITE))\n+      if (loop_constraint_set_p (loop, LOOP_C_FINITE)\n+\t  || loop_constraint_set_p (loop, LOOP_C_NOWRAP))\n \tvect_free_loop_info_assumptions (loop);\n       return ret;\n     }\n@@ -1431,6 +1434,10 @@ pass_vectorize::execute (function *fun)\n \n   vect_slp_fini ();\n \n+  /* LOOP_C_NOWRAP is only valid during vectorizer time, so clear it now.  */\n+  for (auto loop : loops_list (fun, 0))\n+    loop_constraint_clear (loop, LOOP_C_NOWRAP);\n+\n   return ret;\n }\n \n",
    "prefixes": []
}