Message ID | 20180220171712.GH5867@tucnak |
---|---|
State | New |
Headers | show |
Series | Fix pdftex miscompilation due to get_range_strlen (PR tree-optimization/84478) | expand |
On 02/20/2018 12:03 PM, Martin Sebor wrote: > On 02/20/2018 10:17 AM, Jakub Jelinek wrote: >> Hi! >> >> The following testcase distilled from pdftex is miscompiled on i?86, >> because gimple_fold_builtin_strlen sets incorrect value range on >> strlen call on SSA_NAME with def_stmt of PHI <"mu", something> where >> we can't determine anything about string length of something, so the >> right value range is [0, maximum_possible_strlen], but we actually used >> [2, INF]. >> >> get_range_strlen has many modes, for get_maxval_strlen we check return >> value of get_range_strlen, don't set fuzzy and everything seems correct. >> Otherwise we have the fuzzy mode which is used in 2 arg get_range_strlen >> overload, which is used for warnings and recently also for >> gimple_fold_builtin_strlen which sets value ranges from it. Unlike >> warnings, which can perhaps afford some heuristics and guessing, for >> gimple_fold_builtin_strlen we need to be 100% conservative. >> >> The thing that isn't handled conservatively is PHIs and COND_EXPR. >> The current code, if we can't figure one of the args out, for PHIs in >> fuzzy mode increases the *maxval value to +INF, but doesn't touch >> *minval, for COND_EXPR doesn't adjust the *minval/*maxval at all and just >> returns false. Unfortunately, changing that breaks > > It sounds like not setting *minlen is the problem then. Attached > is a slightly smaller patch that fixes the bug with no testsuite > regressions on x86_64-linux. How does it look to you? A safer and even more conservative alternative that should be equivalent to your approach while avoiding the sprintf regressions is to add another mode to the function and have it clear *minlen as an option. This lets the strlen code obtain the conservative lower bound without compromising the sprintf warnings. Martin PR tree-optimization/84478 - [8 Regression] pdftex miscompilation on i386 gcc/ChangeLog: PR tree-optimization/84478 * gimple-fold.h (get_range_strlen): Add argument. * gimple-fold.c (get_range_strlen): Set *MINLEN to zero. (get_range_strlen): Reset range on failure. * tree-ssa-strlen.c (maybe_diag_stxncpy_trunc): Add third argument to the call to get_range_strlen. gcc/testsuite/ChangeLog: PR tree-optimization/84478 * gcc.c-torture/execute/pr84478.c: New test. Index: gcc/gimple-fold.c =================================================================== --- gcc/gimple-fold.c (revision 257850) +++ gcc/gimple-fold.c (working copy) @@ -1290,6 +1290,9 @@ gimple_fold_builtin_memset (gimple_stmt_iterator * When FUZZY is set and the length of a string cannot be determined, the function instead considers as the maximum possible length the size of a character array it may refer to. + When STRICT_MIN is set the function will clear MINMAXLEN[0] if + the length of any of the referenced strings cannot be determined + (and thus can be zero). Set *FLEXP to true if the range of the string lengths has been obtained from the upper bound of an array at the end of a struct. Such an array may hold a string that's longer than its upper bound @@ -1297,7 +1300,7 @@ gimple_fold_builtin_memset (gimple_stmt_iterator * static bool get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, - bool fuzzy, bool *flexp) + bool fuzzy, bool strict_min, bool *flexp) { tree var, val = NULL_TREE; gimple *def_stmt; @@ -1320,7 +1323,8 @@ get_range_strlen (tree arg, tree length[2], bitmap if (TREE_CODE (aop0) == INDIRECT_REF && TREE_CODE (TREE_OPERAND (aop0, 0)) == SSA_NAME) return get_range_strlen (TREE_OPERAND (aop0, 0), - length, visited, type, fuzzy, flexp); + length, visited, type, fuzzy, + strict_min, flexp); } else if (TREE_CODE (TREE_OPERAND (op, 0)) == COMPONENT_REF && fuzzy) { @@ -1354,7 +1358,7 @@ get_range_strlen (tree arg, tree length[2], bitmap { if (TREE_CODE (arg) == ADDR_EXPR) return get_range_strlen (TREE_OPERAND (arg, 0), length, - visited, type, fuzzy, flexp); + visited, type, fuzzy, strict_min, flexp); if (TREE_CODE (arg) == ARRAY_REF) { @@ -1497,14 +1501,17 @@ get_range_strlen (tree arg, tree length[2], bitmap || gimple_assign_unary_nop_p (def_stmt)) { tree rhs = gimple_assign_rhs1 (def_stmt); - return get_range_strlen (rhs, length, visited, type, fuzzy, flexp); + return get_range_strlen (rhs, length, visited, type, fuzzy, + strict_min, flexp); } else if (gimple_assign_rhs_code (def_stmt) == COND_EXPR) { tree op2 = gimple_assign_rhs2 (def_stmt); tree op3 = gimple_assign_rhs3 (def_stmt); - return get_range_strlen (op2, length, visited, type, fuzzy, flexp) - && get_range_strlen (op3, length, visited, type, fuzzy, flexp); + return (get_range_strlen (op2, length, visited, type, fuzzy, + strict_min, flexp) + && get_range_strlen (op3, length, visited, type, fuzzy, + strict_min, flexp)); } return false; @@ -1527,10 +1534,15 @@ get_range_strlen (tree arg, tree length[2], bitmap if (arg == gimple_phi_result (def_stmt)) continue; - if (!get_range_strlen (arg, length, visited, type, fuzzy, flexp)) + if (!get_range_strlen (arg, length, visited, type, fuzzy, + strict_min, flexp)) { if (fuzzy) - *maxlen = build_all_ones_cst (size_type_node); + { + if (strict_min) + *minlen = ssize_int (0); + *maxlen = build_all_ones_cst (size_type_node); + } else return false; } @@ -1551,6 +1563,9 @@ get_range_strlen (tree arg, tree length[2], bitmap and array declared as 'char array[8]', MINMAXLEN[0] will be set to 3 and MINMAXLEN[1] to 7, the longest string that could be stored in array. + When STRICT_MIN is set the function will clear MINMAXLEN[0] if + the length of any of the referenced strings cannot be determined + (and thus can be zero). Return true if the range of the string lengths has been obtained from the upper bound of an array at the end of a struct. Such an array may hold a string that's longer than its upper bound @@ -1557,7 +1572,7 @@ get_range_strlen (tree arg, tree length[2], bitmap due to it being used as a poor-man's flexible array member. */ bool -get_range_strlen (tree arg, tree minmaxlen[2]) +get_range_strlen (tree arg, tree minmaxlen[2], bool strict_min /* = true */) { bitmap visited = NULL; @@ -1565,7 +1580,12 @@ bool minmaxlen[1] = NULL_TREE; bool flexarray = false; - get_range_strlen (arg, minmaxlen, &visited, 1, true, &flexarray); + if (!get_range_strlen (arg, minmaxlen, &visited, 1, true, strict_min, + &flexarray)) + { + minmaxlen[0] = NULL_TREE; + minmaxlen[1] = NULL_TREE; + } if (visited) BITMAP_FREE (visited); @@ -1580,7 +1600,7 @@ get_maxval_strlen (tree arg, int type) tree len[2] = { NULL_TREE, NULL_TREE }; bool dummy; - if (!get_range_strlen (arg, len, &visited, type, false, &dummy)) + if (!get_range_strlen (arg, len, &visited, type, false, true, &dummy)) len[1] = NULL_TREE; if (visited) BITMAP_FREE (visited); Index: gcc/gimple-fold.h =================================================================== --- gcc/gimple-fold.h (revision 257850) +++ gcc/gimple-fold.h (working copy) @@ -25,7 +25,7 @@ along with GCC; see the file COPYING3. If not see extern tree create_tmp_reg_or_ssa_name (tree, gimple *stmt = NULL); extern tree canonicalize_constructor_val (tree, tree); extern tree get_symbol_constant_value (tree); -extern bool get_range_strlen (tree, tree[2]); +extern bool get_range_strlen (tree, tree[2], bool = true); extern tree get_maxval_strlen (tree, int); extern void gimplify_and_update_call_from_tree (gimple_stmt_iterator *, tree); extern bool fold_stmt (gimple_stmt_iterator *); Index: gcc/testsuite/gcc.c-torture/execute/pr84478.c =================================================================== --- gcc/testsuite/gcc.c-torture/execute/pr84478.c (nonexistent) +++ gcc/testsuite/gcc.c-torture/execute/pr84478.c (working copy) @@ -0,0 +1,48 @@ +/* PR tree-optimization/84478 - pdftex miscompilation on i386 */ + +long poolptr; +unsigned char *strpool; +static const char *poolfilearr[] = { + "mu", +#define A "", +#define B A A A A A A A A A A +#define C B B B B B B B B B B +#define D C C C C C C C C C C + D C C C C C C C B B B A + ((void *)0) +}; + +__attribute__((noipa)) long +makestring (void) +{ + return 0; +} + +__attribute__((noipa)) +int +loadpoolstrings (long spare_size) +{ + const char *s; + long g = 0; + int i = 0, j = 0; + while ((s = poolfilearr[j++])) + { + int l = __builtin_strlen (s); + i += l; + if (i >= spare_size) return 0; + while (l-- > 0) strpool[poolptr++] = *s++; + g = makestring (); + } + return g; +} + +int +main () +{ + poolptr = 0; + strpool = __builtin_malloc (1000); + asm volatile ("" : : : "memory"); + volatile int r = loadpoolstrings (1000); + __builtin_free (strpool); + return 0; +}
On Tue, Feb 20, 2018 at 12:03:26PM -0700, Martin Sebor wrote: > PR tree-optimization/84478 - [8 Regression] pdftex miscompilation on i386 > > gcc/ChangeLog: > > PR tree-optimization/84478 > * gimple-fold.c (get_range_strlen): Set *MINLEN to zero. > (get_range_strlen): Reset range on failure. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/84478 > * gcc.c-torture/execute/pr84478.c: New test. > > Index: gcc/gimple-fold.c > =================================================================== > --- gcc/gimple-fold.c (revision 257796) > +++ gcc/gimple-fold.c (working copy) > @@ -1369,7 +1369,10 @@ get_range_strlen (tree arg, tree length[2], bitmap > tree eltype = TREE_TYPE (type); > if (TREE_CODE (type) != ARRAY_TYPE > || !INTEGRAL_TYPE_P (eltype)) > - return false; > + { > + *minlen = ssize_int (0); > + return false; > + } This is just one of the 13 spots where we return false, so this doesn't look safe or sufficient to me, even when you actually honor the return value in 2 argument get_range_strlen. You'd really need to do { if (fuzzy) - *maxlen = build_all_ones_cst (size_type_node); + { + *minlen = size_int (0); + *maxlen = build_all_ones_cst (size_type_node); + } else return false; } or just drop that if (fuzzy) stuff from there, but that breaks the warning tests. It would help if you explained why you think it is a good idea ignoring the other phi arguments if you have one (or more) where you can determine length. One variation of my patch could be instead of adding type 3 change fuzzy from bool to int, and use fuzzy == 1 for the strlen value ranges and fuzzy == 2 for the warning code (i.e. 2 operand get_range_strlen). Note, my patch passed regtest on both x86_64-linux and i686-linux. Jakub
On 02/20/2018 02:34 PM, Jakub Jelinek wrote: > On Tue, Feb 20, 2018 at 01:13:13PM -0700, Martin Sebor wrote: >> A safer and even more conservative alternative that should be >> equivalent to your approach while avoiding the sprintf regressions >> is to add another mode to the function and have it clear *minlen >> as an option. This lets the strlen code obtain the conservative >> lower bound without compromising the sprintf warnings. > > I fail to see what it would be good for to set *MINLEN to zero and > *MAXLEN to all ones for the non-warning use cases, we simply don't know > anything about it, both NULL_TREEs i.e. returning false is better. I'm > offering two alternate patches which use > fuzzy == 0 for the previous !fuzzy, fuzzy == 1 for conservatively correct > code that assumes strlen can't cross field/variable boundaries in > compliant programs and fuzzy == 2 which does that + whatever the warning > code wants. Additionally, I've rewritten the COND_EXPR handling, so that > it matches exactly the PHI handling. > > The first patch doesn't change the 2 argument get_range_strlen and changes > gimple_fold_builtin_strlen to use the 6 argument one, the second patch > changes also the 2 argument get_range_strlen similarly to what you've done > in your patch. > > Tested on x86_64-linux and i686-linux, ok for trunk if it passes > bootstrap/regtest? Which one? I don't have a preference as long as it doesn't introduce any test suite regressions. That's all I was trying to do in my approach (besides fixing the bug). Martin
> It would help if you explained why you think it is a good idea > ignoring the other phi arguments if you have one (or more) where you can > determine length. It's a heuristic that was meant just for the -Wformat-overflow warning. When making decisions that affect code generation it's obviously not correct to ignore the possibility that unknown arguments may be shorter than the minimum or longer than the maximum. The fuzzy argument was meant to differentiate between two got but I forgot about it when I added the fix for PR 83671. For GCC 8 I don't have a preference for how to fix this as long as it doesn't regress the warning tests. I think the ultimate solution (for GCC 9) may be to either disable the heuristic for code generation purposes (e.g., via another argument/flag) or provide a pointer argument to indicate to the caller that the minimum is based on known strings, and that the real minimum may be zero. Martin
On Tue, Feb 20, 2018 at 04:59:12PM -0700, Martin Sebor wrote: > > It would help if you explained why you think it is a good idea > > ignoring the other phi arguments if you have one (or more) where you can > > determine length. > > It's a heuristic that was meant just for the -Wformat-overflow > warning. When making decisions that affect code generation it's > obviously not correct to ignore the possibility that unknown > arguments may be shorter than the minimum or longer than > the maximum. The fuzzy argument was meant to differentiate > between two got but I forgot about it when I added the fix > for PR 83671. get_range_strlen (the 2 argument one) is right now called: 3 times from builtins.c for -Wstringop-overflow once from gimple-ssa-sprintf.c for -Wformat-overflow once from tree-ssa-strlen.c for -Wstringop-truncation once from gimple-fold.c for gimple_fold_builtin_strlen value ranges So, which of these do you want the heuristics for? All 3 warnings, just one of them, something else? In the 2 patches I've posted last all the 3 different warnings are treated the same (fuzzy == 2). Looking at strlenopt-40.c testcase, in the test you clearly rely on fuzzy argument being set for the value ranges case (gimple_fold_builtin_strlen), otherwise many of the subtests would fail. Jakub
On 02/20/2018 05:14 PM, Jakub Jelinek wrote: > On Tue, Feb 20, 2018 at 04:59:12PM -0700, Martin Sebor wrote: >>> It would help if you explained why you think it is a good idea >>> ignoring the other phi arguments if you have one (or more) where you can >>> determine length. >> >> It's a heuristic that was meant just for the -Wformat-overflow >> warning. When making decisions that affect code generation it's >> obviously not correct to ignore the possibility that unknown >> arguments may be shorter than the minimum or longer than >> the maximum. The fuzzy argument was meant to differentiate >> between two got but I forgot about it when I added the fix >> for PR 83671. > > get_range_strlen (the 2 argument one) is right now called: > 3 times from builtins.c for -Wstringop-overflow > once from gimple-ssa-sprintf.c for -Wformat-overflow > once from tree-ssa-strlen.c for -Wstringop-truncation Right. The warnings depend on the heuristic and on using a non-zero lower bound when some of the strings is unknown or has unknown length but other don't. > once from gimple-fold.c for gimple_fold_builtin_strlen value ranges Right. This depends on it too, except it needs to avoid using a nonzero lower bound when any of the strings is unknown/has unknown length. Let's fix that. > > So, which of these do you want the heuristics for? All 3 warnings, > just one of them, something else? In the 2 patches I've posted last > all the 3 different warnings are treated the same (fuzzy == 2). All of them. The bug is in how strlen is folded. There is nothing wrong with the warnings, so fix the bug and leave the warnings alone. My second patch did that and it passed all tests. (I forgot to include the change to gimple-ssa-sprintf.c but it was the same as the one in tree-ssa-strlen.c: call get_range_strlen with true as the third argument. Attached is the same patch with that bit added.) > Looking at strlenopt-40.c testcase, in the test you clearly rely on > fuzzy argument being set for the value ranges case > (gimple_fold_builtin_strlen), otherwise many of the > subtests would fail. Right, that needs to continue to work. It does with my patch. If your solution regresses any of the tests then use the second patch I posted. What am I missing? Martin PR tree-optimization/84478 - [8 Regression] pdftex miscompilation on i386 gcc/ChangeLog: PR tree-optimization/84478 * gimple-fold.h (get_range_strlen): Add argument. * gimple-fold.c (get_range_strlen): Set *MINLEN to zero. (get_range_strlen): Reset range on failure. * gimple-ssa-sprintf.c (get_string_length): Add third argument to the call to get_range_strlen. * tree-ssa-strlen.c (maybe_diag_stxncpy_trunc): Same. gcc/testsuite/ChangeLog: PR tree-optimization/84478 * gcc.c-torture/execute/pr84478.c: New test. Index: gcc/gimple-fold.c =================================================================== --- gcc/gimple-fold.c (revision 257859) +++ gcc/gimple-fold.c (working copy) @@ -1290,6 +1290,9 @@ gimple_fold_builtin_memset (gimple_stmt_iterator * When FUZZY is set and the length of a string cannot be determined, the function instead considers as the maximum possible length the size of a character array it may refer to. + When STRICT_MIN is set the function will clear MINMAXLEN[0] if + the length of any of the referenced strings cannot be determined + (and thus can be zero). Set *FLEXP to true if the range of the string lengths has been obtained from the upper bound of an array at the end of a struct. Such an array may hold a string that's longer than its upper bound @@ -1297,7 +1300,7 @@ gimple_fold_builtin_memset (gimple_stmt_iterator * static bool get_range_strlen (tree arg, tree length[2], bitmap *visited, int type, - bool fuzzy, bool *flexp) + bool fuzzy, bool strict_min, bool *flexp) { tree var, val = NULL_TREE; gimple *def_stmt; @@ -1320,7 +1323,8 @@ get_range_strlen (tree arg, tree length[2], bitmap if (TREE_CODE (aop0) == INDIRECT_REF && TREE_CODE (TREE_OPERAND (aop0, 0)) == SSA_NAME) return get_range_strlen (TREE_OPERAND (aop0, 0), - length, visited, type, fuzzy, flexp); + length, visited, type, fuzzy, + strict_min, flexp); } else if (TREE_CODE (TREE_OPERAND (op, 0)) == COMPONENT_REF && fuzzy) { @@ -1354,7 +1358,7 @@ get_range_strlen (tree arg, tree length[2], bitmap { if (TREE_CODE (arg) == ADDR_EXPR) return get_range_strlen (TREE_OPERAND (arg, 0), length, - visited, type, fuzzy, flexp); + visited, type, fuzzy, strict_min, flexp); if (TREE_CODE (arg) == ARRAY_REF) { @@ -1497,14 +1501,17 @@ get_range_strlen (tree arg, tree length[2], bitmap || gimple_assign_unary_nop_p (def_stmt)) { tree rhs = gimple_assign_rhs1 (def_stmt); - return get_range_strlen (rhs, length, visited, type, fuzzy, flexp); + return get_range_strlen (rhs, length, visited, type, fuzzy, + strict_min, flexp); } else if (gimple_assign_rhs_code (def_stmt) == COND_EXPR) { tree op2 = gimple_assign_rhs2 (def_stmt); tree op3 = gimple_assign_rhs3 (def_stmt); - return get_range_strlen (op2, length, visited, type, fuzzy, flexp) - && get_range_strlen (op3, length, visited, type, fuzzy, flexp); + return (get_range_strlen (op2, length, visited, type, fuzzy, + strict_min, flexp) + && get_range_strlen (op3, length, visited, type, fuzzy, + strict_min, flexp)); } return false; @@ -1527,10 +1534,15 @@ get_range_strlen (tree arg, tree length[2], bitmap if (arg == gimple_phi_result (def_stmt)) continue; - if (!get_range_strlen (arg, length, visited, type, fuzzy, flexp)) + if (!get_range_strlen (arg, length, visited, type, fuzzy, + strict_min, flexp)) { if (fuzzy) - *maxlen = build_all_ones_cst (size_type_node); + { + if (strict_min) + *minlen = ssize_int (0); + *maxlen = build_all_ones_cst (size_type_node); + } else return false; } @@ -1551,6 +1563,9 @@ get_range_strlen (tree arg, tree length[2], bitmap and array declared as 'char array[8]', MINMAXLEN[0] will be set to 3 and MINMAXLEN[1] to 7, the longest string that could be stored in array. + When STRICT_MIN is set the function will clear MINMAXLEN[0] if + the length of any of the referenced strings cannot be determined + (and thus can be zero). Return true if the range of the string lengths has been obtained from the upper bound of an array at the end of a struct. Such an array may hold a string that's longer than its upper bound @@ -1557,7 +1572,7 @@ get_range_strlen (tree arg, tree length[2], bitmap due to it being used as a poor-man's flexible array member. */ bool -get_range_strlen (tree arg, tree minmaxlen[2]) +get_range_strlen (tree arg, tree minmaxlen[2], bool strict_min /* = true */) { bitmap visited = NULL; @@ -1565,7 +1580,12 @@ bool minmaxlen[1] = NULL_TREE; bool flexarray = false; - get_range_strlen (arg, minmaxlen, &visited, 1, true, &flexarray); + if (!get_range_strlen (arg, minmaxlen, &visited, 1, true, strict_min, + &flexarray)) + { + minmaxlen[0] = NULL_TREE; + minmaxlen[1] = NULL_TREE; + } if (visited) BITMAP_FREE (visited); @@ -1580,7 +1600,7 @@ get_maxval_strlen (tree arg, int type) tree len[2] = { NULL_TREE, NULL_TREE }; bool dummy; - if (!get_range_strlen (arg, len, &visited, type, false, &dummy)) + if (!get_range_strlen (arg, len, &visited, type, false, true, &dummy)) len[1] = NULL_TREE; if (visited) BITMAP_FREE (visited); Index: gcc/gimple-fold.h =================================================================== --- gcc/gimple-fold.h (revision 257859) +++ gcc/gimple-fold.h (working copy) @@ -25,7 +25,7 @@ along with GCC; see the file COPYING3. If not see extern tree create_tmp_reg_or_ssa_name (tree, gimple *stmt = NULL); extern tree canonicalize_constructor_val (tree, tree); extern tree get_symbol_constant_value (tree); -extern bool get_range_strlen (tree, tree[2]); +extern bool get_range_strlen (tree, tree[2], bool = true); extern tree get_maxval_strlen (tree, int); extern void gimplify_and_update_call_from_tree (gimple_stmt_iterator *, tree); extern bool fold_stmt (gimple_stmt_iterator *); Index: gcc/gimple-ssa-sprintf.c =================================================================== --- gcc/gimple-ssa-sprintf.c (revision 257859) +++ gcc/gimple-ssa-sprintf.c (working copy) @@ -2075,7 +2075,7 @@ get_string_length (tree str) aren't known to point any such arrays result in LENRANGE[1] set to SIZE_MAX. */ tree lenrange[2]; - bool flexarray = get_range_strlen (str, lenrange); + bool flexarray = get_range_strlen (str, lenrange, true); if (lenrange [0] || lenrange [1]) {
On 02/20/2018 04:59 PM, Martin Sebor wrote: >> It would help if you explained why you think it is a good idea >> ignoring the other phi arguments if you have one (or more) where you can >> determine length. > > It's a heuristic that was meant just for the -Wformat-overflow > warning. When making decisions that affect code generation it's > obviously not correct to ignore the possibility that unknown > arguments may be shorter than the minimum or longer than > the maximum. The fuzzy argument was meant to differentiate > between two got but I forgot about it when I added the fix > for PR 83671. > > For GCC 8 I don't have a preference for how to fix this as long > as it doesn't regress the warning tests. > > I think the ultimate solution (for GCC 9) may be to either > disable the heuristic for code generation purposes (e.g., via > another argument/flag) or provide a pointer argument to indicate > to the caller that the minimum is based on known strings, and that > the real minimum may be zero. I'm still getting refamiliar with this code. But one thing that jumps out immediately is how much this reminds me of the discussion we had around 77608 -- where I argued that returning something that was not conservatively correct was just asking for long term problems. I realize we're talking about different routines, but the concerns are the same -- when we return something that is not conservatively correct it's easy for someone to mistakenly use those results for code generation purposes. The fuzzy stuff is in there to reduce the false positive rates and we're not *supposed* to be using fuzzy results for code generation purposes, but as I argued in 77608, it's easy to miss. I'll reiterate my desire to make this kind of mistake harder to make. Jeff
On 02/20/2018 05:14 PM, Jakub Jelinek wrote: > On Tue, Feb 20, 2018 at 04:59:12PM -0700, Martin Sebor wrote: >>> It would help if you explained why you think it is a good idea >>> ignoring the other phi arguments if you have one (or more) where you can >>> determine length. >> >> It's a heuristic that was meant just for the -Wformat-overflow >> warning. When making decisions that affect code generation it's >> obviously not correct to ignore the possibility that unknown >> arguments may be shorter than the minimum or longer than >> the maximum. The fuzzy argument was meant to differentiate >> between two got but I forgot about it when I added the fix >> for PR 83671. > > get_range_strlen (the 2 argument one) is right now called: > 3 times from builtins.c for -Wstringop-overflow > once from gimple-ssa-sprintf.c for -Wformat-overflow > once from tree-ssa-strlen.c for -Wstringop-truncation > once from gimple-fold.c for gimple_fold_builtin_strlen value ranges Presumably it's the last one that's causing problems and were we should focus our effort. > > So, which of these do you want the heuristics for? All 3 warnings, > just one of them, something else? In the 2 patches I've posted last > all the 3 different warnings are treated the same (fuzzy == 2). > > Looking at strlenopt-40.c testcase, in the test you clearly rely on > fuzzy argument being set for the value ranges case > (gimple_fold_builtin_strlen), otherwise many of the > subtests would fail. Which tests specifically? I did a real quick scan and didn't see anything in there that depended on incorrect behavior for the call from gimple_fold_builtin_strlen. BUt it was a quick scan and I could have missed something. jeff
On 02/20/2018 01:13 PM, Martin Sebor wrote: > On 02/20/2018 12:03 PM, Martin Sebor wrote: >> On 02/20/2018 10:17 AM, Jakub Jelinek wrote: >>> Hi! >>> >>> The following testcase distilled from pdftex is miscompiled on i?86, >>> because gimple_fold_builtin_strlen sets incorrect value range on >>> strlen call on SSA_NAME with def_stmt of PHI <"mu", something> where >>> we can't determine anything about string length of something, so the >>> right value range is [0, maximum_possible_strlen], but we actually used >>> [2, INF]. >>> >>> get_range_strlen has many modes, for get_maxval_strlen we check return >>> value of get_range_strlen, don't set fuzzy and everything seems correct. >>> Otherwise we have the fuzzy mode which is used in 2 arg get_range_strlen >>> overload, which is used for warnings and recently also for >>> gimple_fold_builtin_strlen which sets value ranges from it. Unlike >>> warnings, which can perhaps afford some heuristics and guessing, for >>> gimple_fold_builtin_strlen we need to be 100% conservative. >>> >>> The thing that isn't handled conservatively is PHIs and COND_EXPR. >>> The current code, if we can't figure one of the args out, for PHIs in >>> fuzzy mode increases the *maxval value to +INF, but doesn't touch >>> *minval, for COND_EXPR doesn't adjust the *minval/*maxval at all and >>> just >>> returns false. Unfortunately, changing that breaks >> >> It sounds like not setting *minlen is the problem then. Attached >> is a slightly smaller patch that fixes the bug with no testsuite >> regressions on x86_64-linux. How does it look to you? > > A safer and even more conservative alternative that should be > equivalent to your approach while avoiding the sprintf regressions > is to add another mode to the function and have it clear *minlen > as an option. This lets the strlen code obtain the conservative > lower bound without compromising the sprintf warnings. Adding another mode to this function seems wrong to me -- it's already a bit of a tangled mess to figure out. ISTM that either we allow fuzzy results or not. For warnings, fuzzy rules. For code generation fuzzy is strictly not allowed. Is there some reason that will not work? jeff
On 02/20/2018 12:03 PM, Martin Sebor wrote: >> The thing that isn't handled conservatively is PHIs and COND_EXPR. >> The current code, if we can't figure one of the args out, for PHIs in >> fuzzy mode increases the *maxval value to +INF, but doesn't touch >> *minval, for COND_EXPR doesn't adjust the *minval/*maxval at all and just >> returns false. Unfortunately, changing that breaks > > It sounds like not setting *minlen is the problem then. Attached > is a slightly smaller patch that fixes the bug with no testsuite > regressions on x86_64-linux. How does it look to you? What I don't like here is that we ultimately continue to use the two operand get_range_strlen from the folder. Meaning we're asking for fuzzy results in a code generation path. I'd lean more towards a solution that always gives conservatively correct results in the codegen path while allowing fuzzy on the warning paths. Jeff
On Tue, Feb 20, 2018 at 08:31:06PM -0700, Jeff Law wrote: > On 02/20/2018 05:14 PM, Jakub Jelinek wrote: > > On Tue, Feb 20, 2018 at 04:59:12PM -0700, Martin Sebor wrote: > >>> It would help if you explained why you think it is a good idea > >>> ignoring the other phi arguments if you have one (or more) where you can > >>> determine length. > >> > >> It's a heuristic that was meant just for the -Wformat-overflow > >> warning. When making decisions that affect code generation it's > >> obviously not correct to ignore the possibility that unknown > >> arguments may be shorter than the minimum or longer than > >> the maximum. The fuzzy argument was meant to differentiate > >> between two got but I forgot about it when I added the fix > >> for PR 83671. > > > > get_range_strlen (the 2 argument one) is right now called: > > 3 times from builtins.c for -Wstringop-overflow > > once from gimple-ssa-sprintf.c for -Wformat-overflow > > once from tree-ssa-strlen.c for -Wstringop-truncation > > once from gimple-fold.c for gimple_fold_builtin_strlen value ranges > Presumably it's the last one that's causing problems and were we should > focus our effort. Sure. And that is what my patches do, not change anything for the warning cases and fix up the non-warning one. > > Looking at strlenopt-40.c testcase, in the test you clearly rely on > > fuzzy argument being set for the value ranges case > > (gimple_fold_builtin_strlen), otherwise many of the > > subtests would fail. > Which tests specifically? I did a real quick scan and didn't see > anything in there that depended on incorrect behavior for the call from > gimple_fold_builtin_strlen. BUt it was a quick scan and I could have > missed something. elim_global_arrays, elim_pointer_to_arrays etc. Basically, calling strlen on an array of known fixed length and checking that the upper bound of it is at most the array size minus 1. Jakub
On 02/20/2018 08:25 PM, Jeff Law wrote: > On 02/20/2018 04:59 PM, Martin Sebor wrote: >>> It would help if you explained why you think it is a good idea >>> ignoring the other phi arguments if you have one (or more) where you can >>> determine length. >> >> It's a heuristic that was meant just for the -Wformat-overflow >> warning. When making decisions that affect code generation it's >> obviously not correct to ignore the possibility that unknown >> arguments may be shorter than the minimum or longer than >> the maximum. The fuzzy argument was meant to differentiate >> between two got but I forgot about it when I added the fix >> for PR 83671. >> >> For GCC 8 I don't have a preference for how to fix this as long >> as it doesn't regress the warning tests. >> >> I think the ultimate solution (for GCC 9) may be to either >> disable the heuristic for code generation purposes (e.g., via >> another argument/flag) or provide a pointer argument to indicate >> to the caller that the minimum is based on known strings, and that >> the real minimum may be zero. > I'm still getting refamiliar with this code. But one thing that jumps > out immediately is how much this reminds me of the discussion we had > around 77608 -- where I argued that returning something that was not > conservatively correct was just asking for long term problems. > > I realize we're talking about different routines, but the concerns are > the same -- when we return something that is not conservatively correct > it's easy for someone to mistakenly use those results for code > generation purposes. > > The fuzzy stuff is in there to reduce the false positive rates and we're > not *supposed* to be using fuzzy results for code generation purposes, > but as I argued in 77608, it's easy to miss. > > I'll reiterate my desire to make this kind of mistake harder to make. I agree. I'll take care of it in stage 1. Martin
On February 21, 2018 4:25:14 AM GMT+01:00, Jeff Law <law@redhat.com> wrote: >On 02/20/2018 04:59 PM, Martin Sebor wrote: >>> It would help if you explained why you think it is a good idea >>> ignoring the other phi arguments if you have one (or more) where you >can >>> determine length. >> >> It's a heuristic that was meant just for the -Wformat-overflow >> warning. When making decisions that affect code generation it's >> obviously not correct to ignore the possibility that unknown >> arguments may be shorter than the minimum or longer than >> the maximum. The fuzzy argument was meant to differentiate >> between two got but I forgot about it when I added the fix >> for PR 83671. >> >> For GCC 8 I don't have a preference for how to fix this as long >> as it doesn't regress the warning tests. >> >> I think the ultimate solution (for GCC 9) may be to either >> disable the heuristic for code generation purposes (e.g., via >> another argument/flag) or provide a pointer argument to indicate >> to the caller that the minimum is based on known strings, and that >> the real minimum may be zero. >I'm still getting refamiliar with this code. But one thing that jumps >out immediately is how much this reminds me of the discussion we had >around 77608 -- where I argued that returning something that was not >conservatively correct was just asking for long term problems. > >I realize we're talking about different routines, but the concerns are >the same -- when we return something that is not conservatively correct >it's easy for someone to mistakenly use those results for code >generation purposes. > >The fuzzy stuff is in there to reduce the false positive rates and >we're >not *supposed* to be using fuzzy results for code generation purposes, >but as I argued in 77608, it's easy to miss. > >I'll reiterate my desire to make this kind of mistake harder to make. Can you simply provide that interface via a separately named API rather than overloading one that is also supposed to be used by optimization? Richard. >Jeff
--- gcc/gimple-fold.c.jj 2018-02-19 19:57:03.424279589 +0100 +++ gcc/gimple-fold.c 2018-02-20 16:21:34.583020305 +0100 @@ -1283,10 +1283,11 @@ gimple_fold_builtin_memset (gimple_stmt_ value of ARG in LENGTH[0] and LENGTH[1], respectively. If ARG is an SSA name variable, follow its use-def chains. When TYPE == 0, if LENGTH[1] is not equal to the length we determine or - if we are unable to determine the length or value, return False. + if we are unable to determine the length or value, return false. VISITED is a bitmap of visited variables. TYPE is 0 if string length should be obtained, 1 for maximum string - length and 2 for maximum value ARG can have. + length and 2 for maximum value ARG can have, 3 is like 1, but provide + conservatively correct rather than optimistic answer. When FUZZY is set and the length of a string cannot be determined, the function instead considers as the maximum possible length the size of a character array it may refer to. @@ -1302,9 +1303,8 @@ get_range_strlen (tree arg, tree length[ tree var, val = NULL_TREE; gimple *def_stmt; - /* The minimum and maximum length. The MAXLEN pointer stays unchanged - but MINLEN may be cleared during the execution of the function. */ - tree *minlen = length; + /* The minimum and maximum length. */ + tree *const minlen = length; tree *const maxlen = length + 1; if (TREE_CODE (arg) != SSA_NAME) @@ -1445,12 +1445,11 @@ get_range_strlen (tree arg, tree length[ if (!val) return false; - if (minlen - && (!*minlen - || (type > 0 - && TREE_CODE (*minlen) == INTEGER_CST - && TREE_CODE (val) == INTEGER_CST - && tree_int_cst_lt (val, *minlen)))) + if (!*minlen + || (type > 0 + && TREE_CODE (*minlen) == INTEGER_CST + && TREE_CODE (val) == INTEGER_CST + && tree_int_cst_lt (val, *minlen))) *minlen = val; if (*maxlen) @@ -1503,18 +1502,16 @@ get_range_strlen (tree arg, tree length[ { tree op2 = gimple_assign_rhs2 (def_stmt); tree op3 = gimple_assign_rhs3 (def_stmt); - return get_range_strlen (op2, length, visited, type, fuzzy, flexp) - && get_range_strlen (op3, length, visited, type, fuzzy, flexp); + return (get_range_strlen (op2, length, visited, type, fuzzy, flexp) + && get_range_strlen (op3, length, visited, type, fuzzy, + flexp)); } return false; case GIMPLE_PHI: - { - /* All the arguments of the PHI node must have the same constant - length. */ - unsigned i; - - for (i = 0; i < gimple_phi_num_args (def_stmt); i++) + /* All the arguments of the PHI node must have the same constant + length. */ + for (unsigned i = 0; i < gimple_phi_num_args (def_stmt); i++) { tree arg = gimple_phi_arg (def_stmt, i)->def; @@ -1529,13 +1526,12 @@ get_range_strlen (tree arg, tree length[ if (!get_range_strlen (arg, length, visited, type, fuzzy, flexp)) { - if (fuzzy) + if (fuzzy && type != 3) *maxlen = build_all_ones_cst (size_type_node); else return false; } } - } return true; default: @@ -1554,7 +1550,10 @@ get_range_strlen (tree arg, tree length[ Return true if the range of the string lengths has been obtained from the upper bound of an array at the end of a struct. Such an array may hold a string that's longer than its upper bound - due to it being used as a poor-man's flexible array member. */ + due to it being used as a poor-man's flexible array member. + + This function should be only used for warning code, as it doesn't + handle PHIs in a conservatively correct way. */ bool get_range_strlen (tree arg, tree minmaxlen[2]) @@ -3533,8 +3532,12 @@ gimple_fold_builtin_strlen (gimple_stmt_ wide_int minlen; wide_int maxlen; - tree lenrange[2]; - if (!get_range_strlen (gimple_call_arg (stmt, 0), lenrange) + tree lenrange[2] = { NULL_TREE, NULL_TREE }; + bitmap visited = NULL; + bool flexarray = false; + if (get_range_strlen (gimple_call_arg (stmt, 0), lenrange, &visited, + 3, true, &flexarray) + && !flexarray && lenrange[0] && TREE_CODE (lenrange[0]) == INTEGER_CST && lenrange[1] && TREE_CODE (lenrange[1]) == INTEGER_CST) { @@ -3554,6 +3557,9 @@ gimple_fold_builtin_strlen (gimple_stmt_ maxlen = wi::to_wide (max_object_size (), prec) - 2; } + if (visited) + BITMAP_FREE (visited); + if (minlen == maxlen) { lenrange[0] = force_gimple_operand_gsi (gsi, lenrange[0], true, NULL, --- gcc/testsuite/gcc.c-torture/execute/pr84478.c.jj 2018-02-20 16:32:00.683086212 +0100 +++ gcc/testsuite/gcc.c-torture/execute/pr84478.c 2018-02-20 16:31:33.497081640 +0100 @@ -0,0 +1,49 @@ +/* PR tree-optimization/84478 */ + +long poolptr; +unsigned char *strpool; +static const char *poolfilearr[] = { + "mu", + "", +#define A "x", +#define B A "xx", A A "xxx", A A A A A +#define C B B B B B B B B B B +#define D C C C C C C C C C C + D C C C C C C C B B B + ((void *)0) +}; + +__attribute__((noipa)) long +makestring (void) +{ + return 1; +} + +__attribute__((noipa)) long +loadpoolstrings (long spare_size) +{ + const char *s; + long g = 0; + int i = 0, j = 0; + while ((s = poolfilearr[j++])) + { + int l = __builtin_strlen (s); + i += l; + if (i >= spare_size) return 0; + while (l-- > 0) strpool[poolptr++] = *s++; + g = makestring (); + } + return g; +} + +int +main () +{ + strpool = __builtin_malloc (4000); + if (!strpool) + return 0; + asm volatile ("" : : : "memory"); + volatile int r = loadpoolstrings (4000); + __builtin_free (strpool); + return 0; +}