diff mbox series

Fix native_encode_expr and sccvn caller (PR tree-optimization/85257)

Message ID 20180406201437.GL8577@tucnak
State New
Headers show
Series Fix native_encode_expr and sccvn caller (PR tree-optimization/85257) | expand

Commit Message

Jakub Jelinek April 6, 2018, 8:14 p.m. UTC
Hi!

On the following testcase, we try to read from a huge VECTOR_CST that
doesn't fit into 64 bytes and read completely random number out of it.

The issue is that native_encode_expr has 2 modes of operation, when
called with 3 arguments, it is supposed to encode the whole object or
nothing (i.e. return 0 on failure or the whole size on success), and
when called with 4 arguments, it can encode just a portion thereof (is given
offset at which to start and returns the actually encoded length from that
spot, which can be smaller than the whole object's size).

sccvn was using the first mode, unfortunately native_encode_vector had a bug
where it the length happened to be exactly on the boundary between two
VECTOR_CST elements, it could return smaller len (thus surprising callers
which assumed 0 or everything).  This is fixed by the first hunk.

Though, in sccvn case, using the 3 argument native_encode_expr is
unnecessary, we know the offset and size we want to interpret from it,
so the second 2 hunks optimize it; this way, we can read even from the
256-byte long vector with just 64-byte buffer and optimize the testcase.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2018-04-06  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/85257
	* fold-const.c (native_encode_vector): If not all elts could fit
	and off is -1, return 0 rather than offset.
	* tree-ssa-sccvn.c (vn_reference_lookup_3): Pass
	(offseti - offset2) / BITS_PER_UNIT as 4th argument to
	native_encode_expr.  Verify len * BITS_PER_UNIT >= maxsizei.  Don't
	adjust buffer in native_interpret_expr call.

	* gcc.dg/pr85257.c: New test.


	Jakub

Comments

Richard Biener April 7, 2018, 6:21 a.m. UTC | #1
On April 6, 2018 10:14:37 PM GMT+02:00, Jakub Jelinek <jakub@redhat.com> wrote:
>Hi!
>
>On the following testcase, we try to read from a huge VECTOR_CST that
>doesn't fit into 64 bytes and read completely random number out of it.
>
>The issue is that native_encode_expr has 2 modes of operation, when
>called with 3 arguments, it is supposed to encode the whole object or
>nothing (i.e. return 0 on failure or the whole size on success), and
>when called with 4 arguments, it can encode just a portion thereof (is
>given
>offset at which to start and returns the actually encoded length from
>that
>spot, which can be smaller than the whole object's size).
>
>sccvn was using the first mode, unfortunately native_encode_vector had
>a bug
>where it the length happened to be exactly on the boundary between two
>VECTOR_CST elements, it could return smaller len (thus surprising
>callers
>which assumed 0 or everything).  This is fixed by the first hunk.
>
>Though, in sccvn case, using the 3 argument native_encode_expr is
>unnecessary, we know the offset and size we want to interpret from it,
>so the second 2 hunks optimize it; this way, we can read even from the
>256-byte long vector with just 64-byte buffer and optimize the
>testcase.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK. 

Richard. 

>2018-04-06  Jakub Jelinek  <jakub@redhat.com>
>
>	PR tree-optimization/85257
>	* fold-const.c (native_encode_vector): If not all elts could fit
>	and off is -1, return 0 rather than offset.
>	* tree-ssa-sccvn.c (vn_reference_lookup_3): Pass
>	(offseti - offset2) / BITS_PER_UNIT as 4th argument to
>	native_encode_expr.  Verify len * BITS_PER_UNIT >= maxsizei.  Don't
>	adjust buffer in native_interpret_expr call.
>
>	* gcc.dg/pr85257.c: New test.
>
>--- gcc/fold-const.c.jj	2018-04-06 13:23:30.622190581 +0200
>+++ gcc/fold-const.c	2018-04-06 17:00:52.810460085 +0200
>@@ -7307,7 +7307,7 @@ native_encode_vector (const_tree expr, u
> 	return 0;
>       offset += res;
>       if (offset >= len)
>-	return offset;
>+	return (off == -1 && i < count - 1) ? 0 : offset;
>       if (off != -1)
> 	off = 0;
>     }
>--- gcc/tree-ssa-sccvn.c.jj	2018-04-04 10:23:59.968294555 +0200
>+++ gcc/tree-ssa-sccvn.c	2018-04-06 17:07:44.633489528 +0200
>@@ -2038,8 +2038,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree
> 	  if (TREE_CODE (rhs) == SSA_NAME)
> 	    rhs = SSA_VAL (rhs);
> 	  len = native_encode_expr (gimple_assign_rhs1 (def_stmt),
>-				    buffer, sizeof (buffer));
>-	  if (len > 0)
>+				    buffer, sizeof (buffer),
>+				    (offseti - offset2) / BITS_PER_UNIT);
>+	  if (len > 0 && len * BITS_PER_UNIT >= maxsizei)
> 	    {
> 	      tree type = vr->type;
> 	      /* Make sure to interpret in a type that has a range
>@@ -2048,10 +2049,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree
> 		  && maxsizei != TYPE_PRECISION (vr->type))
> 		type = build_nonstandard_integer_type (maxsizei,
> 						       TYPE_UNSIGNED (type));
>-	      tree val = native_interpret_expr (type,
>-						buffer
>-						+ ((offseti - offset2)
>-						   / BITS_PER_UNIT),
>+	      tree val = native_interpret_expr (type, buffer,
> 						maxsizei / BITS_PER_UNIT);
> 	      /* If we chop off bits because the types precision doesn't
> 		 match the memory access size this is ok when optimizing
>--- gcc/testsuite/gcc.dg/pr85257.c.jj	2018-04-06 17:10:42.710500305
>+0200
>+++ gcc/testsuite/gcc.dg/pr85257.c	2018-04-06 17:10:11.621498423 +0200
>@@ -0,0 +1,20 @@
>+/* PR tree-optimization/85257 */
>+/* { dg-do run { target int128 } } */
>+/* { dg-options "-O2 -fno-tree-ccp" } */
>+
>+typedef __int128 V __attribute__ ((__vector_size__ (16 * sizeof
>(__int128))));
>+
>+__int128 __attribute__ ((noipa))
>+foo (void)
>+{
>+  V v = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
>+  return v[5];
>+}
>+
>+int
>+main ()
>+{
>+  if (foo () != 6)
>+    __builtin_abort ();
>+  return 0;
>+}
>
>	Jakub
diff mbox series

Patch

--- gcc/fold-const.c.jj	2018-04-06 13:23:30.622190581 +0200
+++ gcc/fold-const.c	2018-04-06 17:00:52.810460085 +0200
@@ -7307,7 +7307,7 @@  native_encode_vector (const_tree expr, u
 	return 0;
       offset += res;
       if (offset >= len)
-	return offset;
+	return (off == -1 && i < count - 1) ? 0 : offset;
       if (off != -1)
 	off = 0;
     }
--- gcc/tree-ssa-sccvn.c.jj	2018-04-04 10:23:59.968294555 +0200
+++ gcc/tree-ssa-sccvn.c	2018-04-06 17:07:44.633489528 +0200
@@ -2038,8 +2038,9 @@  vn_reference_lookup_3 (ao_ref *ref, tree
 	  if (TREE_CODE (rhs) == SSA_NAME)
 	    rhs = SSA_VAL (rhs);
 	  len = native_encode_expr (gimple_assign_rhs1 (def_stmt),
-				    buffer, sizeof (buffer));
-	  if (len > 0)
+				    buffer, sizeof (buffer),
+				    (offseti - offset2) / BITS_PER_UNIT);
+	  if (len > 0 && len * BITS_PER_UNIT >= maxsizei)
 	    {
 	      tree type = vr->type;
 	      /* Make sure to interpret in a type that has a range
@@ -2048,10 +2049,7 @@  vn_reference_lookup_3 (ao_ref *ref, tree
 		  && maxsizei != TYPE_PRECISION (vr->type))
 		type = build_nonstandard_integer_type (maxsizei,
 						       TYPE_UNSIGNED (type));
-	      tree val = native_interpret_expr (type,
-						buffer
-						+ ((offseti - offset2)
-						   / BITS_PER_UNIT),
+	      tree val = native_interpret_expr (type, buffer,
 						maxsizei / BITS_PER_UNIT);
 	      /* If we chop off bits because the types precision doesn't
 		 match the memory access size this is ok when optimizing
--- gcc/testsuite/gcc.dg/pr85257.c.jj	2018-04-06 17:10:42.710500305 +0200
+++ gcc/testsuite/gcc.dg/pr85257.c	2018-04-06 17:10:11.621498423 +0200
@@ -0,0 +1,20 @@ 
+/* PR tree-optimization/85257 */
+/* { dg-do run { target int128 } } */
+/* { dg-options "-O2 -fno-tree-ccp" } */
+
+typedef __int128 V __attribute__ ((__vector_size__ (16 * sizeof (__int128))));
+
+__int128 __attribute__ ((noipa))
+foo (void)
+{
+  V v = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
+  return v[5];
+}
+
+int
+main ()
+{
+  if (foo () != 6)
+    __builtin_abort ();
+  return 0;
+}