Message ID | 20130404181758.GV4201@tucnak.redhat.com |
---|---|
State | New |
Headers | show |
Jakub Jelinek <jakub@redhat.com> wrote: >Hi! > >As discussed on IRC, this patch allows as to recognize more patterns as >memset, see the testcase for what it can do. > >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Can you factor out a function that returns A proper qimode value if possible or null and Use it in both places? Thanks, Richard. >2013-04-04 Jakub Jelinek <jakub@redhat.com> > > * tree-loop-distribution.c (generate_memset_builtin): Only handle > integer_all_onesp as -1 val if TYPE_PRECISION is equal to mode >bitsize. > Use native_encode_expr if possible to compute val. > (classify_partition): Verify CONSTRUCTOR doesn't have any elts. > For QImode integers don't require anything about precision. Use > native_encode_expr to find out if the constant doesn't have repeated > bytes in it. > > * gcc.dg/pr56837.c: New test. > >--- gcc/tree-loop-distribution.c.jj 2013-04-04 15:03:28.000000000 +0200 >+++ gcc/tree-loop-distribution.c 2013-04-04 16:52:40.139875453 +0200 >@@ -331,11 +331,21 @@ generate_memset_builtin (struct loop *lo > || real_zerop (val) > || TREE_CODE (val) == CONSTRUCTOR) > val = integer_zero_node; >- else if (integer_all_onesp (val)) >+ else if (integer_all_onesp (val) >+ && (TYPE_PRECISION (TREE_TYPE (val)) >+ == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (val))))) > val = build_int_cst (integer_type_node, -1); > else > { >- if (TREE_CODE (val) == INTEGER_CST) >+ /* Handle constants like 0x15151515 and similarly >+ floating point constants etc. where all bytes are the same. */ >+ unsigned char buf[64]; >+ int len; >+ if (CHAR_BIT == 8 >+ && BITS_PER_UNIT == 8 >+ && (len = native_encode_expr (val, buf, sizeof (buf))) != 0) >+ val = build_int_cst (integer_type_node, buf[0]); >+ else if (TREE_CODE (val) == INTEGER_CST) > val = fold_convert (integer_type_node, val); >else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE >(val))) > { >@@ -944,16 +954,28 @@ classify_partition (loop_p loop, struct > if (!(integer_zerop (rhs) > || real_zerop (rhs) > || (TREE_CODE (rhs) == CONSTRUCTOR >- && !TREE_CLOBBER_P (rhs)) >- || ((integer_all_onesp (rhs) >- || (INTEGRAL_TYPE_P (TREE_TYPE (rhs)) >- && (TYPE_MODE (TREE_TYPE (rhs)) >- == TYPE_MODE (unsigned_char_type_node)))) >- /* For stores of a non-zero value require that the precision >- of the value matches its actual size. */ >+ && !TREE_CLOBBER_P (rhs) >+ && CONSTRUCTOR_NELTS (rhs) == 0) >+ || (integer_all_onesp (rhs) > && (TYPE_PRECISION (TREE_TYPE (rhs)) >- == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs))))))) >- return; >+ == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs))))) >+ || (INTEGRAL_TYPE_P (TREE_TYPE (rhs)) >+ && (TYPE_MODE (TREE_TYPE (rhs)) >+ == TYPE_MODE (unsigned_char_type_node))))) >+ { >+ /* Handle constants like 0x15151515 and similarly >+ floating point constants etc. where all bytes are the same. */ >+ unsigned char buf[64]; >+ int i, len; >+ if (CHAR_BIT != 8 || BITS_PER_UNIT != 8) >+ return; >+ len = native_encode_expr (rhs, buf, sizeof (buf)); >+ if (len == 0) >+ return; >+ for (i = 1; i < len; i++) >+ if (buf[i] != buf[0]) >+ return; >+ } > if (TREE_CODE (rhs) == SSA_NAME > && !SSA_NAME_IS_DEFAULT_DEF (rhs) > && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT >(rhs)))) >--- gcc/testsuite/gcc.dg/pr56837.c.jj 2013-04-04 17:37:58.458675152 >+0200 >+++ gcc/testsuite/gcc.dg/pr56837.c 2013-04-04 17:36:40.000000000 +0200 >@@ -0,0 +1,67 @@ >+/* Limit this test to selected targets with IEEE double, 8-byte long >long, >+ supported 4x int vectors, 4-byte int. */ >+/* { dg-do compile { target { i?86-*-* x86_64-*-* powerpc*-*-* } } } >*/ >+/* { dg-options "-O3 -fdump-tree-optimized" } */ >+/* { dg-additional-options "-msse2" { target ia32 } } */ >+/* { dg-additional-options "-mvsx -maltivec" { target powerpc*-*-* } } >*/ >+ >+typedef int V __attribute__((__vector_size__ (16))); >+#define N 1024 >+double d[N]; >+long long int l[N]; >+_Bool b[N]; >+_Complex double c[N]; >+V v[N]; >+ >+void >+fd (void) >+{ >+ int i; >+ for (i = 0; i < N; i++) >+ d[i] = 747708026454360457216.0; >+} >+ >+void >+fl (void) >+{ >+ int i; >+ for (i = 0; i < N; i++) >+ l[i] = 0x7c7c7c7c7c7c7c7cULL; >+} >+ >+void >+fb (void) >+{ >+ int i; >+ for (i = 0; i < N; i++) >+ b[i] = 1; >+} >+ >+void >+fc (void) >+{ >+ int i; >+ for (i = 0; i < N; i++) >+ c[i] = 747708026454360457216.0 + 747708026454360457216.0i; >+} >+ >+void >+fv (void) >+{ >+ int i; >+ for (i = 0; i < N; i++) >+ v[i] = (V) { 0x12121212, 0x12121212, 0x12121212, 0x12121212 }; >+} >+ >+/* Look for >+ __builtin_memset (&d, 68, 8192); >+ __builtin_memset (&l, 124, 8192); >+ __builtin_memset (&b, 1, 1024); >+ __builtin_memset (&c, 68, 16384); >+ __builtin_memset (&v, 18, 16384); */ >+/* { dg-final { scan-tree-dump-times "memset ..d, 68, 8192.;" 1 >"optimized" } } */ >+/* { dg-final { scan-tree-dump-times "memset ..l, 124, 8192.;" 1 >"optimized" } } */ >+/* { dg-final { scan-tree-dump-times "memset ..b, 1, 1024.;" 1 >"optimized" } } */ >+/* { dg-final { scan-tree-dump-times "memset ..c, 68, 16384.;" 1 >"optimized" } } */ >+/* { dg-final { scan-tree-dump-times "memset ..v, 18, 16384.;" 1 >"optimized" } } */ >+/* { dg-final { cleanup-tree-dump "optimized" } } */ > > Jakub
--- gcc/tree-loop-distribution.c.jj 2013-04-04 15:03:28.000000000 +0200 +++ gcc/tree-loop-distribution.c 2013-04-04 16:52:40.139875453 +0200 @@ -331,11 +331,21 @@ generate_memset_builtin (struct loop *lo || real_zerop (val) || TREE_CODE (val) == CONSTRUCTOR) val = integer_zero_node; - else if (integer_all_onesp (val)) + else if (integer_all_onesp (val) + && (TYPE_PRECISION (TREE_TYPE (val)) + == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (val))))) val = build_int_cst (integer_type_node, -1); else { - if (TREE_CODE (val) == INTEGER_CST) + /* Handle constants like 0x15151515 and similarly + floating point constants etc. where all bytes are the same. */ + unsigned char buf[64]; + int len; + if (CHAR_BIT == 8 + && BITS_PER_UNIT == 8 + && (len = native_encode_expr (val, buf, sizeof (buf))) != 0) + val = build_int_cst (integer_type_node, buf[0]); + else if (TREE_CODE (val) == INTEGER_CST) val = fold_convert (integer_type_node, val); else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val))) { @@ -944,16 +954,28 @@ classify_partition (loop_p loop, struct if (!(integer_zerop (rhs) || real_zerop (rhs) || (TREE_CODE (rhs) == CONSTRUCTOR - && !TREE_CLOBBER_P (rhs)) - || ((integer_all_onesp (rhs) - || (INTEGRAL_TYPE_P (TREE_TYPE (rhs)) - && (TYPE_MODE (TREE_TYPE (rhs)) - == TYPE_MODE (unsigned_char_type_node)))) - /* For stores of a non-zero value require that the precision - of the value matches its actual size. */ + && !TREE_CLOBBER_P (rhs) + && CONSTRUCTOR_NELTS (rhs) == 0) + || (integer_all_onesp (rhs) && (TYPE_PRECISION (TREE_TYPE (rhs)) - == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs))))))) - return; + == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs))))) + || (INTEGRAL_TYPE_P (TREE_TYPE (rhs)) + && (TYPE_MODE (TREE_TYPE (rhs)) + == TYPE_MODE (unsigned_char_type_node))))) + { + /* Handle constants like 0x15151515 and similarly + floating point constants etc. where all bytes are the same. */ + unsigned char buf[64]; + int i, len; + if (CHAR_BIT != 8 || BITS_PER_UNIT != 8) + return; + len = native_encode_expr (rhs, buf, sizeof (buf)); + if (len == 0) + return; + for (i = 1; i < len; i++) + if (buf[i] != buf[0]) + return; + } if (TREE_CODE (rhs) == SSA_NAME && !SSA_NAME_IS_DEFAULT_DEF (rhs) && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs)))) --- gcc/testsuite/gcc.dg/pr56837.c.jj 2013-04-04 17:37:58.458675152 +0200 +++ gcc/testsuite/gcc.dg/pr56837.c 2013-04-04 17:36:40.000000000 +0200 @@ -0,0 +1,67 @@ +/* Limit this test to selected targets with IEEE double, 8-byte long long, + supported 4x int vectors, 4-byte int. */ +/* { dg-do compile { target { i?86-*-* x86_64-*-* powerpc*-*-* } } } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ +/* { dg-additional-options "-msse2" { target ia32 } } */ +/* { dg-additional-options "-mvsx -maltivec" { target powerpc*-*-* } } */ + +typedef int V __attribute__((__vector_size__ (16))); +#define N 1024 +double d[N]; +long long int l[N]; +_Bool b[N]; +_Complex double c[N]; +V v[N]; + +void +fd (void) +{ + int i; + for (i = 0; i < N; i++) + d[i] = 747708026454360457216.0; +} + +void +fl (void) +{ + int i; + for (i = 0; i < N; i++) + l[i] = 0x7c7c7c7c7c7c7c7cULL; +} + +void +fb (void) +{ + int i; + for (i = 0; i < N; i++) + b[i] = 1; +} + +void +fc (void) +{ + int i; + for (i = 0; i < N; i++) + c[i] = 747708026454360457216.0 + 747708026454360457216.0i; +} + +void +fv (void) +{ + int i; + for (i = 0; i < N; i++) + v[i] = (V) { 0x12121212, 0x12121212, 0x12121212, 0x12121212 }; +} + +/* Look for + __builtin_memset (&d, 68, 8192); + __builtin_memset (&l, 124, 8192); + __builtin_memset (&b, 1, 1024); + __builtin_memset (&c, 68, 16384); + __builtin_memset (&v, 18, 16384); */ +/* { dg-final { scan-tree-dump-times "memset ..d, 68, 8192.;" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "memset ..l, 124, 8192.;" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "memset ..b, 1, 1024.;" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "memset ..c, 68, 16384.;" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "memset ..v, 18, 16384.;" 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */