diff mbox

Fix bswap load optimization on big-endian (PR tree-optimization/65215)

Message ID 20150226190748.GF1746@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Feb. 26, 2015, 7:07 p.m. UTC
Hi!

If we cast a wider memory load on big endian to a narrower type and then
byteswap that, we can't load the the narrower object from the
address of the larger original load, because it contains the MS bytes,
rather than LS bytes.

Fixed thusly, bootstrapped/regtested on x86_64-linux, i686-linux,
ppc64-linux and ppc64le-linux.  Ok for trunk?

2015-02-26  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/65215
	* tree-ssa-math-opts.c (find_bswap_or_nop_load): Return false
	for PDP endian targets.
	(perform_symbolic_merge, find_bswap_or_nop_1, find_bswap_or_nop):
	Fix up formatting issues.
	(bswap_replace): Likewise.  For BYTES_BIG_ENDIAN, if the final access
	size is smaller than the original, adjust MEM_REF offset by the
	difference of sizes.  Use is_gimple_mem_ref_addr instead of
	is_gimple_min_invariant test to avoid adding address temporaries.

	* gcc.c-torture/execute/pr65215-1.c: New test.
	* gcc.c-torture/execute/pr65215-2.c: New test.
	* gcc.c-torture/execute/pr65215-3.c: New test.
	* gcc.c-torture/execute/pr65215-4.c: New test.
	* gcc.c-torture/execute/pr65215-5.c: New test.


	Jakub

Comments

Richard Biener Feb. 26, 2015, 8:28 p.m. UTC | #1
On February 26, 2015 8:07:48 PM CET, Jakub Jelinek <jakub@redhat.com> wrote:
>Hi!
>
>If we cast a wider memory load on big endian to a narrower type and
>then
>byteswap that, we can't load the the narrower object from the
>address of the larger original load, because it contains the MS bytes,
>rather than LS bytes.
>
>Fixed thusly, bootstrapped/regtested on x86_64-linux, i686-linux,
>ppc64-linux and ppc64le-linux.  Ok for trunk?

OK.

Thanks,
Richard.

>2015-02-26  Jakub Jelinek  <jakub@redhat.com>
>
>	PR tree-optimization/65215
>	* tree-ssa-math-opts.c (find_bswap_or_nop_load): Return false
>	for PDP endian targets.
>	(perform_symbolic_merge, find_bswap_or_nop_1, find_bswap_or_nop):
>	Fix up formatting issues.
>	(bswap_replace): Likewise.  For BYTES_BIG_ENDIAN, if the final access
>	size is smaller than the original, adjust MEM_REF offset by the
>	difference of sizes.  Use is_gimple_mem_ref_addr instead of
>	is_gimple_min_invariant test to avoid adding address temporaries.
>
>	* gcc.c-torture/execute/pr65215-1.c: New test.
>	* gcc.c-torture/execute/pr65215-2.c: New test.
>	* gcc.c-torture/execute/pr65215-3.c: New test.
>	* gcc.c-torture/execute/pr65215-4.c: New test.
>	* gcc.c-torture/execute/pr65215-5.c: New test.
>
>--- gcc/tree-ssa-math-opts.c.jj	2015-01-28 21:24:56.000000000 +0100
>+++ gcc/tree-ssa-math-opts.c	2015-02-26 11:16:01.062024749 +0100
>@@ -1780,6 +1780,10 @@ find_bswap_or_nop_load (gimple stmt, tre
>   int unsignedp, volatilep;
>   tree offset, base_addr;
> 
>+  /* Not prepared to handle PDP endian.  */
>+  if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
>+    return false;
>+
>   if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
>     return false;
> 
>@@ -1860,8 +1864,8 @@ perform_symbolic_merge (gimple source_st
> 	  || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
> 	return NULL;
> 
>-      if (!n1->offset != !n2->offset ||
>-          (n1->offset && !operand_equal_p (n1->offset, n2->offset,
>0)))
>+      if (!n1->offset != !n2->offset
>+	  || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
> 	return NULL;
> 
>       if (n1->bytepos < n2->bytepos)
>@@ -1912,8 +1916,8 @@ perform_symbolic_merge (gimple source_st
>       size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
>       for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
> 	{
>-	  unsigned marker =
>-	    (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
>+	  unsigned marker
>+	    = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
> 	  if (marker && marker != MARKER_BYTE_UNKNOWN)
> 	    toinc_n_ptr->n += inc;
> 	}
>@@ -2032,7 +2036,7 @@ find_bswap_or_nop_1 (gimple stmt, struct
> 	case RSHIFT_EXPR:
> 	case LROTATE_EXPR:
> 	case RROTATE_EXPR:
>-	  if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2)))
>+	  if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
> 	    return NULL;
> 	  break;
> 	CASE_CONVERT:
>@@ -2104,12 +2108,12 @@ find_bswap_or_nop_1 (gimple stmt, struct
> 	  if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
> 	    return NULL;
> 
>-	  if (!n1.vuse != !n2.vuse ||
>-	  (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
>+	  if (!n1.vuse != !n2.vuse
>+	      || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
> 	    return NULL;
> 
>-	  source_stmt =
>-	    perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
>+	  source_stmt
>+	    = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2,
>n);
> 
> 	  if (!source_stmt)
> 	    return NULL;
>@@ -2153,12 +2157,12 @@ find_bswap_or_nop (gimple stmt, struct s
> in libgcc, and for initial shift/and operation of the src operand.  */
>   limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
>   limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
>-  source_stmt =  find_bswap_or_nop_1 (stmt, n, limit);
>+  source_stmt = find_bswap_or_nop_1 (stmt, n, limit);
> 
>   if (!source_stmt)
>     return NULL;
> 
>-  /* Find real size of result (highest non zero byte).  */
>+  /* Find real size of result (highest non-zero byte).  */
>   if (n->base_addr)
>     {
>       int rsize;
>@@ -2261,8 +2265,30 @@ bswap_replace (gimple cur_stmt, gimple s
>       tree load_offset_ptr, aligned_load_type;
>       gimple addr_stmt, load_stmt;
>       unsigned align;
>+      HOST_WIDE_INT load_offset = 0;
> 
>       align = get_object_alignment (src);
>+      /* If the new access is smaller than the original one, we need
>+	 to perform big endian adjustment.  */
>+      if (BYTES_BIG_ENDIAN)
>+	{
>+	  HOST_WIDE_INT bitsize, bitpos;
>+	  machine_mode mode;
>+	  int unsignedp, volatilep;
>+	  tree offset;
>+
>+	  get_inner_reference (src, &bitsize, &bitpos, &offset, &mode,
>+			       &unsignedp, &volatilep, false);
>+	  if (n->range < (unsigned HOST_WIDE_INT) bitsize)
>+	    {
>+	      load_offset = (bitsize - n->range) / BITS_PER_UNIT;
>+	      unsigned HOST_WIDE_INT l
>+		= (load_offset * BITS_PER_UNIT) & (align - 1);
>+	      if (l)
>+		align = l & -l;
>+	    }
>+	}
>+
>       if (bswap
> 	  && align < GET_MODE_ALIGNMENT (TYPE_MODE (load_type))
> 	  && SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type), align))
>@@ -2274,10 +2300,10 @@ bswap_replace (gimple cur_stmt, gimple s
>       gsi_move_before (&gsi, &gsi_ins);
>       gsi = gsi_for_stmt (cur_stmt);
> 
>-      /*  Compute address to load from and cast according to the size
>-	  of the load.  */
>+      /* Compute address to load from and cast according to the size
>+	 of the load.  */
>       addr_expr = build_fold_addr_expr (unshare_expr (src));
>-      if (is_gimple_min_invariant (addr_expr))
>+      if (is_gimple_mem_ref_addr (addr_expr))
> 	addr_tmp = addr_expr;
>       else
> 	{
>@@ -2291,7 +2317,7 @@ bswap_replace (gimple cur_stmt, gimple s
>       aligned_load_type = load_type;
>       if (align < TYPE_ALIGN (load_type))
> 	aligned_load_type = build_aligned_type (load_type, align);
>-      load_offset_ptr = build_int_cst (n->alias_set, 0);
>+      load_offset_ptr = build_int_cst (n->alias_set, load_offset);
>       val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
> 			      load_offset_ptr);
> 
>@@ -2328,7 +2354,7 @@ bswap_replace (gimple cur_stmt, gimple s
> 	    {
> 	      fprintf (dump_file,
> 		       "%d bit load in target endianness found at: ",
>-		       (int)n->range);
>+		       (int) n->range);
> 	      print_gimple_stmt (dump_file, cur_stmt, 0, 0);
> 	    }
> 	  return true;
>@@ -2395,7 +2421,7 @@ bswap_replace (gimple cur_stmt, gimple s
>   if (dump_file)
>     {
>       fprintf (dump_file, "%d bit bswap implementation found at: ",
>-	       (int)n->range);
>+	       (int) n->range);
>       print_gimple_stmt (dump_file, cur_stmt, 0, 0);
>     }
> 
>--- gcc/testsuite/gcc.c-torture/execute/pr65215-1.c.jj	2015-02-26
>10:46:29.102441519 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr65215-1.c	2015-02-26
>10:44:39.000000000 +0100
>@@ -0,0 +1,24 @@
>+/* PR tree-optimization/65215 */
>+
>+static inline unsigned int
>+foo (unsigned int x)
>+{
>+  return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x
><< 24);
>+}
>+
>+__attribute__((noinline, noclone)) unsigned int
>+bar (unsigned long long *x)
>+{
>+  return foo (*x);
>+}
>+
>+int
>+main ()
>+{
>+  if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof
>(unsigned long long) != 8)
>+    return 0;
>+  unsigned long long l = foo (0xdeadbeefU) | 0xfeedbea800000000ULL;
>+  if (bar (&l) != 0xdeadbeefU)
>+    __builtin_abort ();
>+  return 0;
>+}
>--- gcc/testsuite/gcc.c-torture/execute/pr65215-2.c.jj	2015-02-26
>10:46:31.524401403 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr65215-2.c	2015-02-26
>10:45:15.000000000 +0100
>@@ -0,0 +1,24 @@
>+/* PR tree-optimization/65215 */
>+
>+static inline unsigned int
>+foo (unsigned int x)
>+{
>+  return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x
><< 24);
>+}
>+
>+__attribute__((noinline, noclone)) unsigned long long
>+bar (unsigned long long *x)
>+{
>+  return ((unsigned long long) foo (*x) << 32) | foo (*x >> 32);
>+}
>+
>+int
>+main ()
>+{
>+  if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof
>(unsigned long long) != 8)
>+    return 0;
>+  unsigned long long l = foo (0xfeedbea8U) | ((unsigned long long) foo
>(0xdeadbeefU) << 32);
>+  if (bar (&l) != 0xfeedbea8deadbeefULL)
>+    __builtin_abort ();
>+  return 0;
>+}
>--- gcc/testsuite/gcc.c-torture/execute/pr65215-3.c.jj	2015-02-26
>10:46:33.463369288 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr65215-3.c	2015-02-26
>10:45:37.000000000 +0100
>@@ -0,0 +1,31 @@
>+/* PR tree-optimization/65215 */
>+
>+struct S { unsigned long long l1 : 24, l2 : 8, l3 : 32; };
>+
>+static inline unsigned int
>+foo (unsigned int x)
>+{
>+  return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x
><< 24);
>+}
>+
>+__attribute__((noinline, noclone)) unsigned long long
>+bar (struct S *x)
>+{
>+  unsigned long long x1 = foo (((unsigned int) x->l1 << 8) | x->l2);
>+  unsigned long long x2 = foo (x->l3);
>+  return (x2 << 32) | x1;
>+}
>+
>+int
>+main ()
>+{
>+  if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof
>(unsigned long long) != 8)
>+    return 0;
>+  struct S s = { 0xdeadbeU, 0xefU, 0xfeedbea8U };
>+  unsigned long long l = bar (&s);
>+  if (foo (l >> 32) != s.l3
>+      || (foo (l) >> 8) != s.l1
>+      || (foo (l) & 0xff) != s.l2)
>+    __builtin_abort ();
>+  return 0;
>+}
>--- gcc/testsuite/gcc.c-torture/execute/pr65215-4.c.jj	2015-02-26
>10:46:35.438336576 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr65215-4.c	2015-02-26
>10:45:46.000000000 +0100
>@@ -0,0 +1,27 @@
>+/* PR tree-optimization/65215 */
>+
>+struct S { unsigned long long l1 : 48; };
>+
>+static inline unsigned int
>+foo (unsigned int x)
>+{
>+  return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x
><< 24);
>+}
>+
>+__attribute__((noinline, noclone)) unsigned int
>+bar (struct S *x)
>+{
>+  return foo (x->l1);
>+}
>+
>+int
>+main ()
>+{
>+  if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof
>(unsigned long long) != 8)
>+    return 0;
>+  struct S s;
>+  s.l1 = foo (0xdeadbeefU) | (0xfeedULL << 32);
>+  if (bar (&s) != 0xdeadbeefU)
>+    __builtin_abort ();
>+  return 0;
>+}
>--- gcc/testsuite/gcc.c-torture/execute/pr65215-5.c.jj	2015-02-26
>11:14:44.664298719 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr65215-5.c	2015-02-26
>11:12:27.000000000 +0100
>@@ -0,0 +1,27 @@
>+/* PR tree-optimization/65215 */
>+
>+__attribute__((noinline, noclone)) unsigned int
>+foo (unsigned char *p)
>+{
>+  return ((unsigned int) p[0] << 24) | (p[1] << 16) | (p[2] << 8) |
>p[3];
>+}
>+
>+__attribute__((noinline, noclone)) unsigned int
>+bar (unsigned char *p)
>+{
>+  return ((unsigned int) p[3] << 24) | (p[2] << 16) | (p[1] << 8) |
>p[0];
>+}
>+
>+struct S { unsigned int a; unsigned char b[5]; };
>+
>+int
>+main ()
>+{
>+  struct S s = { 1, { 2, 3, 4, 5, 6 } };
>+  if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4)
>+    return 0;
>+  if (foo (&s.b[1]) != 0x03040506U
>+      || bar (&s.b[1]) != 0x06050403U)
>+    __builtin_abort ();
>+  return 0;
>+}
>
>	Jakub
diff mbox

Patch

--- gcc/tree-ssa-math-opts.c.jj	2015-01-28 21:24:56.000000000 +0100
+++ gcc/tree-ssa-math-opts.c	2015-02-26 11:16:01.062024749 +0100
@@ -1780,6 +1780,10 @@  find_bswap_or_nop_load (gimple stmt, tre
   int unsignedp, volatilep;
   tree offset, base_addr;
 
+  /* Not prepared to handle PDP endian.  */
+  if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
+    return false;
+
   if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
     return false;
 
@@ -1860,8 +1864,8 @@  perform_symbolic_merge (gimple source_st
 	  || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
 	return NULL;
 
-      if (!n1->offset != !n2->offset ||
-          (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
+      if (!n1->offset != !n2->offset
+	  || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
 	return NULL;
 
       if (n1->bytepos < n2->bytepos)
@@ -1912,8 +1916,8 @@  perform_symbolic_merge (gimple source_st
       size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
       for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
 	{
-	  unsigned marker =
-	    (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
+	  unsigned marker
+	    = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
 	  if (marker && marker != MARKER_BYTE_UNKNOWN)
 	    toinc_n_ptr->n += inc;
 	}
@@ -2032,7 +2036,7 @@  find_bswap_or_nop_1 (gimple stmt, struct
 	case RSHIFT_EXPR:
 	case LROTATE_EXPR:
 	case RROTATE_EXPR:
-	  if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2)))
+	  if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
 	    return NULL;
 	  break;
 	CASE_CONVERT:
@@ -2104,12 +2108,12 @@  find_bswap_or_nop_1 (gimple stmt, struct
 	  if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
 	    return NULL;
 
-	  if (!n1.vuse != !n2.vuse ||
-	  (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
+	  if (!n1.vuse != !n2.vuse
+	      || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
 	    return NULL;
 
-	  source_stmt =
-	    perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
+	  source_stmt
+	    = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
 
 	  if (!source_stmt)
 	    return NULL;
@@ -2153,12 +2157,12 @@  find_bswap_or_nop (gimple stmt, struct s
      in libgcc, and for initial shift/and operation of the src operand.  */
   limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
   limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
-  source_stmt =  find_bswap_or_nop_1 (stmt, n, limit);
+  source_stmt = find_bswap_or_nop_1 (stmt, n, limit);
 
   if (!source_stmt)
     return NULL;
 
-  /* Find real size of result (highest non zero byte).  */
+  /* Find real size of result (highest non-zero byte).  */
   if (n->base_addr)
     {
       int rsize;
@@ -2261,8 +2265,30 @@  bswap_replace (gimple cur_stmt, gimple s
       tree load_offset_ptr, aligned_load_type;
       gimple addr_stmt, load_stmt;
       unsigned align;
+      HOST_WIDE_INT load_offset = 0;
 
       align = get_object_alignment (src);
+      /* If the new access is smaller than the original one, we need
+	 to perform big endian adjustment.  */
+      if (BYTES_BIG_ENDIAN)
+	{
+	  HOST_WIDE_INT bitsize, bitpos;
+	  machine_mode mode;
+	  int unsignedp, volatilep;
+	  tree offset;
+
+	  get_inner_reference (src, &bitsize, &bitpos, &offset, &mode,
+			       &unsignedp, &volatilep, false);
+	  if (n->range < (unsigned HOST_WIDE_INT) bitsize)
+	    {
+	      load_offset = (bitsize - n->range) / BITS_PER_UNIT;
+	      unsigned HOST_WIDE_INT l
+		= (load_offset * BITS_PER_UNIT) & (align - 1);
+	      if (l)
+		align = l & -l;
+	    }
+	}
+
       if (bswap
 	  && align < GET_MODE_ALIGNMENT (TYPE_MODE (load_type))
 	  && SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type), align))
@@ -2274,10 +2300,10 @@  bswap_replace (gimple cur_stmt, gimple s
       gsi_move_before (&gsi, &gsi_ins);
       gsi = gsi_for_stmt (cur_stmt);
 
-      /*  Compute address to load from and cast according to the size
-	  of the load.  */
+      /* Compute address to load from and cast according to the size
+	 of the load.  */
       addr_expr = build_fold_addr_expr (unshare_expr (src));
-      if (is_gimple_min_invariant (addr_expr))
+      if (is_gimple_mem_ref_addr (addr_expr))
 	addr_tmp = addr_expr;
       else
 	{
@@ -2291,7 +2317,7 @@  bswap_replace (gimple cur_stmt, gimple s
       aligned_load_type = load_type;
       if (align < TYPE_ALIGN (load_type))
 	aligned_load_type = build_aligned_type (load_type, align);
-      load_offset_ptr = build_int_cst (n->alias_set, 0);
+      load_offset_ptr = build_int_cst (n->alias_set, load_offset);
       val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
 			      load_offset_ptr);
 
@@ -2328,7 +2354,7 @@  bswap_replace (gimple cur_stmt, gimple s
 	    {
 	      fprintf (dump_file,
 		       "%d bit load in target endianness found at: ",
-		       (int)n->range);
+		       (int) n->range);
 	      print_gimple_stmt (dump_file, cur_stmt, 0, 0);
 	    }
 	  return true;
@@ -2395,7 +2421,7 @@  bswap_replace (gimple cur_stmt, gimple s
   if (dump_file)
     {
       fprintf (dump_file, "%d bit bswap implementation found at: ",
-	       (int)n->range);
+	       (int) n->range);
       print_gimple_stmt (dump_file, cur_stmt, 0, 0);
     }
 
--- gcc/testsuite/gcc.c-torture/execute/pr65215-1.c.jj	2015-02-26 10:46:29.102441519 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr65215-1.c	2015-02-26 10:44:39.000000000 +0100
@@ -0,0 +1,24 @@ 
+/* PR tree-optimization/65215 */
+
+static inline unsigned int
+foo (unsigned int x)
+{
+  return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24);
+}
+
+__attribute__((noinline, noclone)) unsigned int
+bar (unsigned long long *x)
+{
+  return foo (*x);
+}
+
+int
+main ()
+{
+  if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof (unsigned long long) != 8)
+    return 0;
+  unsigned long long l = foo (0xdeadbeefU) | 0xfeedbea800000000ULL;
+  if (bar (&l) != 0xdeadbeefU)
+    __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.c-torture/execute/pr65215-2.c.jj	2015-02-26 10:46:31.524401403 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr65215-2.c	2015-02-26 10:45:15.000000000 +0100
@@ -0,0 +1,24 @@ 
+/* PR tree-optimization/65215 */
+
+static inline unsigned int
+foo (unsigned int x)
+{
+  return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24);
+}
+
+__attribute__((noinline, noclone)) unsigned long long
+bar (unsigned long long *x)
+{
+  return ((unsigned long long) foo (*x) << 32) | foo (*x >> 32);
+}
+
+int
+main ()
+{
+  if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof (unsigned long long) != 8)
+    return 0;
+  unsigned long long l = foo (0xfeedbea8U) | ((unsigned long long) foo (0xdeadbeefU) << 32);
+  if (bar (&l) != 0xfeedbea8deadbeefULL)
+    __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.c-torture/execute/pr65215-3.c.jj	2015-02-26 10:46:33.463369288 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr65215-3.c	2015-02-26 10:45:37.000000000 +0100
@@ -0,0 +1,31 @@ 
+/* PR tree-optimization/65215 */
+
+struct S { unsigned long long l1 : 24, l2 : 8, l3 : 32; };
+
+static inline unsigned int
+foo (unsigned int x)
+{
+  return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24);
+}
+
+__attribute__((noinline, noclone)) unsigned long long
+bar (struct S *x)
+{
+  unsigned long long x1 = foo (((unsigned int) x->l1 << 8) | x->l2);
+  unsigned long long x2 = foo (x->l3);
+  return (x2 << 32) | x1;
+}
+
+int
+main ()
+{
+  if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof (unsigned long long) != 8)
+    return 0;
+  struct S s = { 0xdeadbeU, 0xefU, 0xfeedbea8U };
+  unsigned long long l = bar (&s);
+  if (foo (l >> 32) != s.l3
+      || (foo (l) >> 8) != s.l1
+      || (foo (l) & 0xff) != s.l2)
+    __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.c-torture/execute/pr65215-4.c.jj	2015-02-26 10:46:35.438336576 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr65215-4.c	2015-02-26 10:45:46.000000000 +0100
@@ -0,0 +1,27 @@ 
+/* PR tree-optimization/65215 */
+
+struct S { unsigned long long l1 : 48; };
+
+static inline unsigned int
+foo (unsigned int x)
+{
+  return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24);
+}
+
+__attribute__((noinline, noclone)) unsigned int
+bar (struct S *x)
+{
+  return foo (x->l1);
+}
+
+int
+main ()
+{
+  if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof (unsigned long long) != 8)
+    return 0;
+  struct S s;
+  s.l1 = foo (0xdeadbeefU) | (0xfeedULL << 32);
+  if (bar (&s) != 0xdeadbeefU)
+    __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.c-torture/execute/pr65215-5.c.jj	2015-02-26 11:14:44.664298719 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr65215-5.c	2015-02-26 11:12:27.000000000 +0100
@@ -0,0 +1,27 @@ 
+/* PR tree-optimization/65215 */
+
+__attribute__((noinline, noclone)) unsigned int
+foo (unsigned char *p)
+{
+  return ((unsigned int) p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
+}
+
+__attribute__((noinline, noclone)) unsigned int
+bar (unsigned char *p)
+{
+  return ((unsigned int) p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0];
+}
+
+struct S { unsigned int a; unsigned char b[5]; };
+
+int
+main ()
+{
+  struct S s = { 1, { 2, 3, 4, 5, 6 } };
+  if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4)
+    return 0;
+  if (foo (&s.b[1]) != 0x03040506U
+      || bar (&s.b[1]) != 0x06050403U)
+    __builtin_abort ();
+  return 0;
+}