diff mbox

Fix unaligned access when predictive commoning (PR 71083)

Message ID AM4PR0701MB2162D9D641FA190732D625ADE41B0@AM4PR0701MB2162.eurprd07.prod.outlook.com
State New
Headers show

Commit Message

Bernd Edlinger Aug. 8, 2016, 7:56 p.m. UTC
Hi!


As described in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71083
we generate unaligned accesses because tree-predcom rewrites
bitfield and packed accesses in a way that looses the proper
alignment information.

The attached patch fixes this by re-using the gimple memory
expression from the original access.

I was not sure, if any non-constant array references would be
valid at where the ref_at_iteration is expanded, I set these
to the array-lower-bound, as the DR_OFFSET contains the folded
value of all non-constant array references.

The patch compensates for the constant offset from the outer
reference to the inner reference, and replaces the inner
reference instead of the outer reference.


Boot-strapped and reg-tested on x86_64-linux-gnu.
OK for trunk?


Thanks
Bernd.
2016-08-08  Bernd Edlinger  <bernd.edlinger@hotmail.de>

	PR tree-optimization/71083
	* tree-predcom.c (ref_at_iteration): Rewrite the inner reference.

testsuite:
2016-08-08  Bernd Edlinger  <bernd.edlinger@hotmail.de>

	PR tree-optimization/71083
	* gcc.c-torture/execute/pr71083.c: New test.

Comments

Richard Biener Aug. 9, 2016, 7:29 a.m. UTC | #1
On Mon, 8 Aug 2016, Bernd Edlinger wrote:

> Hi!
> 
> 
> As described in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71083
> we generate unaligned accesses because tree-predcom rewrites
> bitfield and packed accesses in a way that looses the proper
> alignment information.
> 
> The attached patch fixes this by re-using the gimple memory
> expression from the original access.
> 
> I was not sure, if any non-constant array references would be
> valid at where the ref_at_iteration is expanded, I set these
> to the array-lower-bound, as the DR_OFFSET contains the folded
> value of all non-constant array references.
> 
> The patch compensates for the constant offset from the outer
> reference to the inner reference, and replaces the inner
> reference instead of the outer reference.
> 
> 
> Boot-strapped and reg-tested on x86_64-linux-gnu.
> OK for trunk?

I don't think what you do is correct.  Consider

 for (i)
  for (j)
   {
     ... = a[i][j];
   }

predcom considers innermost loops only and thus the DRs will
be analyzed with respect to that which means DR_BASE_ADDRESS
is &a[i][0] but you end up generating (*(&a) + i * ..)[0][0]
which is at best suspicious with respect to further analyses
by data-ref and TBAA.  Also note that this may get alignment
wrong as well as changing [i] to [0] may lead to false alignment
(consider a [2][2] char array aligned to 4 bytes).

Your patch goes halfway back to code we had in the past
(looking at the 4.3 branch right now) which had numerous
issues (don't remember exactly).

I believe that if we'd handle bitfields by

  if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
      && DECL_BIT_FIELD_TYPE (TREE_OPERAND (DR_REF (dr), 1)))
    ref = TREE_OPERAND (DR_REF (dr), 0);
  else
    ref = DR_REF (dr);
  unsigned align = get_object_alignment (ref);

and use the type / alignment of that ref for the built MEM_REF
(with coff adjusted by the split bitfield component-ref offset)
and build a COMPONENT_REF around the MEM_REF to handle the
bitfield part this should work ok.

Richard.

> 
> Thanks
> Bernd.
>
diff mbox

Patch

Index: gcc/tree-predcom.c
===================================================================
--- gcc/tree-predcom.c	(revision 239193)
+++ gcc/tree-predcom.c	(working copy)
@@ -1364,11 +1364,33 @@  replace_ref_with (gimple *stmt, tree new_tree, boo
 /* Returns a memory reference to DR in the ITER-th iteration of
    the loop it was analyzed in.  Append init stmts to STMTS.  */
 
-static tree 
+static tree
 ref_at_iteration (data_reference_p dr, int iter, gimple_seq *stmts)
 {
   tree off = DR_OFFSET (dr);
   tree coff = DR_INIT (dr);
+  tree ref = unshare_expr (DR_REF (dr));
+  tree *exp = &ref;
+  while (handled_component_p (*exp))
+    {
+      switch (TREE_CODE (*exp))
+	{
+	  case ARRAY_REF:
+	  case ARRAY_RANGE_REF:
+	    if (!cst_and_fits_in_hwi (TREE_OPERAND (*exp, 1)))
+	      TREE_OPERAND (*exp, 1) = array_ref_low_bound (*exp);
+	    break;
+	  default:
+	    break;
+	}
+      exp = &TREE_OPERAND (*exp, 0);
+    }
+  HOST_WIDE_INT bitsize, bitpos;
+  tree offset;
+  machine_mode mode;
+  int unsignedp, reversep, volatilep = 0;
+  get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
+		       &unsignedp, &reversep, &volatilep);
   if (iter == 0)
     ;
   else if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST)
@@ -1377,27 +1399,16 @@  ref_at_iteration (data_reference_p dr, int iter, g
   else
     off = size_binop (PLUS_EXPR, off,
 		      size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
+  if (offset)
+    off = size_binop (MINUS_EXPR, off, offset);
+  if (bitpos)
+    coff = size_binop (MINUS_EXPR, coff, ssize_int (bitpos / BITS_PER_UNIT));
   tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
   addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
 				 is_gimple_mem_ref_addr, NULL_TREE);
-  tree alias_ptr = fold_convert (reference_alias_ptr_type (DR_REF (dr)), coff);
-  /* While data-ref analysis punts on bit offsets it still handles
-     bitfield accesses at byte boundaries.  Cope with that.  Note that
-     we cannot simply re-apply the outer COMPONENT_REF because the
-     byte-granular portion of it is already applied via DR_INIT and
-     DR_OFFSET, so simply build a BIT_FIELD_REF knowing that the bits
-     start at offset zero.  */
-  if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
-      && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
-    {
-      tree field = TREE_OPERAND (DR_REF (dr), 1);
-      return build3 (BIT_FIELD_REF, TREE_TYPE (DR_REF (dr)),
-		     build2 (MEM_REF, DECL_BIT_FIELD_TYPE (field),
-			     addr, alias_ptr),
-		     DECL_SIZE (field), bitsize_zero_node);
-    }
-  else
-    return fold_build2 (MEM_REF, TREE_TYPE (DR_REF (dr)), addr, alias_ptr);
+  tree alias_ptr = fold_convert (reference_alias_ptr_type (*exp), coff);
+  *exp = fold_build2 (MEM_REF, TREE_TYPE (*exp), addr, alias_ptr);
+  return ref;
 }
 
 /* Get the initialization expression for the INDEX-th temporary variable
Index: gcc/testsuite/gcc.c-torture/execute/pr71083.c
===================================================================
--- gcc/testsuite/gcc.c-torture/execute/pr71083.c	(revision 0)
+++ gcc/testsuite/gcc.c-torture/execute/pr71083.c	(working copy)
@@ -0,0 +1,43 @@ 
+struct lock_chain {
+  unsigned int irq_context: 2,
+    depth: 6,
+    base: 24;
+};
+
+__attribute__((noinline, noclone))
+struct lock_chain * foo (struct lock_chain *chain)
+{
+  int i;
+  for (i = 0; i < 100; i++)
+    {
+      chain[i+1].base = chain[i].base;
+    }
+  return chain;
+}
+
+struct lock_chain1 {
+  char x;
+  unsigned short base;
+} __attribute__((packed));
+
+__attribute__((noinline, noclone))
+struct lock_chain1 * bar (struct lock_chain1 *chain)
+{
+  int i;
+  for (i = 0; i < 100; i++)
+    {
+      chain[i+1].base = chain[i].base;
+    }
+  return chain;
+}
+
+struct lock_chain test [101];
+struct lock_chain1 test1 [101];
+
+int
+main ()
+{
+  foo (test);
+  bar (test1);
+  return 0;
+}