diff mbox

[Ada] Improve run time performance for large array reset

Message ID 20140730102335.GA27601@adacore.com
State New
Headers show

Commit Message

Arnaud Charlet July 30, 2014, 10:23 a.m. UTC
This patch makes the compiler generate faster code to reset a large array of
integers to 0 by means of an aggregate with a single Others choice and, more
generally, to set a large array of storage units to a single value by the
same means, for example:

  type Arr is array (1 .. 10000) of Integer;
  A : Arr := (others => 0);

Tested on x86_64-pc-linux-gnu, committed on trunk

2014-07-30  Eric Botcazou  <ebotcazou@adacore.com>

	* exp_aggr.adb (Aggr_Assignment_OK_For_Backend): New predicate.
	(Expand_Array_Aggregate): Also enable in-place expansion for
	code generated by the compiler.  For an object declaration,
	set the kind of the object in addition to its type.  If an
	in-place assignment is to be generated and it can be directly
	done by the back-end, do not expand the aggregate.
	* fe.h (Is_Others_Aggregate): Declare.
	* gcc-interface/trans.c
	(gnat_to_gnu) <N_Assignment_Statement>: Add support for an
	aggregate with a single Others choice on the RHS by means of
	__builtin_memset.  Tidy up.
diff mbox

Patch

Index: fe.h
===================================================================
--- fe.h	(revision 213201)
+++ fe.h	(working copy)
@@ -202,6 +202,11 @@ 
 extern void Check_Elaboration_Code_Allowed (Node_Id);
 extern void Check_Implicit_Dynamic_Code_Allowed (Node_Id);
 
+/* sem_aggr:  */
+#define Is_Others_Aggregate    sem_aggr__is_others_aggregate
+
+extern Boolean Is_Others_Aggregate (Node_Id);
+
 /* sem_aux:  */
 
 #define Ancestor_Subtype               sem_aux__ancestor_subtype
Index: exp_aggr.adb
===================================================================
--- exp_aggr.adb	(revision 213216)
+++ exp_aggr.adb	(working copy)
@@ -3945,6 +3945,9 @@ 
       Aggr_Index_Typ : array (1 .. Aggr_Dimension) of Entity_Id;
       --  The type of each index
 
+      In_Place_Assign_OK_For_Declaration : Boolean := False;
+      --  True if we are to generate an in place assignment for a declaration
+
       Maybe_In_Place_OK : Boolean;
       --  If the type is neither controlled nor packed and the aggregate
       --  is the expression in an assignment, assignment in place may be
@@ -3955,6 +3958,9 @@ 
       --  If Others_Present (J) is True, then there is an others choice
       --  in one of the sub-aggregates of N at dimension J.
 
+      function Aggr_Assignment_OK_For_Backend (N : Node_Id) return Boolean;
+      --  Returns true if an aggregate assignment can be done by the back end
+
       procedure Build_Constrained_Type (Positional : Boolean);
       --  If the subtype is not static or unconstrained, build a constrained
       --  type using the computable sizes of the aggregate and its sub-
@@ -3991,6 +3997,108 @@ 
       --  built directly into the target of the assignment it must be free
       --  of side-effects.
 
+      ------------------------------------
+      -- Aggr_Assignment_OK_For_Backend --
+      ------------------------------------
+
+      --  Backend processing by Gigi/gcc is possible only if all the following
+      --  conditions are met:
+
+      --    1. N consists of a single OTHERS choice, possibly recursively
+
+      --    2. The component type is discrete
+
+      --    3. The component size is a multiple of Storage_Unit
+
+      --    4. The component size is exactly Storage_Unit or the expression is
+      --       an integer whose unsigned value is the binary concatenation of
+      --       K times its remainder modulo 2**Storage_Unit.
+
+      --  The ultimate goal is to generate a call to a fast memset routine
+      --  specifically optimized for the target.
+
+      function Aggr_Assignment_OK_For_Backend (N : Node_Id) return Boolean is
+         Ctyp      : Entity_Id;
+         Expr      : Node_Id := N;
+         Remainder : Uint;
+         Value     : Uint;
+         Nunits    : Nat;
+
+      begin
+         --  Recurse as far as possible to find the innermost component type
+
+         Ctyp := Etype (N);
+         while Is_Array_Type (Ctyp) loop
+            if Nkind (Expr) /= N_Aggregate
+              or else not Is_Others_Aggregate (Expr)
+            then
+               return False;
+            end if;
+
+            Expr := Expression (First (Component_Associations (Expr)));
+
+            for J in 1 .. Number_Dimensions (Ctyp) - 1 loop
+               if Nkind (Expr) /= N_Aggregate
+                 or else not Is_Others_Aggregate (Expr)
+               then
+                  return False;
+               end if;
+
+               Expr := Expression (First (Component_Associations (Expr)));
+            end loop;
+
+            Ctyp := Component_Type (Ctyp);
+         end loop;
+
+         if not Is_Discrete_Type (Ctyp)
+           or else RM_Size (Ctyp) mod System_Storage_Unit /= 0
+         then
+            return False;
+         end if;
+
+         --  The expression needs to be analyzed if True is returned
+
+         Analyze_And_Resolve (Expr, Ctyp);
+
+         Nunits := UI_To_Int (RM_Size (Ctyp) / System_Storage_Unit);
+         if Nunits = 1 then
+            return True;
+         end if;
+
+         if not Compile_Time_Known_Value (Expr) then
+            return False;
+         end if;
+
+         Value := Expr_Value (Expr);
+
+         if Has_Biased_Representation (Ctyp) then
+            Value := Value - Expr_Value (Type_Low_Bound (Ctyp));
+         end if;
+
+         --  0 and -1 immediately satisfy check #4
+
+         if Value = Uint_0 or else Value = Uint_Minus_1 then
+            return True;
+         end if;
+
+         --  We need to work with an unsigned value
+
+         if Value < 0 then
+            Value := Value + 2**(System_Storage_Unit * Nunits);
+         end if;
+
+         Remainder := Value rem 2**System_Storage_Unit;
+         for I in 1 .. Nunits - 1 loop
+            Value := Value / 2**System_Storage_Unit;
+
+            if Value rem 2**System_Storage_Unit /= Remainder then
+               return False;
+            end if;
+         end loop;
+
+         return True;
+      end Aggr_Assignment_OK_For_Backend;
+
       ----------------------------
       -- Build_Constrained_Type --
       ----------------------------
@@ -5065,7 +5173,6 @@ 
       else
          Maybe_In_Place_OK :=
           (Nkind (Parent (N)) = N_Assignment_Statement
-            and then Comes_From_Source (N)
             and then In_Place_Assign_OK)
 
           or else
@@ -5098,22 +5205,27 @@ 
          and then not Is_Bit_Packed_Array (Typ)
          and then not Has_Controlled_Component (Typ)
       then
+         In_Place_Assign_OK_For_Declaration := True;
          Tmp := Defining_Identifier (Parent (N));
          Set_No_Initialization (Parent (N));
          Set_Expression (Parent (N), Empty);
 
-         --  Set the type of the entity, for use in the analysis of the
-         --  subsequent indexed assignments. If the nominal type is not
+         --  Set kind and type of the entity, for use in the analysis
+         --  of the subsequent assignments. If the nominal type is not
          --  constrained, build a subtype from the known bounds of the
          --  aggregate. If the declaration has a subtype mark, use it,
          --  otherwise use the itype of the aggregate.
 
+         Set_Ekind (Tmp, E_Variable);
+
          if not Is_Constrained (Typ) then
             Build_Constrained_Type (Positional => False);
+
          elsif Is_Entity_Name (Object_Definition (Parent (N)))
            and then Is_Constrained (Entity (Object_Definition (Parent (N))))
          then
             Set_Etype (Tmp, Entity (Object_Definition (Parent (N))));
+
          else
             Set_Size_Known_At_Compile_Time (Typ, False);
             Set_Etype (Tmp, Typ);
@@ -5150,7 +5262,6 @@ 
 
       elsif Maybe_In_Place_OK
         and then Nkind (Name (Parent (N))) = N_Slice
-        and then Comes_From_Source (N)
         and then Is_Others_Aggregate (N)
       then
          Tmp := Name (Parent (N));
@@ -5214,13 +5325,39 @@ 
             Target := New_Copy (Tmp);
          end if;
 
-         Aggr_Code :=
-           Build_Array_Aggr_Code (N,
-             Ctype       => Ctyp,
-             Index       => First_Index (Typ),
-             Into        => Target,
-             Scalar_Comp => Is_Scalar_Type (Ctyp));
+         --  If we are to generate an in place assignment for a declaration or
+         --  an assignment statement, and the assignment can be done directly
+         --  by the back end, then do not expand further.
 
+         --  ??? We can also do that if in place expansion is not possible but
+         --  then we could go into an infinite recursion.
+
+         if (In_Place_Assign_OK_For_Declaration or else Maybe_In_Place_OK)
+           and then not AAMP_On_Target
+           and then VM_Target = No_VM
+           and then not Generate_SCIL
+           and then not Possible_Bit_Aligned_Component (Target)
+           and then Aggr_Assignment_OK_For_Backend (N)
+         then
+            if Maybe_In_Place_OK then
+               return;
+            end if;
+
+            Aggr_Code :=
+              New_List (
+                Make_Assignment_Statement (Loc,
+                  Name       => Target,
+                  Expression => New_Copy (N)));
+         else
+
+            Aggr_Code :=
+              Build_Array_Aggr_Code (N,
+                Ctype       => Ctyp,
+                Index       => First_Index (Typ),
+                Into        => Target,
+                Scalar_Comp => Is_Scalar_Type (Ctyp));
+         end if;
+
          --  Save the last assignment statement associated with the aggregate
          --  when building a controlled object. This reference is utilized by
          --  the finalization machinery when marking an object as successfully
Index: gcc-interface/trans.c
===================================================================
--- gcc-interface/trans.c	(revision 213201)
+++ gcc-interface/trans.c	(working copy)
@@ -2400,9 +2400,11 @@ 
       /* First compile all the different case choices for the current WHEN
 	 alternative.  */
       for (gnat_choice = First (Discrete_Choices (gnat_when));
-	   Present (gnat_choice); gnat_choice = Next (gnat_choice))
+	   Present (gnat_choice);
+	   gnat_choice = Next (gnat_choice))
 	{
 	  tree gnu_low = NULL_TREE, gnu_high = NULL_TREE;
+	  tree label = create_artificial_label (input_location);
 
 	  switch (Nkind (gnat_choice))
 	    {
@@ -2426,8 +2428,8 @@ 
 		{
 		  tree gnu_type = get_unpadded_type (Entity (gnat_choice));
 
-		  gnu_low = fold (TYPE_MIN_VALUE (gnu_type));
-		  gnu_high = fold (TYPE_MAX_VALUE (gnu_type));
+		  gnu_low = TYPE_MIN_VALUE (gnu_type);
+		  gnu_high = TYPE_MAX_VALUE (gnu_type);
 		  break;
 		}
 
@@ -2445,20 +2447,13 @@ 
 	      gcc_unreachable ();
 	    }
 
-	  /* If the case value is a subtype that raises Constraint_Error at
-	     run time because of a wrong bound, then gnu_low or gnu_high is
-	     not translated into an INTEGER_CST.  In such a case, we need
-	     to ensure that the when statement is not added in the tree,
-	     otherwise it will crash the gimplifier.  */
-	  if ((!gnu_low || TREE_CODE (gnu_low) == INTEGER_CST)
-	      && (!gnu_high || TREE_CODE (gnu_high) == INTEGER_CST))
-	    {
-	      add_stmt_with_node (build_case_label
-				  (gnu_low, gnu_high,
-				   create_artificial_label (input_location)),
-				  gnat_choice);
-	      choices_added_p = true;
-	    }
+	  /* Everything should be folded into constants at this point.  */
+	  gcc_assert (!gnu_low  || TREE_CODE (gnu_low)  == INTEGER_CST);
+	  gcc_assert (!gnu_high || TREE_CODE (gnu_high) == INTEGER_CST);
+
+	  add_stmt_with_node (build_case_label (gnu_low, gnu_high, label),
+			      gnat_choice);
+	  choices_added_p = true;
 	}
 
       /* This construct doesn't define a scope so we shouldn't push a binding
@@ -5713,16 +5708,27 @@ 
 	gnu_result = alloc_stmt_list ();
       break;
 
+    case N_Exception_Renaming_Declaration:
+      gnat_temp = Defining_Entity (gnat_node);
+      if (Renamed_Entity (gnat_temp) != Empty)
+        gnu_result
+          = gnat_to_gnu_entity (gnat_temp,
+                                gnat_to_gnu (Renamed_Entity (gnat_temp)), 1);
+      else
+        gnu_result = alloc_stmt_list ();
+      break;
+
     case N_Implicit_Label_Declaration:
       gnat_to_gnu_entity (Defining_Entity (gnat_node), NULL_TREE, 1);
       gnu_result = alloc_stmt_list ();
       break;
 
-    case N_Exception_Renaming_Declaration:
     case N_Number_Declaration:
+    case N_Subprogram_Renaming_Declaration:
     case N_Package_Renaming_Declaration:
-    case N_Subprogram_Renaming_Declaration:
       /* These are fully handled in the front end.  */
+      /* ??? For package renamings, find a way to use GENERIC namespaces so
+	 that we get proper debug information for them.  */
       gnu_result = alloc_stmt_list ();
       break;
 
@@ -6479,40 +6485,79 @@ 
 			 atomic_sync_required_p (Name (gnat_node)));
       else
 	{
-	  gnu_rhs
-	    = maybe_unconstrained_array (gnat_to_gnu (Expression (gnat_node)));
+	  const Node_Id gnat_expr = Expression (gnat_node);
+	  const Entity_Id gnat_type
+	    = Underlying_Type (Etype (Name (gnat_node)));
+	  const bool regular_array_type_p
+	    = (Is_Array_Type (gnat_type) && !Is_Bit_Packed_Array (gnat_type));
+	  const bool use_memset_p
+	    = (regular_array_type_p
+	       && Nkind (gnat_expr) == N_Aggregate
+	       && Is_Others_Aggregate (gnat_expr));
 
+	  /* If we'll use memset, we need to find the inner expression.  */
+	  if (use_memset_p)
+	    {
+	      Node_Id gnat_inner
+		= Expression (First (Component_Associations (gnat_expr)));
+	      while (Nkind (gnat_inner) == N_Aggregate
+		     && Is_Others_Aggregate (gnat_inner))
+		gnat_inner
+		  = Expression (First (Component_Associations (gnat_inner)));
+	      gnu_rhs = gnat_to_gnu (gnat_inner);
+	    }
+	  else
+	    gnu_rhs = maybe_unconstrained_array (gnat_to_gnu (gnat_expr));
+
 	  /* If range check is needed, emit code to generate it.  */
-	  if (Do_Range_Check (Expression (gnat_node)))
+	  if (Do_Range_Check (gnat_expr))
 	    gnu_rhs = emit_range_check (gnu_rhs, Etype (Name (gnat_node)),
 					gnat_node);
 
+	  /* If atomic synchronization is required, build an atomic store.  */
 	  if (atomic_sync_required_p (Name (gnat_node)))
 	    gnu_result = build_atomic_store (gnu_lhs, gnu_rhs);
+
+	  /* Or else, use memset when the conditions are met.  */
+	  else if (use_memset_p)
+	    {
+	      tree value = fold_convert (integer_type_node, gnu_rhs);
+	      tree to = gnu_lhs;
+	      tree type = TREE_TYPE (to);
+	      tree size
+	        = SUBSTITUTE_PLACEHOLDER_IN_EXPR (TYPE_SIZE_UNIT (type), to);
+	      tree to_ptr = build_fold_addr_expr (to);
+	      tree t = builtin_decl_implicit (BUILT_IN_MEMSET);
+	      if (TREE_CODE (value) == INTEGER_CST)
+		{
+		  tree mask
+		    = build_int_cst (integer_type_node,
+				     ((HOST_WIDE_INT) 1 << BITS_PER_UNIT) - 1);
+		  value = int_const_binop (BIT_AND_EXPR, value, mask);
+		}
+	      gnu_result = build_call_expr (t, 3, to_ptr, value, size);
+	    }
+
+	  /* Otherwise build a regular assignment.  */
 	  else
 	    gnu_result
 	      = build_binary_op (MODIFY_EXPR, NULL_TREE, gnu_lhs, gnu_rhs);
 
-	  /* If the type being assigned is an array type and the two sides are
+	  /* If the assignment type is a regular array and the two sides are
 	     not completely disjoint, play safe and use memmove.  But don't do
 	     it for a bit-packed array as it might not be byte-aligned.  */
 	  if (TREE_CODE (gnu_result) == MODIFY_EXPR
-	      && Is_Array_Type (Etype (Name (gnat_node)))
-	      && !Is_Bit_Packed_Array (Etype (Name (gnat_node)))
+	      && regular_array_type_p
 	      && !(Forwards_OK (gnat_node) && Backwards_OK (gnat_node)))
 	    {
-	      tree to, from, size, to_ptr, from_ptr, t;
-
-	      to = TREE_OPERAND (gnu_result, 0);
-	      from = TREE_OPERAND (gnu_result, 1);
-
-	      size = TYPE_SIZE_UNIT (TREE_TYPE (from));
-	      size = SUBSTITUTE_PLACEHOLDER_IN_EXPR (size, from);
-
-	      to_ptr = build_fold_addr_expr (to);
-	      from_ptr = build_fold_addr_expr (from);
-
-	      t = builtin_decl_implicit (BUILT_IN_MEMMOVE);
+	      tree to = TREE_OPERAND (gnu_result, 0);
+	      tree from = TREE_OPERAND (gnu_result, 1);
+	      tree type = TREE_TYPE (from);
+	      tree size
+	        = SUBSTITUTE_PLACEHOLDER_IN_EXPR (TYPE_SIZE_UNIT (type), from);
+	      tree to_ptr = build_fold_addr_expr (to);
+	      tree from_ptr = build_fold_addr_expr (from);
+	      tree t = builtin_decl_implicit (BUILT_IN_MEMMOVE);
 	      gnu_result = build_call_expr (t, 3, to_ptr, from_ptr, size);
 	   }
 	}
@@ -7457,7 +7502,10 @@ 
 void
 add_stmt_with_node (tree gnu_stmt, Node_Id gnat_node)
 {
-  if (Present (gnat_node))
+  /* Do not emit a location for renamings that come from generic instantiation,
+     they are likely to disturb debugging.  */
+  if (Present (gnat_node)
+      && !renaming_from_generic_instantiation_p (gnat_node))
     set_expr_location_from_node (gnu_stmt, gnat_node);
   add_stmt (gnu_stmt);
 }