Add "fast" conversions from arrays to bitmaps
diff mbox series

Message ID mpth85l77n7.fsf@arm.com
State New
Headers show
Series
  • Add "fast" conversions from arrays to bitmaps
Related show

Commit Message

Richard Sandiford Sept. 9, 2019, 4:32 p.m. UTC
This patch adds a bitmap_view<X> class that creates a read-only,
on-stack bitmap representation of an array-like object X.  The main
use case is to allow HARD_REG_SETs to be used in REG_SET (i.e. bitmap)
operations.

For now it only handles constant-sized arrays, but I've tried to
define the types in a way that could handle variable-sized arrays
in future (although less efficiently).  E.g. this might be useful
for combining bitmaps and sbitmaps.

For the read-only view to work as intended, I needed to make
bitmap_bit_p take a const_bitmap instead of a bitmap.  Logically
the bitmap really is read-only, but we update the "current" and
"indx" fields of the bitmap_head after doing a search.

The patch makes this change using a const_cast.  Another option
was to make the fields mutable and push the constness down to
bitmap_list_find_element and bitmap_tree_find_element.
However, the constness of const_bitmap should apply to the bitmap
as a whole rather than just the head structure, so if the input
to those functions is a const_bitmap, the result should be a
"const bitmap_element *".  We'd therefore need overloaded versions of
bitmap_*_find_element that take a constant bitmap and return a constant
element (as for standard container accessors).  These would be wrappers
around the non-constant versions and themselves need a const_cast.
All that seemed a bit over-the-top for an internal interface.

I experimented with a few ways of writing the constructors, but the
attached gave the best code.  E.g. (for x86_64 users):

void
foo (bitmap a, const_hard_reg_set b)
{
  bitmap_ior_into (a, bitmap_view<HARD_REG_SET> (b));
}

becomes:

        .cfi_startproc
        subq    $88, %rsp
        .cfi_def_cfa_offset 96
        movq    (%rsi), %rdx
        movq    8(%rsi), %rax
        movq    $0, (%rsp)
        movq    $0, 8(%rsp)
        movq    %rdx, %rcx
        movq    $0, 16(%rsp)
        orq     %rax, %rcx
        movq    $0, 24(%rsp)
        je      .L645
        leaq    32(%rsp), %rcx
        movl    $0, 48(%rsp)
        movq    %rcx, 8(%rsp)
        movq    $0, 40(%rsp)
        movq    $0, 32(%rsp)
        movq    %rcx, 16(%rsp)
        movq    %rdx, 56(%rsp)
        movq    %rax, 64(%rsp)
.L645:
        movq    %rsp, %rsi
        call    _Z15bitmap_ior_intoP11bitmap_headPKS_
        addq    $88, %rsp
        .cfi_def_cfa_offset 8
        ret

which ought to be more efficient than the loops that the patch
is replacing.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


2019-09-09  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* array-traits.h: New file.
	* coretypes.h (array_traits, bitmap_view): New types.
	* bitmap.h: Include "array-traits.h"
	(bitmap_bit_p): Take a const_bitmap instead of a bitmap.
	(base_bitmap_view, bitmap_view): New classes.
	* bitmap.c (bitmap_bit_p): Take a const_bitmap instead of a bitmap.
	* hard-reg-set.h: Include array-traits.h.
	(array_traits<HARD_REG_SET>): New struct.
	* regset.h (IOR_REG_SET_HRS): New macro.
	* loop-iv.c (simplify_using_initial_values): Use IOR_REG_SET_HRS
	rather than iterating over each hard register.
	* sched-deps.c (sched_analyze_insn): Likewise.
	* sel-sched-ir.c (setup_id_implicit_regs): Likewise.

Comments

Jeff Law Sept. 9, 2019, 5:46 p.m. UTC | #1
On 9/9/19 10:32 AM, Richard Sandiford wrote:
> This patch adds a bitmap_view<X> class that creates a read-only,
> on-stack bitmap representation of an array-like object X.  The main
> use case is to allow HARD_REG_SETs to be used in REG_SET (i.e. bitmap)
> operations.
> 
> For now it only handles constant-sized arrays, but I've tried to
> define the types in a way that could handle variable-sized arrays
> in future (although less efficiently).  E.g. this might be useful
> for combining bitmaps and sbitmaps.
> 
> For the read-only view to work as intended, I needed to make
> bitmap_bit_p take a const_bitmap instead of a bitmap.  Logically
> the bitmap really is read-only, but we update the "current" and
> "indx" fields of the bitmap_head after doing a search.
> 
> The patch makes this change using a const_cast.  Another option
> was to make the fields mutable and push the constness down to
> bitmap_list_find_element and bitmap_tree_find_element.
> However, the constness of const_bitmap should apply to the bitmap
> as a whole rather than just the head structure, so if the input
> to those functions is a const_bitmap, the result should be a
> "const bitmap_element *".  We'd therefore need overloaded versions of
> bitmap_*_find_element that take a constant bitmap and return a constant
> element (as for standard container accessors).  These would be wrappers
> around the non-constant versions and themselves need a const_cast.
> All that seemed a bit over-the-top for an internal interface.
> 
> I experimented with a few ways of writing the constructors, but the
> attached gave the best code.  E.g. (for x86_64 users):
> 
> void
> foo (bitmap a, const_hard_reg_set b)
> {
>   bitmap_ior_into (a, bitmap_view<HARD_REG_SET> (b));
> }
> 
> becomes:
> 
>         .cfi_startproc
>         subq    $88, %rsp
>         .cfi_def_cfa_offset 96
>         movq    (%rsi), %rdx
>         movq    8(%rsi), %rax
>         movq    $0, (%rsp)
>         movq    $0, 8(%rsp)
>         movq    %rdx, %rcx
>         movq    $0, 16(%rsp)
>         orq     %rax, %rcx
>         movq    $0, 24(%rsp)
>         je      .L645
>         leaq    32(%rsp), %rcx
>         movl    $0, 48(%rsp)
>         movq    %rcx, 8(%rsp)
>         movq    $0, 40(%rsp)
>         movq    $0, 32(%rsp)
>         movq    %rcx, 16(%rsp)
>         movq    %rdx, 56(%rsp)
>         movq    %rax, 64(%rsp)
> .L645:
>         movq    %rsp, %rsi
>         call    _Z15bitmap_ior_intoP11bitmap_headPKS_
>         addq    $88, %rsp
>         .cfi_def_cfa_offset 8
>         ret
> 
> which ought to be more efficient than the loops that the patch
> is replacing.
> 
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?
> 
> Richard
> 
> 
> 2019-09-09  Richard Sandiford  <richard.sandiford@arm.com>
> 
> gcc/
> 	* array-traits.h: New file.
> 	* coretypes.h (array_traits, bitmap_view): New types.
> 	* bitmap.h: Include "array-traits.h"
> 	(bitmap_bit_p): Take a const_bitmap instead of a bitmap.
> 	(base_bitmap_view, bitmap_view): New classes.
> 	* bitmap.c (bitmap_bit_p): Take a const_bitmap instead of a bitmap.
> 	* hard-reg-set.h: Include array-traits.h.
> 	(array_traits<HARD_REG_SET>): New struct.
> 	* regset.h (IOR_REG_SET_HRS): New macro.
> 	* loop-iv.c (simplify_using_initial_values): Use IOR_REG_SET_HRS
> 	rather than iterating over each hard register.
> 	* sched-deps.c (sched_analyze_insn): Likewise.
> 	* sel-sched-ir.c (setup_id_implicit_regs): Likewise.
OK
jeff

Patch
diff mbox series

Index: gcc/array-traits.h
===================================================================
--- /dev/null	2019-07-30 08:53:31.317691683 +0100
+++ gcc/array-traits.h	2019-09-09 17:23:00.736796759 +0100
@@ -0,0 +1,48 @@ 
+/* Descriptions of array-like objects.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_ARRAY_TRAITS_H
+#define GCC_ARRAY_TRAITS_H
+
+/* Implementation for single integers (and similar types).  */
+template<typename T, T zero = T (0)>
+struct scalar_array_traits
+{
+  typedef T element_type;
+  static const bool has_constant_size = true;
+  static const size_t constant_size = 1;
+  static const T *base (const T &x) { return &x; }
+  static size_t size (const T &) { return 1; }
+};
+
+template<typename T>
+struct array_traits : scalar_array_traits<T> {};
+
+/* Implementation for arrays with a static size.  */
+template<typename T, size_t N>
+struct array_traits<T[N]>
+{
+  typedef T element_type;
+  static const bool has_constant_size = true;
+  static const size_t constant_size = N;
+  static const T *base (const T (&x)[N]) { return x; }
+  static size_t size (const T (&x)[N]) { return N; }
+};
+
+#endif
Index: gcc/coretypes.h
===================================================================
--- gcc/coretypes.h	2019-07-10 19:41:26.387898094 +0100
+++ gcc/coretypes.h	2019-09-09 17:23:00.736796759 +0100
@@ -153,6 +153,14 @@  struct cl_option_handlers;
 struct diagnostic_context;
 class pretty_printer;
 
+template<typename T> struct array_traits;
+
+/* Provides a read-only bitmap view of a single integer bitmask or an
+   array of integer bitmasks, or of a wrapper around such bitmasks.  */
+template<typename T, typename Traits = array_traits<T>,
+	 bool has_constant_size = Traits::has_constant_size>
+class bitmap_view;
+
 /* Address space number for named address space support.  */
 typedef unsigned char addr_space_t;
 
Index: gcc/bitmap.h
===================================================================
--- gcc/bitmap.h	2019-07-31 08:32:45.444538679 +0100
+++ gcc/bitmap.h	2019-09-09 17:23:00.736796759 +0100
@@ -210,6 +210,7 @@  #define GCC_BITMAP_H
    on which many random-access membership tests will happen.  */
 
 #include "obstack.h"
+#include "array-traits.h"
 
 /* Bitmap memory usage.  */
 class bitmap_usage: public mem_usage
@@ -435,7 +436,7 @@  extern bool bitmap_clear_bit (bitmap, in
 extern bool bitmap_set_bit (bitmap, int);
 
 /* Return true if a bit is set in a bitmap.  */
-extern int bitmap_bit_p (bitmap, int);
+extern int bitmap_bit_p (const_bitmap, int);
 
 /* Debug functions to print a bitmap.  */
 extern void debug_bitmap (const_bitmap);
@@ -956,4 +957,123 @@  #define EXECUTE_IF_AND_COMPL_IN_BITMAP(B
   bitmap_head m_bits;
 };
 
+/* Base class for bitmap_view; see there for details.  */
+template<typename T, typename Traits = array_traits<T> >
+class base_bitmap_view
+{
+public:
+  typedef typename Traits::element_type array_element_type;
+
+  base_bitmap_view (const T &, bitmap_element *);
+  operator const_bitmap () const { return &m_head; }
+
+private:
+  base_bitmap_view (const base_bitmap_view &);
+
+  bitmap_head m_head;
+};
+
+/* Provides a read-only bitmap view of a single integer bitmask or a
+   constant-sized array of integer bitmasks, or of a wrapper around such
+   bitmasks.  */
+template<typename T, typename Traits>
+class bitmap_view<T, Traits, true> : public base_bitmap_view<T, Traits>
+{
+public:
+  bitmap_view (const T &array)
+    : base_bitmap_view<T, Traits> (array, m_bitmap_elements) {}
+
+private:
+  /* How many bitmap_elements we need to hold a full T.  */
+  static const size_t num_bitmap_elements
+    = CEIL (CHAR_BIT
+	    * sizeof (typename Traits::element_type)
+	    * Traits::constant_size,
+	    BITMAP_ELEMENT_ALL_BITS);
+  bitmap_element m_bitmap_elements[num_bitmap_elements];
+};
+
+/* Initialize the view for array ARRAY, using the array of bitmap
+   elements in BITMAP_ELEMENTS (which is known to contain enough
+   entries).  */
+template<typename T, typename Traits>
+base_bitmap_view<T, Traits>::base_bitmap_view (const T &array,
+					       bitmap_element *bitmap_elements)
+{
+  m_head.obstack = NULL;
+
+  /* The code currently assumes that each element of ARRAY corresponds
+     to exactly one bitmap_element.  */
+  const size_t array_element_bits = CHAR_BIT * sizeof (array_element_type);
+  STATIC_ASSERT (BITMAP_ELEMENT_ALL_BITS % array_element_bits == 0);
+  size_t array_step = BITMAP_ELEMENT_ALL_BITS / array_element_bits;
+  size_t array_size = Traits::size (array);
+
+  /* Process each potential bitmap_element in turn.  The loop is written
+     this way rather than per array element because usually there are
+     only a small number of array elements per bitmap element (typically
+     two or four).  The inner loops should therefore unroll completely.  */
+  const array_element_type *array_elements = Traits::base (array);
+  unsigned int indx = 0;
+  for (size_t array_base = 0;
+       array_base < array_size;
+       array_base += array_step, indx += 1)
+    {
+      /* How many array elements are in this particular bitmap_element.  */
+      unsigned int array_count
+	= (STATIC_CONSTANT_P (array_size % array_step == 0)
+	   ? array_step : MIN (array_step, array_size - array_base));
+
+      /* See whether we need this bitmap element.  */
+      array_element_type ior = array_elements[array_base];
+      for (size_t i = 1; i < array_count; ++i)
+	ior |= array_elements[array_base + i];
+      if (ior == 0)
+	continue;
+
+      /* Grab the next bitmap element and chain it.  */
+      bitmap_element *bitmap_element = bitmap_elements++;
+      if (m_head.current)
+	m_head.current->next = bitmap_element;
+      else
+	m_head.first = bitmap_element;
+      bitmap_element->prev = m_head.current;
+      bitmap_element->next = NULL;
+      bitmap_element->indx = indx;
+      m_head.current = bitmap_element;
+      m_head.indx = indx;
+
+      /* Fill in the bits of the bitmap element.  */
+      if (array_element_bits < BITMAP_WORD_BITS)
+	{
+	  /* Multiple array elements fit in one element of
+	     bitmap_element->bits.  */
+	  size_t array_i = array_base;
+	  for (unsigned int word_i = 0; word_i < BITMAP_ELEMENT_WORDS;
+	       ++word_i)
+	    {
+	      BITMAP_WORD word = 0;
+	      for (unsigned int shift = 0;
+		   shift < BITMAP_WORD_BITS && array_i < array_size;
+		   shift += array_element_bits)
+		word |= array_elements[array_i++] << shift;
+	      bitmap_element->bits[word_i] = word;
+	    }
+	}
+      else
+	{
+	  /* Array elements are the same size as elements of
+	     bitmap_element->bits, or are an exact multiple of that size.  */
+	  unsigned int word_i = 0;
+	  for (unsigned int i = 0; i < array_count; ++i)
+	    for (unsigned int shift = 0; shift < array_element_bits;
+		 shift += BITMAP_WORD_BITS)
+	      bitmap_element->bits[word_i++]
+		= array_elements[array_base + i] >> shift;
+	  while (word_i < BITMAP_ELEMENT_WORDS)
+	    bitmap_element->bits[word_i++] = 0;
+	}
+    }
+}
+
 #endif /* GCC_BITMAP_H */
Index: gcc/bitmap.c
===================================================================
--- gcc/bitmap.c	2019-07-31 08:32:45.440538708 +0100
+++ gcc/bitmap.c	2019-09-09 17:23:00.736796759 +0100
@@ -979,17 +979,17 @@  bitmap_set_bit (bitmap head, int bit)
 /* Return whether a bit is set within a bitmap.  */
 
 int
-bitmap_bit_p (bitmap head, int bit)
+bitmap_bit_p (const_bitmap head, int bit)
 {
   unsigned int indx = bit / BITMAP_ELEMENT_ALL_BITS;
-  bitmap_element *ptr;
+  const bitmap_element *ptr;
   unsigned bit_num;
   unsigned word_num;
 
   if (!head->tree_form)
-    ptr = bitmap_list_find_element (head, indx);
+    ptr = bitmap_list_find_element (const_cast<bitmap> (head), indx);
   else
-    ptr = bitmap_tree_find_element (head, indx);
+    ptr = bitmap_tree_find_element (const_cast<bitmap> (head), indx);
   if (ptr == 0)
     return 0;
 
Index: gcc/hard-reg-set.h
===================================================================
--- gcc/hard-reg-set.h	2019-09-09 17:02:41.633376161 +0100
+++ gcc/hard-reg-set.h	2019-09-09 17:23:00.736796759 +0100
@@ -20,6 +20,8 @@  Software Foundation; either version 3, o
 #ifndef GCC_HARD_REG_SET_H
 #define GCC_HARD_REG_SET_H
 
+#include "array-traits.h"
+
 /* Define the type of a set of hard registers.  */
 
 /* HARD_REG_ELT_TYPE is a typedef of the unsigned integral type which
@@ -115,6 +117,16 @@  struct HARD_REG_SET
 };
 typedef const HARD_REG_SET &const_hard_reg_set;
 
+template<>
+struct array_traits<HARD_REG_SET>
+{
+  typedef HARD_REG_ELT_TYPE element_type;
+  static const bool has_constant_size = true;
+  static const size_t constant_size = HARD_REG_SET_LONGS;
+  static const element_type *base (const HARD_REG_SET &x) { return x.elts; }
+  static size_t size (const HARD_REG_SET &) { return HARD_REG_SET_LONGS; }
+};
+
 #endif
 
 /* HARD_REG_SET wrapped into a structure, to make it possible to
Index: gcc/regset.h
===================================================================
--- gcc/regset.h	2019-03-08 18:14:27.285004225 +0000
+++ gcc/regset.h	2019-09-09 17:23:00.740796731 +0100
@@ -64,6 +64,10 @@  #define AND_COMPL_REG_SET(TO, FROM) bitm
 /* Inclusive or a register set with a second register set.  */
 #define IOR_REG_SET(TO, FROM) bitmap_ior_into (TO, FROM)
 
+/* Same, but with FROM being a HARD_REG_SET.  */
+#define IOR_REG_SET_HRS(TO, FROM) \
+  bitmap_ior_into (TO, bitmap_view<HARD_REG_SET> (FROM))
+
 /* Exclusive or a register set with a second register set.  */
 #define XOR_REG_SET(TO, FROM) bitmap_xor_into (TO, FROM)
 
Index: gcc/loop-iv.c
===================================================================
--- gcc/loop-iv.c	2019-09-09 16:53:26.705274310 +0100
+++ gcc/loop-iv.c	2019-09-09 17:23:00.740796731 +0100
@@ -1972,14 +1972,8 @@  simplify_using_initial_values (class loo
 	  CLEAR_REG_SET (this_altered);
 	  note_stores (insn, mark_altered, this_altered);
 	  if (CALL_P (insn))
-	    {
-	      /* Kill all call clobbered registers.  */
-	      unsigned int i;
-	      hard_reg_set_iterator hrsi;
-	      EXECUTE_IF_SET_IN_HARD_REG_SET (regs_invalidated_by_call,
-					      0, i, hrsi)
-		SET_REGNO_REG_SET (this_altered, i);
-	    }
+	    /* Kill all call clobbered registers.  */
+	    IOR_REG_SET_HRS (this_altered, regs_invalidated_by_call);
 
 	  if (suitable_set_for_replacement (insn, &dest, &src))
 	    {
Index: gcc/sched-deps.c
===================================================================
--- gcc/sched-deps.c	2019-09-09 17:02:41.557376696 +0100
+++ gcc/sched-deps.c	2019-09-09 17:23:00.740796731 +0100
@@ -3332,10 +3332,9 @@  sched_analyze_insn (class deps_desc *dep
       IOR_REG_SET (&deps->reg_last_in_use, reg_pending_uses);
       IOR_REG_SET (&deps->reg_last_in_use, reg_pending_clobbers);
       IOR_REG_SET (&deps->reg_last_in_use, reg_pending_sets);
-      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
-	if (TEST_HARD_REG_BIT (implicit_reg_pending_uses, i)
-	    || TEST_HARD_REG_BIT (implicit_reg_pending_clobbers, i))
-	  SET_REGNO_REG_SET (&deps->reg_last_in_use, i);
+      IOR_REG_SET_HRS (&deps->reg_last_in_use,
+		       implicit_reg_pending_uses
+		       | implicit_reg_pending_clobbers);
 
       /* Set up the pending barrier found.  */
       deps->last_reg_pending_barrier = reg_pending_barrier;
Index: gcc/sel-sched-ir.c
===================================================================
--- gcc/sel-sched-ir.c	2019-07-10 19:41:26.347898412 +0100
+++ gcc/sel-sched-ir.c	2019-09-09 17:23:00.740796731 +0100
@@ -2661,12 +2661,9 @@  setup_id_implicit_regs (idata_t id, insn
     return;
 
   HARD_REG_SET temp;
-  unsigned regno;
-  hard_reg_set_iterator hrsi;
 
   get_implicit_reg_pending_clobbers (&temp, insn);
-  EXECUTE_IF_SET_IN_HARD_REG_SET (temp, 0, regno, hrsi)
-    SET_REGNO_REG_SET (IDATA_REG_SETS (id), regno);
+  IOR_REG_SET_HRS (IDATA_REG_SETS (id), temp);
 }
 
 /* Setup register sets describing INSN in ID.  */