[libstdc++-v3,parallel,mode] Improve find algorithm

Submitted by Johannes Singler on June 8, 2010, 2:07 p.m.

Details

Message ID 4C0E4EA4.8080904@kit.edu
State New
Headers show

Commit Message

Johannes Singler June 8, 2010, 2:07 p.m.
This patch improves the parallel find algorithm by making the block size
proportional to the current start position, which gives nice theoretical
performance guarantees and better performance in practice.

Tested x86_64-unknown-linux-gnu: No regressions

Please approve for mainline.

2010-06-08  Johannes Singler  <singler@kit.edu>

        * include/parallel/find.h
        (__find_template(.., growing_blocks_tag)): Make block size
        proportional to current position.
        * include/parallel/settings.h (_Settings): Introduce new tuning
        parameter find_scale_factor to the end of the struct, default to
        0.01.

Johannes

Comments

Paolo Carlini June 8, 2010, 2:35 p.m.
On 06/08/2010 04:07 PM, Johannes Singler wrote:
> Please approve for mainline.
>   
Patch is OK, thanks.

Paolo.

Patch hide | download patch | download mbox

Index: include/parallel/settings.h
===================================================================
--- include/parallel/settings.h	(revision 160424)
+++ include/parallel/settings.h	(working copy)
@@ -272,6 +272,9 @@ 
     /// Minimal input size for search and search_n.
     _SequenceIndex              search_minimal_n;
 
+    /// Block size scale-down factor with respect to current position.
+    float                       find_scale_factor;
+
     /// Get the global settings.
     _GLIBCXX_CONST static const _Settings&
     get() throw();
@@ -331,7 +334,8 @@ 
             TLB_size(128),
             cache_line_size(64),
             qsb_steals(0),
-            search_minimal_n(1000)
+            search_minimal_n(1000),
+            find_scale_factor(0.01f)
     { }
   };
 }
Index: include/parallel/find.h
===================================================================
--- include/parallel/find.h	(revision 160424)
+++ include/parallel/find.h	(working copy)
@@ -168,9 +168,7 @@ 
    *  @param __selector _Functionality (e. g. std::find_if(), std::equal(),...)
    *  @return Place of finding in both sequences.
    *  @see __gnu_parallel::_Settings::find_sequential_search_size
-   *  @see __gnu_parallel::_Settings::find_initial_block_size
-   *  @see __gnu_parallel::_Settings::find_maximum_block_size
-   *  @see __gnu_parallel::_Settings::find_increasing_factor
+   *  @see __gnu_parallel::_Settings::find_scale_factor
    *
    *  There are two main differences between the growing blocks and
    *  the constant-size blocks variants.
@@ -218,6 +216,8 @@ 
       omp_lock_t __result_lock;
       omp_init_lock(&__result_lock);
 
+      const float __scale_factor = __s.find_scale_factor;
+
       _ThreadIndex __num_threads = __get_max_threads();
 #     pragma omp parallel shared(__result) num_threads(__num_threads)
       {
@@ -227,7 +227,8 @@ 
 	// Not within first __k elements -> start parallel.
 	_ThreadIndex __iam = omp_get_thread_num();
 
-	_DifferenceType __block_size = __s.find_initial_block_size;
+	_DifferenceType __block_size =
+	  std::max<_DifferenceType>(1, __scale_factor * __next_block_start);
 	_DifferenceType __start = __fetch_and_add<_DifferenceType>
 	  (&__next_block_start, __block_size);
 
@@ -265,15 +266,14 @@ 
 		omp_unset_lock(&__result_lock);
 	      }
 
-	    __block_size = std::min<_DifferenceType>
-	      (__block_size * __s.find_increasing_factor,
-	       __s.find_maximum_block_size);
+	    _DifferenceType __block_size =
+	     std::max<_DifferenceType>(1, __scale_factor * __next_block_start);
 
 	    // Get new block, update pointer to next block.
 	    __start = __fetch_and_add<_DifferenceType>(&__next_block_start,
 						       __block_size);
-	    __stop = (__length < (__start + __block_size)
-		      ? __length : (__start + __block_size));
+	    __stop =
+	      std::min<_DifferenceType>(__length, __start + __block_size);
 	  }
       } //parallel