Patchwork [libstdc++-v3,parallel,mode] Improve find algorithm

login
register
mail settings
Submitter Johannes Singler
Date June 8, 2010, 2:07 p.m.
Message ID <4C0E4EA4.8080904@kit.edu>
Download mbox | patch
Permalink /patch/54979/
State New
Headers show

Comments

Johannes Singler - June 8, 2010, 2:07 p.m.
This patch improves the parallel find algorithm by making the block size
proportional to the current start position, which gives nice theoretical
performance guarantees and better performance in practice.

Tested x86_64-unknown-linux-gnu: No regressions

Please approve for mainline.

2010-06-08  Johannes Singler  <singler@kit.edu>

        * include/parallel/find.h
        (__find_template(.., growing_blocks_tag)): Make block size
        proportional to current position.
        * include/parallel/settings.h (_Settings): Introduce new tuning
        parameter find_scale_factor to the end of the struct, default to
        0.01.

Johannes
Paolo Carlini - June 8, 2010, 2:35 p.m.
On 06/08/2010 04:07 PM, Johannes Singler wrote:
> Please approve for mainline.
>   
Patch is OK, thanks.

Paolo.

Patch

Index: include/parallel/settings.h
===================================================================
--- include/parallel/settings.h	(revision 160424)
+++ include/parallel/settings.h	(working copy)
@@ -272,6 +272,9 @@ 
     /// Minimal input size for search and search_n.
     _SequenceIndex              search_minimal_n;
 
+    /// Block size scale-down factor with respect to current position.
+    float                       find_scale_factor;
+
     /// Get the global settings.
     _GLIBCXX_CONST static const _Settings&
     get() throw();
@@ -331,7 +334,8 @@ 
             TLB_size(128),
             cache_line_size(64),
             qsb_steals(0),
-            search_minimal_n(1000)
+            search_minimal_n(1000),
+            find_scale_factor(0.01f)
     { }
   };
 }
Index: include/parallel/find.h
===================================================================
--- include/parallel/find.h	(revision 160424)
+++ include/parallel/find.h	(working copy)
@@ -168,9 +168,7 @@ 
    *  @param __selector _Functionality (e. g. std::find_if(), std::equal(),...)
    *  @return Place of finding in both sequences.
    *  @see __gnu_parallel::_Settings::find_sequential_search_size
-   *  @see __gnu_parallel::_Settings::find_initial_block_size
-   *  @see __gnu_parallel::_Settings::find_maximum_block_size
-   *  @see __gnu_parallel::_Settings::find_increasing_factor
+   *  @see __gnu_parallel::_Settings::find_scale_factor
    *
    *  There are two main differences between the growing blocks and
    *  the constant-size blocks variants.
@@ -218,6 +216,8 @@ 
       omp_lock_t __result_lock;
       omp_init_lock(&__result_lock);
 
+      const float __scale_factor = __s.find_scale_factor;
+
       _ThreadIndex __num_threads = __get_max_threads();
 #     pragma omp parallel shared(__result) num_threads(__num_threads)
       {
@@ -227,7 +227,8 @@ 
 	// Not within first __k elements -> start parallel.
 	_ThreadIndex __iam = omp_get_thread_num();
 
-	_DifferenceType __block_size = __s.find_initial_block_size;
+	_DifferenceType __block_size =
+	  std::max<_DifferenceType>(1, __scale_factor * __next_block_start);
 	_DifferenceType __start = __fetch_and_add<_DifferenceType>
 	  (&__next_block_start, __block_size);
 
@@ -265,15 +266,14 @@ 
 		omp_unset_lock(&__result_lock);
 	      }
 
-	    __block_size = std::min<_DifferenceType>
-	      (__block_size * __s.find_increasing_factor,
-	       __s.find_maximum_block_size);
+	    _DifferenceType __block_size =
+	     std::max<_DifferenceType>(1, __scale_factor * __next_block_start);
 
 	    // Get new block, update pointer to next block.
 	    __start = __fetch_and_add<_DifferenceType>(&__next_block_start,
 						       __block_size);
-	    __stop = (__length < (__start + __block_size)
-		      ? __length : (__start + __block_size));
+	    __stop =
+	      std::min<_DifferenceType>(__length, __start + __block_size);
 	  }
       } //parallel