<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/> <title>libstdc++: workstealing.h Source File</title> <link href="tabs.css" rel="stylesheet" type="text/css"/> <link href="navtree.css" rel="stylesheet" type="text/css"/> <script type="text/javascript" src="jquery.js"></script> <script type="text/javascript" src="navtree.js"></script> <script type="text/javascript" src="resize.js"></script> <script type="text/javascript"> $(document).ready(initResizable); </script> <link href="doxygen.css" rel="stylesheet" type="text/css"/> </head> <body> <!-- Generated by Doxygen 1.7.4 --> <div id="top"> <div id="titlearea"> <table cellspacing="0" cellpadding="0"> <tbody> <tr style="height: 56px;"> <td style="padding-left: 0.5em;"> <div id="projectname">libstdc++</div> </td> </tr> </tbody> </table> </div> </div> <div id="side-nav" class="ui-resizable side-nav-resizable"> <div id="nav-tree"> <div id="nav-tree-contents"> </div> </div> <div id="splitbar" style="-moz-user-select:none;" class="ui-resizable-handle"> </div> </div> <script type="text/javascript"> initNavTree('a01122.html',''); </script> <div id="doc-content"> <div class="header"> <div class="headertitle"> <div class="title">workstealing.h</div> </div> </div> <div class="contents"> <a href="a01122.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">// -*- C++ -*-</span> <a name="l00002"></a>00002 <a name="l00003"></a>00003 <span class="comment">// Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc.</span> <a name="l00004"></a>00004 <span class="comment">//</span> <a name="l00005"></a>00005 <span class="comment">// This file is part of the GNU ISO C++ Library. This library is free</span> <a name="l00006"></a>00006 <span class="comment">// software; you can redistribute it and/or modify it under the terms</span> <a name="l00007"></a>00007 <span class="comment">// of the GNU General Public License as published by the Free Software</span> <a name="l00008"></a>00008 <span class="comment">// Foundation; either version 3, or (at your option) any later</span> <a name="l00009"></a>00009 <span class="comment">// version.</span> <a name="l00010"></a>00010 <a name="l00011"></a>00011 <span class="comment">// This library is distributed in the hope that it will be useful, but</span> <a name="l00012"></a>00012 <span class="comment">// WITHOUT ANY WARRANTY; without even the implied warranty of</span> <a name="l00013"></a>00013 <span class="comment">// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU</span> <a name="l00014"></a>00014 <span class="comment">// General Public License for more details.</span> <a name="l00015"></a>00015 <a name="l00016"></a>00016 <span class="comment">// Under Section 7 of GPL version 3, you are granted additional</span> <a name="l00017"></a>00017 <span class="comment">// permissions described in the GCC Runtime Library Exception, version</span> <a name="l00018"></a>00018 <span class="comment">// 3.1, as published by the Free Software Foundation.</span> <a name="l00019"></a>00019 <a name="l00020"></a>00020 <span class="comment">// You should have received a copy of the GNU General Public License and</span> <a name="l00021"></a>00021 <span class="comment">// a copy of the GCC Runtime Library Exception along with this program;</span> <a name="l00022"></a>00022 <span class="comment">// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see</span> <a name="l00023"></a>00023 <span class="comment">// <http://www.gnu.org/licenses/>.</span> <a name="l00024"></a>00024 <span class="comment"></span> <a name="l00025"></a>00025 <span class="comment">/** @file parallel/workstealing.h</span> <a name="l00026"></a>00026 <span class="comment"> * @brief Parallelization of embarrassingly parallel execution by</span> <a name="l00027"></a>00027 <span class="comment"> * means of work-stealing.</span> <a name="l00028"></a>00028 <span class="comment"> *</span> <a name="l00029"></a>00029 <span class="comment"> * Work stealing is described in</span> <a name="l00030"></a>00030 <span class="comment"> *</span> <a name="l00031"></a>00031 <span class="comment"> * R. D. Blumofe and C. E. Leiserson.</span> <a name="l00032"></a>00032 <span class="comment"> * Scheduling multithreaded computations by work stealing.</span> <a name="l00033"></a>00033 <span class="comment"> * Journal of the ACM, 46(5):720–748, 1999.</span> <a name="l00034"></a>00034 <span class="comment"> *</span> <a name="l00035"></a>00035 <span class="comment"> * This file is a GNU parallel extension to the Standard C++ Library.</span> <a name="l00036"></a>00036 <span class="comment"> */</span> <a name="l00037"></a>00037 <a name="l00038"></a>00038 <span class="comment">// Written by Felix Putze.</span> <a name="l00039"></a>00039 <a name="l00040"></a>00040 <span class="preprocessor">#ifndef _GLIBCXX_PARALLEL_WORKSTEALING_H</span> <a name="l00041"></a>00041 <span class="preprocessor"></span><span class="preprocessor">#define _GLIBCXX_PARALLEL_WORKSTEALING_H 1</span> <a name="l00042"></a>00042 <span class="preprocessor"></span> <a name="l00043"></a>00043 <span class="preprocessor">#include <<a class="code" href="a00971.html" title="End-user include file. Provides advanced settings and tuning options. This file is a GNU parallel ext...">parallel/parallel.h</a>></span> <a name="l00044"></a>00044 <span class="preprocessor">#include <<a class="code" href="a00999.html" title="Random number generator based on the Mersenne twister. This file is a GNU parallel extension to the S...">parallel/random_number.h</a>></span> <a name="l00045"></a>00045 <span class="preprocessor">#include <<a class="code" href="a00810.html" title="Compatibility layer, mostly concerned with atomic operations. This file is a GNU parallel extension t...">parallel/compatibility.h</a>></span> <a name="l00046"></a>00046 <a name="l00047"></a>00047 <span class="keyword">namespace </span>__gnu_parallel <a name="l00048"></a>00048 { <a name="l00049"></a>00049 <a name="l00050"></a>00050 <span class="preprocessor">#define _GLIBCXX_JOB_VOLATILE volatile</span> <a name="l00051"></a>00051 <span class="preprocessor"></span><span class="comment"></span> <a name="l00052"></a>00052 <span class="comment"> /** @brief One __job for a certain thread. */</span> <a name="l00053"></a>00053 <span class="keyword">template</span><<span class="keyword">typename</span> _DifferenceTp> <a name="l00054"></a><a class="code" href="a00127.html">00054</a> <span class="keyword">struct </span><a class="code" href="a00127.html" title="One __job for a certain thread.">_Job</a> <a name="l00055"></a>00055 { <a name="l00056"></a>00056 <span class="keyword">typedef</span> _DifferenceTp _DifferenceType; <a name="l00057"></a>00057 <span class="comment"></span> <a name="l00058"></a>00058 <span class="comment"> /** @brief First element.</span> <a name="l00059"></a>00059 <span class="comment"> *</span> <a name="l00060"></a>00060 <span class="comment"> * Changed by owning and stealing thread. By stealing thread,</span> <a name="l00061"></a>00061 <span class="comment"> * always incremented. */</span> <a name="l00062"></a><a class="code" href="a00127.html#a815b2e740292adbcc40185ebae5b1c67">00062</a> _GLIBCXX_JOB_VOLATILE _DifferenceType <a class="code" href="a00127.html#a815b2e740292adbcc40185ebae5b1c67" title="First element.">_M_first</a>; <a name="l00063"></a>00063 <span class="comment"></span> <a name="l00064"></a>00064 <span class="comment"> /** @brief Last element.</span> <a name="l00065"></a>00065 <span class="comment"> *</span> <a name="l00066"></a>00066 <span class="comment"> * Changed by owning thread only. */</span> <a name="l00067"></a><a class="code" href="a00127.html#a6893a607875d35bea0a2c15b6a448129">00067</a> _GLIBCXX_JOB_VOLATILE _DifferenceType <a class="code" href="a00127.html#a6893a607875d35bea0a2c15b6a448129" title="Last element.">_M_last</a>; <a name="l00068"></a>00068 <span class="comment"></span> <a name="l00069"></a>00069 <span class="comment"> /** @brief Number of elements, i.e. @c _M_last-_M_first+1.</span> <a name="l00070"></a>00070 <span class="comment"> *</span> <a name="l00071"></a>00071 <span class="comment"> * Changed by owning thread only. */</span> <a name="l00072"></a><a class="code" href="a00127.html#a46f2881dc8a59f16b0cb6761d7f632c4">00072</a> _GLIBCXX_JOB_VOLATILE _DifferenceType <a class="code" href="a00127.html#a46f2881dc8a59f16b0cb6761d7f632c4" title="Number of elements, i.e. _M_last-_M_first+1.">_M_load</a>; <a name="l00073"></a>00073 }; <a name="l00074"></a>00074 <span class="comment"></span> <a name="l00075"></a>00075 <span class="comment"> /** @brief Work stealing algorithm for random access iterators.</span> <a name="l00076"></a>00076 <span class="comment"> *</span> <a name="l00077"></a>00077 <span class="comment"> * Uses O(1) additional memory. Synchronization at job lists is</span> <a name="l00078"></a>00078 <span class="comment"> * done with atomic operations.</span> <a name="l00079"></a>00079 <span class="comment"> * @param __begin Begin iterator of element sequence.</span> <a name="l00080"></a>00080 <span class="comment"> * @param __end End iterator of element sequence.</span> <a name="l00081"></a>00081 <span class="comment"> * @param __op User-supplied functor (comparator, predicate, adding</span> <a name="l00082"></a>00082 <span class="comment"> * functor, ...).</span> <a name="l00083"></a>00083 <span class="comment"> * @param __f Functor to @a process an element with __op (depends on</span> <a name="l00084"></a>00084 <span class="comment"> * desired functionality, e. g. for std::for_each(), ...).</span> <a name="l00085"></a>00085 <span class="comment"> * @param __r Functor to @a add a single __result to the already</span> <a name="l00086"></a>00086 <span class="comment"> * processed elements (depends on functionality).</span> <a name="l00087"></a>00087 <span class="comment"> * @param __base Base value for reduction.</span> <a name="l00088"></a>00088 <span class="comment"> * @param __output Pointer to position where final result is written to</span> <a name="l00089"></a>00089 <span class="comment"> * @param __bound Maximum number of elements processed (e. g. for</span> <a name="l00090"></a>00090 <span class="comment"> * std::count_n()).</span> <a name="l00091"></a>00091 <span class="comment"> * @return User-supplied functor (that may contain a part of the result).</span> <a name="l00092"></a>00092 <span class="comment"> */</span> <a name="l00093"></a>00093 <span class="keyword">template</span><<span class="keyword">typename</span> _RAIter, <a name="l00094"></a>00094 <span class="keyword">typename</span> _Op, <a name="l00095"></a>00095 <span class="keyword">typename</span> _Fu, <a name="l00096"></a>00096 <span class="keyword">typename</span> _Red, <a name="l00097"></a>00097 <span class="keyword">typename</span> _Result> <a name="l00098"></a>00098 _Op <a name="l00099"></a><a class="code" href="a01131.html#a0723c5ff76bd65efb01e11ba74f780c7">00099</a> <a class="code" href="a01131.html#a0723c5ff76bd65efb01e11ba74f780c7" title="Work stealing algorithm for random access iterators.">__for_each_template_random_access_workstealing</a>(_RAIter __begin, <a name="l00100"></a>00100 _RAIter __end, _Op __op, <a name="l00101"></a>00101 _Fu& __f, _Red __r, <a name="l00102"></a>00102 _Result <a class="code" href="a01129.html#a1eb258935ccc1f18d8b8423cf079c353">__base</a>, <a name="l00103"></a>00103 _Result& __output, <a name="l00104"></a>00104 <span class="keyword">typename</span> std::iterator_traits<_RAIter>::difference_type __bound) <a name="l00105"></a>00105 { <a name="l00106"></a>00106 <a class="code" href="a00811.html#a77fb93c9cecec331ccee755972695128" title="Macro to produce log message when entering a function.">_GLIBCXX_CALL</a>(__end - __begin) <a name="l00107"></a>00107 <a name="l00108"></a>00108 <span class="keyword">typedef</span> std::iterator_traits<_RAIter> _TraitsType; <a name="l00109"></a>00109 <span class="keyword">typedef</span> <span class="keyword">typename</span> _TraitsType::difference_type _DifferenceType; <a name="l00110"></a>00110 <a name="l00111"></a>00111 <span class="keyword">const</span> <a class="code" href="a00158.html" title="class _Settings Run-time settings for the parallel mode including all tunable parameters.">_Settings</a>& __s = <a class="code" href="a00158.html#abc4965eacae0b49945ebc887cb11adc1" title="Get the global settings.">_Settings::get</a>(); <a name="l00112"></a>00112 <a name="l00113"></a>00113 _DifferenceType __chunk_size = <a name="l00114"></a>00114 <span class="keyword">static_cast<</span>_DifferenceType<span class="keyword">></span>(__s.workstealing_chunk_size); <a name="l00115"></a>00115 <a name="l00116"></a>00116 <span class="comment">// How many jobs?</span> <a name="l00117"></a>00117 _DifferenceType __length = (__bound < 0) ? (__end - __begin) : __bound; <a name="l00118"></a>00118 <a name="l00119"></a>00119 <span class="comment">// To avoid false sharing in a cache line.</span> <a name="l00120"></a>00120 <span class="keyword">const</span> <span class="keywordtype">int</span> __stride = (__s.<a class="code" href="a00158.html#a2918b2f3f97a4fbbcfe990e73ace805b" title="Overestimation of cache line size. Used to avoid false sharing, i.e. elements of different threads ar...">cache_line_size</a> * 10 <a name="l00121"></a>00121 / <span class="keyword">sizeof</span>(<a class="code" href="a00127.html" title="One __job for a certain thread.">_Job<_DifferenceType></a>) + 1); <a name="l00122"></a>00122 <a name="l00123"></a>00123 <span class="comment">// Total number of threads currently working.</span> <a name="l00124"></a>00124 <a class="code" href="a01131.html#a05e502e51bfc3233671730f74a44dc6a" title="Unsigned integer to index a thread number. The maximum thread number (for each processor) must fit in...">_ThreadIndex</a> __busy = 0; <a name="l00125"></a>00125 <a name="l00126"></a>00126 <a class="code" href="a00127.html" title="One __job for a certain thread.">_Job<_DifferenceType></a> *__job; <a name="l00127"></a>00127 <a name="l00128"></a>00128 omp_lock_t __output_lock; <a name="l00129"></a>00129 omp_init_lock(&__output_lock); <a name="l00130"></a>00130 <a name="l00131"></a>00131 <span class="comment">// Write base value to output.</span> <a name="l00132"></a>00132 __output = <a class="code" href="a01129.html#a1eb258935ccc1f18d8b8423cf079c353">__base</a>; <a name="l00133"></a>00133 <a name="l00134"></a>00134 <span class="comment">// No more threads than jobs, at least one thread.</span> <a name="l00135"></a>00135 <a class="code" href="a01131.html#a05e502e51bfc3233671730f74a44dc6a" title="Unsigned integer to index a thread number. The maximum thread number (for each processor) must fit in...">_ThreadIndex</a> __num_threads = __gnu_parallel::max<_ThreadIndex> <a name="l00136"></a>00136 (1, __gnu_parallel::min<_DifferenceType>(__length, <a name="l00137"></a>00137 __get_max_threads())); <a name="l00138"></a>00138 <a name="l00139"></a>00139 <span class="preprocessor"># pragma omp parallel shared(__busy) num_threads(__num_threads)</span> <a name="l00140"></a>00140 <span class="preprocessor"></span> { <a name="l00141"></a>00141 <span class="preprocessor"># pragma omp single</span> <a name="l00142"></a>00142 <span class="preprocessor"></span> { <a name="l00143"></a>00143 __num_threads = omp_get_num_threads(); <a name="l00144"></a>00144 <a name="l00145"></a>00145 <span class="comment">// Create job description array.</span> <a name="l00146"></a>00146 __job = <span class="keyword">new</span> <a class="code" href="a00127.html" title="One __job for a certain thread.">_Job<_DifferenceType></a>[__num_threads * __stride]; <a name="l00147"></a>00147 } <a name="l00148"></a>00148 <a name="l00149"></a>00149 <span class="comment">// Initialization phase.</span> <a name="l00150"></a>00150 <a name="l00151"></a>00151 <span class="comment">// Flags for every thread if it is doing productive work.</span> <a name="l00152"></a>00152 <span class="keywordtype">bool</span> __iam_working = <span class="keyword">false</span>; <a name="l00153"></a>00153 <a name="l00154"></a>00154 <span class="comment">// Thread id.</span> <a name="l00155"></a>00155 <a class="code" href="a01131.html#a05e502e51bfc3233671730f74a44dc6a" title="Unsigned integer to index a thread number. The maximum thread number (for each processor) must fit in...">_ThreadIndex</a> __iam = omp_get_thread_num(); <a name="l00156"></a>00156 <a name="l00157"></a>00157 <span class="comment">// This job.</span> <a name="l00158"></a>00158 <a class="code" href="a00127.html" title="One __job for a certain thread.">_Job<_DifferenceType></a>& __my_job = __job[__iam * __stride]; <a name="l00159"></a>00159 <a name="l00160"></a>00160 <span class="comment">// Random number (for work stealing).</span> <a name="l00161"></a>00161 <a class="code" href="a01131.html#a05e502e51bfc3233671730f74a44dc6a" title="Unsigned integer to index a thread number. The maximum thread number (for each processor) must fit in...">_ThreadIndex</a> __victim; <a name="l00162"></a>00162 <a name="l00163"></a>00163 <span class="comment">// Local value for reduction.</span> <a name="l00164"></a>00164 _Result __result = _Result(); <a name="l00165"></a>00165 <a name="l00166"></a>00166 <span class="comment">// Number of elements to steal in one attempt.</span> <a name="l00167"></a>00167 _DifferenceType __steal; <a name="l00168"></a>00168 <a name="l00169"></a>00169 <span class="comment">// Every thread has its own random number generator</span> <a name="l00170"></a>00170 <span class="comment">// (modulo __num_threads).</span> <a name="l00171"></a>00171 <a class="code" href="a00154.html" title="Random number generator, based on the Mersenne twister.">_RandomNumber</a> __rand_gen(__iam, __num_threads); <a name="l00172"></a>00172 <a name="l00173"></a>00173 <span class="comment">// This thread is currently working.</span> <a name="l00174"></a>00174 <span class="preprocessor"># pragma omp atomic</span> <a name="l00175"></a>00175 <span class="preprocessor"></span> ++__busy; <a name="l00176"></a>00176 <a name="l00177"></a>00177 __iam_working = <span class="keyword">true</span>; <a name="l00178"></a>00178 <a name="l00179"></a>00179 <span class="comment">// How many jobs per thread? last thread gets the rest.</span> <a name="l00180"></a>00180 __my_job.<a class="code" href="a00127.html#a815b2e740292adbcc40185ebae5b1c67" title="First element.">_M_first</a> = <span class="keyword">static_cast<</span>_DifferenceType<span class="keyword">></span> <a name="l00181"></a>00181 (__iam * (__length / __num_threads)); <a name="l00182"></a>00182 <a name="l00183"></a>00183 __my_job.<a class="code" href="a00127.html#a6893a607875d35bea0a2c15b6a448129" title="Last element.">_M_last</a> = (__iam == (__num_threads - 1) <a name="l00184"></a>00184 ? (__length - 1) <a name="l00185"></a>00185 : ((__iam + 1) * (__length / __num_threads) - 1)); <a name="l00186"></a>00186 __my_job.<a class="code" href="a00127.html#a46f2881dc8a59f16b0cb6761d7f632c4" title="Number of elements, i.e. _M_last-_M_first+1.">_M_load</a> = __my_job.<a class="code" href="a00127.html#a6893a607875d35bea0a2c15b6a448129" title="Last element.">_M_last</a> - __my_job.<a class="code" href="a00127.html#a815b2e740292adbcc40185ebae5b1c67" title="First element.">_M_first</a> + 1; <a name="l00187"></a>00187 <a name="l00188"></a>00188 <span class="comment">// Init result with _M_first value (to have a base value for reduction)</span> <a name="l00189"></a>00189 <span class="keywordflow">if</span> (__my_job.<a class="code" href="a00127.html#a815b2e740292adbcc40185ebae5b1c67" title="First element.">_M_first</a> <= __my_job.<a class="code" href="a00127.html#a6893a607875d35bea0a2c15b6a448129" title="Last element.">_M_last</a>) <a name="l00190"></a>00190 { <a name="l00191"></a>00191 <span class="comment">// Cannot use volatile variable directly.</span> <a name="l00192"></a>00192 _DifferenceType __my_first = __my_job.<a class="code" href="a00127.html#a815b2e740292adbcc40185ebae5b1c67" title="First element.">_M_first</a>; <a name="l00193"></a>00193 __result = __f(__op, __begin + __my_first); <a name="l00194"></a>00194 ++__my_job.<a class="code" href="a00127.html#a815b2e740292adbcc40185ebae5b1c67" title="First element.">_M_first</a>; <a name="l00195"></a>00195 --__my_job.<a class="code" href="a00127.html#a46f2881dc8a59f16b0cb6761d7f632c4" title="Number of elements, i.e. _M_last-_M_first+1.">_M_load</a>; <a name="l00196"></a>00196 } <a name="l00197"></a>00197 <a name="l00198"></a>00198 _RAIter __current; <a name="l00199"></a>00199 <a name="l00200"></a>00200 <span class="preprocessor"># pragma omp barrier</span> <a name="l00201"></a>00201 <span class="preprocessor"></span> <a name="l00202"></a>00202 <span class="comment">// Actual work phase</span> <a name="l00203"></a>00203 <span class="comment">// Work on own or stolen current start</span> <a name="l00204"></a>00204 <span class="keywordflow">while</span> (__busy > 0) <a name="l00205"></a>00205 { <a name="l00206"></a>00206 <span class="comment">// Work until no productive thread left.</span> <a name="l00207"></a>00207 <span class="preprocessor"># pragma omp flush(__busy)</span> <a name="l00208"></a>00208 <span class="preprocessor"></span> <a name="l00209"></a>00209 <span class="comment">// Thread has own work to do</span> <a name="l00210"></a>00210 <span class="keywordflow">while</span> (__my_job.<a class="code" href="a00127.html#a815b2e740292adbcc40185ebae5b1c67" title="First element.">_M_first</a> <= __my_job.<a class="code" href="a00127.html#a6893a607875d35bea0a2c15b6a448129" title="Last element.">_M_last</a>) <a name="l00211"></a>00211 { <a name="l00212"></a>00212 <span class="comment">// fetch-and-add call</span> <a name="l00213"></a>00213 <span class="comment">// Reserve current job block (size __chunk_size) in my queue.</span> <a name="l00214"></a>00214 _DifferenceType __current_job = <a name="l00215"></a>00215 __fetch_and_add<_DifferenceType>(&(__my_job.<a class="code" href="a00127.html#a815b2e740292adbcc40185ebae5b1c67" title="First element.">_M_first</a>), <a name="l00216"></a>00216 __chunk_size); <a name="l00217"></a>00217 <a name="l00218"></a>00218 <span class="comment">// Update _M_load, to make the three values consistent,</span> <a name="l00219"></a>00219 <span class="comment">// _M_first might have been changed in the meantime</span> <a name="l00220"></a>00220 __my_job.<a class="code" href="a00127.html#a46f2881dc8a59f16b0cb6761d7f632c4" title="Number of elements, i.e. _M_last-_M_first+1.">_M_load</a> = __my_job.<a class="code" href="a00127.html#a6893a607875d35bea0a2c15b6a448129" title="Last element.">_M_last</a> - __my_job.<a class="code" href="a00127.html#a815b2e740292adbcc40185ebae5b1c67" title="First element.">_M_first</a> + 1; <a name="l00221"></a>00221 <span class="keywordflow">for</span> (_DifferenceType __job_counter = 0; <a name="l00222"></a>00222 __job_counter < __chunk_size <a name="l00223"></a>00223 && __current_job <= __my_job.<a class="code" href="a00127.html#a6893a607875d35bea0a2c15b6a448129" title="Last element.">_M_last</a>; <a name="l00224"></a>00224 ++__job_counter) <a name="l00225"></a>00225 { <a name="l00226"></a>00226 <span class="comment">// Yes: process it!</span> <a name="l00227"></a>00227 __current = __begin + __current_job; <a name="l00228"></a>00228 ++__current_job; <a name="l00229"></a>00229 <a name="l00230"></a>00230 <span class="comment">// Do actual work.</span> <a name="l00231"></a>00231 __result = __r(__result, __f(__op, __current)); <a name="l00232"></a>00232 } <a name="l00233"></a>00233 <a name="l00234"></a>00234 <span class="preprocessor"># pragma omp flush(__busy)</span> <a name="l00235"></a>00235 <span class="preprocessor"></span> } <a name="l00236"></a>00236 <a name="l00237"></a>00237 <span class="comment">// After reaching this point, a thread's __job list is empty.</span> <a name="l00238"></a>00238 <span class="keywordflow">if</span> (__iam_working) <a name="l00239"></a>00239 { <a name="l00240"></a>00240 <span class="comment">// This thread no longer has work.</span> <a name="l00241"></a>00241 <span class="preprocessor"># pragma omp atomic</span> <a name="l00242"></a>00242 <span class="preprocessor"></span> --__busy; <a name="l00243"></a>00243 <a name="l00244"></a>00244 __iam_working = <span class="keyword">false</span>; <a name="l00245"></a>00245 } <a name="l00246"></a>00246 <a name="l00247"></a>00247 _DifferenceType __supposed_first, __supposed_last, <a name="l00248"></a>00248 __supposed_load; <a name="l00249"></a>00249 <span class="keywordflow">do</span> <a name="l00250"></a>00250 { <a name="l00251"></a>00251 <span class="comment">// Find random nonempty deque (not own), do consistency check.</span> <a name="l00252"></a>00252 <a class="code" href="a01131.html#aaa76236af73146ae89f726921bc3f2cb" title="Yield the control to another thread, without waiting for the end to the time slice.">__yield</a>(); <a name="l00253"></a>00253 <span class="preprocessor"># pragma omp flush(__busy)</span> <a name="l00254"></a>00254 <span class="preprocessor"></span> __victim = __rand_gen(); <a name="l00255"></a>00255 __supposed_first = __job[__victim * __stride].<a class="code" href="a00127.html#a815b2e740292adbcc40185ebae5b1c67" title="First element.">_M_first</a>; <a name="l00256"></a>00256 __supposed_last = __job[__victim * __stride].<a class="code" href="a00127.html#a6893a607875d35bea0a2c15b6a448129" title="Last element.">_M_last</a>; <a name="l00257"></a>00257 __supposed_load = __job[__victim * __stride].<a class="code" href="a00127.html#a46f2881dc8a59f16b0cb6761d7f632c4" title="Number of elements, i.e. _M_last-_M_first+1.">_M_load</a>; <a name="l00258"></a>00258 } <a name="l00259"></a>00259 <span class="keywordflow">while</span> (__busy > 0 <a name="l00260"></a>00260 && ((__supposed_load <= 0) <a name="l00261"></a>00261 || ((__supposed_first + __supposed_load - 1) <a name="l00262"></a>00262 != __supposed_last))); <a name="l00263"></a>00263 <a name="l00264"></a>00264 <span class="keywordflow">if</span> (__busy == 0) <a name="l00265"></a>00265 <span class="keywordflow">break</span>; <a name="l00266"></a>00266 <a name="l00267"></a>00267 <span class="keywordflow">if</span> (__supposed_load > 0) <a name="l00268"></a>00268 { <a name="l00269"></a>00269 <span class="comment">// Has work and work to do.</span> <a name="l00270"></a>00270 <span class="comment">// Number of elements to steal (at least one).</span> <a name="l00271"></a>00271 __steal = (__supposed_load < 2) ? 1 : __supposed_load / 2; <a name="l00272"></a>00272 <a name="l00273"></a>00273 <span class="comment">// Push __victim's current start forward.</span> <a name="l00274"></a>00274 _DifferenceType __stolen_first = <a name="l00275"></a>00275 __fetch_and_add<_DifferenceType> <a name="l00276"></a>00276 (&(__job[__victim * __stride].<a class="code" href="a00127.html#a815b2e740292adbcc40185ebae5b1c67" title="First element.">_M_first</a>), __steal); <a name="l00277"></a>00277 _DifferenceType __stolen_try = (__stolen_first + __steal <a name="l00278"></a>00278 - _DifferenceType(1)); <a name="l00279"></a>00279 <a name="l00280"></a>00280 __my_job.<a class="code" href="a00127.html#a815b2e740292adbcc40185ebae5b1c67" title="First element.">_M_first</a> = __stolen_first; <a name="l00281"></a>00281 __my_job.<a class="code" href="a00127.html#a6893a607875d35bea0a2c15b6a448129" title="Last element.">_M_last</a> = <a class="code" href="a01131.html#aac79a6cb5fee139a90fafba0778a3d98" title="Equivalent to std::min.">__gnu_parallel::min</a>(__stolen_try, <a name="l00282"></a>00282 __supposed_last); <a name="l00283"></a>00283 __my_job.<a class="code" href="a00127.html#a46f2881dc8a59f16b0cb6761d7f632c4" title="Number of elements, i.e. _M_last-_M_first+1.">_M_load</a> = __my_job.<a class="code" href="a00127.html#a6893a607875d35bea0a2c15b6a448129" title="Last element.">_M_last</a> - __my_job.<a class="code" href="a00127.html#a815b2e740292adbcc40185ebae5b1c67" title="First element.">_M_first</a> + 1; <a name="l00284"></a>00284 <a name="l00285"></a>00285 <span class="comment">// Has potential work again.</span> <a name="l00286"></a>00286 <span class="preprocessor"># pragma omp atomic</span> <a name="l00287"></a>00287 <span class="preprocessor"></span> ++__busy; <a name="l00288"></a>00288 __iam_working = <span class="keyword">true</span>; <a name="l00289"></a>00289 <a name="l00290"></a>00290 <span class="preprocessor"># pragma omp flush(__busy)</span> <a name="l00291"></a>00291 <span class="preprocessor"></span> } <a name="l00292"></a>00292 <span class="preprocessor"># pragma omp flush(__busy)</span> <a name="l00293"></a>00293 <span class="preprocessor"></span> } <span class="comment">// end while __busy > 0</span> <a name="l00294"></a>00294 <span class="comment">// Add accumulated result to output.</span> <a name="l00295"></a>00295 omp_set_lock(&__output_lock); <a name="l00296"></a>00296 __output = __r(__output, __result); <a name="l00297"></a>00297 omp_unset_lock(&__output_lock); <a name="l00298"></a>00298 } <a name="l00299"></a>00299 <a name="l00300"></a>00300 <span class="keyword">delete</span>[] __job; <a name="l00301"></a>00301 <a name="l00302"></a>00302 <span class="comment">// Points to last element processed (needed as return value for</span> <a name="l00303"></a>00303 <span class="comment">// some algorithms like transform)</span> <a name="l00304"></a>00304 __f._M_finish_iterator = __begin + __length; <a name="l00305"></a>00305 <a name="l00306"></a>00306 omp_destroy_lock(&__output_lock); <a name="l00307"></a>00307 <a name="l00308"></a>00308 <span class="keywordflow">return</span> __op; <a name="l00309"></a>00309 } <a name="l00310"></a>00310 } <span class="comment">// end namespace</span> <a name="l00311"></a>00311 <a name="l00312"></a>00312 <span class="preprocessor">#endif </span><span class="comment">/* _GLIBCXX_PARALLEL_WORKSTEALING_H */</span> </pre></div></div> </div> <div id="nav-path" class="navpath"> <ul> <li class="navelem"><a class="el" href="a01122.html">workstealing.h</a> </li> <li class="footer">Generated by  <a href="http://www.doxygen.org/index.html"> <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.4 </li> </ul> </div> </body> </html>