From: singler Date: Thu, 22 Apr 2010 10:14:07 +0000 (+0000) Subject: 2010-04-22 Johannes Singler X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=commitdiff_plain;h=60cc67651dd7b14ff931b856fe791193168844e0 2010-04-22 Johannes Singler * include/parallel/partition.h (__parallel_partition): Improve scalability by: -introducing new variables __leftold, __rightold, __dist, thus -getting rid of omp lock by using atomic operations -getting rid of two omp barriers git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@158636 138bc75d-0d04-0410-961f-82ee72b054a4 --- diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 841dd99d7f6..96d166fa225 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,11 @@ +2010-04-22 Johannes Singler + + * include/parallel/partition.h (__parallel_partition): + Improve scalability by: + -introducing new variables __leftold, __rightold, __dist, thus + -getting rid of omp lock by using atomic operations + -getting rid of two omp barriers + 2010-04-22 Jonathan Wakely * doc/xml/faq.xml: Link to manual. diff --git a/libstdc++-v3/include/parallel/partition.h b/libstdc++-v3/include/parallel/partition.h index 6a9c4aceaa6..0d5a139968c 100644 --- a/libstdc++-v3/include/parallel/partition.h +++ b/libstdc++-v3/include/parallel/partition.h @@ -66,27 +66,26 @@ namespace __gnu_parallel const _Settings& __s = _Settings::get(); - // Shared. - _GLIBCXX_VOLATILE _DifferenceType __left = 0, __right = __n - 1; - _GLIBCXX_VOLATILE _DifferenceType __leftover_left, __leftover_right; - _GLIBCXX_VOLATILE _DifferenceType __leftnew, __rightnew; + // shared + _GLIBCXX_VOLATILE _DifferenceType __left = 0, __right = __n - 1, + __dist = __n, + __leftover_left, __leftover_right, + __leftnew, __rightnew; - bool* __reserved_left = NULL, * __reserved_right = NULL; + // just 0 or 1, but int to allow atomic operations + int* __reserved_left = NULL, * __reserved_right = NULL; _DifferenceType __chunk_size = __s.partition_chunk_size; - omp_lock_t __result_lock; - omp_init_lock(&__result_lock); - //at least two chunks per thread - if (__right - __left + 1 >= 2 * __num_threads * __chunk_size) + if (__dist >= 2 * __num_threads * __chunk_size) # pragma omp parallel num_threads(__num_threads) { # pragma omp single { __num_threads = omp_get_num_threads(); - __reserved_left = new bool[__num_threads]; - __reserved_right = new bool[__num_threads]; + __reserved_left = new int[__num_threads]; + __reserved_right = new int[__num_threads]; if (__s.partition_chunk_share > 0.0) __chunk_size = std::max<_DifferenceType> @@ -96,17 +95,16 @@ namespace __gnu_parallel __chunk_size = __s.partition_chunk_size; } - while (__right - __left + 1 >= 2 * __num_threads * __chunk_size) + while (__dist >= 2 * __num_threads * __chunk_size) { # pragma omp single { - _DifferenceType __num_chunks = ((__right - __left + 1) - / __chunk_size); + _DifferenceType __num_chunks = __dist / __chunk_size; for (_ThreadIndex __r = 0; __r < __num_threads; ++__r) { - __reserved_left[__r] = false; - __reserved_right[__r] = false; + __reserved_left [__r] = 0; // false + __reserved_right[__r] = 0; // false } __leftover_left = 0; __leftover_right = 0; @@ -115,11 +113,13 @@ namespace __gnu_parallel // Private. _DifferenceType __thread_left, __thread_left_border, __thread_right, __thread_right_border; - __thread_left = __left + 1; + __thread_left = __left + 1; // Just to satisfy the condition below. __thread_left_border = __thread_left - 1; + __thread_right = __n - 1; + // Just to satisfy the condition below. __thread_right_border = __thread_right + 1; bool __iam_finished = false; @@ -127,35 +127,42 @@ namespace __gnu_parallel { if (__thread_left > __thread_left_border) { - omp_set_lock(&__result_lock); - if (__left + (__chunk_size - 1) > __right) - __iam_finished = true; - else - { - __thread_left = __left; - __thread_left_border = __left + (__chunk_size - 1); - __left += __chunk_size; - } - omp_unset_lock(&__result_lock); + _DifferenceType __former_dist = + __fetch_and_add(&__dist, -__chunk_size); + if (__former_dist < __chunk_size) + { + __fetch_and_add(&__dist, __chunk_size); + __iam_finished = true; + break; + } + else + { + __thread_left = + __fetch_and_add(&__left, __chunk_size); + __thread_left_border = + __thread_left + (__chunk_size - 1); + } } if (__thread_right < __thread_right_border) { - omp_set_lock(&__result_lock); - if (__left > __right - (__chunk_size - 1)) - __iam_finished = true; - else - { - __thread_right = __right; - __thread_right_border = __right - (__chunk_size - 1); - __right -= __chunk_size; - } - omp_unset_lock(&__result_lock); + _DifferenceType __former_dist = + __fetch_and_add(&__dist, -__chunk_size); + if (__former_dist < __chunk_size) + { + __fetch_and_add(&__dist, __chunk_size); + __iam_finished = true; + break; + } + else + { + __thread_right = + __fetch_and_add(&__right, -__chunk_size); + __thread_right_border = + __thread_right - (__chunk_size - 1); + } } - if (__iam_finished) - break; - // Swap as usual. while (__thread_left < __thread_right) { @@ -188,13 +195,11 @@ namespace __gnu_parallel # pragma omp barrier -# pragma omp single - { - __leftnew = __left - __leftover_left * __chunk_size; - __rightnew = __right + __leftover_right * __chunk_size; - } - -# pragma omp barrier + _DifferenceType + __leftold = __left, + __leftnew = __left - __leftover_left * __chunk_size, + __rightold = __right, + __rightnew = __right + __leftover_right * __chunk_size; // <=> __thread_left_border + (__chunk_size - 1) >= __leftnew if (__thread_left <= __thread_left_border @@ -202,7 +207,7 @@ namespace __gnu_parallel { // Chunk already in place, reserve spot. __reserved_left[(__left - (__thread_left_border + 1)) - / __chunk_size] = true; + / __chunk_size] = 1; } // <=> __thread_right_border - (__chunk_size - 1) <= __rightnew @@ -211,7 +216,7 @@ namespace __gnu_parallel { // Chunk already in place, reserve spot. __reserved_right[((__thread_right_border - 1) - __right) - / __chunk_size] = true; + / __chunk_size] = 1; } # pragma omp barrier @@ -221,15 +226,13 @@ namespace __gnu_parallel { // Find spot and swap. _DifferenceType __swapstart = -1; - omp_set_lock(&__result_lock); - for (_DifferenceType __r = 0; __r < __leftover_left; ++__r) - if (!__reserved_left[__r]) - { - __reserved_left[__r] = true; - __swapstart = __left - (__r + 1) * __chunk_size; - break; - } - omp_unset_lock(&__result_lock); + for (int __r = 0; __r < __leftover_left; ++__r) + if (__reserved_left[__r] == 0 + && __compare_and_swap(&(__reserved_left[__r]), 0, 1)) + { + __swapstart = __leftold - (__r + 1) * __chunk_size; + break; + } #if _GLIBCXX_ASSERTIONS _GLIBCXX_PARALLEL_ASSERT(__swapstart != -1); @@ -246,15 +249,13 @@ namespace __gnu_parallel { // Find spot and swap _DifferenceType __swapstart = -1; - omp_set_lock(&__result_lock); - for (_DifferenceType __r = 0; __r < __leftover_right; ++__r) - if (!__reserved_right[__r]) - { - __reserved_right[__r] = true; - __swapstart = __right + __r * __chunk_size + 1; - break; - } - omp_unset_lock(&__result_lock); + for (int __r = 0; __r < __leftover_right; ++__r) + if (__reserved_right[__r] == 0 + && __compare_and_swap(&(__reserved_right[__r]), 0, 1)) + { + __swapstart = __rightold + __r * __chunk_size + 1; + break; + } #if _GLIBCXX_ASSERTIONS _GLIBCXX_PARALLEL_ASSERT(__swapstart != -1); @@ -270,18 +271,15 @@ namespace __gnu_parallel # pragma omp single { for (_DifferenceType __r = 0; __r < __leftover_left; ++__r) - _GLIBCXX_PARALLEL_ASSERT(__reserved_left[__r]); + _GLIBCXX_PARALLEL_ASSERT(__reserved_left[__r] == 1); for (_DifferenceType __r = 0; __r < __leftover_right; ++__r) - _GLIBCXX_PARALLEL_ASSERT(__reserved_right[__r]); + _GLIBCXX_PARALLEL_ASSERT(__reserved_right[__r] == 1); } - -# pragma omp barrier #endif -# pragma omp barrier - __left = __leftnew; __right = __rightnew; + __dist = __right - __left + 1; } # pragma omp flush(__left, __right) @@ -313,8 +311,6 @@ namespace __gnu_parallel delete[] __reserved_left; delete[] __reserved_right; - omp_destroy_lock(&__result_lock); - // Element "between" __final_left and __final_right might not have // been regarded yet if (__final_left < __n && !__pred(__begin[__final_left]))