util/X86LINUX64/cuda-6.5/include/thrust/system/cuda/detail/block/reduce.h

   1 /*
   2  *  Copyright 2008-2013 NVIDIA Corporation
   3  *
   4  *  Licensed under the Apache License, Version 2.0 (the "License");
   5  *  you may not use this file except in compliance with the License.
   6  *  You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  *  Unless required by applicable law or agreed to in writing, software
  11  *  distributed under the License is distributed on an "AS IS" BASIS,
  12  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  *  See the License for the specific language governing permissions and
  14  *  limitations under the License.
  15  */
  16
  17 #pragma once
  18
  19 #include <thrust/detail/config.h>
  20 #include <thrust/iterator/iterator_traits.h>
  21
  22 namespace thrust
  23 {
  24 namespace system
  25 {
  26 namespace cuda
  27 {
  28 namespace detail
  29 {
  30 namespace block
  31 {
  32
  33 /* Reduces [data, data + n) using binary_op and stores the result in data[0]
  34  *
  35  * Upon return the elements in [data + 1, data + n) have unspecified values.
  36  */
  37 template <typename Context, typename ValueIterator, typename BinaryFunction>
  38 __device__ __thrust_forceinline__
  39 void reduce_n(Context context, ValueIterator data, unsigned int n, BinaryFunction binary_op)
  40 {
  41   if (context.block_dimension() < n)
  42   {
  43     for (unsigned int i = context.block_dimension() + context.thread_index(); i < n; i += context.block_dimension())
  44       data[context.thread_index()] = binary_op(data[context.thread_index()], data[i]);
  45
  46     context.barrier();
  47   }
  48
  49   while (n > 1)
  50   {
  51     unsigned int half = n / 2;
  52
  53     if (context.thread_index() < half)
  54       data[context.thread_index()] = binary_op(data[context.thread_index()], data[n - context.thread_index() - 1]);
  55
  56     context.barrier();
  57
  58     n = n - half;
  59   }
  60 }
  61
  62 } // end namespace block
  63 } // end namespace detail
  64 } // end namespace cuda
  65 } // end namespace system
  66 } // end namespace thrust
  67