1 /******************************************************************************
3 * Copyright 2010 Duane Merrill
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
22 * If you use|reference|benchmark this code, please cite our Technical
23 * Report (http://www.cs.virginia.edu/~dgm4d/papers/RadixSortTR.pdf):
25 * @TechReport{ Merrill:Sorting:2010,
26 * author = "Duane Merrill and Andrew Grimshaw",
27 * title = "Revisiting Sorting for GPGPU Stream Architectures",
29 * institution = "University of Virginia, Department of Computer Science",
30 * address = "Charlottesville, VA, USA",
31 * number = "CS2010-03"
34 * For more information, see our Google Code project site:
35 * http://code.google.com/p/back40computing/
39 ******************************************************************************/
42 /******************************************************************************
43 * Functors for converting signed and floating point types to unsigned types
44 * suitable for radix sorting
45 ******************************************************************************/
54 namespace b40c_thrust {
58 // Do-nothing functors
63 template <typename ConvertedKeyType>
64 __device__ __host__ __forceinline__ void operator()(ConvertedKeyType &converted_key) {}
65 __device__ __host__ __forceinline__ static bool MustApply(){ return false;}
69 // Do-nothing functors that indicate a mandatory pass
73 struct MandatoryPassNopFunctor{
74 template <typename ConvertedKeyType>
75 __device__ __host__ __forceinline__ void operator()(ConvertedKeyType &converted_key) {}
76 __device__ __host__ __forceinline__ static bool MustApply(){ return false;}
81 // Conversion for generic unsigned types
84 template <typename T> struct KeyConversion {
85 typedef T UnsignedBits;
89 struct PreprocessKeyFunctor{
90 template <typename ConvertedKeyType>
91 __device__ __host__ __forceinline__ void operator()(ConvertedKeyType &converted_key) {}
92 __device__ __host__ __forceinline__ static bool MustApply(){ return false;}
96 struct PostprocessKeyFunctor {
97 template <typename ConvertedKeyType>
98 __device__ __host__ __forceinline__ void operator()(ConvertedKeyType &converted_key) {}
99 __device__ __host__ __forceinline__ static bool MustApply(){ return false;}
105 // Conversion for floats
108 template <> struct KeyConversion<float> {
109 typedef unsigned int UnsignedBits;
113 struct PreprocessKeyFunctor<float> {
114 __device__ __host__ __forceinline__ void operator()(unsigned int &converted_key) {
116 unsigned int mask = (converted_key & 0x80000000) ? 0xffffffff : 0x80000000;
117 converted_key ^= mask;
119 __device__ __host__ __forceinline__ static bool MustApply(){ return true;}
123 struct PostprocessKeyFunctor<float> {
124 __device__ __host__ __forceinline__ void operator()(unsigned int &converted_key) {
126 unsigned int mask = (converted_key & 0x80000000) ? 0x80000000 : 0xffffffff;
127 converted_key ^= mask;
129 __device__ __host__ __forceinline__ static bool MustApply(){ return true;}
135 // Conversion for doubles
138 template <> struct KeyConversion<double> {
139 typedef unsigned long long UnsignedBits;
143 struct PreprocessKeyFunctor<double> {
144 __device__ __host__ __forceinline__ void operator()(unsigned long long &converted_key) {
146 unsigned long long mask = (converted_key & 0x8000000000000000) ? 0xffffffffffffffff : 0x8000000000000000;
147 converted_key ^= mask;
149 __device__ __host__ __forceinline__ static bool MustApply(){ return true;}
153 struct PostprocessKeyFunctor<double> {
154 __device__ __host__ __forceinline__ void operator()(unsigned long long &converted_key) {
155 unsigned long long mask = (converted_key & 0x8000000000000000) ? 0x8000000000000000 : 0xffffffffffffffff;
156 converted_key ^= mask;
158 __device__ __host__ __forceinline__ static bool MustApply(){ return true;}
163 // Conversion for signed chars
166 template <> struct KeyConversion<char> {
167 typedef unsigned char UnsignedBits;
171 struct PreprocessKeyFunctor<char> {
172 __device__ __host__ __forceinline__ void operator()(unsigned char &converted_key) {
173 // char is unsigned on some platforms, so we have to check
174 if(std::numeric_limits<char>::is_signed)
176 const unsigned int SIGN_MASK = 1u << ((sizeof(char) * 8) - 1);
177 converted_key ^= SIGN_MASK;
180 __device__ __host__ __forceinline__ static bool MustApply(){ return std::numeric_limits<char>::is_signed;}
184 struct PostprocessKeyFunctor<char> {
185 __device__ __host__ __forceinline__ void operator()(unsigned char &converted_key) {
186 // char is unsigned on some platforms, so we have to check
187 if(std::numeric_limits<char>::is_signed)
189 const unsigned int SIGN_MASK = 1u << ((sizeof(char) * 8) - 1);
190 converted_key ^= SIGN_MASK;
193 __device__ __host__ __forceinline__ static bool MustApply(){ return std::numeric_limits<char>::is_signed;}
197 // TODO handle this more gracefully
198 template <> struct KeyConversion<signed char> {
199 typedef unsigned char UnsignedBits;
203 struct PreprocessKeyFunctor<signed char> {
204 __device__ __host__ __forceinline__ void operator()(unsigned char &converted_key) {
205 const unsigned int SIGN_MASK = 1u << ((sizeof(char) * 8) - 1);
206 converted_key ^= SIGN_MASK;
208 __device__ __host__ __forceinline__ static bool MustApply(){ return true;}
212 struct PostprocessKeyFunctor<signed char> {
213 __device__ __host__ __forceinline__ void operator()(unsigned char &converted_key) {
214 const unsigned int SIGN_MASK = 1u << ((sizeof(char) * 8) - 1);
215 converted_key ^= SIGN_MASK;
217 __device__ __host__ __forceinline__ static bool MustApply(){ return true;}
222 // Conversion for signed shorts
225 template <> struct KeyConversion<short> {
226 typedef unsigned short UnsignedBits;
230 struct PreprocessKeyFunctor<short> {
231 __device__ __host__ __forceinline__ void operator()(unsigned short &converted_key) {
232 const unsigned int SIGN_MASK = 1u << ((sizeof(short) * 8) - 1);
233 converted_key ^= SIGN_MASK;
235 __device__ __host__ __forceinline__ static bool MustApply(){ return true;}
239 struct PostprocessKeyFunctor<short> {
240 __device__ __host__ __forceinline__ void operator()(unsigned short &converted_key) {
241 const unsigned int SIGN_MASK = 1u << ((sizeof(short) * 8) - 1);
242 converted_key ^= SIGN_MASK;
244 __device__ __host__ __forceinline__ static bool MustApply(){ return true;}
250 // Conversion for signed ints
253 template <> struct KeyConversion<int> {
254 typedef unsigned int UnsignedBits;
258 struct PreprocessKeyFunctor<int> {
259 __device__ __host__ __forceinline__ void operator()(unsigned int &converted_key) {
260 const unsigned int SIGN_MASK = 1u << ((sizeof(int) * 8) - 1);
261 converted_key ^= SIGN_MASK;
263 __device__ __host__ __forceinline__ static bool MustApply(){ return true;}
267 struct PostprocessKeyFunctor<int> {
268 __device__ __host__ __forceinline__ void operator()(unsigned int &converted_key) {
269 const unsigned int SIGN_MASK = 1u << ((sizeof(int) * 8) - 1);
270 converted_key ^= SIGN_MASK;
272 __device__ __host__ __forceinline__ static bool MustApply(){ return true;}
278 // Conversion for signed longs
281 // TODO rework this with metaprogramming
282 template <> struct KeyConversion<unsigned long> {
283 #if ULONG_MAX == UINT_MAX
284 typedef unsigned int UnsignedBits;
286 typedef unsigned long long UnsignedBits;
290 // TODO rework this with metaprogramming
291 template <> struct KeyConversion<long> {
292 #if ULONG_MAX == UINT_MAX
293 typedef unsigned int UnsignedBits;
295 typedef unsigned long long UnsignedBits;
300 struct PreprocessKeyFunctor<long> {
301 __device__ __host__ __forceinline__ void operator()(typename KeyConversion<long>::UnsignedBits& converted_key) {
302 const typename KeyConversion<long>::UnsignedBits SIGN_MASK = 1ul << ((sizeof(long) * 8) - 1);
303 converted_key ^= SIGN_MASK;
305 __device__ __host__ __forceinline__ static bool MustApply(){ return true;}
309 struct PostprocessKeyFunctor<long> {
310 __device__ __host__ __forceinline__ void operator()(typename KeyConversion<long>::UnsignedBits& converted_key) {
311 const typename KeyConversion<long>::UnsignedBits SIGN_MASK = 1ul << ((sizeof(long) * 8) - 1);
312 converted_key ^= SIGN_MASK;
314 __device__ __host__ __forceinline__ static bool MustApply(){ return true;}
320 // Conversion for signed long longs
323 template <> struct KeyConversion<long long> {
324 typedef unsigned long long UnsignedBits;
328 struct PreprocessKeyFunctor<long long> {
329 __device__ __host__ __forceinline__ void operator()(unsigned long long &converted_key) {
330 const unsigned long long SIGN_MASK = 1ull << ((sizeof(long long) * 8) - 1);
331 converted_key ^= SIGN_MASK;
333 __device__ __host__ __forceinline__ static bool MustApply(){ return true;}
337 struct PostprocessKeyFunctor<long long> {
338 __device__ __host__ __forceinline__ void operator()(unsigned long long &converted_key) {
339 const unsigned long long SIGN_MASK = 1ull << ((sizeof(long long) * 8) - 1);
340 converted_key ^= SIGN_MASK;
342 __device__ __host__ __forceinline__ static bool MustApply(){ return true;}
346 } // end namespace b40c_thrust
347 } // end namespace detail
348 } // end namespace detail
349 } // end namespace cuda
350 } // end namespace system
351 } // end namespace thrust