OSDN Git Service

Fixed Issue #104: Doubleclicking changed submodule dir in Check For Modifications...
[tortoisegit/TortoiseGitJp.git] / src / Utils / QuickHash.h
1 // TortoiseSVN - a Windows shell extension for easy version control\r
2 \r
3 // Copyright (C) 2007-2007 - TortoiseSVN\r
4 \r
5 // This program is free software; you can redistribute it and/or\r
6 // modify it under the terms of the GNU General Public License\r
7 // as published by the Free Software Foundation; either version 2\r
8 // of the License, or (at your option) any later version.\r
9 \r
10 // This program is distributed in the hope that it will be useful,\r
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of\r
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\r
13 // GNU General Public License for more details.\r
14 \r
15 // You should have received a copy of the GNU General Public License\r
16 // along with this program; if not, write to the Free Software Foundation,\r
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\r
18 //\r
19 #pragma once\r
20 \r
21 \r
22 /**\r
23  * A quick linear (array) hash index class. It requires HF to\r
24  * provide the following interface:\r
25  *\r
26  * value_type           data type of the hash\r
27  * index_type           type of the index to store the hash\r
28  * NO_INDEX                     an index type to mark empty buckets\r
29  *\r
30  * operator()           value_type -> size_t hash function\r
31  * value()                      index_type -> value_type\r
32  * equal()                      (value_type, index_type) -> bool\r
33  *\r
34  * The capacity approximately doubles by each rehash().\r
35  * Only insertion and lookup are provided. Collisions are\r
36  * resolved using linear probing.\r
37  *\r
38  * Use statistics() to monitor the cache performance.\r
39  */\r
40 template<class HF>\r
41 class quick_hash\r
42 {\r
43 public:\r
44 \r
45         typedef typename HF::value_type value_type;\r
46         typedef typename HF::index_type index_type;\r
47         \r
48         enum {NO_INDEX = (index_type)(HF::NO_INDEX)};\r
49 \r
50         struct statistics_t\r
51         {\r
52                 size_t capacity;\r
53                 size_t used;\r
54                 size_t collisions;\r
55                 size_t max_path;\r
56                 size_t collision_path_sum;\r
57 \r
58                 statistics_t()\r
59                         : capacity (1)\r
60                         , used (0)\r
61                         , collisions (0)\r
62                         , max_path (1)\r
63                         , collision_path_sum (0)\r
64                 {\r
65                 }\r
66         };\r
67 \r
68 private:\r
69 \r
70         class prime_grower\r
71         {\r
72         private:\r
73 \r
74                 static const size_t primes[31];\r
75                 statistics_t statistics;\r
76                 size_t index;\r
77 \r
78         public:\r
79 \r
80                 prime_grower()\r
81                         : index (0)\r
82                         , statistics()\r
83                 {\r
84                         statistics.capacity = primes[index];\r
85                 }\r
86 \r
87                 size_t capacity() const \r
88                 {\r
89                         return statistics.capacity;\r
90                 }\r
91 \r
92                 size_t size() const \r
93                 {\r
94                         return statistics.used;\r
95                 }\r
96                 \r
97                 size_t collisions() const \r
98                 {\r
99                         return statistics.collisions;\r
100                 }\r
101                 \r
102                 void inserted_cleanly() \r
103                 {\r
104                         ++statistics.used;\r
105                 }\r
106 \r
107                 void inserted_collision (size_t path_size) \r
108                 {\r
109                         ++statistics.used;\r
110                         ++statistics.collisions;\r
111                         statistics.collision_path_sum += path_size;\r
112                         if (statistics.max_path <= path_size)\r
113                                 statistics.max_path = path_size + 1;\r
114                 }\r
115 \r
116                 void grow() \r
117                 {\r
118                         statistics.capacity = primes[++index];\r
119                         statistics.collisions = 0;\r
120                         statistics.used = 0;\r
121                         statistics.collision_path_sum = 0;\r
122                         statistics.max_path = 1;\r
123                 }\r
124                 \r
125                 size_t map (size_t hash_value) const \r
126                 {\r
127                         return hash_value % capacity();\r
128                 }\r
129                 \r
130                 size_t next (size_t index) const \r
131                 {\r
132                         return (index + 1381000000) % capacity();\r
133                 }\r
134 \r
135                 const statistics_t& get_statistics() const\r
136                 {\r
137                         return statistics;\r
138                 }\r
139         };\r
140 \r
141         index_type* data;\r
142         prime_grower grower;\r
143         HF hf;\r
144         \r
145 private:\r
146 \r
147         /// check if we're allowed to add new entries to the hash\r
148         /// without re-hashing.\r
149         bool should_grow() const\r
150         {\r
151                 // grow, if there are many collisions \r
152                 // or capacity is almost exceeded\r
153                 // There must also be at least one empty entry\r
154 \r
155                 return grower.size() + grower.collisions() + 1 >= grower.capacity();\r
156         }\r
157 \r
158         /// initialize the new array before re-hashing\r
159         void create_data()\r
160         {\r
161                 size_t new_capacity = grower.capacity();\r
162         \r
163                 data = new index_type[new_capacity];\r
164                 stdext::unchecked_fill_n (data, new_capacity, NO_INDEX);\r
165         }\r
166         \r
167         /// add a value to the hash \r
168         /// (must not be in it already; hash must not be full)\r
169         void internal_insert (const value_type& value, index_type index)\r
170         {\r
171                 // first try: un-collisioned insertion\r
172 \r
173                 size_t bucket = grower.map (hf (value));\r
174                 index_type* target = data + bucket;\r
175 \r
176                 if (*target == NO_INDEX)\r
177                 {\r
178                         *target = index;\r
179                         grower.inserted_cleanly();\r
180 \r
181                         return;\r
182                 }\r
183 \r
184                 // collision -> look for an empty bucket\r
185 \r
186                 size_t collision_path_size = 0;\r
187                 do\r
188                 {\r
189                         bucket = grower.next (bucket);\r
190                         target = data + bucket;\r
191                         ++collision_path_size;\r
192                 }\r
193                 while (*target != NO_INDEX);\r
194                 \r
195                 // insert collisioned item\r
196 \r
197                 *target = index;\r
198                 grower.inserted_collision (collision_path_size);\r
199         }\r
200                 \r
201         void rehash (index_type* old_data, size_t old_data_size)\r
202         {\r
203                 create_data();\r
204 \r
205                 for (size_t i = 0; i < old_data_size; ++i)\r
206                 {\r
207                         index_type index = old_data[i];\r
208                         if (index != NO_INDEX)\r
209                                 internal_insert (hf.value (index), index);\r
210                 }\r
211 \r
212                 delete[] old_data;\r
213         }\r
214         \r
215 public:\r
216 \r
217         /// construction / destruction\r
218         quick_hash (const HF& hash_function) \r
219                 : data(NULL)\r
220                 , hf (hash_function)\r
221                 , grower()\r
222         {\r
223                 create_data();\r
224         }\r
225         \r
226         quick_hash (const quick_hash& rhs) \r
227                 : data (NULL)\r
228                 , hf (rhs.hf)\r
229                 , grower (rhs.grower)\r
230         {\r
231                 create_data();\r
232                 operator= (rhs);\r
233         }\r
234         \r
235         ~quick_hash() \r
236         {\r
237                 delete[] data;\r
238         }\r
239 \r
240         /// find the bucket containing the desired value;\r
241         /// return NO_INDEX if not contained in hash\r
242         index_type find (const value_type& value) const\r
243         {\r
244                 size_t bucket = grower.map (hf (value));\r
245                 index_type index = data[bucket];\r
246 \r
247                 while (index != NO_INDEX)\r
248                 {\r
249                         // found?\r
250 \r
251                         if (hf.equal (value, index))\r
252                                 break;\r
253 \r
254                         // collision -> look at next bucket position\r
255 \r
256                         bucket = grower.next (bucket);\r
257                         index = data[bucket];\r
258                 }\r
259 \r
260                 // either found or not in hash\r
261                 \r
262                 return index;\r
263         }\r
264         \r
265         void insert (const value_type& value, index_type index)\r
266         {\r
267                 assert (find (value) == NO_INDEX);\r
268                 \r
269                 if (should_grow())\r
270                         reserve (grower.capacity()+1);\r
271         \r
272                 internal_insert (value, index);\r
273         }\r
274 \r
275         void reserve (size_t min_bucket_count)\r
276         {\r
277                 if (size_t(-1) / sizeof (index_type[4]) > min_bucket_count)\r
278                         min_bucket_count *= 2;\r
279 \r
280                 index_type* old_data = data;\r
281                 size_t old_data_size = grower.capacity();\r
282                 \r
283                 while (grower.capacity() < min_bucket_count)\r
284                         grower.grow();\r
285                         \r
286                 if (grower.capacity() != old_data_size)\r
287                         rehash (old_data, old_data_size);\r
288         }\r
289 \r
290         /// assignment\r
291 \r
292         quick_hash& operator=(const quick_hash& rhs)\r
293         {\r
294                 if (grower.capacity() != rhs.grower.capacity())\r
295                 {\r
296                         delete[] data;\r
297                         data = new index_type [rhs.grower.capacity()];\r
298                 }\r
299 \r
300                 grower = rhs.grower;\r
301 \r
302                 stdext::unchecked_copy (rhs.data, rhs.data + rhs.grower.capacity(), data);\r
303 \r
304                 return *this;\r
305         }\r
306 \r
307         /// get rid of all entries\r
308 \r
309         void clear()\r
310         {\r
311                 if (grower.size() > 0)\r
312                 {\r
313                         delete[] data;\r
314                         grower = prime_grower();\r
315                         create_data();\r
316                 }\r
317         }\r
318 \r
319         /// efficiently exchange two containers\r
320 \r
321         void swap (quick_hash& rhs)\r
322         {\r
323                 std::swap (data, rhs.data);\r
324 \r
325                 prime_grower temp = grower;\r
326                 grower = rhs.grower;\r
327                 rhs.grower = temp;\r
328         }\r
329 \r
330         /// read cache performance statistics\r
331         const statistics_t& statistics() const\r
332         {\r
333                 return grower.get_statistics();\r
334         }\r
335 };\r
336 \r
337 template<class HF>\r
338 const size_t quick_hash<HF>::prime_grower::primes[31] = \r
339         {1, 3, 7, 17, \r
340          31, 67, 127, 257, \r
341          509, 1021, 2053, 4099,\r
342          8191, 16381, 32771, 65537,\r
343          131071, 262147, 524287, 1048573,\r
344          2097143, 4194301, 8388617, 16777213,\r
345          33554467, 67108859, 134217757, 268435459,\r
346          536870909, 1073741827};\r
347 \r