1 // TortoiseSVN - a Windows shell extension for easy version control
\r
3 // Copyright (C) 2007-2007 - TortoiseSVN
\r
5 // This program is free software; you can redistribute it and/or
\r
6 // modify it under the terms of the GNU General Public License
\r
7 // as published by the Free Software Foundation; either version 2
\r
8 // of the License, or (at your option) any later version.
\r
10 // This program is distributed in the hope that it will be useful,
\r
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
13 // GNU General Public License for more details.
\r
15 // You should have received a copy of the GNU General Public License
\r
16 // along with this program; if not, write to the Free Software Foundation,
\r
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\r
23 * A quick linear (array) hash index class. It requires HF to
\r
24 * provide the following interface:
\r
26 * value_type data type of the hash
\r
27 * index_type type of the index to store the hash
\r
28 * NO_INDEX an index type to mark empty buckets
\r
30 * operator() value_type -> size_t hash function
\r
31 * value() index_type -> value_type
\r
32 * equal() (value_type, index_type) -> bool
\r
34 * The capacity approximately doubles by each rehash().
\r
35 * Only insertion and lookup are provided. Collisions are
\r
36 * resolved using linear probing.
\r
38 * Use statistics() to monitor the cache performance.
\r
45 typedef typename HF::value_type value_type;
\r
46 typedef typename HF::index_type index_type;
\r
48 enum {NO_INDEX = (index_type)(HF::NO_INDEX)};
\r
56 size_t collision_path_sum;
\r
63 , collision_path_sum (0)
\r
74 static const size_t primes[31];
\r
75 statistics_t statistics;
\r
84 statistics.capacity = primes[index];
\r
87 size_t capacity() const
\r
89 return statistics.capacity;
\r
92 size_t size() const
\r
94 return statistics.used;
\r
97 size_t collisions() const
\r
99 return statistics.collisions;
\r
102 void inserted_cleanly()
\r
107 void inserted_collision (size_t path_size)
\r
110 ++statistics.collisions;
\r
111 statistics.collision_path_sum += path_size;
\r
112 if (statistics.max_path <= path_size)
\r
113 statistics.max_path = path_size + 1;
\r
118 statistics.capacity = primes[++index];
\r
119 statistics.collisions = 0;
\r
120 statistics.used = 0;
\r
121 statistics.collision_path_sum = 0;
\r
122 statistics.max_path = 1;
\r
125 size_t map (size_t hash_value) const
\r
127 return hash_value % capacity();
\r
130 size_t next (size_t index) const
\r
132 return (index + 1381000000) % capacity();
\r
135 const statistics_t& get_statistics() const
\r
142 prime_grower grower;
\r
147 /// check if we're allowed to add new entries to the hash
\r
148 /// without re-hashing.
\r
149 bool should_grow() const
\r
151 // grow, if there are many collisions
\r
152 // or capacity is almost exceeded
\r
153 // There must also be at least one empty entry
\r
155 return grower.size() + grower.collisions() + 1 >= grower.capacity();
\r
158 /// initialize the new array before re-hashing
\r
161 size_t new_capacity = grower.capacity();
\r
163 data = new index_type[new_capacity];
\r
164 stdext::unchecked_fill_n (data, new_capacity, NO_INDEX);
\r
167 /// add a value to the hash
\r
168 /// (must not be in it already; hash must not be full)
\r
169 void internal_insert (const value_type& value, index_type index)
\r
171 // first try: un-collisioned insertion
\r
173 size_t bucket = grower.map (hf (value));
\r
174 index_type* target = data + bucket;
\r
176 if (*target == NO_INDEX)
\r
179 grower.inserted_cleanly();
\r
184 // collision -> look for an empty bucket
\r
186 size_t collision_path_size = 0;
\r
189 bucket = grower.next (bucket);
\r
190 target = data + bucket;
\r
191 ++collision_path_size;
\r
193 while (*target != NO_INDEX);
\r
195 // insert collisioned item
\r
198 grower.inserted_collision (collision_path_size);
\r
201 void rehash (index_type* old_data, size_t old_data_size)
\r
205 for (size_t i = 0; i < old_data_size; ++i)
\r
207 index_type index = old_data[i];
\r
208 if (index != NO_INDEX)
\r
209 internal_insert (hf.value (index), index);
\r
217 /// construction / destruction
\r
218 quick_hash (const HF& hash_function)
\r
220 , hf (hash_function)
\r
226 quick_hash (const quick_hash& rhs)
\r
229 , grower (rhs.grower)
\r
240 /// find the bucket containing the desired value;
\r
241 /// return NO_INDEX if not contained in hash
\r
242 index_type find (const value_type& value) const
\r
244 size_t bucket = grower.map (hf (value));
\r
245 index_type index = data[bucket];
\r
247 while (index != NO_INDEX)
\r
251 if (hf.equal (value, index))
\r
254 // collision -> look at next bucket position
\r
256 bucket = grower.next (bucket);
\r
257 index = data[bucket];
\r
260 // either found or not in hash
\r
265 void insert (const value_type& value, index_type index)
\r
267 assert (find (value) == NO_INDEX);
\r
270 reserve (grower.capacity()+1);
\r
272 internal_insert (value, index);
\r
275 void reserve (size_t min_bucket_count)
\r
277 if (size_t(-1) / sizeof (index_type[4]) > min_bucket_count)
\r
278 min_bucket_count *= 2;
\r
280 index_type* old_data = data;
\r
281 size_t old_data_size = grower.capacity();
\r
283 while (grower.capacity() < min_bucket_count)
\r
286 if (grower.capacity() != old_data_size)
\r
287 rehash (old_data, old_data_size);
\r
292 quick_hash& operator=(const quick_hash& rhs)
\r
294 if (grower.capacity() != rhs.grower.capacity())
\r
297 data = new index_type [rhs.grower.capacity()];
\r
300 grower = rhs.grower;
\r
302 stdext::unchecked_copy (rhs.data, rhs.data + rhs.grower.capacity(), data);
\r
307 /// get rid of all entries
\r
311 if (grower.size() > 0)
\r
314 grower = prime_grower();
\r
319 /// efficiently exchange two containers
\r
321 void swap (quick_hash& rhs)
\r
323 std::swap (data, rhs.data);
\r
325 prime_grower temp = grower;
\r
326 grower = rhs.grower;
\r
330 /// read cache performance statistics
\r
331 const statistics_t& statistics() const
\r
333 return grower.get_statistics();
\r
338 const size_t quick_hash<HF>::prime_grower::primes[31] =
\r
341 509, 1021, 2053, 4099,
\r
342 8191, 16381, 32771, 65537,
\r
343 131071, 262147, 524287, 1048573,
\r
344 2097143, 4194301, 8388617, 16777213,
\r
345 33554467, 67108859, 134217757, 268435459,
\r
346 536870909, 1073741827};
\r