Merge lp:~zorba-coders/zorba/hashmap into lp:zorba
- hashmap
- Merge into trunk
Proposed by
Markos Zaharioudakis
Status: | Merged |
---|---|
Approved by: | Markos Zaharioudakis |
Approved revision: | 10817 |
Merged at revision: | 11091 |
Proposed branch: | lp:~zorba-coders/zorba/hashmap |
Merge into: | lp:zorba |
Diff against target: |
1469 lines (+600/-192) 10 files modified
src/context/static_context.h (+1/-0) src/store/naive/qname_pool.cpp (+8/-8) src/store/naive/string_pool.cpp (+42/-38) src/unit_tests/CMakeLists.txt (+1/-0) src/unit_tests/test_hashmaps.cpp (+257/-0) src/unit_tests/unit_test_list.h (+16/-1) src/unit_tests/unit_tests.cpp (+13/-1) src/zorbaserialization/bin_archiver.cpp (+1/-1) src/zorbautils/hashmap.h (+251/-122) src/zorbautils/hashset.h (+10/-21) |
To merge this branch: | bzr merge lp:~zorba-coders/zorba/hashmap |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Markos Zaharioudakis | Approve | ||
Review via email: mp+125187@code.launchpad.net |
Commit message
HashMap optimizations
Description of the change
HashMap optimizations
To post a comment you must log in.
Revision history for this message
Markos Zaharioudakis (markos-za) : | # |
review:
Approve
lp:~zorba-coders/zorba/hashmap
updated
- 10815. By Markos Zaharioudakis
-
merge from trunk
- 10816. By Markos Zaharioudakis
-
removed debugging print out
- 10817. By Markos Zaharioudakis
-
merge from trunk
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : | # |
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote : | # |
Validation queue job hashmap-
All tests succeeded!
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'src/context/static_context.h' |
2 | --- src/context/static_context.h 2012-10-03 12:32:55 +0000 |
3 | +++ src/context/static_context.h 2012-10-08 12:24:30 +0000 |
4 | @@ -42,6 +42,7 @@ |
5 | |
6 | #include "zorbautils/hashmap_zstring.h" |
7 | #include "zorbautils/hashmap_itemp.h" |
8 | +#include "zorbautils/checked_vector.h" |
9 | |
10 | #include "common/shared_types.h" |
11 | |
12 | |
13 | === modified file 'src/store/naive/qname_pool.cpp' |
14 | --- src/store/naive/qname_pool.cpp 2012-09-17 00:36:37 +0000 |
15 | +++ src/store/naive/qname_pool.cpp 2012-10-08 12:24:30 +0000 |
16 | @@ -63,12 +63,12 @@ |
17 | ********************************************************************************/ |
18 | QNamePool::~QNamePool() |
19 | { |
20 | - csize n = theHashSet.theHashTab.size(); |
21 | + csize n = theHashSet.capacity(); |
22 | for (csize i = 0; i < n; ++i) |
23 | { |
24 | if (!theHashSet.theHashTab[i].isFree() && |
25 | - theHashSet.theHashTab[i].theItem->isOverflow()) |
26 | - delete theHashSet.theHashTab[i].theItem; |
27 | + theHashSet.theHashTab[i].key()->isOverflow()) |
28 | + delete theHashSet.theHashTab[i].key(); |
29 | } |
30 | |
31 | if (theCache != NULL) |
32 | @@ -297,12 +297,12 @@ |
33 | |
34 | bool found; |
35 | entry = theHashSet.hashInsert(qn, hval, found); |
36 | - entry->theItem = qn; |
37 | + entry->key() = qn; |
38 | ZORBA_FATAL(!found, ""); |
39 | } |
40 | else |
41 | { |
42 | - qn = entry->theItem; |
43 | + qn = entry->key(); |
44 | cachePin(qn); |
45 | } |
46 | |
47 | @@ -389,12 +389,12 @@ |
48 | |
49 | bool found; |
50 | entry = theHashSet.hashInsert(qn, hval, found); |
51 | - entry->theItem = qn; |
52 | + entry->key() = qn; |
53 | ZORBA_FATAL(!found, ""); |
54 | } |
55 | else |
56 | { |
57 | - qn = entry->theItem; |
58 | + qn = entry->key(); |
59 | cachePin(qn); |
60 | } |
61 | |
62 | @@ -478,7 +478,7 @@ |
63 | |
64 | while (entry != NULL) |
65 | { |
66 | - QNameItem* qn = entry->theItem; |
67 | + QNameItem* qn = entry->key(); |
68 | |
69 | if (ztd::equals(qn->getLocalName(), ln, lnlen) && |
70 | ztd::equals(qn->getNamespace(), ns, nslen) && |
71 | |
72 | === modified file 'src/store/naive/string_pool.cpp' |
73 | --- src/store/naive/string_pool.cpp 2012-09-17 00:36:37 +0000 |
74 | +++ src/store/naive/string_pool.cpp 2012-10-08 12:24:30 +0000 |
75 | @@ -28,13 +28,14 @@ |
76 | StringPool::~StringPool() |
77 | { |
78 | csize count = 0; |
79 | - csize n = theHashTab.size(); |
80 | + csize n = capacity(); |
81 | |
82 | for (csize i = 0; i < n; ++i) |
83 | { |
84 | - if (theHashTab[i].theItem.is_shared()) |
85 | + if (!theHashTab[i].isFree() && theHashTab[i].key().is_shared()) |
86 | { |
87 | - std::cerr << "ID: " << i << " Referenced URI: " << theHashTab[i].theItem << std::endl; |
88 | + std::cerr << "ID: " << i << " Referenced URI: " |
89 | + << theHashTab[i].key() << std::endl; |
90 | //delete theHashTab[i].theString.getp(); |
91 | count++; |
92 | } |
93 | @@ -57,7 +58,8 @@ |
94 | bool found = false; |
95 | |
96 | zstring::size_type len = strlen(str); |
97 | - ulong hval = hashfun::h32(str, len, FNV_32_INIT) % theHashTabSize; |
98 | + |
99 | + ulong hval = hashfun::h32(str, len, FNV_32_INIT) % bucket_count(); |
100 | |
101 | { |
102 | SYNC_CODE(AutoMutex lock(&theMutex);) |
103 | @@ -68,7 +70,7 @@ |
104 | { |
105 | while (entry != NULL) |
106 | { |
107 | - if (ztd::equals(entry->theItem, str, len)) |
108 | + if (ztd::equals(entry->key(), str, len)) |
109 | { |
110 | found = true; |
111 | break; |
112 | @@ -79,7 +81,7 @@ |
113 | |
114 | if (found) |
115 | { |
116 | - outStr = entry->theItem; |
117 | + outStr = entry->key(); |
118 | return false; |
119 | } |
120 | } |
121 | @@ -97,76 +99,78 @@ |
122 | ********************************************************************************/ |
123 | void StringPool::garbageCollect() |
124 | { |
125 | - HashEntry<zstring, DummyHashValue>* entry; |
126 | + HashEntry<zstring, DummyHashValue>* currEntry; |
127 | |
128 | HashEntry<zstring, DummyHashValue>* freeList = NULL; |
129 | |
130 | - zstring::size_type size = theHashTabSize; |
131 | + csize size = bucket_count(); |
132 | |
133 | - for (ulong i = 0; i < size; ++i) |
134 | + for (csize i = 0; i < size; ++i) |
135 | { |
136 | - entry = &theHashTab[i]; |
137 | + currEntry = &theHashTab[i]; |
138 | |
139 | // If the current hash bucket is empty, move to the next one |
140 | - if (entry->isFree()) |
141 | + if (currEntry->isFree()) |
142 | { |
143 | - ZORBA_FATAL(entry->theNext == 0, ""); |
144 | + ZORBA_FATAL(currEntry->theNext == 0, ""); |
145 | continue; |
146 | } |
147 | |
148 | // Handle the 1st hash entry of the current hash bucket |
149 | - while (!entry->theItem.is_shared()) |
150 | + while (!currEntry->key().is_shared()) |
151 | { |
152 | - if (entry->theNext == 0) |
153 | + if (currEntry->theNext == 0) |
154 | { |
155 | - entry->setFree(); |
156 | - theNumEntries--; |
157 | + currEntry->setFree(); |
158 | + --theNumEntries; |
159 | break; |
160 | } |
161 | else |
162 | { |
163 | - HashEntry<zstring, DummyHashValue>* nextEntry = entry->getNext(); |
164 | - *entry = *nextEntry; |
165 | - entry->setNext(nextEntry->getNext()); |
166 | + HashEntry<zstring, DummyHashValue>* nextEntry = currEntry->getNext(); |
167 | + assert(!nextEntry->isFree()); |
168 | + *currEntry = *nextEntry; |
169 | + currEntry->setNext(nextEntry->getNext()); |
170 | nextEntry->setFree(); |
171 | nextEntry->setNext(freeList); |
172 | freeList = nextEntry; |
173 | - theNumEntries--; |
174 | + --theNumEntries; |
175 | } |
176 | } |
177 | |
178 | // Handle the overflow entries of the current hash bucket. |
179 | - HashEntry<zstring, DummyHashValue>* prevEntry = entry; |
180 | - entry = entry->getNext(); |
181 | + HashEntry<zstring, DummyHashValue>* prevEntry = currEntry; |
182 | + currEntry = currEntry->getNext(); |
183 | |
184 | - while (entry != NULL) |
185 | + while (currEntry != NULL) |
186 | { |
187 | - if (!entry->theItem.is_shared()) |
188 | + if (!currEntry->key().is_shared()) |
189 | { |
190 | - prevEntry->setNext(entry->getNext()); |
191 | - entry->setFree(); |
192 | - entry->setNext(freeList); |
193 | - freeList = entry; |
194 | - theNumEntries--; |
195 | + prevEntry->setNext(currEntry->getNext()); |
196 | + currEntry->setFree(); |
197 | + currEntry->setNext(freeList); |
198 | + freeList = currEntry; |
199 | + --theNumEntries; |
200 | |
201 | - entry = prevEntry->getNext(); |
202 | + currEntry = prevEntry->getNext(); |
203 | } |
204 | else |
205 | { |
206 | - prevEntry = entry; |
207 | - entry = entry->getNext(); |
208 | + prevEntry = currEntry; |
209 | + currEntry = currEntry->getNext(); |
210 | } |
211 | } |
212 | } |
213 | |
214 | if (freeList != NULL) |
215 | { |
216 | - entry = freeList; |
217 | - while (entry->theNext != 0) |
218 | - entry = entry->getNext(); |
219 | - |
220 | - entry->setNext(theHashTab[theHashTabSize].getNext()); |
221 | - theHashTab[theHashTabSize].setNext(freeList); |
222 | + currEntry = freeList; |
223 | + while (currEntry->theNext != 0) |
224 | + currEntry = currEntry->getNext(); |
225 | + |
226 | + currEntry->setNext(theHashTab[bucket_count()].getNext()); |
227 | + |
228 | + theHashTab[bucket_count()].setNext(freeList); |
229 | } |
230 | } |
231 | |
232 | |
233 | === modified file 'src/unit_tests/CMakeLists.txt' |
234 | --- src/unit_tests/CMakeLists.txt 2012-09-17 00:36:37 +0000 |
235 | +++ src/unit_tests/CMakeLists.txt 2012-10-08 12:24:30 +0000 |
236 | @@ -25,6 +25,7 @@ |
237 | test_uri.cpp |
238 | test_json_parser.cpp |
239 | test_fs_iterator.cpp |
240 | + test_hashmaps.cpp |
241 | # memory_manager.cpp |
242 | ) |
243 | |
244 | |
245 | === added file 'src/unit_tests/test_hashmaps.cpp' |
246 | --- src/unit_tests/test_hashmaps.cpp 1970-01-01 00:00:00 +0000 |
247 | +++ src/unit_tests/test_hashmaps.cpp 2012-10-08 12:24:30 +0000 |
248 | @@ -0,0 +1,257 @@ |
249 | + |
250 | +#include <cstdlib> |
251 | +#include <stdio.h> |
252 | +#include <iostream> |
253 | + |
254 | +#include <zorba/util/time.h> |
255 | + |
256 | +#include "zorbautils/hashmap.h" |
257 | +#include "util/hashmap32.h" |
258 | +#include "util/hashmap.h" |
259 | +#include "util/unordered_map.h" |
260 | + |
261 | + |
262 | +namespace zorba { |
263 | + |
264 | +namespace UnitTests { |
265 | + |
266 | + |
267 | +class IntCompFunc |
268 | +{ |
269 | +public: |
270 | + static bool equal(uint64_t k1, uint64_t k2) |
271 | + { |
272 | + return k1 == k2; |
273 | + } |
274 | + |
275 | + static uint32_t hash(uint64_t key) |
276 | + { |
277 | +#if 1 |
278 | + return key; |
279 | +#else |
280 | + char buf[9]; |
281 | + buf[0] = (unsigned char)(key>>56); |
282 | + buf[1] = (unsigned char)(key>>48); |
283 | + buf[2] = (unsigned char)(key>>40); |
284 | + buf[3] = (unsigned char)(key>>32); |
285 | + buf[4] = (unsigned char)(key>>24); |
286 | + buf[5] = (unsigned char)(key>>16); |
287 | + buf[6] = (unsigned char)(key>>8 ); |
288 | + buf[7] = (unsigned char)(key ); |
289 | + buf[8] = 0; |
290 | + return (uint32_t)hashfun::h64((void*)buf,8,FNV_64_INIT); |
291 | +#endif |
292 | + } |
293 | +}; |
294 | + |
295 | + |
296 | +class StrCompFunc |
297 | +{ |
298 | +public: |
299 | + static bool equal(const zstring& k1, const zstring& k2) |
300 | + { |
301 | + return k1 == k2; |
302 | + } |
303 | + |
304 | + static uint32_t hash(const zstring& key) |
305 | + { |
306 | + return hashfun::h32(key.c_str(), FNV_32_INIT); |
307 | + } |
308 | +}; |
309 | + |
310 | + |
311 | +int test_hashmaps(int argc, char* argv[]) |
312 | +{ |
313 | + if (argc < 4) |
314 | + return 1; |
315 | + |
316 | + int test_id = atoi(argv[1]); |
317 | + |
318 | + double load_factor = atof(argv[2]); |
319 | + |
320 | + int num = atoi(argv[3]); |
321 | + |
322 | + uint64_t* int_buf = new uint64_t[num]; |
323 | + |
324 | + for (int i = 0; i < num; ++i) |
325 | + { |
326 | + int_buf[i] = rand() % num; |
327 | + } |
328 | + |
329 | + zstring* str_buf = new zstring[num]; |
330 | + |
331 | + for (int i = 0; i < num; ++i) |
332 | + { |
333 | + char tmp[20]; |
334 | + sprintf(tmp, "%d", rand() % num); |
335 | + str_buf[i] = tmp; |
336 | + |
337 | + //std::cout << str_buf[i] << std::endl; |
338 | + } |
339 | + |
340 | + |
341 | + HashMap<uint64_t, int, IntCompFunc> map1(1024, false); |
342 | + HashMap<zstring, int, StrCompFunc> map2(1024, false); |
343 | + |
344 | + std::unordered_map<uint64_t, int> map3(1024); |
345 | + std::unordered_map<zstring, int> map4(1024); |
346 | + |
347 | + hash64map<int> map5(1024, load_factor); |
348 | + hashmap<zstring, int> map6(1024, load_factor); |
349 | + |
350 | + map1.set_load_factor(load_factor); |
351 | + map2.set_load_factor(load_factor); |
352 | + |
353 | + if (test_id == 1) |
354 | + { |
355 | + zorba::time::walltime startTime; |
356 | + zorba::time::get_current_walltime(startTime); |
357 | + |
358 | + for (int i = 0; i < num; ++i) |
359 | + { |
360 | + uint64_t key = int_buf[i]; |
361 | + int value = 1; |
362 | + (void)map1.insert(key, value); |
363 | + } |
364 | + |
365 | + zorba::time::walltime stopTime; |
366 | + time::get_current_walltime(stopTime); |
367 | + |
368 | + double time = time::get_walltime_elapsed(startTime, stopTime); |
369 | + |
370 | + std::cout << "Load factor = " << load_factor << std::endl |
371 | + << "Num numeric insertions = " << num << std::endl |
372 | + << "HashMap entries = " << map1.size() << std::endl |
373 | + << "HashMap capacity = " << map1.capacity() << std::endl |
374 | + << "HashMap colisions = " << map1.collisions() << std::endl |
375 | + << "Time = " << time << std::endl; |
376 | + } |
377 | + else if (test_id == 2) |
378 | + { |
379 | + zorba::time::walltime startTime; |
380 | + zorba::time::get_current_walltime(startTime); |
381 | + |
382 | + for (int i = 0; i < num; ++i) |
383 | + { |
384 | + zstring key = str_buf[i]; |
385 | + int value = 1; |
386 | + (void)map2.insert(key, value); |
387 | + } |
388 | + |
389 | + zorba::time::walltime stopTime; |
390 | + time::get_current_walltime(stopTime); |
391 | + |
392 | + double time = time::get_walltime_elapsed(startTime, stopTime); |
393 | + |
394 | + std::cout << "Load factor = " << load_factor << std::endl |
395 | + << "Num zstring insertions = " << num << std::endl |
396 | + << "HashMap entries = " << map2.size() << std::endl |
397 | + << "HashMap buckets = " << map2.bucket_count() << std::endl |
398 | + << "HashMap capacity = " << map2.capacity() << std::endl |
399 | + << "Time = " << time << std::endl; |
400 | + } |
401 | + else if (test_id == 3) |
402 | + { |
403 | + zorba::time::walltime startTime; |
404 | + zorba::time::get_current_walltime(startTime); |
405 | + |
406 | + for (int i = 0; i < num; ++i) |
407 | + { |
408 | + uint64_t key = int_buf[i]; |
409 | + int value = 1; |
410 | + (void)map3.insert(std::pair<uint64_t, int>(key, value)); |
411 | + } |
412 | + |
413 | + zorba::time::walltime stopTime; |
414 | + time::get_current_walltime(stopTime); |
415 | + |
416 | + double time = time::get_walltime_elapsed(startTime, stopTime); |
417 | + |
418 | + std::cout << "Load factor = " << load_factor << std::endl |
419 | + << "Num numeric insertions = " << num << std::endl |
420 | + << "UnorderedMap entries = " << map3.size() << std::endl |
421 | + << "UnorderedMap buckets = " << map3.bucket_count() << std::endl |
422 | + << "Time = " << time << std::endl; |
423 | + } |
424 | + else if (test_id == 4) |
425 | + { |
426 | + zorba::time::walltime startTime; |
427 | + zorba::time::get_current_walltime(startTime); |
428 | + |
429 | + for (int i = 0; i < num; ++i) |
430 | + { |
431 | + zstring key = str_buf[i]; |
432 | + int value = 1; |
433 | + (void)map4.insert(std::pair<zstring, int>(key, value)); |
434 | + } |
435 | + |
436 | + zorba::time::walltime stopTime; |
437 | + time::get_current_walltime(stopTime); |
438 | + |
439 | + double time = time::get_walltime_elapsed(startTime, stopTime); |
440 | + |
441 | + std::cout << "Load factor = " << load_factor << std::endl |
442 | + << "Num zstring insertions = " << num << std::endl |
443 | + << "UnorderedMap entries = " << map4.size() << std::endl |
444 | + << "UnorderedMap buckets = " << map4.bucket_count() << std::endl |
445 | + << "Time = " << time << std::endl; |
446 | + } |
447 | + else if (test_id == 5) |
448 | + { |
449 | + zorba::time::walltime startTime; |
450 | + zorba::time::get_current_walltime(startTime); |
451 | + |
452 | + for (int i = 0; i < num; ++i) |
453 | + { |
454 | + uint64_t key = int_buf[i]; |
455 | + int value = 1; |
456 | + (void)map5.put_unsync(key, value); |
457 | + } |
458 | + |
459 | + zorba::time::walltime stopTime; |
460 | + time::get_current_walltime(stopTime); |
461 | + |
462 | + double time = time::get_walltime_elapsed(startTime, stopTime); |
463 | + |
464 | + std::cout << "Load factor = " << load_factor << std::endl |
465 | + << "Num numeric insertions = " << num << std::endl |
466 | + << "hashmap entries = " << map5.size() << std::endl |
467 | + << "Time = " << time << std::endl; |
468 | + } |
469 | + else if (test_id == 6) |
470 | + { |
471 | + zorba::time::walltime startTime; |
472 | + zorba::time::get_current_walltime(startTime); |
473 | + |
474 | + for (int i = 0; i < num; ++i) |
475 | + { |
476 | + zstring key = str_buf[i]; |
477 | + int value = 1; |
478 | + (void)map6.put(key, value, false); |
479 | + } |
480 | + |
481 | + zorba::time::walltime stopTime; |
482 | + time::get_current_walltime(stopTime); |
483 | + |
484 | + double time = time::get_walltime_elapsed(startTime, stopTime); |
485 | + |
486 | + std::cout << "Load factor = " << load_factor << std::endl |
487 | + << "Num zstring insertions = " << num << std::endl |
488 | + << "hashmap entries = " << map6.size() << std::endl |
489 | + << "Time = " << time << std::endl; |
490 | + } |
491 | + else |
492 | + { |
493 | + std::cout << "Invalid test id" << std::endl; |
494 | + return 2; |
495 | + } |
496 | + |
497 | + delete [] int_buf; |
498 | + delete [] str_buf; |
499 | + |
500 | + return 0; |
501 | +} |
502 | + |
503 | + |
504 | +} |
505 | +} |
506 | |
507 | === modified file 'src/unit_tests/unit_test_list.h' |
508 | --- src/unit_tests/unit_test_list.h 2012-09-17 00:36:37 +0000 |
509 | +++ src/unit_tests/unit_test_list.h 2012-10-08 12:24:30 +0000 |
510 | @@ -25,34 +25,49 @@ |
511 | namespace UnitTests { |
512 | |
513 | int runUriTest(int argc, char* argv[]); |
514 | + |
515 | int runDebuggerProtocolTest(int argc, char* argv[]); |
516 | + |
517 | int test_base64( int, char*[] ); |
518 | + |
519 | int test_base64_streambuf( int, char*[] ); |
520 | + |
521 | int test_fs_iterator( int, char*[] ); |
522 | + |
523 | #ifndef ZORBA_NO_ICU |
524 | int test_icu_streambuf( int, char*[] ); |
525 | #endif /* ZORBA_NO_ICU */ |
526 | + |
527 | int test_json_parser( int, char*[] ); |
528 | + |
529 | int test_string( int, char*[] ); |
530 | + |
531 | int test_unique_ptr( int, char*[] ); |
532 | + |
533 | int test_fs_iterator( int, char*[] ); |
534 | - //int test_mem_manager( int, char*[] ); |
535 | + |
536 | #ifndef ZORBA_NO_FULL_TEXT |
537 | int test_stemmer( int, char*[] ); |
538 | int test_thesaurus( int, char*[] ); |
539 | int test_tokenizer( int, char*[] ); |
540 | #endif /* ZORBA_NO_FULL_TEXT */ |
541 | + |
542 | #ifndef ZORBA_HAVE_UNIQUE_PTR |
543 | int test_unique_ptr( int, char*[] ); |
544 | #endif /* ZORBA_HAVE_UNIQUE_PTR */ |
545 | + |
546 | int test_uuid( int, char*[] ); |
547 | + |
548 | #ifndef ZORBA_HAVE_UNORDERED_MAP |
549 | int test_unordered_map( int, char*[] ); |
550 | #endif /* ZORBA_HAVE_UNORDERED_MAP */ |
551 | + |
552 | #ifndef ZORBA_HAVE_UNORDERED_SET |
553 | int test_unordered_set( int, char*[] ); |
554 | #endif /* ZORBA_HAVE_UNORDERED_SET */ |
555 | |
556 | + int test_hashmaps(int argc, char* argv[]); |
557 | + |
558 | void initializeTestList(); |
559 | |
560 | } // namespace UnitTests |
561 | |
562 | === modified file 'src/unit_tests/unit_tests.cpp' |
563 | --- src/unit_tests/unit_tests.cpp 2012-09-17 00:36:37 +0000 |
564 | +++ src/unit_tests/unit_tests.cpp 2012-10-08 12:24:30 +0000 |
565 | @@ -38,29 +38,41 @@ |
566 | void initializeTestList() |
567 | { |
568 | libunittests["base64"] = test_base64; |
569 | + |
570 | libunittests["base64_streambuf"] = test_base64_streambuf; |
571 | + |
572 | libunittests["fs_iterator"] = test_fs_iterator; |
573 | - //libunittests["memory_manager"] = test_mem_manager; |
574 | + |
575 | #ifndef ZORBA_NO_ICU |
576 | libunittests["icu_streambuf"] = test_icu_streambuf; |
577 | #endif /* ZORBA_NO_ICU */ |
578 | + |
579 | libunittests["json_parser"] = test_json_parser; |
580 | + |
581 | libunittests["string"] = test_string; |
582 | + |
583 | #ifndef ZORBA_NO_FULL_TEXT |
584 | libunittests["stemmer"] = test_stemmer; |
585 | libunittests["thesaurus"] = test_thesaurus; |
586 | libunittests["tokenizer"] = test_tokenizer; |
587 | #endif /* ZORBA_NO_FULL_TEXT */ |
588 | + |
589 | #ifndef ZORBA_HAVE_UNIQUE_PTR |
590 | libunittests["unique_ptr"] = test_unique_ptr; |
591 | #endif /* ZORBA_HAVE_UNIQUE_PTR */ |
592 | + |
593 | libunittests["uuid"] = test_uuid; |
594 | + |
595 | #ifndef ZORBA_HAVE_UNORDERED_MAP |
596 | libunittests["unordered_map"] = test_unordered_map; |
597 | #endif /* ZORBA_HAVE_UNORDERED_MAP */ |
598 | + |
599 | #ifndef ZORBA_HAVE_UNORDERED_SET |
600 | libunittests["unordered_set"] = test_unordered_set; |
601 | #endif /* ZORBA_HAVE_UNORDERED_SET */ |
602 | + |
603 | + libunittests["hashmaps"] = test_hashmaps; |
604 | + |
605 | libunittests["uri"] = runUriTest; |
606 | |
607 | #ifdef ZORBA_WITH_DEBUGGER |
608 | |
609 | === modified file 'src/zorbaserialization/bin_archiver.cpp' |
610 | --- src/zorbaserialization/bin_archiver.cpp 2012-09-17 00:36:37 +0000 |
611 | +++ src/zorbaserialization/bin_archiver.cpp 2012-10-08 12:24:30 +0000 |
612 | @@ -137,7 +137,7 @@ |
613 | BinArchiver::BinArchiver(std::ostream* os) |
614 | : |
615 | Archiver(true), |
616 | - theStringPool(1024, false, false), |
617 | + theStringPool(1024, false), |
618 | theFirstBinaryString(0) |
619 | { |
620 | this->is = NULL; |
621 | |
622 | === modified file 'src/zorbautils/hashmap.h' |
623 | --- src/zorbautils/hashmap.h 2012-09-17 00:36:37 +0000 |
624 | +++ src/zorbautils/hashmap.h 2012-10-08 12:24:30 +0000 |
625 | @@ -16,6 +16,7 @@ |
626 | #ifndef ZORBA_UTILS_HASHMAP_H |
627 | #define ZORBA_UTILS_HASHMAP_H |
628 | |
629 | +#include <vector> |
630 | |
631 | #include <cstddef> |
632 | #include <zorba/config.h> |
633 | @@ -23,9 +24,10 @@ |
634 | #include "common/common.h" |
635 | |
636 | #include "zorbautils/fatal.h" |
637 | -#include "zorbautils/checked_vector.h" |
638 | #include "zorbautils/mutex.h" |
639 | |
640 | +#include "store/api/shared_types.h" |
641 | + |
642 | |
643 | namespace zorba |
644 | { |
645 | @@ -42,18 +44,81 @@ |
646 | template <class T, class V> |
647 | class HashEntry |
648 | { |
649 | + struct KeyHolder |
650 | + { |
651 | + char theKey[sizeof(T)]; |
652 | + }; |
653 | + |
654 | + struct ValueHolder |
655 | + { |
656 | + char theValue[sizeof(V)]; |
657 | + }; |
658 | + |
659 | public: |
660 | bool theIsFree; |
661 | - T theItem; |
662 | - V theValue; |
663 | + KeyHolder theKey; |
664 | + ValueHolder theValue; |
665 | ptrdiff_t theNext; // offset from "this" to the next entry. |
666 | |
667 | - HashEntry() : theIsFree(true), theNext(0) { } |
668 | + HashEntry() |
669 | + : |
670 | + theIsFree(true), |
671 | + theNext(0) |
672 | + { |
673 | + } |
674 | + |
675 | + HashEntry(const HashEntry<T, V>& other) |
676 | + { |
677 | + theIsFree = other.theIsFree; |
678 | + theNext = other.theNext; |
679 | + if (!theIsFree) |
680 | + { |
681 | + new (&theKey) T(other.key()); |
682 | + new (&theValue) V(other.value()); |
683 | + } |
684 | + } |
685 | |
686 | ~HashEntry() |
687 | { |
688 | - theIsFree = true; |
689 | - theNext = 0; |
690 | + if (!theIsFree) |
691 | + { |
692 | + key().~T(); |
693 | + value().~V(); |
694 | + } |
695 | + } |
696 | + |
697 | + HashEntry<T, V>& operator = (const HashEntry<T, V>& other) |
698 | + { |
699 | + if (theIsFree) |
700 | + { |
701 | + assert(false); |
702 | + |
703 | + if (!other.theIsFree) |
704 | + { |
705 | + new (&theKey) T(other.key()); |
706 | + new (&theValue) V(other.value()); |
707 | + } |
708 | + } |
709 | + else |
710 | + { |
711 | + if (!other.theIsFree) |
712 | + { |
713 | + key() = other.key(); |
714 | + value() = other.value(); |
715 | + } |
716 | + else |
717 | + { |
718 | + assert(false); |
719 | + |
720 | + key().~T(); |
721 | + value().~V(); |
722 | + } |
723 | + } |
724 | + |
725 | + theIsFree = other.theIsFree; |
726 | + theNext = other.theNext; |
727 | + |
728 | + return *this; |
729 | } |
730 | |
731 | bool isFree() const |
732 | @@ -63,15 +128,39 @@ |
733 | |
734 | void setFree() |
735 | { |
736 | - theItem.~T(); |
737 | + key().~T(); |
738 | + value().~V(); |
739 | theIsFree = true; |
740 | + theNext = 0; |
741 | } |
742 | |
743 | void unsetFree() |
744 | { |
745 | + new (&theKey) T; |
746 | + new (&theValue) V; |
747 | theIsFree = false; |
748 | } |
749 | |
750 | + T& key() |
751 | + { |
752 | + return *reinterpret_cast<T*>(&theKey); |
753 | + } |
754 | + |
755 | + const T& key() const |
756 | + { |
757 | + return *reinterpret_cast<const T*>(&theKey); |
758 | + } |
759 | + |
760 | + const V& value() const |
761 | + { |
762 | + return *reinterpret_cast<const V*>(&theValue); |
763 | + } |
764 | + |
765 | + V& value() |
766 | + { |
767 | + return *reinterpret_cast<V*>(&theValue); |
768 | + } |
769 | + |
770 | void setNext(HashEntry* nextEntry) |
771 | { |
772 | theNext = (nextEntry == NULL ? 0 : nextEntry - this); |
773 | @@ -105,17 +194,17 @@ |
774 | |
775 | theHashTab : The hash table. The table is implemented as a vector of hash |
776 | entries and is devided in 2 areas: Each entry between 0 and |
777 | - theHashTabSize - 1 is the head of a hash bucket. Each entry |
778 | - between theHashTabSize+1 and theHashTab.size()-1 is either |
779 | + theNumBuckets - 1 is the head of a hash bucket. Each entry |
780 | + between theNumBuckets+1 and theHashTab.size()-1 is either |
781 | a "collision" entry (i.e., it belongs to a hash bucket with |
782 | more than one entries) or a "free" entry (i.e. it does not |
783 | currently belong to any bucket, but is available for |
784 | allocation as a collision entry when needed). Free entries |
785 | in the collision area are linked in a free list. Entry |
786 | - theHashTab[theHashTabSize] is reserved as the head of this |
787 | + theHashTab[theNumBuckets] is reserved as the head of this |
788 | free list. |
789 | - theHashTabSize : The current number of hash buckets in theHashTab. |
790 | - theInitialSize : The initial number of hash buckets. |
791 | + theNumBuckets : The current number of hash buckets in theHashTab. |
792 | + |
793 | theLoadFactor : The max fraction of non-empty hash buckets after which the |
794 | hash table is doubled in size. |
795 | |
796 | @@ -130,11 +219,11 @@ |
797 | friend class HashMap; |
798 | |
799 | protected: |
800 | - checked_vector<HashEntry<T, V> >* theHashTab; |
801 | - size_t thePos; |
802 | + std::vector<HashEntry<T, V> >* theHashTab; |
803 | + csize thePos; |
804 | |
805 | protected: |
806 | - iterator(checked_vector<HashEntry<T, V> >* ht, size_t pos) |
807 | + iterator(std::vector<HashEntry<T, V> >* ht, csize pos) |
808 | : |
809 | theHashTab(ht), |
810 | thePos(pos) |
811 | @@ -150,7 +239,7 @@ |
812 | |
813 | HashEntry<T, V>& entry = (*theHashTab)[thePos]; |
814 | |
815 | - return entry.theItem; |
816 | + return entry.key(); |
817 | } |
818 | |
819 | public: |
820 | @@ -194,7 +283,7 @@ |
821 | |
822 | const HashEntry<T, V>& entry = (*theHashTab)[thePos]; |
823 | |
824 | - return std::pair<T, V>(entry.theItem, entry.theValue); |
825 | + return std::pair<T, V>(entry.key(), entry.value()); |
826 | } |
827 | |
828 | const T& getKey() const |
829 | @@ -203,16 +292,25 @@ |
830 | |
831 | const HashEntry<T, V>& entry = (*theHashTab)[thePos]; |
832 | |
833 | - return entry.theItem; |
834 | - } |
835 | - |
836 | - V& getValue() const |
837 | - { |
838 | - ZORBA_FATAL(thePos < theHashTab->size(), ""); |
839 | - |
840 | - HashEntry<T, V>& entry = (*theHashTab)[thePos]; |
841 | - |
842 | - return entry.theValue; |
843 | + return entry.key(); |
844 | + } |
845 | + |
846 | + const V& getValue() const |
847 | + { |
848 | + ZORBA_FATAL(thePos < theHashTab->size(), ""); |
849 | + |
850 | + HashEntry<T, V>& entry = (*theHashTab)[thePos]; |
851 | + |
852 | + return entry.value(); |
853 | + } |
854 | + |
855 | + V& getValue() |
856 | + { |
857 | + ZORBA_FATAL(thePos < theHashTab->size(), ""); |
858 | + |
859 | + HashEntry<T, V>& entry = (*theHashTab)[thePos]; |
860 | + |
861 | + return entry.value(); |
862 | } |
863 | |
864 | void setValue(const V& val) |
865 | @@ -221,7 +319,7 @@ |
866 | |
867 | HashEntry<T, V>& entry = (*theHashTab)[thePos]; |
868 | |
869 | - entry.theValue = val; |
870 | + entry.value() = val; |
871 | } |
872 | }; |
873 | |
874 | @@ -230,20 +328,22 @@ |
875 | static const double DEFAULT_LOAD_FACTOR; |
876 | |
877 | protected: |
878 | - ulong theNumEntries; |
879 | - |
880 | - size_t theHashTabSize; |
881 | - size_t theInitialSize; |
882 | - checked_vector<HashEntry<T, V> > theHashTab; |
883 | - double theLoadFactor; |
884 | - C theCompareFunction; |
885 | - |
886 | - bool theUseTransfer; |
887 | - |
888 | - SYNC_CODE(mutable Mutex theMutex;) |
889 | - SYNC_CODE(Mutex * theMutexp;) |
890 | - |
891 | - int numCollisions; |
892 | + std::vector<HashEntry<T, V> > theHashTab; |
893 | + |
894 | + csize theNumBuckets; |
895 | + |
896 | + csize theNumEntries; |
897 | + |
898 | + double theLoadFactor; |
899 | + |
900 | + double theMaxLoad; |
901 | + |
902 | + C theCompareFunction; |
903 | + |
904 | + SYNC_CODE(mutable Mutex theMutex;) |
905 | + SYNC_CODE(Mutex * theMutexp;) |
906 | + |
907 | + csize theNumCollisions; |
908 | |
909 | public: |
910 | |
911 | @@ -257,19 +357,20 @@ |
912 | depends on some parametrs (e.g. the collation or timezone). These parameters |
913 | are provided as data members of the given comparison-function obj. |
914 | ********************************************************************************/ |
915 | -HashMap(const C& compFunction, size_t size, bool sync, bool useTransfer = false) |
916 | +HashMap(const C& compFunction, csize size, bool sync) |
917 | : |
918 | + theNumBuckets(size), |
919 | theNumEntries(0), |
920 | - theHashTabSize(size), |
921 | - theInitialSize(size), |
922 | - theHashTab(computeTabSize(size)), |
923 | theLoadFactor(DEFAULT_LOAD_FACTOR), |
924 | theCompareFunction(compFunction), |
925 | - theUseTransfer(useTransfer), |
926 | - numCollisions(0) |
927 | + theNumCollisions(0) |
928 | { |
929 | + theHashTab.resize(computeCapacity(size)); |
930 | + |
931 | formatCollisionArea(); |
932 | |
933 | + theMaxLoad = theNumBuckets * theLoadFactor; |
934 | + |
935 | SYNC_CODE(theMutexp = (sync ? &theMutex : NULL);) |
936 | } |
937 | |
938 | @@ -284,18 +385,19 @@ |
939 | theCompareFunction data member is initialized with the default constructor |
940 | of the C class. |
941 | ********************************************************************************/ |
942 | -HashMap(size_t size, bool sync, bool useTransfer = false) |
943 | +HashMap(csize size, bool sync) |
944 | : |
945 | + theNumBuckets(size), |
946 | theNumEntries(0), |
947 | - theHashTabSize(size), |
948 | - theInitialSize(size), |
949 | - theHashTab(computeTabSize(size)), |
950 | theLoadFactor(DEFAULT_LOAD_FACTOR), |
951 | - theUseTransfer(useTransfer), |
952 | - numCollisions(0) |
953 | + theNumCollisions(0) |
954 | { |
955 | + theHashTab.resize(computeCapacity(size)); |
956 | + |
957 | formatCollisionArea(); |
958 | |
959 | + theMaxLoad = theNumBuckets * theLoadFactor; |
960 | + |
961 | SYNC_CODE(theMutexp = (sync ? &theMutex : NULL);) |
962 | } |
963 | |
964 | @@ -340,7 +442,7 @@ |
965 | /******************************************************************************* |
966 | |
967 | ********************************************************************************/ |
968 | -ulong size() const |
969 | +csize size() const |
970 | { |
971 | return theNumEntries; |
972 | } |
973 | @@ -349,7 +451,7 @@ |
974 | /******************************************************************************* |
975 | |
976 | ********************************************************************************/ |
977 | -size_t capacity() const |
978 | +csize capacity() const |
979 | { |
980 | return theHashTab.size(); |
981 | } |
982 | @@ -358,6 +460,24 @@ |
983 | /******************************************************************************* |
984 | |
985 | ********************************************************************************/ |
986 | +csize bucket_count() const |
987 | +{ |
988 | + return theNumBuckets; |
989 | +} |
990 | + |
991 | + |
992 | +/******************************************************************************* |
993 | + |
994 | +********************************************************************************/ |
995 | +csize collisions() const |
996 | +{ |
997 | + return theNumCollisions; |
998 | +} |
999 | + |
1000 | + |
1001 | +/******************************************************************************* |
1002 | + |
1003 | +********************************************************************************/ |
1004 | C get_compare_function() |
1005 | { |
1006 | return theCompareFunction; |
1007 | @@ -388,13 +508,17 @@ |
1008 | void clearNoSync() |
1009 | { |
1010 | theNumEntries = 0; |
1011 | - numCollisions = 0; |
1012 | - |
1013 | - size_t n = theHashTab.size(); |
1014 | - |
1015 | - for (size_t i = 0; i < n; ++i) |
1016 | + theNumCollisions = 0; |
1017 | + |
1018 | + csize n = theHashTab.size(); |
1019 | + |
1020 | + HashEntry<T, V>* entry = &theHashTab[0]; |
1021 | + HashEntry<T, V>* lastentry = &theHashTab[n]; |
1022 | + |
1023 | + for (; entry < lastentry; ++entry) |
1024 | { |
1025 | - theHashTab[i].~HashEntry<T, V>(); |
1026 | + if (!entry->isFree()) |
1027 | + entry->setFree(); |
1028 | } |
1029 | |
1030 | formatCollisionArea(); |
1031 | @@ -406,13 +530,13 @@ |
1032 | ********************************************************************************/ |
1033 | iterator begin() const |
1034 | { |
1035 | - return iterator(const_cast<checked_vector<HashEntry<T, V> >*>(&theHashTab), 0); |
1036 | + return iterator(const_cast<std::vector<HashEntry<T, V> >*>(&theHashTab), 0); |
1037 | } |
1038 | |
1039 | |
1040 | iterator end() const |
1041 | { |
1042 | - return iterator(const_cast<checked_vector<HashEntry<T, V> >*>(&theHashTab), |
1043 | + return iterator(const_cast<std::vector<HashEntry<T, V> >*>(&theHashTab), |
1044 | theHashTab.size()); |
1045 | } |
1046 | |
1047 | @@ -437,7 +561,7 @@ |
1048 | |
1049 | while (entry != NULL) |
1050 | { |
1051 | - if (equal(entry->theItem, item)) |
1052 | + if (equal(entry->key(), item)) |
1053 | return true; |
1054 | |
1055 | entry = entry->getNext(); |
1056 | @@ -468,7 +592,7 @@ |
1057 | |
1058 | while (entry != NULL) |
1059 | { |
1060 | - if (equal(entry->theItem, item)) |
1061 | + if (equal(entry->key(), item)) |
1062 | return iterator(&theHashTab, entry - &theHashTab[0]); |
1063 | |
1064 | entry = entry->getNext(); |
1065 | @@ -498,9 +622,9 @@ |
1066 | |
1067 | while (entry != NULL) |
1068 | { |
1069 | - if (equal(entry->theItem, item)) |
1070 | + if (equal(entry->key(), item)) |
1071 | { |
1072 | - value = entry->theValue; |
1073 | + value = entry->value(); |
1074 | return true; |
1075 | } |
1076 | |
1077 | @@ -527,8 +651,8 @@ |
1078 | |
1079 | if (!found) |
1080 | { |
1081 | - entry->theItem = pair.first; |
1082 | - entry->theValue = pair.second; |
1083 | + entry->key() = pair.first; |
1084 | + entry->value() = pair.second; |
1085 | } |
1086 | |
1087 | return !found; |
1088 | @@ -552,12 +676,12 @@ |
1089 | |
1090 | if (!found) |
1091 | { |
1092 | - entry->theItem = item; |
1093 | - entry->theValue = value; |
1094 | + entry->key() = item; |
1095 | + entry->value() = value; |
1096 | } |
1097 | else |
1098 | { |
1099 | - value = entry->theValue; |
1100 | + value = entry->value(); |
1101 | } |
1102 | |
1103 | return !found; |
1104 | @@ -586,7 +710,7 @@ |
1105 | |
1106 | while (entry != NULL) |
1107 | { |
1108 | - if (equal(entry->theItem, item)) |
1109 | + if (equal(entry->key(), item)) |
1110 | { |
1111 | found = true; |
1112 | break; |
1113 | @@ -602,7 +726,7 @@ |
1114 | } |
1115 | else |
1116 | { |
1117 | - entry->theValue = value; |
1118 | + entry->value() = value; |
1119 | return true; |
1120 | } |
1121 | } |
1122 | @@ -616,13 +740,13 @@ |
1123 | { |
1124 | SYNC_CODE(AutoMutex lock(theMutexp);) |
1125 | |
1126 | - if (ite.thePos < theHashTabSize) |
1127 | + if (ite.thePos < theNumBuckets) |
1128 | { |
1129 | eraseEntry(&theHashTab[ite.thePos], NULL); |
1130 | } |
1131 | else |
1132 | { |
1133 | - const T& item = theHashTab[ite.thePos].theItem; |
1134 | + const T& item = theHashTab[ite.thePos].key(); |
1135 | |
1136 | ulong hval = hash(item); |
1137 | |
1138 | @@ -665,7 +789,7 @@ |
1139 | // If the item to remove is in the 1st entry of a bucket, then if the |
1140 | // bucket has no other entries, just call the destructor on that entry, |
1141 | // else copy the 2nd entry to the 1st entry and freeup the 2nd entry. |
1142 | - if (equal(entry->theItem, item)) |
1143 | + if (equal(entry->key(), item)) |
1144 | { |
1145 | eraseEntry(entry, NULL); |
1146 | return true; |
1147 | @@ -678,7 +802,7 @@ |
1148 | |
1149 | while (entry != NULL) |
1150 | { |
1151 | - if (equal(entry->theItem, item)) |
1152 | + if (equal(entry->key(), item)) |
1153 | { |
1154 | eraseEntry(entry, preventry); |
1155 | return true; |
1156 | @@ -698,9 +822,9 @@ |
1157 | /******************************************************************************* |
1158 | |
1159 | ********************************************************************************/ |
1160 | -size_t computeTabSize(size_t size) const |
1161 | +csize computeCapacity(csize size) const |
1162 | { |
1163 | - return size + 32 + size/5; |
1164 | + return size + 32 + size / (5 - 10 * (theLoadFactor - 0.7)) ; |
1165 | } |
1166 | |
1167 | |
1168 | @@ -724,7 +848,7 @@ |
1169 | ********************************************************************************/ |
1170 | HashEntry<T, V>* bucket(ulong hvalue) |
1171 | { |
1172 | - return &theHashTab[hvalue % theHashTabSize]; |
1173 | + return &theHashTab[hvalue % theNumBuckets]; |
1174 | } |
1175 | |
1176 | |
1177 | @@ -733,7 +857,7 @@ |
1178 | ********************************************************************************/ |
1179 | const HashEntry<T, V>* bucket(ulong hvalue) const |
1180 | { |
1181 | - return &theHashTab[hvalue % theHashTabSize]; |
1182 | + return &theHashTab[hvalue % theNumBuckets]; |
1183 | } |
1184 | |
1185 | |
1186 | @@ -742,7 +866,7 @@ |
1187 | ********************************************************************************/ |
1188 | HashEntry<T, V>* freelist() |
1189 | { |
1190 | - return &theHashTab[theHashTabSize]; |
1191 | + return &theHashTab[theNumBuckets]; |
1192 | } |
1193 | |
1194 | |
1195 | @@ -755,41 +879,38 @@ |
1196 | { |
1197 | if (entry->theNext == 0) |
1198 | { |
1199 | - entry->~HashEntry<T, V>(); |
1200 | + entry->setFree(); |
1201 | } |
1202 | else |
1203 | { |
1204 | HashEntry<T, V>* nextEntry = entry->getNext(); |
1205 | *entry = *nextEntry; |
1206 | entry->setNext(nextEntry->getNext()); |
1207 | - nextEntry->~HashEntry<T, V>(); |
1208 | + nextEntry->setFree(); |
1209 | nextEntry->setNext(freelist()->getNext()); |
1210 | freelist()->setNext(nextEntry); |
1211 | } |
1212 | |
1213 | - theNumEntries--; |
1214 | + --theNumEntries; |
1215 | |
1216 | - if (theHashTabSize > theInitialSize && |
1217 | - theNumEntries < (theHashTabSize / 2) * theLoadFactor) |
1218 | + if (theNumEntries < theMaxLoad / 2) |
1219 | { |
1220 | - resizeHashTab(theHashTabSize / 2); |
1221 | + resizeHashTab(theNumBuckets / 2); |
1222 | } |
1223 | |
1224 | } |
1225 | else |
1226 | { |
1227 | preventry->setNext(entry->getNext()); |
1228 | - entry->~HashEntry<T, V>(); |
1229 | + entry->setFree(); |
1230 | entry->setNext(freelist()->getNext()); |
1231 | freelist()->setNext(entry); |
1232 | |
1233 | - theNumEntries--; |
1234 | - numCollisions--; |
1235 | + --theNumEntries; |
1236 | |
1237 | - if (theHashTabSize > theInitialSize && |
1238 | - theNumEntries < (theHashTabSize / 2) * theLoadFactor) |
1239 | + if (theNumEntries < theMaxLoad / 2) |
1240 | { |
1241 | - resizeHashTab(theHashTabSize / 2); |
1242 | + resizeHashTab(theNumBuckets / 2); |
1243 | } |
1244 | } |
1245 | } |
1246 | @@ -812,7 +933,7 @@ |
1247 | // If the hash bucket is empty, its 1st entry is used to store the new string. |
1248 | if (headEntry->isFree()) |
1249 | { |
1250 | - theNumEntries++; |
1251 | + ++theNumEntries; |
1252 | headEntry->unsetFree(); |
1253 | return headEntry; |
1254 | } |
1255 | @@ -822,7 +943,7 @@ |
1256 | |
1257 | while (currEntry != NULL) |
1258 | { |
1259 | - if (equal(currEntry->theItem, item)) |
1260 | + if (equal(currEntry->key(), item)) |
1261 | { |
1262 | found = true; |
1263 | return currEntry; |
1264 | @@ -836,22 +957,22 @@ |
1265 | // Do garbage collection if the hash table is more than 60% full. Note that |
1266 | // gc does NOT resize theHashTab, so after gc, the item still belongs to the |
1267 | // same bucket as before gc. |
1268 | - if (theNumEntries > theHashTabSize * theLoadFactor) |
1269 | + if (theNumEntries > theMaxLoad) |
1270 | { |
1271 | garbageCollect(); |
1272 | |
1273 | if (headEntry->isFree()) |
1274 | { |
1275 | - theNumEntries++; |
1276 | + ++theNumEntries; |
1277 | headEntry->unsetFree(); |
1278 | return headEntry; |
1279 | } |
1280 | } |
1281 | |
1282 | // Double the size of the hash table if it is more than 60% full. |
1283 | - if (theNumEntries > theHashTabSize * theLoadFactor) |
1284 | + if (theNumEntries > theMaxLoad) |
1285 | { |
1286 | - resizeHashTab(theHashTabSize * 2); |
1287 | + resizeHashTab(theNumBuckets * 2); |
1288 | goto retry; |
1289 | } |
1290 | |
1291 | @@ -859,8 +980,8 @@ |
1292 | // collision list for its bucket. We place the new item right after the |
1293 | // headEntry for the bucket. |
1294 | |
1295 | - theNumEntries++; |
1296 | - numCollisions++; |
1297 | + ++theNumEntries; |
1298 | + ++theNumCollisions; |
1299 | |
1300 | // If no free entry exists, we extend the collision area of the hash table. |
1301 | if (freelist()->getNext() == 0) |
1302 | @@ -886,10 +1007,15 @@ |
1303 | ********************************************************************************/ |
1304 | void extendCollisionArea() |
1305 | { |
1306 | - size_t oldSize = theHashTab.size(); |
1307 | - size_t numCollisionEntries = oldSize - theHashTabSize; |
1308 | - size_t newSize = theHashTabSize + 2 * numCollisionEntries; |
1309 | + csize oldSize = theHashTab.size(); |
1310 | + csize numCollisionEntries = oldSize - theNumBuckets; |
1311 | + csize newSize = theNumBuckets + 2 * numCollisionEntries; |
1312 | |
1313 | + /* |
1314 | + std::cout << "Extending collision area" << std::endl |
1315 | + << "numBuckets = " << theNumBuckets << " numCollisionEntries = " |
1316 | + << numCollisionEntries << std::endl << std::endl; |
1317 | + */ |
1318 | //foo(); for setting a breakpoint |
1319 | |
1320 | theHashTab.resize(newSize); |
1321 | @@ -909,7 +1035,8 @@ |
1322 | firstentry = freelist(); |
1323 | |
1324 | HashEntry<T, V>* lastentry = &theHashTab[theHashTab.size() - 1]; |
1325 | - for (HashEntry<T, V>* entry = firstentry; entry < lastentry; entry++) |
1326 | + |
1327 | + for (HashEntry<T, V>* entry = firstentry; entry < lastentry; ++entry) |
1328 | entry->theNext = 1; |
1329 | |
1330 | lastentry->theNext = 0; |
1331 | @@ -919,31 +1046,34 @@ |
1332 | /******************************************************************************* |
1333 | |
1334 | ********************************************************************************/ |
1335 | -void resizeHashTab(size_t newSize) |
1336 | +void resizeHashTab(csize newSize) |
1337 | { |
1338 | - HashEntry<T, V>* entry; |
1339 | - HashEntry<T, V>* oldentry; |
1340 | + if (newSize == 0) |
1341 | + newSize = 3; |
1342 | + |
1343 | + csize oldcap = theHashTab.size(); |
1344 | + csize newcap = computeCapacity(newSize); |
1345 | |
1346 | // Create a new vector of new size and swap theHashTab with this new vector |
1347 | - checked_vector<HashEntry<T, V> > oldTab(computeTabSize(newSize)); |
1348 | + std::vector<HashEntry<T, V> > oldTab(newcap); |
1349 | theHashTab.swap(oldTab); |
1350 | |
1351 | - size_t oldsize = oldTab.size(); |
1352 | - theHashTabSize = newSize; |
1353 | + theNumBuckets = newSize; |
1354 | + theMaxLoad = theNumBuckets * theLoadFactor; |
1355 | |
1356 | formatCollisionArea(); |
1357 | |
1358 | - numCollisions = 0; |
1359 | + HashEntry<T, V>* entry; |
1360 | + HashEntry<T, V>* oldentry = &oldTab[0]; |
1361 | + HashEntry<T, V>* lastentry = &oldTab[oldcap]; |
1362 | |
1363 | // Now rehash every entry |
1364 | - for (size_t i = 0; i < oldsize; i++) |
1365 | + for (; oldentry < lastentry; ++oldentry) |
1366 | { |
1367 | - oldentry = &oldTab[i]; |
1368 | - |
1369 | if (oldentry->isFree()) |
1370 | continue; |
1371 | |
1372 | - entry = bucket (hash(oldentry->theItem)); |
1373 | + entry = bucket(hash(oldentry->key())); |
1374 | |
1375 | if (!entry->isFree()) |
1376 | { |
1377 | @@ -962,12 +1092,11 @@ |
1378 | freelist()->setNext(entry->getNext()); |
1379 | entry->setNext(headEntry->getNext()); |
1380 | headEntry->setNext(entry); |
1381 | - numCollisions++; |
1382 | } |
1383 | |
1384 | - entry->theItem = oldentry->theItem; |
1385 | - entry->theValue = oldentry->theValue; |
1386 | entry->unsetFree(); |
1387 | + entry->key() = oldentry->key(); |
1388 | + entry->value() = oldentry->value(); |
1389 | } |
1390 | } |
1391 | |
1392 | |
1393 | === modified file 'src/zorbautils/hashset.h' |
1394 | --- src/zorbautils/hashset.h 2012-09-17 00:36:37 +0000 |
1395 | +++ src/zorbautils/hashset.h 2012-10-08 12:24:30 +0000 |
1396 | @@ -45,16 +45,16 @@ |
1397 | typedef typename HashMap<T, DummyHashValue, C>::iterator iterator; |
1398 | |
1399 | |
1400 | -HashSet(const C& compFunction, ulong size, bool sync, bool useTransfer = false) |
1401 | +HashSet(const C& compFunction, ulong size, bool sync) |
1402 | : |
1403 | - HashMap<T, DummyHashValue, C>(compFunction, size, sync, useTransfer) |
1404 | + HashMap<T, DummyHashValue, C>(compFunction, size, sync) |
1405 | { |
1406 | } |
1407 | |
1408 | |
1409 | HashSet(ulong size, bool sync) |
1410 | : |
1411 | - HashMap<T, DummyHashValue, C>(size, sync, false) |
1412 | + HashMap<T, DummyHashValue, C>(size, sync) |
1413 | { |
1414 | } |
1415 | |
1416 | @@ -100,12 +100,7 @@ |
1417 | |
1418 | if (!found) |
1419 | { |
1420 | - /* |
1421 | - if (this->theUseTransfer) |
1422 | - entry->theItem.transfer(item); |
1423 | - else |
1424 | - */ |
1425 | - entry->theItem = item; |
1426 | + entry->key() = item; |
1427 | } |
1428 | |
1429 | return !found; |
1430 | @@ -129,19 +124,13 @@ |
1431 | entry = this->hashInsert(item, this->hash(item), found); |
1432 | if (!found) |
1433 | { |
1434 | - /* |
1435 | - if (this->theUseTransfer) |
1436 | - entry->theItem.transfer(item); |
1437 | - else |
1438 | - */ |
1439 | - entry->theItem = item; |
1440 | - |
1441 | - outItem = entry->theItem; |
1442 | + entry->key() = item; |
1443 | + outItem = entry->key(); |
1444 | return true; |
1445 | } |
1446 | else |
1447 | { |
1448 | - outItem = entry->theItem; |
1449 | + outItem = entry->key(); |
1450 | return false; |
1451 | } |
1452 | } |
1453 | @@ -164,13 +153,13 @@ |
1454 | entry = this->hashInsert(item, this->hash(item), found); |
1455 | if (!found) |
1456 | { |
1457 | - entry->theItem = item; |
1458 | - outItem = entry->theItem; |
1459 | + entry->key() = item; |
1460 | + outItem = entry->key(); |
1461 | return true; |
1462 | } |
1463 | else |
1464 | { |
1465 | - outItem = entry->theItem; |
1466 | + outItem = entry->key(); |
1467 | return false; |
1468 | } |
1469 | } |
Validation queue starting for merge proposal. zorbatest. lambda. nu:8080/ remotequeue/ hashmap- 2012-10- 08T12-28- 47.769Z/ log.html
Log at: http://