Merge lp:~zorba-coders/zorba/hashmap into lp:zorba

Proposed by Markos Zaharioudakis
Status: Merged
Approved by: Markos Zaharioudakis
Approved revision: 10817
Merged at revision: 11091
Proposed branch: lp:~zorba-coders/zorba/hashmap
Merge into: lp:zorba
Diff against target: 1469 lines (+600/-192)
10 files modified
src/context/static_context.h (+1/-0)
src/store/naive/qname_pool.cpp (+8/-8)
src/store/naive/string_pool.cpp (+42/-38)
src/unit_tests/CMakeLists.txt (+1/-0)
src/unit_tests/test_hashmaps.cpp (+257/-0)
src/unit_tests/unit_test_list.h (+16/-1)
src/unit_tests/unit_tests.cpp (+13/-1)
src/zorbaserialization/bin_archiver.cpp (+1/-1)
src/zorbautils/hashmap.h (+251/-122)
src/zorbautils/hashset.h (+10/-21)
To merge this branch: bzr merge lp:~zorba-coders/zorba/hashmap
Reviewer Review Type Date Requested Status
Markos Zaharioudakis Approve
Review via email: mp+125187@code.launchpad.net

Commit message

HashMap optimizations

Description of the change

HashMap optimizations

To post a comment you must log in.
Revision history for this message
Markos Zaharioudakis (markos-za) :
review: Approve
lp:~zorba-coders/zorba/hashmap updated
10815. By Markos Zaharioudakis

merge from trunk

10816. By Markos Zaharioudakis

removed debugging print out

10817. By Markos Zaharioudakis

merge from trunk

Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :
Revision history for this message
Zorba Build Bot (zorba-buildbot) wrote :

Validation queue job hashmap-2012-10-08T12-28-47.769Z is finished. The final status was:

All tests succeeded!

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'src/context/static_context.h'
2--- src/context/static_context.h 2012-10-03 12:32:55 +0000
3+++ src/context/static_context.h 2012-10-08 12:24:30 +0000
4@@ -42,6 +42,7 @@
5
6 #include "zorbautils/hashmap_zstring.h"
7 #include "zorbautils/hashmap_itemp.h"
8+#include "zorbautils/checked_vector.h"
9
10 #include "common/shared_types.h"
11
12
13=== modified file 'src/store/naive/qname_pool.cpp'
14--- src/store/naive/qname_pool.cpp 2012-09-17 00:36:37 +0000
15+++ src/store/naive/qname_pool.cpp 2012-10-08 12:24:30 +0000
16@@ -63,12 +63,12 @@
17 ********************************************************************************/
18 QNamePool::~QNamePool()
19 {
20- csize n = theHashSet.theHashTab.size();
21+ csize n = theHashSet.capacity();
22 for (csize i = 0; i < n; ++i)
23 {
24 if (!theHashSet.theHashTab[i].isFree() &&
25- theHashSet.theHashTab[i].theItem->isOverflow())
26- delete theHashSet.theHashTab[i].theItem;
27+ theHashSet.theHashTab[i].key()->isOverflow())
28+ delete theHashSet.theHashTab[i].key();
29 }
30
31 if (theCache != NULL)
32@@ -297,12 +297,12 @@
33
34 bool found;
35 entry = theHashSet.hashInsert(qn, hval, found);
36- entry->theItem = qn;
37+ entry->key() = qn;
38 ZORBA_FATAL(!found, "");
39 }
40 else
41 {
42- qn = entry->theItem;
43+ qn = entry->key();
44 cachePin(qn);
45 }
46
47@@ -389,12 +389,12 @@
48
49 bool found;
50 entry = theHashSet.hashInsert(qn, hval, found);
51- entry->theItem = qn;
52+ entry->key() = qn;
53 ZORBA_FATAL(!found, "");
54 }
55 else
56 {
57- qn = entry->theItem;
58+ qn = entry->key();
59 cachePin(qn);
60 }
61
62@@ -478,7 +478,7 @@
63
64 while (entry != NULL)
65 {
66- QNameItem* qn = entry->theItem;
67+ QNameItem* qn = entry->key();
68
69 if (ztd::equals(qn->getLocalName(), ln, lnlen) &&
70 ztd::equals(qn->getNamespace(), ns, nslen) &&
71
72=== modified file 'src/store/naive/string_pool.cpp'
73--- src/store/naive/string_pool.cpp 2012-09-17 00:36:37 +0000
74+++ src/store/naive/string_pool.cpp 2012-10-08 12:24:30 +0000
75@@ -28,13 +28,14 @@
76 StringPool::~StringPool()
77 {
78 csize count = 0;
79- csize n = theHashTab.size();
80+ csize n = capacity();
81
82 for (csize i = 0; i < n; ++i)
83 {
84- if (theHashTab[i].theItem.is_shared())
85+ if (!theHashTab[i].isFree() && theHashTab[i].key().is_shared())
86 {
87- std::cerr << "ID: " << i << " Referenced URI: " << theHashTab[i].theItem << std::endl;
88+ std::cerr << "ID: " << i << " Referenced URI: "
89+ << theHashTab[i].key() << std::endl;
90 //delete theHashTab[i].theString.getp();
91 count++;
92 }
93@@ -57,7 +58,8 @@
94 bool found = false;
95
96 zstring::size_type len = strlen(str);
97- ulong hval = hashfun::h32(str, len, FNV_32_INIT) % theHashTabSize;
98+
99+ ulong hval = hashfun::h32(str, len, FNV_32_INIT) % bucket_count();
100
101 {
102 SYNC_CODE(AutoMutex lock(&theMutex);)
103@@ -68,7 +70,7 @@
104 {
105 while (entry != NULL)
106 {
107- if (ztd::equals(entry->theItem, str, len))
108+ if (ztd::equals(entry->key(), str, len))
109 {
110 found = true;
111 break;
112@@ -79,7 +81,7 @@
113
114 if (found)
115 {
116- outStr = entry->theItem;
117+ outStr = entry->key();
118 return false;
119 }
120 }
121@@ -97,76 +99,78 @@
122 ********************************************************************************/
123 void StringPool::garbageCollect()
124 {
125- HashEntry<zstring, DummyHashValue>* entry;
126+ HashEntry<zstring, DummyHashValue>* currEntry;
127
128 HashEntry<zstring, DummyHashValue>* freeList = NULL;
129
130- zstring::size_type size = theHashTabSize;
131+ csize size = bucket_count();
132
133- for (ulong i = 0; i < size; ++i)
134+ for (csize i = 0; i < size; ++i)
135 {
136- entry = &theHashTab[i];
137+ currEntry = &theHashTab[i];
138
139 // If the current hash bucket is empty, move to the next one
140- if (entry->isFree())
141+ if (currEntry->isFree())
142 {
143- ZORBA_FATAL(entry->theNext == 0, "");
144+ ZORBA_FATAL(currEntry->theNext == 0, "");
145 continue;
146 }
147
148 // Handle the 1st hash entry of the current hash bucket
149- while (!entry->theItem.is_shared())
150+ while (!currEntry->key().is_shared())
151 {
152- if (entry->theNext == 0)
153+ if (currEntry->theNext == 0)
154 {
155- entry->setFree();
156- theNumEntries--;
157+ currEntry->setFree();
158+ --theNumEntries;
159 break;
160 }
161 else
162 {
163- HashEntry<zstring, DummyHashValue>* nextEntry = entry->getNext();
164- *entry = *nextEntry;
165- entry->setNext(nextEntry->getNext());
166+ HashEntry<zstring, DummyHashValue>* nextEntry = currEntry->getNext();
167+ assert(!nextEntry->isFree());
168+ *currEntry = *nextEntry;
169+ currEntry->setNext(nextEntry->getNext());
170 nextEntry->setFree();
171 nextEntry->setNext(freeList);
172 freeList = nextEntry;
173- theNumEntries--;
174+ --theNumEntries;
175 }
176 }
177
178 // Handle the overflow entries of the current hash bucket.
179- HashEntry<zstring, DummyHashValue>* prevEntry = entry;
180- entry = entry->getNext();
181+ HashEntry<zstring, DummyHashValue>* prevEntry = currEntry;
182+ currEntry = currEntry->getNext();
183
184- while (entry != NULL)
185+ while (currEntry != NULL)
186 {
187- if (!entry->theItem.is_shared())
188+ if (!currEntry->key().is_shared())
189 {
190- prevEntry->setNext(entry->getNext());
191- entry->setFree();
192- entry->setNext(freeList);
193- freeList = entry;
194- theNumEntries--;
195+ prevEntry->setNext(currEntry->getNext());
196+ currEntry->setFree();
197+ currEntry->setNext(freeList);
198+ freeList = currEntry;
199+ --theNumEntries;
200
201- entry = prevEntry->getNext();
202+ currEntry = prevEntry->getNext();
203 }
204 else
205 {
206- prevEntry = entry;
207- entry = entry->getNext();
208+ prevEntry = currEntry;
209+ currEntry = currEntry->getNext();
210 }
211 }
212 }
213
214 if (freeList != NULL)
215 {
216- entry = freeList;
217- while (entry->theNext != 0)
218- entry = entry->getNext();
219-
220- entry->setNext(theHashTab[theHashTabSize].getNext());
221- theHashTab[theHashTabSize].setNext(freeList);
222+ currEntry = freeList;
223+ while (currEntry->theNext != 0)
224+ currEntry = currEntry->getNext();
225+
226+ currEntry->setNext(theHashTab[bucket_count()].getNext());
227+
228+ theHashTab[bucket_count()].setNext(freeList);
229 }
230 }
231
232
233=== modified file 'src/unit_tests/CMakeLists.txt'
234--- src/unit_tests/CMakeLists.txt 2012-09-17 00:36:37 +0000
235+++ src/unit_tests/CMakeLists.txt 2012-10-08 12:24:30 +0000
236@@ -25,6 +25,7 @@
237 test_uri.cpp
238 test_json_parser.cpp
239 test_fs_iterator.cpp
240+ test_hashmaps.cpp
241 # memory_manager.cpp
242 )
243
244
245=== added file 'src/unit_tests/test_hashmaps.cpp'
246--- src/unit_tests/test_hashmaps.cpp 1970-01-01 00:00:00 +0000
247+++ src/unit_tests/test_hashmaps.cpp 2012-10-08 12:24:30 +0000
248@@ -0,0 +1,257 @@
249+
250+#include <cstdlib>
251+#include <stdio.h>
252+#include <iostream>
253+
254+#include <zorba/util/time.h>
255+
256+#include "zorbautils/hashmap.h"
257+#include "util/hashmap32.h"
258+#include "util/hashmap.h"
259+#include "util/unordered_map.h"
260+
261+
262+namespace zorba {
263+
264+namespace UnitTests {
265+
266+
267+class IntCompFunc
268+{
269+public:
270+ static bool equal(uint64_t k1, uint64_t k2)
271+ {
272+ return k1 == k2;
273+ }
274+
275+ static uint32_t hash(uint64_t key)
276+ {
277+#if 1
278+ return key;
279+#else
280+ char buf[9];
281+ buf[0] = (unsigned char)(key>>56);
282+ buf[1] = (unsigned char)(key>>48);
283+ buf[2] = (unsigned char)(key>>40);
284+ buf[3] = (unsigned char)(key>>32);
285+ buf[4] = (unsigned char)(key>>24);
286+ buf[5] = (unsigned char)(key>>16);
287+ buf[6] = (unsigned char)(key>>8 );
288+ buf[7] = (unsigned char)(key );
289+ buf[8] = 0;
290+ return (uint32_t)hashfun::h64((void*)buf,8,FNV_64_INIT);
291+#endif
292+ }
293+};
294+
295+
296+class StrCompFunc
297+{
298+public:
299+ static bool equal(const zstring& k1, const zstring& k2)
300+ {
301+ return k1 == k2;
302+ }
303+
304+ static uint32_t hash(const zstring& key)
305+ {
306+ return hashfun::h32(key.c_str(), FNV_32_INIT);
307+ }
308+};
309+
310+
311+int test_hashmaps(int argc, char* argv[])
312+{
313+ if (argc < 4)
314+ return 1;
315+
316+ int test_id = atoi(argv[1]);
317+
318+ double load_factor = atof(argv[2]);
319+
320+ int num = atoi(argv[3]);
321+
322+ uint64_t* int_buf = new uint64_t[num];
323+
324+ for (int i = 0; i < num; ++i)
325+ {
326+ int_buf[i] = rand() % num;
327+ }
328+
329+ zstring* str_buf = new zstring[num];
330+
331+ for (int i = 0; i < num; ++i)
332+ {
333+ char tmp[20];
334+ sprintf(tmp, "%d", rand() % num);
335+ str_buf[i] = tmp;
336+
337+ //std::cout << str_buf[i] << std::endl;
338+ }
339+
340+
341+ HashMap<uint64_t, int, IntCompFunc> map1(1024, false);
342+ HashMap<zstring, int, StrCompFunc> map2(1024, false);
343+
344+ std::unordered_map<uint64_t, int> map3(1024);
345+ std::unordered_map<zstring, int> map4(1024);
346+
347+ hash64map<int> map5(1024, load_factor);
348+ hashmap<zstring, int> map6(1024, load_factor);
349+
350+ map1.set_load_factor(load_factor);
351+ map2.set_load_factor(load_factor);
352+
353+ if (test_id == 1)
354+ {
355+ zorba::time::walltime startTime;
356+ zorba::time::get_current_walltime(startTime);
357+
358+ for (int i = 0; i < num; ++i)
359+ {
360+ uint64_t key = int_buf[i];
361+ int value = 1;
362+ (void)map1.insert(key, value);
363+ }
364+
365+ zorba::time::walltime stopTime;
366+ time::get_current_walltime(stopTime);
367+
368+ double time = time::get_walltime_elapsed(startTime, stopTime);
369+
370+ std::cout << "Load factor = " << load_factor << std::endl
371+ << "Num numeric insertions = " << num << std::endl
372+ << "HashMap entries = " << map1.size() << std::endl
373+ << "HashMap capacity = " << map1.capacity() << std::endl
374+ << "HashMap colisions = " << map1.collisions() << std::endl
375+ << "Time = " << time << std::endl;
376+ }
377+ else if (test_id == 2)
378+ {
379+ zorba::time::walltime startTime;
380+ zorba::time::get_current_walltime(startTime);
381+
382+ for (int i = 0; i < num; ++i)
383+ {
384+ zstring key = str_buf[i];
385+ int value = 1;
386+ (void)map2.insert(key, value);
387+ }
388+
389+ zorba::time::walltime stopTime;
390+ time::get_current_walltime(stopTime);
391+
392+ double time = time::get_walltime_elapsed(startTime, stopTime);
393+
394+ std::cout << "Load factor = " << load_factor << std::endl
395+ << "Num zstring insertions = " << num << std::endl
396+ << "HashMap entries = " << map2.size() << std::endl
397+ << "HashMap buckets = " << map2.bucket_count() << std::endl
398+ << "HashMap capacity = " << map2.capacity() << std::endl
399+ << "Time = " << time << std::endl;
400+ }
401+ else if (test_id == 3)
402+ {
403+ zorba::time::walltime startTime;
404+ zorba::time::get_current_walltime(startTime);
405+
406+ for (int i = 0; i < num; ++i)
407+ {
408+ uint64_t key = int_buf[i];
409+ int value = 1;
410+ (void)map3.insert(std::pair<uint64_t, int>(key, value));
411+ }
412+
413+ zorba::time::walltime stopTime;
414+ time::get_current_walltime(stopTime);
415+
416+ double time = time::get_walltime_elapsed(startTime, stopTime);
417+
418+ std::cout << "Load factor = " << load_factor << std::endl
419+ << "Num numeric insertions = " << num << std::endl
420+ << "UnorderedMap entries = " << map3.size() << std::endl
421+ << "UnorderedMap buckets = " << map3.bucket_count() << std::endl
422+ << "Time = " << time << std::endl;
423+ }
424+ else if (test_id == 4)
425+ {
426+ zorba::time::walltime startTime;
427+ zorba::time::get_current_walltime(startTime);
428+
429+ for (int i = 0; i < num; ++i)
430+ {
431+ zstring key = str_buf[i];
432+ int value = 1;
433+ (void)map4.insert(std::pair<zstring, int>(key, value));
434+ }
435+
436+ zorba::time::walltime stopTime;
437+ time::get_current_walltime(stopTime);
438+
439+ double time = time::get_walltime_elapsed(startTime, stopTime);
440+
441+ std::cout << "Load factor = " << load_factor << std::endl
442+ << "Num zstring insertions = " << num << std::endl
443+ << "UnorderedMap entries = " << map4.size() << std::endl
444+ << "UnorderedMap buckets = " << map4.bucket_count() << std::endl
445+ << "Time = " << time << std::endl;
446+ }
447+ else if (test_id == 5)
448+ {
449+ zorba::time::walltime startTime;
450+ zorba::time::get_current_walltime(startTime);
451+
452+ for (int i = 0; i < num; ++i)
453+ {
454+ uint64_t key = int_buf[i];
455+ int value = 1;
456+ (void)map5.put_unsync(key, value);
457+ }
458+
459+ zorba::time::walltime stopTime;
460+ time::get_current_walltime(stopTime);
461+
462+ double time = time::get_walltime_elapsed(startTime, stopTime);
463+
464+ std::cout << "Load factor = " << load_factor << std::endl
465+ << "Num numeric insertions = " << num << std::endl
466+ << "hashmap entries = " << map5.size() << std::endl
467+ << "Time = " << time << std::endl;
468+ }
469+ else if (test_id == 6)
470+ {
471+ zorba::time::walltime startTime;
472+ zorba::time::get_current_walltime(startTime);
473+
474+ for (int i = 0; i < num; ++i)
475+ {
476+ zstring key = str_buf[i];
477+ int value = 1;
478+ (void)map6.put(key, value, false);
479+ }
480+
481+ zorba::time::walltime stopTime;
482+ time::get_current_walltime(stopTime);
483+
484+ double time = time::get_walltime_elapsed(startTime, stopTime);
485+
486+ std::cout << "Load factor = " << load_factor << std::endl
487+ << "Num zstring insertions = " << num << std::endl
488+ << "hashmap entries = " << map6.size() << std::endl
489+ << "Time = " << time << std::endl;
490+ }
491+ else
492+ {
493+ std::cout << "Invalid test id" << std::endl;
494+ return 2;
495+ }
496+
497+ delete [] int_buf;
498+ delete [] str_buf;
499+
500+ return 0;
501+}
502+
503+
504+}
505+}
506
507=== modified file 'src/unit_tests/unit_test_list.h'
508--- src/unit_tests/unit_test_list.h 2012-09-17 00:36:37 +0000
509+++ src/unit_tests/unit_test_list.h 2012-10-08 12:24:30 +0000
510@@ -25,34 +25,49 @@
511 namespace UnitTests {
512
513 int runUriTest(int argc, char* argv[]);
514+
515 int runDebuggerProtocolTest(int argc, char* argv[]);
516+
517 int test_base64( int, char*[] );
518+
519 int test_base64_streambuf( int, char*[] );
520+
521 int test_fs_iterator( int, char*[] );
522+
523 #ifndef ZORBA_NO_ICU
524 int test_icu_streambuf( int, char*[] );
525 #endif /* ZORBA_NO_ICU */
526+
527 int test_json_parser( int, char*[] );
528+
529 int test_string( int, char*[] );
530+
531 int test_unique_ptr( int, char*[] );
532+
533 int test_fs_iterator( int, char*[] );
534- //int test_mem_manager( int, char*[] );
535+
536 #ifndef ZORBA_NO_FULL_TEXT
537 int test_stemmer( int, char*[] );
538 int test_thesaurus( int, char*[] );
539 int test_tokenizer( int, char*[] );
540 #endif /* ZORBA_NO_FULL_TEXT */
541+
542 #ifndef ZORBA_HAVE_UNIQUE_PTR
543 int test_unique_ptr( int, char*[] );
544 #endif /* ZORBA_HAVE_UNIQUE_PTR */
545+
546 int test_uuid( int, char*[] );
547+
548 #ifndef ZORBA_HAVE_UNORDERED_MAP
549 int test_unordered_map( int, char*[] );
550 #endif /* ZORBA_HAVE_UNORDERED_MAP */
551+
552 #ifndef ZORBA_HAVE_UNORDERED_SET
553 int test_unordered_set( int, char*[] );
554 #endif /* ZORBA_HAVE_UNORDERED_SET */
555
556+ int test_hashmaps(int argc, char* argv[]);
557+
558 void initializeTestList();
559
560 } // namespace UnitTests
561
562=== modified file 'src/unit_tests/unit_tests.cpp'
563--- src/unit_tests/unit_tests.cpp 2012-09-17 00:36:37 +0000
564+++ src/unit_tests/unit_tests.cpp 2012-10-08 12:24:30 +0000
565@@ -38,29 +38,41 @@
566 void initializeTestList()
567 {
568 libunittests["base64"] = test_base64;
569+
570 libunittests["base64_streambuf"] = test_base64_streambuf;
571+
572 libunittests["fs_iterator"] = test_fs_iterator;
573- //libunittests["memory_manager"] = test_mem_manager;
574+
575 #ifndef ZORBA_NO_ICU
576 libunittests["icu_streambuf"] = test_icu_streambuf;
577 #endif /* ZORBA_NO_ICU */
578+
579 libunittests["json_parser"] = test_json_parser;
580+
581 libunittests["string"] = test_string;
582+
583 #ifndef ZORBA_NO_FULL_TEXT
584 libunittests["stemmer"] = test_stemmer;
585 libunittests["thesaurus"] = test_thesaurus;
586 libunittests["tokenizer"] = test_tokenizer;
587 #endif /* ZORBA_NO_FULL_TEXT */
588+
589 #ifndef ZORBA_HAVE_UNIQUE_PTR
590 libunittests["unique_ptr"] = test_unique_ptr;
591 #endif /* ZORBA_HAVE_UNIQUE_PTR */
592+
593 libunittests["uuid"] = test_uuid;
594+
595 #ifndef ZORBA_HAVE_UNORDERED_MAP
596 libunittests["unordered_map"] = test_unordered_map;
597 #endif /* ZORBA_HAVE_UNORDERED_MAP */
598+
599 #ifndef ZORBA_HAVE_UNORDERED_SET
600 libunittests["unordered_set"] = test_unordered_set;
601 #endif /* ZORBA_HAVE_UNORDERED_SET */
602+
603+ libunittests["hashmaps"] = test_hashmaps;
604+
605 libunittests["uri"] = runUriTest;
606
607 #ifdef ZORBA_WITH_DEBUGGER
608
609=== modified file 'src/zorbaserialization/bin_archiver.cpp'
610--- src/zorbaserialization/bin_archiver.cpp 2012-09-17 00:36:37 +0000
611+++ src/zorbaserialization/bin_archiver.cpp 2012-10-08 12:24:30 +0000
612@@ -137,7 +137,7 @@
613 BinArchiver::BinArchiver(std::ostream* os)
614 :
615 Archiver(true),
616- theStringPool(1024, false, false),
617+ theStringPool(1024, false),
618 theFirstBinaryString(0)
619 {
620 this->is = NULL;
621
622=== modified file 'src/zorbautils/hashmap.h'
623--- src/zorbautils/hashmap.h 2012-09-17 00:36:37 +0000
624+++ src/zorbautils/hashmap.h 2012-10-08 12:24:30 +0000
625@@ -16,6 +16,7 @@
626 #ifndef ZORBA_UTILS_HASHMAP_H
627 #define ZORBA_UTILS_HASHMAP_H
628
629+#include <vector>
630
631 #include <cstddef>
632 #include <zorba/config.h>
633@@ -23,9 +24,10 @@
634 #include "common/common.h"
635
636 #include "zorbautils/fatal.h"
637-#include "zorbautils/checked_vector.h"
638 #include "zorbautils/mutex.h"
639
640+#include "store/api/shared_types.h"
641+
642
643 namespace zorba
644 {
645@@ -42,18 +44,81 @@
646 template <class T, class V>
647 class HashEntry
648 {
649+ struct KeyHolder
650+ {
651+ char theKey[sizeof(T)];
652+ };
653+
654+ struct ValueHolder
655+ {
656+ char theValue[sizeof(V)];
657+ };
658+
659 public:
660 bool theIsFree;
661- T theItem;
662- V theValue;
663+ KeyHolder theKey;
664+ ValueHolder theValue;
665 ptrdiff_t theNext; // offset from "this" to the next entry.
666
667- HashEntry() : theIsFree(true), theNext(0) { }
668+ HashEntry()
669+ :
670+ theIsFree(true),
671+ theNext(0)
672+ {
673+ }
674+
675+ HashEntry(const HashEntry<T, V>& other)
676+ {
677+ theIsFree = other.theIsFree;
678+ theNext = other.theNext;
679+ if (!theIsFree)
680+ {
681+ new (&theKey) T(other.key());
682+ new (&theValue) V(other.value());
683+ }
684+ }
685
686 ~HashEntry()
687 {
688- theIsFree = true;
689- theNext = 0;
690+ if (!theIsFree)
691+ {
692+ key().~T();
693+ value().~V();
694+ }
695+ }
696+
697+ HashEntry<T, V>& operator = (const HashEntry<T, V>& other)
698+ {
699+ if (theIsFree)
700+ {
701+ assert(false);
702+
703+ if (!other.theIsFree)
704+ {
705+ new (&theKey) T(other.key());
706+ new (&theValue) V(other.value());
707+ }
708+ }
709+ else
710+ {
711+ if (!other.theIsFree)
712+ {
713+ key() = other.key();
714+ value() = other.value();
715+ }
716+ else
717+ {
718+ assert(false);
719+
720+ key().~T();
721+ value().~V();
722+ }
723+ }
724+
725+ theIsFree = other.theIsFree;
726+ theNext = other.theNext;
727+
728+ return *this;
729 }
730
731 bool isFree() const
732@@ -63,15 +128,39 @@
733
734 void setFree()
735 {
736- theItem.~T();
737+ key().~T();
738+ value().~V();
739 theIsFree = true;
740+ theNext = 0;
741 }
742
743 void unsetFree()
744 {
745+ new (&theKey) T;
746+ new (&theValue) V;
747 theIsFree = false;
748 }
749
750+ T& key()
751+ {
752+ return *reinterpret_cast<T*>(&theKey);
753+ }
754+
755+ const T& key() const
756+ {
757+ return *reinterpret_cast<const T*>(&theKey);
758+ }
759+
760+ const V& value() const
761+ {
762+ return *reinterpret_cast<const V*>(&theValue);
763+ }
764+
765+ V& value()
766+ {
767+ return *reinterpret_cast<V*>(&theValue);
768+ }
769+
770 void setNext(HashEntry* nextEntry)
771 {
772 theNext = (nextEntry == NULL ? 0 : nextEntry - this);
773@@ -105,17 +194,17 @@
774
775 theHashTab : The hash table. The table is implemented as a vector of hash
776 entries and is devided in 2 areas: Each entry between 0 and
777- theHashTabSize - 1 is the head of a hash bucket. Each entry
778- between theHashTabSize+1 and theHashTab.size()-1 is either
779+ theNumBuckets - 1 is the head of a hash bucket. Each entry
780+ between theNumBuckets+1 and theHashTab.size()-1 is either
781 a "collision" entry (i.e., it belongs to a hash bucket with
782 more than one entries) or a "free" entry (i.e. it does not
783 currently belong to any bucket, but is available for
784 allocation as a collision entry when needed). Free entries
785 in the collision area are linked in a free list. Entry
786- theHashTab[theHashTabSize] is reserved as the head of this
787+ theHashTab[theNumBuckets] is reserved as the head of this
788 free list.
789- theHashTabSize : The current number of hash buckets in theHashTab.
790- theInitialSize : The initial number of hash buckets.
791+ theNumBuckets : The current number of hash buckets in theHashTab.
792+
793 theLoadFactor : The max fraction of non-empty hash buckets after which the
794 hash table is doubled in size.
795
796@@ -130,11 +219,11 @@
797 friend class HashMap;
798
799 protected:
800- checked_vector<HashEntry<T, V> >* theHashTab;
801- size_t thePos;
802+ std::vector<HashEntry<T, V> >* theHashTab;
803+ csize thePos;
804
805 protected:
806- iterator(checked_vector<HashEntry<T, V> >* ht, size_t pos)
807+ iterator(std::vector<HashEntry<T, V> >* ht, csize pos)
808 :
809 theHashTab(ht),
810 thePos(pos)
811@@ -150,7 +239,7 @@
812
813 HashEntry<T, V>& entry = (*theHashTab)[thePos];
814
815- return entry.theItem;
816+ return entry.key();
817 }
818
819 public:
820@@ -194,7 +283,7 @@
821
822 const HashEntry<T, V>& entry = (*theHashTab)[thePos];
823
824- return std::pair<T, V>(entry.theItem, entry.theValue);
825+ return std::pair<T, V>(entry.key(), entry.value());
826 }
827
828 const T& getKey() const
829@@ -203,16 +292,25 @@
830
831 const HashEntry<T, V>& entry = (*theHashTab)[thePos];
832
833- return entry.theItem;
834- }
835-
836- V& getValue() const
837- {
838- ZORBA_FATAL(thePos < theHashTab->size(), "");
839-
840- HashEntry<T, V>& entry = (*theHashTab)[thePos];
841-
842- return entry.theValue;
843+ return entry.key();
844+ }
845+
846+ const V& getValue() const
847+ {
848+ ZORBA_FATAL(thePos < theHashTab->size(), "");
849+
850+ HashEntry<T, V>& entry = (*theHashTab)[thePos];
851+
852+ return entry.value();
853+ }
854+
855+ V& getValue()
856+ {
857+ ZORBA_FATAL(thePos < theHashTab->size(), "");
858+
859+ HashEntry<T, V>& entry = (*theHashTab)[thePos];
860+
861+ return entry.value();
862 }
863
864 void setValue(const V& val)
865@@ -221,7 +319,7 @@
866
867 HashEntry<T, V>& entry = (*theHashTab)[thePos];
868
869- entry.theValue = val;
870+ entry.value() = val;
871 }
872 };
873
874@@ -230,20 +328,22 @@
875 static const double DEFAULT_LOAD_FACTOR;
876
877 protected:
878- ulong theNumEntries;
879-
880- size_t theHashTabSize;
881- size_t theInitialSize;
882- checked_vector<HashEntry<T, V> > theHashTab;
883- double theLoadFactor;
884- C theCompareFunction;
885-
886- bool theUseTransfer;
887-
888- SYNC_CODE(mutable Mutex theMutex;)
889- SYNC_CODE(Mutex * theMutexp;)
890-
891- int numCollisions;
892+ std::vector<HashEntry<T, V> > theHashTab;
893+
894+ csize theNumBuckets;
895+
896+ csize theNumEntries;
897+
898+ double theLoadFactor;
899+
900+ double theMaxLoad;
901+
902+ C theCompareFunction;
903+
904+ SYNC_CODE(mutable Mutex theMutex;)
905+ SYNC_CODE(Mutex * theMutexp;)
906+
907+ csize theNumCollisions;
908
909 public:
910
911@@ -257,19 +357,20 @@
912 depends on some parametrs (e.g. the collation or timezone). These parameters
913 are provided as data members of the given comparison-function obj.
914 ********************************************************************************/
915-HashMap(const C& compFunction, size_t size, bool sync, bool useTransfer = false)
916+HashMap(const C& compFunction, csize size, bool sync)
917 :
918+ theNumBuckets(size),
919 theNumEntries(0),
920- theHashTabSize(size),
921- theInitialSize(size),
922- theHashTab(computeTabSize(size)),
923 theLoadFactor(DEFAULT_LOAD_FACTOR),
924 theCompareFunction(compFunction),
925- theUseTransfer(useTransfer),
926- numCollisions(0)
927+ theNumCollisions(0)
928 {
929+ theHashTab.resize(computeCapacity(size));
930+
931 formatCollisionArea();
932
933+ theMaxLoad = theNumBuckets * theLoadFactor;
934+
935 SYNC_CODE(theMutexp = (sync ? &theMutex : NULL);)
936 }
937
938@@ -284,18 +385,19 @@
939 theCompareFunction data member is initialized with the default constructor
940 of the C class.
941 ********************************************************************************/
942-HashMap(size_t size, bool sync, bool useTransfer = false)
943+HashMap(csize size, bool sync)
944 :
945+ theNumBuckets(size),
946 theNumEntries(0),
947- theHashTabSize(size),
948- theInitialSize(size),
949- theHashTab(computeTabSize(size)),
950 theLoadFactor(DEFAULT_LOAD_FACTOR),
951- theUseTransfer(useTransfer),
952- numCollisions(0)
953+ theNumCollisions(0)
954 {
955+ theHashTab.resize(computeCapacity(size));
956+
957 formatCollisionArea();
958
959+ theMaxLoad = theNumBuckets * theLoadFactor;
960+
961 SYNC_CODE(theMutexp = (sync ? &theMutex : NULL);)
962 }
963
964@@ -340,7 +442,7 @@
965 /*******************************************************************************
966
967 ********************************************************************************/
968-ulong size() const
969+csize size() const
970 {
971 return theNumEntries;
972 }
973@@ -349,7 +451,7 @@
974 /*******************************************************************************
975
976 ********************************************************************************/
977-size_t capacity() const
978+csize capacity() const
979 {
980 return theHashTab.size();
981 }
982@@ -358,6 +460,24 @@
983 /*******************************************************************************
984
985 ********************************************************************************/
986+csize bucket_count() const
987+{
988+ return theNumBuckets;
989+}
990+
991+
992+/*******************************************************************************
993+
994+********************************************************************************/
995+csize collisions() const
996+{
997+ return theNumCollisions;
998+}
999+
1000+
1001+/*******************************************************************************
1002+
1003+********************************************************************************/
1004 C get_compare_function()
1005 {
1006 return theCompareFunction;
1007@@ -388,13 +508,17 @@
1008 void clearNoSync()
1009 {
1010 theNumEntries = 0;
1011- numCollisions = 0;
1012-
1013- size_t n = theHashTab.size();
1014-
1015- for (size_t i = 0; i < n; ++i)
1016+ theNumCollisions = 0;
1017+
1018+ csize n = theHashTab.size();
1019+
1020+ HashEntry<T, V>* entry = &theHashTab[0];
1021+ HashEntry<T, V>* lastentry = &theHashTab[n];
1022+
1023+ for (; entry < lastentry; ++entry)
1024 {
1025- theHashTab[i].~HashEntry<T, V>();
1026+ if (!entry->isFree())
1027+ entry->setFree();
1028 }
1029
1030 formatCollisionArea();
1031@@ -406,13 +530,13 @@
1032 ********************************************************************************/
1033 iterator begin() const
1034 {
1035- return iterator(const_cast<checked_vector<HashEntry<T, V> >*>(&theHashTab), 0);
1036+ return iterator(const_cast<std::vector<HashEntry<T, V> >*>(&theHashTab), 0);
1037 }
1038
1039
1040 iterator end() const
1041 {
1042- return iterator(const_cast<checked_vector<HashEntry<T, V> >*>(&theHashTab),
1043+ return iterator(const_cast<std::vector<HashEntry<T, V> >*>(&theHashTab),
1044 theHashTab.size());
1045 }
1046
1047@@ -437,7 +561,7 @@
1048
1049 while (entry != NULL)
1050 {
1051- if (equal(entry->theItem, item))
1052+ if (equal(entry->key(), item))
1053 return true;
1054
1055 entry = entry->getNext();
1056@@ -468,7 +592,7 @@
1057
1058 while (entry != NULL)
1059 {
1060- if (equal(entry->theItem, item))
1061+ if (equal(entry->key(), item))
1062 return iterator(&theHashTab, entry - &theHashTab[0]);
1063
1064 entry = entry->getNext();
1065@@ -498,9 +622,9 @@
1066
1067 while (entry != NULL)
1068 {
1069- if (equal(entry->theItem, item))
1070+ if (equal(entry->key(), item))
1071 {
1072- value = entry->theValue;
1073+ value = entry->value();
1074 return true;
1075 }
1076
1077@@ -527,8 +651,8 @@
1078
1079 if (!found)
1080 {
1081- entry->theItem = pair.first;
1082- entry->theValue = pair.second;
1083+ entry->key() = pair.first;
1084+ entry->value() = pair.second;
1085 }
1086
1087 return !found;
1088@@ -552,12 +676,12 @@
1089
1090 if (!found)
1091 {
1092- entry->theItem = item;
1093- entry->theValue = value;
1094+ entry->key() = item;
1095+ entry->value() = value;
1096 }
1097 else
1098 {
1099- value = entry->theValue;
1100+ value = entry->value();
1101 }
1102
1103 return !found;
1104@@ -586,7 +710,7 @@
1105
1106 while (entry != NULL)
1107 {
1108- if (equal(entry->theItem, item))
1109+ if (equal(entry->key(), item))
1110 {
1111 found = true;
1112 break;
1113@@ -602,7 +726,7 @@
1114 }
1115 else
1116 {
1117- entry->theValue = value;
1118+ entry->value() = value;
1119 return true;
1120 }
1121 }
1122@@ -616,13 +740,13 @@
1123 {
1124 SYNC_CODE(AutoMutex lock(theMutexp);)
1125
1126- if (ite.thePos < theHashTabSize)
1127+ if (ite.thePos < theNumBuckets)
1128 {
1129 eraseEntry(&theHashTab[ite.thePos], NULL);
1130 }
1131 else
1132 {
1133- const T& item = theHashTab[ite.thePos].theItem;
1134+ const T& item = theHashTab[ite.thePos].key();
1135
1136 ulong hval = hash(item);
1137
1138@@ -665,7 +789,7 @@
1139 // If the item to remove is in the 1st entry of a bucket, then if the
1140 // bucket has no other entries, just call the destructor on that entry,
1141 // else copy the 2nd entry to the 1st entry and freeup the 2nd entry.
1142- if (equal(entry->theItem, item))
1143+ if (equal(entry->key(), item))
1144 {
1145 eraseEntry(entry, NULL);
1146 return true;
1147@@ -678,7 +802,7 @@
1148
1149 while (entry != NULL)
1150 {
1151- if (equal(entry->theItem, item))
1152+ if (equal(entry->key(), item))
1153 {
1154 eraseEntry(entry, preventry);
1155 return true;
1156@@ -698,9 +822,9 @@
1157 /*******************************************************************************
1158
1159 ********************************************************************************/
1160-size_t computeTabSize(size_t size) const
1161+csize computeCapacity(csize size) const
1162 {
1163- return size + 32 + size/5;
1164+ return size + 32 + size / (5 - 10 * (theLoadFactor - 0.7)) ;
1165 }
1166
1167
1168@@ -724,7 +848,7 @@
1169 ********************************************************************************/
1170 HashEntry<T, V>* bucket(ulong hvalue)
1171 {
1172- return &theHashTab[hvalue % theHashTabSize];
1173+ return &theHashTab[hvalue % theNumBuckets];
1174 }
1175
1176
1177@@ -733,7 +857,7 @@
1178 ********************************************************************************/
1179 const HashEntry<T, V>* bucket(ulong hvalue) const
1180 {
1181- return &theHashTab[hvalue % theHashTabSize];
1182+ return &theHashTab[hvalue % theNumBuckets];
1183 }
1184
1185
1186@@ -742,7 +866,7 @@
1187 ********************************************************************************/
1188 HashEntry<T, V>* freelist()
1189 {
1190- return &theHashTab[theHashTabSize];
1191+ return &theHashTab[theNumBuckets];
1192 }
1193
1194
1195@@ -755,41 +879,38 @@
1196 {
1197 if (entry->theNext == 0)
1198 {
1199- entry->~HashEntry<T, V>();
1200+ entry->setFree();
1201 }
1202 else
1203 {
1204 HashEntry<T, V>* nextEntry = entry->getNext();
1205 *entry = *nextEntry;
1206 entry->setNext(nextEntry->getNext());
1207- nextEntry->~HashEntry<T, V>();
1208+ nextEntry->setFree();
1209 nextEntry->setNext(freelist()->getNext());
1210 freelist()->setNext(nextEntry);
1211 }
1212
1213- theNumEntries--;
1214+ --theNumEntries;
1215
1216- if (theHashTabSize > theInitialSize &&
1217- theNumEntries < (theHashTabSize / 2) * theLoadFactor)
1218+ if (theNumEntries < theMaxLoad / 2)
1219 {
1220- resizeHashTab(theHashTabSize / 2);
1221+ resizeHashTab(theNumBuckets / 2);
1222 }
1223
1224 }
1225 else
1226 {
1227 preventry->setNext(entry->getNext());
1228- entry->~HashEntry<T, V>();
1229+ entry->setFree();
1230 entry->setNext(freelist()->getNext());
1231 freelist()->setNext(entry);
1232
1233- theNumEntries--;
1234- numCollisions--;
1235+ --theNumEntries;
1236
1237- if (theHashTabSize > theInitialSize &&
1238- theNumEntries < (theHashTabSize / 2) * theLoadFactor)
1239+ if (theNumEntries < theMaxLoad / 2)
1240 {
1241- resizeHashTab(theHashTabSize / 2);
1242+ resizeHashTab(theNumBuckets / 2);
1243 }
1244 }
1245 }
1246@@ -812,7 +933,7 @@
1247 // If the hash bucket is empty, its 1st entry is used to store the new string.
1248 if (headEntry->isFree())
1249 {
1250- theNumEntries++;
1251+ ++theNumEntries;
1252 headEntry->unsetFree();
1253 return headEntry;
1254 }
1255@@ -822,7 +943,7 @@
1256
1257 while (currEntry != NULL)
1258 {
1259- if (equal(currEntry->theItem, item))
1260+ if (equal(currEntry->key(), item))
1261 {
1262 found = true;
1263 return currEntry;
1264@@ -836,22 +957,22 @@
1265 // Do garbage collection if the hash table is more than 60% full. Note that
1266 // gc does NOT resize theHashTab, so after gc, the item still belongs to the
1267 // same bucket as before gc.
1268- if (theNumEntries > theHashTabSize * theLoadFactor)
1269+ if (theNumEntries > theMaxLoad)
1270 {
1271 garbageCollect();
1272
1273 if (headEntry->isFree())
1274 {
1275- theNumEntries++;
1276+ ++theNumEntries;
1277 headEntry->unsetFree();
1278 return headEntry;
1279 }
1280 }
1281
1282 // Double the size of the hash table if it is more than 60% full.
1283- if (theNumEntries > theHashTabSize * theLoadFactor)
1284+ if (theNumEntries > theMaxLoad)
1285 {
1286- resizeHashTab(theHashTabSize * 2);
1287+ resizeHashTab(theNumBuckets * 2);
1288 goto retry;
1289 }
1290
1291@@ -859,8 +980,8 @@
1292 // collision list for its bucket. We place the new item right after the
1293 // headEntry for the bucket.
1294
1295- theNumEntries++;
1296- numCollisions++;
1297+ ++theNumEntries;
1298+ ++theNumCollisions;
1299
1300 // If no free entry exists, we extend the collision area of the hash table.
1301 if (freelist()->getNext() == 0)
1302@@ -886,10 +1007,15 @@
1303 ********************************************************************************/
1304 void extendCollisionArea()
1305 {
1306- size_t oldSize = theHashTab.size();
1307- size_t numCollisionEntries = oldSize - theHashTabSize;
1308- size_t newSize = theHashTabSize + 2 * numCollisionEntries;
1309+ csize oldSize = theHashTab.size();
1310+ csize numCollisionEntries = oldSize - theNumBuckets;
1311+ csize newSize = theNumBuckets + 2 * numCollisionEntries;
1312
1313+ /*
1314+ std::cout << "Extending collision area" << std::endl
1315+ << "numBuckets = " << theNumBuckets << " numCollisionEntries = "
1316+ << numCollisionEntries << std::endl << std::endl;
1317+ */
1318 //foo(); for setting a breakpoint
1319
1320 theHashTab.resize(newSize);
1321@@ -909,7 +1035,8 @@
1322 firstentry = freelist();
1323
1324 HashEntry<T, V>* lastentry = &theHashTab[theHashTab.size() - 1];
1325- for (HashEntry<T, V>* entry = firstentry; entry < lastentry; entry++)
1326+
1327+ for (HashEntry<T, V>* entry = firstentry; entry < lastentry; ++entry)
1328 entry->theNext = 1;
1329
1330 lastentry->theNext = 0;
1331@@ -919,31 +1046,34 @@
1332 /*******************************************************************************
1333
1334 ********************************************************************************/
1335-void resizeHashTab(size_t newSize)
1336+void resizeHashTab(csize newSize)
1337 {
1338- HashEntry<T, V>* entry;
1339- HashEntry<T, V>* oldentry;
1340+ if (newSize == 0)
1341+ newSize = 3;
1342+
1343+ csize oldcap = theHashTab.size();
1344+ csize newcap = computeCapacity(newSize);
1345
1346 // Create a new vector of new size and swap theHashTab with this new vector
1347- checked_vector<HashEntry<T, V> > oldTab(computeTabSize(newSize));
1348+ std::vector<HashEntry<T, V> > oldTab(newcap);
1349 theHashTab.swap(oldTab);
1350
1351- size_t oldsize = oldTab.size();
1352- theHashTabSize = newSize;
1353+ theNumBuckets = newSize;
1354+ theMaxLoad = theNumBuckets * theLoadFactor;
1355
1356 formatCollisionArea();
1357
1358- numCollisions = 0;
1359+ HashEntry<T, V>* entry;
1360+ HashEntry<T, V>* oldentry = &oldTab[0];
1361+ HashEntry<T, V>* lastentry = &oldTab[oldcap];
1362
1363 // Now rehash every entry
1364- for (size_t i = 0; i < oldsize; i++)
1365+ for (; oldentry < lastentry; ++oldentry)
1366 {
1367- oldentry = &oldTab[i];
1368-
1369 if (oldentry->isFree())
1370 continue;
1371
1372- entry = bucket (hash(oldentry->theItem));
1373+ entry = bucket(hash(oldentry->key()));
1374
1375 if (!entry->isFree())
1376 {
1377@@ -962,12 +1092,11 @@
1378 freelist()->setNext(entry->getNext());
1379 entry->setNext(headEntry->getNext());
1380 headEntry->setNext(entry);
1381- numCollisions++;
1382 }
1383
1384- entry->theItem = oldentry->theItem;
1385- entry->theValue = oldentry->theValue;
1386 entry->unsetFree();
1387+ entry->key() = oldentry->key();
1388+ entry->value() = oldentry->value();
1389 }
1390 }
1391
1392
1393=== modified file 'src/zorbautils/hashset.h'
1394--- src/zorbautils/hashset.h 2012-09-17 00:36:37 +0000
1395+++ src/zorbautils/hashset.h 2012-10-08 12:24:30 +0000
1396@@ -45,16 +45,16 @@
1397 typedef typename HashMap<T, DummyHashValue, C>::iterator iterator;
1398
1399
1400-HashSet(const C& compFunction, ulong size, bool sync, bool useTransfer = false)
1401+HashSet(const C& compFunction, ulong size, bool sync)
1402 :
1403- HashMap<T, DummyHashValue, C>(compFunction, size, sync, useTransfer)
1404+ HashMap<T, DummyHashValue, C>(compFunction, size, sync)
1405 {
1406 }
1407
1408
1409 HashSet(ulong size, bool sync)
1410 :
1411- HashMap<T, DummyHashValue, C>(size, sync, false)
1412+ HashMap<T, DummyHashValue, C>(size, sync)
1413 {
1414 }
1415
1416@@ -100,12 +100,7 @@
1417
1418 if (!found)
1419 {
1420- /*
1421- if (this->theUseTransfer)
1422- entry->theItem.transfer(item);
1423- else
1424- */
1425- entry->theItem = item;
1426+ entry->key() = item;
1427 }
1428
1429 return !found;
1430@@ -129,19 +124,13 @@
1431 entry = this->hashInsert(item, this->hash(item), found);
1432 if (!found)
1433 {
1434- /*
1435- if (this->theUseTransfer)
1436- entry->theItem.transfer(item);
1437- else
1438- */
1439- entry->theItem = item;
1440-
1441- outItem = entry->theItem;
1442+ entry->key() = item;
1443+ outItem = entry->key();
1444 return true;
1445 }
1446 else
1447 {
1448- outItem = entry->theItem;
1449+ outItem = entry->key();
1450 return false;
1451 }
1452 }
1453@@ -164,13 +153,13 @@
1454 entry = this->hashInsert(item, this->hash(item), found);
1455 if (!found)
1456 {
1457- entry->theItem = item;
1458- outItem = entry->theItem;
1459+ entry->key() = item;
1460+ outItem = entry->key();
1461 return true;
1462 }
1463 else
1464 {
1465- outItem = entry->theItem;
1466+ outItem = entry->key();
1467 return false;
1468 }
1469 }

Subscribers

People subscribed via source and target branches