Zorba

Merge lp:~zorba-coders/zorba/hashmap into lp:zorba

hashmap
Merge into trunk

Proposed by Markos Zaharioudakis on 2012-09-19

Status:	Merged
Approved by:	Markos Zaharioudakis on 2012-10-08
Approved revision:	10817
Merged at revision:	11091
Proposed branch:	lp:~zorba-coders/zorba/hashmap
Merge into:	lp:zorba
Diff against target:	1469 lines (+600/-192) 10 files modified src/context/static_context.h (+1/-0) src/store/naive/qname_pool.cpp (+8/-8) src/store/naive/string_pool.cpp (+42/-38) src/unit_tests/CMakeLists.txt (+1/-0) src/unit_tests/test_hashmaps.cpp (+257/-0) src/unit_tests/unit_test_list.h (+16/-1) src/unit_tests/unit_tests.cpp (+13/-1) src/zorbaserialization/bin_archiver.cpp (+1/-1) src/zorbautils/hashmap.h (+251/-122) src/zorbautils/hashset.h (+10/-21)
To merge this branch:	bzr merge lp:~zorba-coders/zorba/hashmap
Related bugs:	Link a bug report

Reviewer	Review Type	Date Requested	Status
Markos Zaharioudakis			Approve on 2012-09-19
Review via email: mp+125187@code.launchpad.net

Commit message

HashMap optimizations

Description of the change

HashMap optimizations

Revision history for this message

Markos Zaharioudakis (markos-za) on 2012-09-19:

review: Approve

lp:~zorba-coders/zorba/hashmap updated on 2012-10-08

10815. By Markos Zaharioudakis on 2012-09-19: merge from trunk
10816. By Markos Zaharioudakis on 2012-09-19: removed debugging print out
10817. By Markos Zaharioudakis on 2012-10-08: merge from trunk

Revision history for this message

Zorba Build Bot (zorba-buildbot) wrote on 2012-10-08:

Validation queue starting for merge proposal.
Log at: http://zorbatest.lambda.nu:8080/remotequeue/hashmap-2012-10-08T12-28-47.769Z/log.html

Revision history for this message

Zorba Build Bot (zorba-buildbot) wrote on 2012-10-08:

Validation queue job hashmap-2012-10-08T12-28-47.769Z is finished. The final status was:

All tests succeeded!

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk

Subscribers

People subscribed via source and target branches

to all changes:

Dana Florescu

Matthias Brantner

Zorba Coders

 === modified file 'src/context/static_context.h'
 --- src/context/static_context.h	2012-10-03 12:32:55 +0000
 +++ src/context/static_context.h	2012-10-08 12:24:30 +0000
@@ -42,6 +42,7 @@
  #include "zorbautils/hashmap_zstring.h"
  #include "zorbautils/hashmap_itemp.h"
++#include "zorbautils/checked_vector.h"
  #include "common/shared_types.h"
 === modified file 'src/store/naive/qname_pool.cpp'
 --- src/store/naive/qname_pool.cpp	2012-09-17 00:36:37 +0000
 +++ src/store/naive/qname_pool.cpp	2012-10-08 12:24:30 +0000
@@ -63,12 +63,12 @@
  ********************************************************************************/
  QNamePool::~QNamePool()
+ {
--  csize n = theHashSet.theHashTab.size();
++  csize n = theHashSet.capacity();
    for (csize i = 0; i < n; ++i)
+   {
      if (!theHashSet.theHashTab[i].isFree() &&
--        theHashSet.theHashTab[i].theItem->isOverflow())
--      delete theHashSet.theHashTab[i].theItem;
++        theHashSet.theHashTab[i].key()->isOverflow())
++      delete theHashSet.theHashTab[i].key();
+   }
    if (theCache != NULL)
@@ -297,12 +297,12 @@
        bool found;
        entry = theHashSet.hashInsert(qn, hval, found);
--      entry->theItem = qn;
++      entry->key() = qn;
        ZORBA_FATAL(!found, "");
+     }
      else
+     {
--      qn = entry->theItem;
++      qn = entry->key();
        cachePin(qn);
+     }
@@ -389,12 +389,12 @@
        bool found;
        entry = theHashSet.hashInsert(qn, hval, found);
--      entry->theItem = qn;
++      entry->key() = qn;
        ZORBA_FATAL(!found, "");
+     }
      else
+     {
--      qn = entry->theItem;
++      qn = entry->key();
        cachePin(qn);
+     }
@@ -478,7 +478,7 @@
    while (entry != NULL)
+   {
--    QNameItem* qn = entry->theItem;
++    QNameItem* qn = entry->key();
      if (ztd::equals(qn->getLocalName(), ln, lnlen) &&
          ztd::equals(qn->getNamespace(), ns, nslen) &&
 === modified file 'src/store/naive/string_pool.cpp'
 --- src/store/naive/string_pool.cpp	2012-09-17 00:36:37 +0000
 +++ src/store/naive/string_pool.cpp	2012-10-08 12:24:30 +0000
@@ -28,13 +28,14 @@
  StringPool::~StringPool()
+ {
    csize count = 0;
--  csize n = theHashTab.size();
++  csize n = capacity();
    for (csize i = 0; i < n; ++i)
+   {
--    if (theHashTab[i].theItem.is_shared())
++    if (!theHashTab[i].isFree() && theHashTab[i].key().is_shared())
+     {
--      std::cerr << "ID: " << i << " Referenced URI: " << theHashTab[i].theItem << std::endl;
++      std::cerr << "ID: " << i << " Referenced URI: "
++                << theHashTab[i].key() << std::endl;
        //delete theHashTab[i].theString.getp();
        count++;
+     }
@@ -57,7 +58,8 @@
    bool found = false;
    zstring::size_type len = strlen(str);
--  ulong hval = hashfun::h32(str, len, FNV_32_INIT) % theHashTabSize;
++
++  ulong hval = hashfun::h32(str, len, FNV_32_INIT) % bucket_count();
+   {
      SYNC_CODE(AutoMutex lock(&theMutex);)
@@ -68,7 +70,7 @@
+     {
        while (entry != NULL)
+       {
--        if (ztd::equals(entry->theItem, str, len))
++        if (ztd::equals(entry->key(), str, len))
+         {
            found = true;
            break;
@@ -79,7 +81,7 @@
      if (found)
+     {
--      outStr = entry->theItem;
++      outStr = entry->key();
        return false;
+     }
+   }
@@ -97,76 +99,78 @@
  ********************************************************************************/
  void StringPool::garbageCollect()
+ {
--  HashEntry<zstring, DummyHashValue>* entry;
++  HashEntry<zstring, DummyHashValue>* currEntry;
    HashEntry<zstring, DummyHashValue>* freeList = NULL;
--  zstring::size_type size = theHashTabSize;
++  csize size = bucket_count();
--  for (ulong i = 0; i < size; ++i)
++  for (csize i = 0; i < size; ++i)
+   {
--    entry = &theHashTab[i];
++    currEntry = &theHashTab[i];
      // If the current hash bucket is empty, move to the next one
--    if (entry->isFree())
++    if (currEntry->isFree())
+     {
--      ZORBA_FATAL(entry->theNext == 0, "");
++      ZORBA_FATAL(currEntry->theNext == 0, "");
        continue;
+     }
      // Handle the 1st hash entry of the current hash bucket
--    while (!entry->theItem.is_shared())
++    while (!currEntry->key().is_shared())
+     {
--      if (entry->theNext == 0)
++      if (currEntry->theNext == 0)
+       {
--        entry->setFree();
--        theNumEntries--;
++        currEntry->setFree();
++        --theNumEntries;
          break;
+       }
        else
+       {
--        HashEntry<zstring, DummyHashValue>* nextEntry = entry->getNext();
--        *entry = *nextEntry;
--        entry->setNext(nextEntry->getNext());
++        HashEntry<zstring, DummyHashValue>* nextEntry = currEntry->getNext();
++        assert(!nextEntry->isFree());
++        *currEntry = *nextEntry;
++        currEntry->setNext(nextEntry->getNext());
          nextEntry->setFree();
          nextEntry->setNext(freeList);
          freeList = nextEntry;
--        theNumEntries--;
++        --theNumEntries;
+       }
+     }
      // Handle the overflow entries of the current hash bucket.
--    HashEntry<zstring, DummyHashValue>* prevEntry = entry;
--    entry = entry->getNext();
++    HashEntry<zstring, DummyHashValue>* prevEntry = currEntry;
++    currEntry = currEntry->getNext();
--    while (entry != NULL)
++    while (currEntry != NULL)
+     {
--      if (!entry->theItem.is_shared())
++      if (!currEntry->key().is_shared())
+       {
--        prevEntry->setNext(entry->getNext());
--        entry->setFree();
--        entry->setNext(freeList);
--        freeList = entry;
--        theNumEntries--;
++        prevEntry->setNext(currEntry->getNext());
++        currEntry->setFree();
++        currEntry->setNext(freeList);
++        freeList = currEntry;
++        --theNumEntries;
--        entry = prevEntry->getNext();
++        currEntry = prevEntry->getNext();
+       }
        else
+       {
--        prevEntry = entry;
--        entry = entry->getNext();
++        prevEntry = currEntry;
++        currEntry = currEntry->getNext();
+       }
+     }
+   }
    if (freeList != NULL)
+   {
--    entry = freeList;
--    while (entry->theNext != 0)
--      entry = entry->getNext();
--
--    entry->setNext(theHashTab[theHashTabSize].getNext());
--    theHashTab[theHashTabSize].setNext(freeList);
++    currEntry = freeList;
++    while (currEntry->theNext != 0)
++      currEntry = currEntry->getNext();
++
++    currEntry->setNext(theHashTab[bucket_count()].getNext());
++
++    theHashTab[bucket_count()].setNext(freeList);
+   }
+ }
 === modified file 'src/unit_tests/CMakeLists.txt'
 --- src/unit_tests/CMakeLists.txt	2012-09-17 00:36:37 +0000
 +++ src/unit_tests/CMakeLists.txt	2012-10-08 12:24:30 +0000
@@ -25,6 +25,7 @@
    test_uri.cpp
    test_json_parser.cpp
    test_fs_iterator.cpp
++  test_hashmaps.cpp
  #  memory_manager.cpp
+ )
 === added file 'src/unit_tests/test_hashmaps.cpp'
 --- src/unit_tests/test_hashmaps.cpp	1970-01-01 00:00:00 +0000
 +++ src/unit_tests/test_hashmaps.cpp	2012-10-08 12:24:30 +0000
@@ -0,0 +1,257 @@
++
++#include <cstdlib>
++#include <stdio.h>
++#include <iostream>
++
++#include <zorba/util/time.h>
++
++#include "zorbautils/hashmap.h"
++#include "util/hashmap32.h"
++#include "util/hashmap.h"
++#include "util/unordered_map.h"
++
++
++namespace zorba {
++
++namespace UnitTests {
++
++
++class IntCompFunc
++{
++public:
++  static bool equal(uint64_t k1, uint64_t k2)
++  {
++    return k1 == k2;
++  }
++
++  static uint32_t hash(uint64_t key)
++  {
++#if 1
++    return key;
++#else
++    char buf[9];
++    buf[0] = (unsigned char)(key>>56);
++    buf[1] = (unsigned char)(key>>48);
++    buf[2] = (unsigned char)(key>>40);
++    buf[3] = (unsigned char)(key>>32);
++    buf[4] = (unsigned char)(key>>24);
++    buf[5] = (unsigned char)(key>>16);
++    buf[6] = (unsigned char)(key>>8 );
++    buf[7] = (unsigned char)(key    );
++    buf[8] = 0;
++    return (uint32_t)hashfun::h64((void*)buf,8,FNV_64_INIT);
++#endif
++  }
++};
++
++
++class StrCompFunc
++{
++public:
++  static bool equal(const zstring& k1, const zstring& k2)
++  {
++    return k1 == k2;
++  }
++
++  static uint32_t hash(const zstring& key)
++  {
++    return hashfun::h32(key.c_str(), FNV_32_INIT);
++  }
++};
++
++
++int test_hashmaps(int argc, char* argv[])
++{
++  if (argc < 4)
++    return 1;
++
++  int test_id = atoi(argv[1]);
++
++  double load_factor = atof(argv[2]);
++
++  int num = atoi(argv[3]);
++
++  uint64_t* int_buf = new uint64_t[num];
++
++  for (int i = 0; i < num; ++i)
++  {
++    int_buf[i] = rand() % num;
++  }
++
++  zstring* str_buf = new zstring[num];
++
++  for (int i = 0; i < num; ++i)
++  {
++    char tmp[20];
++    sprintf(tmp, "%d", rand() % num);
++    str_buf[i] = tmp;
++
++    //std::cout << str_buf[i] << std::endl;
++  }
++
++
++  HashMap<uint64_t, int, IntCompFunc> map1(1024, false);
++  HashMap<zstring, int, StrCompFunc> map2(1024, false);
++
++  std::unordered_map<uint64_t, int> map3(1024);
++  std::unordered_map<zstring, int> map4(1024);
++
++  hash64map<int> map5(1024, load_factor);
++  hashmap<zstring, int> map6(1024, load_factor);
++
++  map1.set_load_factor(load_factor);
++  map2.set_load_factor(load_factor);
++
++  if (test_id == 1)
++  {
++    zorba::time::walltime startTime;
++    zorba::time::get_current_walltime(startTime);
++
++    for (int i = 0; i < num; ++i)
++    {
++      uint64_t key = int_buf[i];
++      int value = 1;
++      (void)map1.insert(key, value);
++    }
++
++    zorba::time::walltime stopTime;
++    time::get_current_walltime(stopTime);
++
++    double time = time::get_walltime_elapsed(startTime, stopTime);
++
++    std::cout << "Load factor = " << load_factor << std::endl
++              << "Num numeric insertions = " << num << std::endl
++              << "HashMap entries = " << map1.size() << std::endl
++              << "HashMap capacity = " << map1.capacity() << std::endl
++              << "HashMap colisions = " << map1.collisions() << std::endl
++              << "Time = " << time << std::endl;
++  }
++  else if (test_id == 2)
++  {
++    zorba::time::walltime startTime;
++    zorba::time::get_current_walltime(startTime);
++
++    for (int i = 0; i < num; ++i)
++    {
++      zstring key = str_buf[i];
++      int value = 1;
++      (void)map2.insert(key, value);
++    }
++
++    zorba::time::walltime stopTime;
++    time::get_current_walltime(stopTime);
++
++    double time = time::get_walltime_elapsed(startTime, stopTime);
++
++    std::cout << "Load factor = " << load_factor << std::endl
++              << "Num zstring insertions = " << num << std::endl
++              << "HashMap entries = " << map2.size() << std::endl
++              << "HashMap buckets = " << map2.bucket_count() << std::endl
++              << "HashMap capacity = " << map2.capacity() << std::endl
++              << "Time = " << time << std::endl;
++  }
++  else if (test_id == 3)
++  {
++    zorba::time::walltime startTime;
++    zorba::time::get_current_walltime(startTime);
++
++    for (int i = 0; i < num; ++i)
++    {
++      uint64_t key = int_buf[i];
++      int value = 1;
++      (void)map3.insert(std::pair<uint64_t, int>(key, value));
++    }
++
++    zorba::time::walltime stopTime;
++    time::get_current_walltime(stopTime);
++
++    double time = time::get_walltime_elapsed(startTime, stopTime);
++
++    std::cout << "Load factor = " << load_factor << std::endl
++              << "Num numeric insertions = " << num << std::endl
++              << "UnorderedMap entries = " << map3.size() << std::endl
++              << "UnorderedMap buckets = " << map3.bucket_count() << std::endl
++              << "Time = " << time << std::endl;
++  }
++  else if (test_id == 4)
++  {
++    zorba::time::walltime startTime;
++    zorba::time::get_current_walltime(startTime);
++
++    for (int i = 0; i < num; ++i)
++    {
++      zstring key = str_buf[i];
++      int value = 1;
++      (void)map4.insert(std::pair<zstring, int>(key, value));
++    }
++
++    zorba::time::walltime stopTime;
++    time::get_current_walltime(stopTime);
++
++    double time = time::get_walltime_elapsed(startTime, stopTime);
++
++    std::cout << "Load factor = " << load_factor << std::endl
++              << "Num zstring insertions = " << num << std::endl
++              << "UnorderedMap entries = " << map4.size() << std::endl
++              << "UnorderedMap buckets = " << map4.bucket_count() << std::endl
++              << "Time = " << time << std::endl;
++  }
++  else if (test_id == 5)
++  {
++    zorba::time::walltime startTime;
++    zorba::time::get_current_walltime(startTime);
++
++    for (int i = 0; i < num; ++i)
++    {
++      uint64_t key = int_buf[i];
++      int value = 1;
++      (void)map5.put_unsync(key, value);
++    }
++
++    zorba::time::walltime stopTime;
++    time::get_current_walltime(stopTime);
++
++    double time = time::get_walltime_elapsed(startTime, stopTime);
++
++    std::cout << "Load factor = " << load_factor << std::endl
++              << "Num numeric insertions = " << num << std::endl
++              << "hashmap entries = " << map5.size() << std::endl
++              << "Time = " << time << std::endl;
++  }
++  else if (test_id == 6)
++  {
++    zorba::time::walltime startTime;
++    zorba::time::get_current_walltime(startTime);
++
++    for (int i = 0; i < num; ++i)
++    {
++      zstring key = str_buf[i];
++      int value = 1;
++      (void)map6.put(key, value, false);
++    }
++
++    zorba::time::walltime stopTime;
++    time::get_current_walltime(stopTime);
++
++    double time = time::get_walltime_elapsed(startTime, stopTime);
++
++    std::cout << "Load factor = " << load_factor << std::endl
++              << "Num zstring insertions = " << num << std::endl
++              << "hashmap entries = " << map6.size() << std::endl
++              << "Time = " << time << std::endl;
++  }
++  else
++  {
++    std::cout << "Invalid test id" << std::endl;
++    return 2;
++  }
++
++  delete [] int_buf;
++  delete [] str_buf;
++
++  return 0;
++}
++
++
++}
++}
 === modified file 'src/unit_tests/unit_test_list.h'
 --- src/unit_tests/unit_test_list.h	2012-09-17 00:36:37 +0000
 +++ src/unit_tests/unit_test_list.h	2012-10-08 12:24:30 +0000
@@ -25,34 +25,49 @@
  namespace UnitTests {
    int runUriTest(int argc, char* argv[]);
++
    int runDebuggerProtocolTest(int argc, char* argv[]);
++
    int test_base64( int, char*[] );
++
    int test_base64_streambuf( int, char*[] );
++
    int test_fs_iterator( int, char*[] );
++
  #ifndef ZORBA_NO_ICU
    int test_icu_streambuf( int, char*[] );
  #endif /* ZORBA_NO_ICU */
++
    int test_json_parser( int, char*[] );
++
    int test_string( int, char*[] );
++
    int test_unique_ptr( int, char*[] );
++
    int test_fs_iterator( int, char*[] );
--  //int test_mem_manager( int, char*[] );
++
  #ifndef ZORBA_NO_FULL_TEXT
    int test_stemmer( int, char*[] );
    int test_thesaurus( int, char*[] );
    int test_tokenizer( int, char*[] );
  #endif /* ZORBA_NO_FULL_TEXT */
++
  #ifndef ZORBA_HAVE_UNIQUE_PTR
    int test_unique_ptr( int, char*[] );
  #endif /* ZORBA_HAVE_UNIQUE_PTR */
++
    int test_uuid( int, char*[] );
++
  #ifndef ZORBA_HAVE_UNORDERED_MAP
    int test_unordered_map( int, char*[] );
  #endif /* ZORBA_HAVE_UNORDERED_MAP */
++
  #ifndef ZORBA_HAVE_UNORDERED_SET
    int test_unordered_set( int, char*[] );
  #endif /* ZORBA_HAVE_UNORDERED_SET */
++  int test_hashmaps(int argc, char* argv[]);
++
    void initializeTestList();
  } // namespace UnitTests
 === modified file 'src/unit_tests/unit_tests.cpp'
 --- src/unit_tests/unit_tests.cpp	2012-09-17 00:36:37 +0000
 +++ src/unit_tests/unit_tests.cpp	2012-10-08 12:24:30 +0000
@@ -38,29 +38,41 @@
  void initializeTestList()
+ {
    libunittests["base64"] = test_base64;
++
    libunittests["base64_streambuf"] = test_base64_streambuf;
++
    libunittests["fs_iterator"] = test_fs_iterator;
--  //libunittests["memory_manager"] = test_mem_manager;
++
  #ifndef ZORBA_NO_ICU
    libunittests["icu_streambuf"] = test_icu_streambuf;
  #endif /* ZORBA_NO_ICU */
++
    libunittests["json_parser"] = test_json_parser;
++
    libunittests["string"] = test_string;
++
  #ifndef ZORBA_NO_FULL_TEXT
    libunittests["stemmer"] = test_stemmer;
    libunittests["thesaurus"] = test_thesaurus;
    libunittests["tokenizer"] = test_tokenizer;
  #endif /* ZORBA_NO_FULL_TEXT */
++
  #ifndef ZORBA_HAVE_UNIQUE_PTR
    libunittests["unique_ptr"] = test_unique_ptr;
  #endif /* ZORBA_HAVE_UNIQUE_PTR */
++
    libunittests["uuid"] = test_uuid;
++
  #ifndef ZORBA_HAVE_UNORDERED_MAP
    libunittests["unordered_map"] = test_unordered_map;
  #endif /* ZORBA_HAVE_UNORDERED_MAP */
++
  #ifndef ZORBA_HAVE_UNORDERED_SET
    libunittests["unordered_set"] = test_unordered_set;
  #endif /* ZORBA_HAVE_UNORDERED_SET */
++
++  libunittests["hashmaps"] = test_hashmaps;
++
    libunittests["uri"] = runUriTest;
  #ifdef ZORBA_WITH_DEBUGGER
 === modified file 'src/zorbaserialization/bin_archiver.cpp'
 --- src/zorbaserialization/bin_archiver.cpp	2012-09-17 00:36:37 +0000
 +++ src/zorbaserialization/bin_archiver.cpp	2012-10-08 12:24:30 +0000
@@ -137,7 +137,7 @@
  BinArchiver::BinArchiver(std::ostream* os)
+   :
    Archiver(true),
--  theStringPool(1024, false, false),
++  theStringPool(1024, false),
    theFirstBinaryString(0)
+ {
    this->is = NULL;
 === modified file 'src/zorbautils/hashmap.h'
 --- src/zorbautils/hashmap.h	2012-09-17 00:36:37 +0000
 +++ src/zorbautils/hashmap.h	2012-10-08 12:24:30 +0000
@@ -16,6 +16,7 @@
  #ifndef ZORBA_UTILS_HASHMAP_H
  #define ZORBA_UTILS_HASHMAP_H
++#include <vector>
  #include <cstddef>
  #include <zorba/config.h>
@@ -23,9 +24,10 @@
  #include "common/common.h"
  #include "zorbautils/fatal.h"
--#include "zorbautils/checked_vector.h"
  #include "zorbautils/mutex.h"
++#include "store/api/shared_types.h"
++
  namespace zorba
+ {
@@ -42,18 +44,81 @@
  template <class T, class V>
  class HashEntry
+ {
++  struct KeyHolder
++  {
++    char theKey[sizeof(T)];
++  };
++
++  struct ValueHolder
++  {
++    char theValue[sizeof(V)];
++  };
++
  public:
    bool         theIsFree;
--  T            theItem;
--  V            theValue;
++  KeyHolder    theKey;
++  ValueHolder  theValue;
    ptrdiff_t    theNext;  // offset from "this" to the next entry.
--  HashEntry() : theIsFree(true), theNext(0) { }
++  HashEntry()
++    :
++    theIsFree(true),
++    theNext(0)
++  {
++  }
++
++  HashEntry(const HashEntry<T, V>& other)
++  {
++    theIsFree = other.theIsFree;
++    theNext = other.theNext;
++    if (!theIsFree)
++    {
++      new (&theKey) T(other.key());
++      new (&theValue) V(other.value());
++    }
++  }
    ~HashEntry()
+   {
--    theIsFree = true;
--    theNext = 0;
++    if (!theIsFree)
++    {
++      key().~T();
++      value().~V();
++    }
++  }
++
++  HashEntry<T, V>& operator = (const HashEntry<T, V>& other)
++  {
++    if (theIsFree)
++    {
++      assert(false);
++
++      if (!other.theIsFree)
++      {
++        new (&theKey) T(other.key());
++        new (&theValue) V(other.value());
++      }
++    }
++    else
++    {
++      if (!other.theIsFree)
++      {
++        key() = other.key();
++        value() = other.value();
++      }
++      else
++      {
++        assert(false);
++
++        key().~T();
++        value().~V();
++      }
++    }
++
++    theIsFree = other.theIsFree;
++    theNext = other.theNext;
++
++    return *this;
+   }
    bool isFree() const
@@ -63,15 +128,39 @@
    void setFree()
+   {
--    theItem.~T();
++    key().~T();
++    value().~V();
      theIsFree = true;
++    theNext = 0;
+   }
    void unsetFree()
+   {
++    new (&theKey) T;
++    new (&theValue) V;
      theIsFree = false;
+   }
++  T& key()
++  {
++    return *reinterpret_cast<T*>(&theKey);
++  }
++
++  const T& key() const
++  {
++    return *reinterpret_cast<const T*>(&theKey);
++  }
++
++  const V& value() const
++  {
++    return *reinterpret_cast<const V*>(&theValue);
++  }
++
++  V& value()
++  {
++    return *reinterpret_cast<V*>(&theValue);
++  }
++
    void setNext(HashEntry* nextEntry)
+   {
      theNext = (nextEntry == NULL ? 0 : nextEntry - this);
@@ -105,17 +194,17 @@
    theHashTab     : The hash table. The table is implemented as a vector of hash
                     entries and is devided in 2 areas: Each entry between 0 and
--                   theHashTabSize - 1 is the head of a hash bucket. Each entry
--                   between theHashTabSize+1 and theHashTab.size()-1 is either
++                   theNumBuckets - 1 is the head of a hash bucket. Each entry
++                   between theNumBuckets+1 and theHashTab.size()-1 is either
                     a "collision" entry (i.e., it belongs to a hash bucket with
                     more than one entries) or a "free" entry (i.e. it does not
                     currently belong to any bucket, but is available for
                     allocation as a collision entry when needed). Free entries
                     in the collision area are linked in a free list. Entry
--                   theHashTab[theHashTabSize] is reserved as the head of this
++                   theHashTab[theNumBuckets] is reserved as the head of this
                     free list.
--  theHashTabSize : The current number of hash buckets in theHashTab.
--  theInitialSize : The initial number of hash buckets.
++  theNumBuckets : The current number of hash buckets in theHashTab.
++
    theLoadFactor  : The max fraction of non-empty hash buckets after which the
                     hash table is doubled in size.
@@ -130,11 +219,11 @@
      friend class HashMap;
    protected:
--    checked_vector<HashEntry<T, V> >*  theHashTab;
--    size_t                             thePos;
++    std::vector<HashEntry<T, V> >*  theHashTab;
++    csize                           thePos;
    protected:
--    iterator(checked_vector<HashEntry<T, V> >* ht, size_t pos)
++    iterator(std::vector<HashEntry<T, V> >* ht, csize pos)
+       :
        theHashTab(ht),
        thePos(pos)
@@ -150,7 +239,7 @@
        HashEntry<T, V>& entry = (*theHashTab)[thePos];
--      return entry.theItem;
++      return entry.key();
+     }
    public:
@@ -194,7 +283,7 @@
        const HashEntry<T, V>& entry = (*theHashTab)[thePos];
--      return std::pair<T, V>(entry.theItem, entry.theValue);
++      return std::pair<T, V>(entry.key(), entry.value());
+     }
      const T& getKey() const
@@ -203,16 +292,25 @@
        const HashEntry<T, V>& entry = (*theHashTab)[thePos];
--      return entry.theItem;
--    }
--
--    V& getValue() const
--    {
--      ZORBA_FATAL(thePos < theHashTab->size(), "");
--
--      HashEntry<T, V>& entry = (*theHashTab)[thePos];
--
--      return entry.theValue;
++      return entry.key();
++    }
++
++    const V& getValue() const
++    {
++      ZORBA_FATAL(thePos < theHashTab->size(), "");
++
++      HashEntry<T, V>& entry = (*theHashTab)[thePos];
++
++      return entry.value();
++    }
++
++    V& getValue()
++    {
++      ZORBA_FATAL(thePos < theHashTab->size(), "");
++
++      HashEntry<T, V>& entry = (*theHashTab)[thePos];
++
++      return entry.value();
+     }
      void setValue(const V& val)
@@ -221,7 +319,7 @@
        HashEntry<T, V>& entry = (*theHashTab)[thePos];
--      entry.theValue = val;
++      entry.value() = val;
+     }
    };
@@ -230,20 +328,22 @@
    static const double DEFAULT_LOAD_FACTOR;
  protected:
--  ulong                             theNumEntries;
--
--  size_t                            theHashTabSize;
--  size_t                            theInitialSize;
--  checked_vector<HashEntry<T, V> >  theHashTab;
--  double                            theLoadFactor;
--  C                                 theCompareFunction;
--
--  bool                              theUseTransfer;
--
--  SYNC_CODE(mutable Mutex           theMutex;)
--  SYNC_CODE(Mutex                 * theMutexp;)
--
--  int                               numCollisions;
++  std::vector<HashEntry<T, V> >  theHashTab;
++
++  csize                          theNumBuckets;
++
++  csize                          theNumEntries;
++
++  double                         theLoadFactor;
++
++  double                         theMaxLoad;
++
++  C                              theCompareFunction;
++
++  SYNC_CODE(mutable Mutex        theMutex;)
++  SYNC_CODE(Mutex              * theMutexp;)
++
++  csize                          theNumCollisions;
  public:
@@ -257,19 +357,20 @@
    depends on some parametrs (e.g. the collation or timezone). These parameters
    are provided as data members of the given comparison-function obj.
  ********************************************************************************/
--HashMap(const C& compFunction, size_t size, bool sync, bool useTransfer = false)
++HashMap(const C& compFunction, csize size, bool sync)
+   :
++  theNumBuckets(size),
    theNumEntries(0),
--  theHashTabSize(size),
--  theInitialSize(size),
--  theHashTab(computeTabSize(size)),
    theLoadFactor(DEFAULT_LOAD_FACTOR),
    theCompareFunction(compFunction),
--  theUseTransfer(useTransfer),
--  numCollisions(0)
++  theNumCollisions(0)
+ {
++  theHashTab.resize(computeCapacity(size));
++
    formatCollisionArea();
++  theMaxLoad = theNumBuckets * theLoadFactor;
++
    SYNC_CODE(theMutexp = (sync ? &theMutex : NULL);)
+ }
@@ -284,18 +385,19 @@
    theCompareFunction data member is initialized with the default constructor
    of the C class.
  ********************************************************************************/
--HashMap(size_t size, bool sync, bool useTransfer = false)
++HashMap(csize size, bool sync)
+   :
++  theNumBuckets(size),
    theNumEntries(0),
--  theHashTabSize(size),
--  theInitialSize(size),
--  theHashTab(computeTabSize(size)),
    theLoadFactor(DEFAULT_LOAD_FACTOR),
--  theUseTransfer(useTransfer),
--  numCollisions(0)
++  theNumCollisions(0)
+ {
++  theHashTab.resize(computeCapacity(size));
++
    formatCollisionArea();
++  theMaxLoad = theNumBuckets * theLoadFactor;
++
    SYNC_CODE(theMutexp = (sync ? &theMutex : NULL);)
+ }
@@ -340,7 +442,7 @@
  /*******************************************************************************
  ********************************************************************************/
--ulong size() const
++csize size() const
+ {
    return theNumEntries;
+ }
@@ -349,7 +451,7 @@
  /*******************************************************************************
  ********************************************************************************/
--size_t capacity() const
++csize capacity() const
+ {
    return theHashTab.size();
+ }
@@ -358,6 +460,24 @@
  /*******************************************************************************
  ********************************************************************************/
++csize bucket_count() const
++{
++  return theNumBuckets;
++}
++
++
++/*******************************************************************************
++
++********************************************************************************/
++csize collisions() const
++{
++  return theNumCollisions;
++}
++
++
++/*******************************************************************************
++
++********************************************************************************/
  C get_compare_function()
+ {
    return theCompareFunction;
@@ -388,13 +508,17 @@
  void clearNoSync()
+ {
    theNumEntries = 0;
--  numCollisions = 0;
--
--  size_t n = theHashTab.size();
--
--  for (size_t i = 0; i < n; ++i)
++  theNumCollisions = 0;
++
++  csize n = theHashTab.size();
++
++  HashEntry<T, V>* entry = &theHashTab[0];
++  HashEntry<T, V>* lastentry = &theHashTab[n];
++
++  for (; entry < lastentry; ++entry)
+   {
--    theHashTab[i].~HashEntry<T, V>();
++    if (!entry->isFree())
++      entry->setFree();
+   }
    formatCollisionArea();
@@ -406,13 +530,13 @@
  ********************************************************************************/
  iterator begin() const
+ {
--  return iterator(const_cast<checked_vector<HashEntry<T, V> >*>(&theHashTab), 0);
++  return iterator(const_cast<std::vector<HashEntry<T, V> >*>(&theHashTab), 0);
+ }
  iterator end() const
+ {
--  return iterator(const_cast<checked_vector<HashEntry<T, V> >*>(&theHashTab),
++  return iterator(const_cast<std::vector<HashEntry<T, V> >*>(&theHashTab),
                    theHashTab.size());
+ }
@@ -437,7 +561,7 @@
    while (entry != NULL)
+   {
--    if (equal(entry->theItem, item))
++    if (equal(entry->key(), item))
        return true;
      entry = entry->getNext();
@@ -468,7 +592,7 @@
    while (entry != NULL)
+   {
--    if (equal(entry->theItem, item))
++    if (equal(entry->key(), item))
        return iterator(&theHashTab, entry - &theHashTab[0]);
      entry = entry->getNext();
@@ -498,9 +622,9 @@
    while (entry != NULL)
+   {
--    if (equal(entry->theItem, item))
++    if (equal(entry->key(), item))
+     {
--      value = entry->theValue;
++      value = entry->value();
        return true;
+     }
@@ -527,8 +651,8 @@
    if (!found)
+   {
--    entry->theItem = pair.first;
--    entry->theValue = pair.second;
++    entry->key() = pair.first;
++    entry->value() = pair.second;
+   }
    return !found;
@@ -552,12 +676,12 @@
    if (!found)
+   {
--    entry->theItem = item;
--    entry->theValue = value;
++    entry->key() = item;
++    entry->value() = value;
+   }
    else
+   {
--    value = entry->theValue;
++    value = entry->value();
+   }
    return !found;
@@ -586,7 +710,7 @@
      while (entry != NULL)
+     {
--      if (equal(entry->theItem, item))
++      if (equal(entry->key(), item))
+       {
          found = true;
          break;
@@ -602,7 +726,7 @@
+   }
    else
+   {
--    entry->theValue = value;
++    entry->value() = value;
      return true;
+   }
+ }
@@ -616,13 +740,13 @@
+ {
    SYNC_CODE(AutoMutex lock(theMutexp);)
--  if (ite.thePos < theHashTabSize)
++  if (ite.thePos < theNumBuckets)
+   {
      eraseEntry(&theHashTab[ite.thePos], NULL);
+   }
    else
+   {
--    const T& item = theHashTab[ite.thePos].theItem;
++    const T& item = theHashTab[ite.thePos].key();
      ulong hval = hash(item);
@@ -665,7 +789,7 @@
    // If the item to remove is in the 1st entry of a bucket, then if the
    // bucket has no other entries, just call the destructor on that entry,
    // else copy the 2nd entry to the 1st entry and freeup the 2nd entry.
--  if (equal(entry->theItem, item))
++  if (equal(entry->key(), item))
+   {
      eraseEntry(entry, NULL);
      return true;
@@ -678,7 +802,7 @@
    while (entry != NULL)
+   {
--    if (equal(entry->theItem, item))
++    if (equal(entry->key(), item))
+     {
        eraseEntry(entry, preventry);
        return true;
@@ -698,9 +822,9 @@
  /*******************************************************************************
  ********************************************************************************/
--size_t computeTabSize(size_t size) const
++csize computeCapacity(csize size) const
+ {
--  return size + 32 + size/5;
++  return size + 32 + size / (5 - 10 * (theLoadFactor - 0.7)) ;
+ }
@@ -724,7 +848,7 @@
  ********************************************************************************/
  HashEntry<T, V>* bucket(ulong hvalue)
+ {
--  return &theHashTab[hvalue % theHashTabSize];
++  return &theHashTab[hvalue % theNumBuckets];
+ }
@@ -733,7 +857,7 @@
  ********************************************************************************/
  const HashEntry<T, V>* bucket(ulong hvalue) const
+ {
--  return &theHashTab[hvalue % theHashTabSize];
++  return &theHashTab[hvalue % theNumBuckets];
+ }
@@ -742,7 +866,7 @@
  ********************************************************************************/
  HashEntry<T, V>* freelist()
+ {
--  return &theHashTab[theHashTabSize];
++  return &theHashTab[theNumBuckets];
+ }
@@ -755,41 +879,38 @@
+   {
      if (entry->theNext == 0)
+     {
--      entry->~HashEntry<T, V>();
++      entry->setFree();
+     }
      else
+     {
        HashEntry<T, V>* nextEntry = entry->getNext();
        *entry = *nextEntry;
        entry->setNext(nextEntry->getNext());
--      nextEntry->~HashEntry<T, V>();
++      nextEntry->setFree();
        nextEntry->setNext(freelist()->getNext());
        freelist()->setNext(nextEntry);
+     }
--    theNumEntries--;
++    --theNumEntries;
--    if (theHashTabSize > theInitialSize &&
--        theNumEntries < (theHashTabSize / 2) * theLoadFactor)
++    if (theNumEntries < theMaxLoad / 2)
+     {
--      resizeHashTab(theHashTabSize / 2);
++      resizeHashTab(theNumBuckets / 2);
+     }
+   }
    else
+   {
      preventry->setNext(entry->getNext());
--    entry->~HashEntry<T, V>();
++    entry->setFree();
      entry->setNext(freelist()->getNext());
      freelist()->setNext(entry);
--    theNumEntries--;
--    numCollisions--;
++    --theNumEntries;
--    if (theHashTabSize > theInitialSize &&
--        theNumEntries < (theHashTabSize / 2) * theLoadFactor)
++    if (theNumEntries < theMaxLoad / 2)
+     {
--      resizeHashTab(theHashTabSize / 2);
++      resizeHashTab(theNumBuckets / 2);
+     }
+   }
+ }
@@ -812,7 +933,7 @@
    // If the hash bucket is empty, its 1st entry is used to store the new string.
    if (headEntry->isFree())
+   {
--    theNumEntries++;
++    ++theNumEntries;
      headEntry->unsetFree();
      return headEntry;
+   }
@@ -822,7 +943,7 @@
    while (currEntry != NULL)
+   {
--    if (equal(currEntry->theItem, item))
++    if (equal(currEntry->key(), item))
+     {
        found = true;
        return currEntry;
@@ -836,22 +957,22 @@
    // Do garbage collection if the hash table is more than 60% full. Note that
    // gc does NOT resize theHashTab, so after gc, the item still belongs to the
    // same bucket as before gc.
--  if (theNumEntries > theHashTabSize * theLoadFactor)
++  if (theNumEntries > theMaxLoad)
+   {
      garbageCollect();
      if (headEntry->isFree())
+     {
--      theNumEntries++;
++      ++theNumEntries;
        headEntry->unsetFree();
        return headEntry;
+     }
+   }
    // Double the size of the hash table if it is more than 60% full.
--  if (theNumEntries > theHashTabSize * theLoadFactor)
++  if (theNumEntries > theMaxLoad)
+   {
--    resizeHashTab(theHashTabSize * 2);
++    resizeHashTab(theNumBuckets * 2);
      goto retry;
+   }
@@ -859,8 +980,8 @@
    // collision list for its bucket. We place the new item right after the
    // headEntry for the bucket.
--  theNumEntries++;
--  numCollisions++;
++  ++theNumEntries;
++  ++theNumCollisions;
    // If no free entry exists, we extend the collision area of the hash table.
    if (freelist()->getNext() == 0)
@@ -886,10 +1007,15 @@
  ********************************************************************************/
  void extendCollisionArea()
+ {
--  size_t oldSize = theHashTab.size();
--  size_t numCollisionEntries = oldSize - theHashTabSize;
--  size_t newSize = theHashTabSize + 2 * numCollisionEntries;
++  csize oldSize = theHashTab.size();
++  csize numCollisionEntries = oldSize - theNumBuckets;
++  csize newSize = theNumBuckets + 2 * numCollisionEntries;
++  /*
++  std::cout << "Extending collision area" << std::endl
++            << "numBuckets = " << theNumBuckets << " numCollisionEntries = "
++            << numCollisionEntries << std::endl << std::endl;
++  */
    //foo();  for setting a breakpoint
    theHashTab.resize(newSize);
@@ -909,7 +1035,8 @@
      firstentry = freelist();
    HashEntry<T, V>* lastentry = &theHashTab[theHashTab.size() - 1];
--  for (HashEntry<T, V>* entry = firstentry; entry < lastentry; entry++)
++
++  for (HashEntry<T, V>* entry = firstentry; entry < lastentry; ++entry)
      entry->theNext = 1;
    lastentry->theNext = 0;
@@ -919,31 +1046,34 @@
  /*******************************************************************************
  ********************************************************************************/
--void resizeHashTab(size_t newSize)
++void resizeHashTab(csize newSize)
+ {
--  HashEntry<T, V>* entry;
--  HashEntry<T, V>* oldentry;
++  if (newSize == 0)
++    newSize = 3;
++
++  csize oldcap = theHashTab.size();
++  csize newcap = computeCapacity(newSize);
    // Create a new vector of new size and swap theHashTab with this new vector
--  checked_vector<HashEntry<T, V> > oldTab(computeTabSize(newSize));
++  std::vector<HashEntry<T, V> > oldTab(newcap);
    theHashTab.swap(oldTab);
--  size_t oldsize = oldTab.size();
--  theHashTabSize = newSize;
++  theNumBuckets = newSize;
++  theMaxLoad = theNumBuckets * theLoadFactor;
    formatCollisionArea();
--  numCollisions = 0;
++  HashEntry<T, V>* entry;
++  HashEntry<T, V>* oldentry = &oldTab[0];
++  HashEntry<T, V>* lastentry = &oldTab[oldcap];
    // Now rehash every entry
--  for (size_t i = 0; i < oldsize; i++)
++  for (; oldentry < lastentry; ++oldentry)
+   {
--    oldentry = &oldTab[i];
--
      if (oldentry->isFree())
        continue;
--    entry = bucket (hash(oldentry->theItem));
++    entry = bucket(hash(oldentry->key()));
      if (!entry->isFree())
+     {
@@ -962,12 +1092,11 @@
        freelist()->setNext(entry->getNext());
        entry->setNext(headEntry->getNext());
        headEntry->setNext(entry);
--      numCollisions++;
+     }
--    entry->theItem = oldentry->theItem;
--    entry->theValue = oldentry->theValue;
      entry->unsetFree();
++    entry->key() = oldentry->key();
++    entry->value() = oldentry->value();
+   }
+ }
 === modified file 'src/zorbautils/hashset.h'
 --- src/zorbautils/hashset.h	2012-09-17 00:36:37 +0000
 +++ src/zorbautils/hashset.h	2012-10-08 12:24:30 +0000
@@ -45,16 +45,16 @@
  typedef typename HashMap<T, DummyHashValue, C>::iterator iterator;
--HashSet(const C& compFunction, ulong size, bool sync, bool useTransfer = false)
++HashSet(const C& compFunction, ulong size, bool sync)
+   :
--  HashMap<T, DummyHashValue, C>(compFunction, size, sync, useTransfer)
++  HashMap<T, DummyHashValue, C>(compFunction, size, sync)
+ {
+ }
  HashSet(ulong size, bool sync)
+   :
--  HashMap<T, DummyHashValue, C>(size, sync, false)
++  HashMap<T, DummyHashValue, C>(size, sync)
+ {
+ }
@@ -100,12 +100,7 @@
    if (!found)
+   {
--    /*
--    if (this->theUseTransfer)
--      entry->theItem.transfer(item);
--    else
--    */
--      entry->theItem = item;
++    entry->key() = item;
+   }
    return !found;
@@ -129,19 +124,13 @@
    entry = this->hashInsert(item, this->hash(item), found);
    if (!found)
+   {
--    /*
--    if (this->theUseTransfer)
--      entry->theItem.transfer(item);
--    else
--    */
--      entry->theItem = item;
--
--    outItem = entry->theItem;
++    entry->key() = item;
++    outItem = entry->key();
      return true;
+   }
    else
+   {
--    outItem = entry->theItem;
++    outItem = entry->key();
      return false;
+   }
+ }
@@ -164,13 +153,13 @@
    entry = this->hashInsert(item, this->hash(item), found);
    if (!found)
+   {
--    entry->theItem = item;
--    outItem = entry->theItem;
++    entry->key() = item;
++    outItem = entry->key();
      return true;
+   }
    else
+   {
--    outItem = entry->theItem;
++    outItem = entry->key();
      return false;
+   }
+ }