migration C#, Java, C++ (day 13), HashTable

The one who has not used a Dictionary in C# is definitely not a programmer. You need Dictionaries to lookup values quickly. But how do they work?
Dictionaries use HashTables. These are, to simplify it here, large arrays of linked lists.

HashTable

To demonstrate the technique I chose small arrays of 5 elements in my example source codes. Otherwise it would be difficult to produce duplicate entries, because hash values are supposed to have as few duplicates as possible.

Let’s say you have 6 objects and an algorithm that produces equally distributed values between 0 and 7 (see picture) for each object. Now you do not have to walk through a list anymore to find the objects. Simply use that hash value (0 to 7) to check, which of the 8 lists you have to look at. The number of search steps would ideally be only 1/8 compared to a list. Unfortunately hash values are not unique. Therefore you can have many entries for one hash value. In my examples I use a simple linked list to store them.

The C# Dictionary class is obviously more complex and uses larger arrays and nested search algorithms. Well, the following simplified examples are easier to grasp.

HashTables

using System;

namespace Day13b {
  class Program {

    public class HashTable<K, V> {
      public class Entry {
        public K key;
        public V value;
        public Entry next = null;

        public Entry(K xKey, V xValue) { key = xKey; value = xValue; }
      }  // class

      private readonly uint _HashTableSize;
      private readonly Entry[] _HashTable;

      public HashTable(uint xHashTableSize) {
        _HashTableSize = xHashTableSize;
        _HashTable = new Entry[xHashTableSize];
        for (int i = 0; i < xHashTableSize; i++) _HashTable[i] = null;
      } // constructor

      public void Print() {
        for (int i = 0; i < _HashTableSize; i++) {
          if (_HashTable[i] == null) continue;
          Entry lEntry = _HashTable[i];
          while (lEntry != null) {
            Entry lNext = lEntry.next;
            Console.WriteLine("HashTable[" + i + "] has key " + lEntry.key);
            lEntry = lNext;
          }
        }
      } //

      public uint GetHashCode(string xKey) {
        uint lHash = 2166136261;  // initial FNV www.isthe.com/chongo/tech/comp/fnv
        for (int i = 0; i < xKey.Length; i++) {
          lHash = lHash ^ (xKey[i]);
          lHash = lHash * 16777619;  // FNVMultiple
        }
        return lHash % _HashTableSize;
      } //

      public uint GetHashCode(uint xKey) {
        return xKey % _HashTableSize;
      } //

      public uint GetHashCode(K xKey) {
        if (xKey is string) return GetHashCode(xKey as string);
        if (xKey is uint) return GetHashCode(Convert.ToUInt32(xKey));
        return (uint)xKey.GetHashCode() % _HashTableSize; // general approach         
      } //



      public V AddOrReplace(K xKey, V xValue) {
        uint lHash = GetHashCode(xKey);
        Entry lEntry = _HashTable[lHash];

        // new entry
        if (lEntry == null) {
          _HashTable[lHash] = new Entry(xKey, xValue);
          return default(V);
        }

        while (lEntry != null) {
          if (lEntry.key.Equals(xKey)) {
            // replace existing entry  
            V lRemove = lEntry.value;
            lEntry.value = xValue;
            Console.WriteLine("Replacing " + lRemove);
            return lRemove;
          }
          if (lEntry.next == null) {
            // end of linked list
            lEntry.next = new Entry(xKey, xValue);
            return default(V);
          }
          lEntry = lEntry.next;
        }

        // non reachable code:
        return default(V);
      } //

      public V TryGetValue(K xKey) {
        uint lHash = GetHashCode(xKey);
        if (_HashTable[lHash] == null) return default(V);

        Entry lEntry = _HashTable[lHash];
        while ((lEntry != null) && (!lEntry.key.Equals(xKey))) lEntry = lEntry.next;

        if (lEntry == null) return default(V);
        else return lEntry.value;
      } //

      public V Remove(K xKey) {
        uint lHash = GetHashCode(xKey);
        Entry lEntry = _HashTable[lHash];
        if (lEntry == null) return default(V);

        Entry lPrevious = null;
        while (true) {
          if (lEntry == null) return default(V); // end of linked list
          if (lEntry.key.Equals(xKey)) {
            // remove existing entry  
            V lRemove = lEntry.value;
            if (lPrevious != null) lPrevious.next = lEntry.next;
            else _HashTable[lHash] = null; // no previous element
            return lRemove;
          }
          lPrevious = lEntry;
          lEntry = lEntry.next;
        }

        // non reachable code
        //return default(V);  // the compiler complains properly 🙂
      } //

    }; // class


    public class Data {
      public int id;
      public string text;

      public override string ToString() {
        return "id: " + id + ", text: " + text;
      } //

      ~Data() {
        Console.WriteLine("good bye Data struct with id " + id + " and text " + text);
      } // destructor

    } // class


    static void Main(string[] args) {
      // test with KEY string and VALUE struct

      HashTable<string, Data> lHashTable = new HashTable<string, Data>(5);
      string[] lTestStrings = { "hello", "world", "hola", "mundo", "hallo", "Welt", "hej", "verden", "hello", "wereld" };
      int i = 0;
      foreach (string s in lTestStrings) {
        Console.WriteLine("HashCode example: " + s + " = " + lHashTable.GetHashCode(s));
        Data lData = new Data();
        lData.text = s;
        lData.id = i++;
        lHashTable.AddOrReplace(s, lData);
      }
      lHashTable.Print();
      Data lRemoved = lHashTable.Remove("hej");
      lRemoved = null;  // de-reference for garbage collector (just for demo purposes)
      lHashTable.Print();
      Data a = lHashTable.TryGetValue("hej");
      Data b = lHashTable.TryGetValue("hallo");
      if (a == null) Console.WriteLine("a is null");
      if (b != null) Console.WriteLine("b has been found");

      Console.WriteLine();
      Console.WriteLine();

      // test with KEY integer and VALUE integer

      HashTable<uint, uint> lHashTable2 = new HashTable<uint, uint>(5);
      uint[] lTestIntegers = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 };
      foreach (uint x in lTestIntegers) {
        Console.WriteLine("HashCode example: " + x + " = " + lHashTable2.GetHashCode(x));
        lHashTable2.AddOrReplace(x, x + 100);
      }
      lHashTable2.Print();

      GC.Collect();   // calls destructors of de-referenced objects
      Console.ReadKey();
    } //

  } // class
} // namespace

example output:
HashCode example: hello = 3
HashCode example: world = 2
HashCode example: hola = 3
HashCode example: mundo = 0
HashCode example: hallo = 1
HashCode example: Welt = 2
HashCode example: hej = 0
HashCode example: verden = 0
HashCode example: hello = 3
Replacing id: 0, text: hello
HashCode example: wereld = 1
HashTable[0] has key mundo
HashTable[0] has key hej
HashTable[0] has key verden
HashTable[1] has key hallo
HashTable[1] has key wereld
HashTable[2] has key world
HashTable[2] has key Welt
HashTable[3] has key hello
HashTable[3] has key hola
HashTable[0] has key mundo
HashTable[0] has key verden
HashTable[1] has key hallo
HashTable[1] has key wereld
HashTable[2] has key world
HashTable[2] has key Welt
HashTable[3] has key hello
HashTable[3] has key hola
a is null
b has been found

HashCode example: 1 = 1
HashCode example: 2 = 2
HashCode example: 3 = 3
HashCode example: 4 = 4
HashCode example: 5 = 0
HashCode example: 6 = 1
HashCode example: 7 = 2
HashCode example: 8 = 3
HashCode example: 9 = 4
HashCode example: 10 = 0
HashCode example: 11 = 1
HashCode example: 12 = 2
HashTable[0] has key 5
HashTable[0] has key 10
HashTable[1] has key 1
HashTable[1] has key 6
HashTable[1] has key 11
HashTable[2] has key 2
HashTable[2] has key 7
HashTable[2] has key 12
HashTable[3] has key 3
HashTable[3] has key 8
HashTable[4] has key 4
HashTable[4] has key 9
good bye Data struct with id 0 and text hello
good bye Data struct with id 6 and text hej

One of the main differences between C# and C++ is the memory management. I call the garbage collector at the end with GC.Collect() to show that some objects were de-referenced during the runtime.

Let me also highlight one part of the C# code: GetHashCode(K xKey) {}.
Although there is a definition for string, the above method is called. In fact we have a duplicate overload as soon as we define K as string or unsigned integer. The reason is that the generic class is not specific enough. The type K is closer to K than string. It therefore has a higher priority.

public V AddOrReplace(K xKey, V xValue) {
  uint lHash = GetHashCode(xKey);
  ....

And now compare it with a clear type declaration.

foreach (uint x in lTestIntegers) {
  ....
  lHashTable2.AddOrReplace(x, x + 100);
}

C++ frees memory as soon as you call delete. Compare the output. The destructors are called immediately. You could also use a kind of smart memory management in C++ as well. But why would you then use C++ at all? Such memory management indirectly questions the need for pure C++. The garbage collector for C# is highly optimized. If you need memory management, then don’t program in C++.

#include <string>
#include <iostream>
#include <cstdio>
#include <algorithm>
#include <vector>
#include <cstdlib>

using namespace std;

template <typename K, typename V>
class HashTable {
public:
  class Entry {
  public:
    K key;
    V value;
    Entry *next;

    Entry(K xKey, V xValue)  { key = xKey; value = xValue;    next = nullptr; }
  }; // class

private:
  int _HashTableSize;
  Entry **_HashTable;

public:

  HashTable(int xHashTableSize) : _HashTableSize(xHashTableSize) {
    _HashTable = new Entry*[xHashTableSize];
    for (int i = 0; i < xHashTableSize; i++)  _HashTable[i] = nullptr;
  } // constructor

  ~HashTable() {
    for (int i = 0; i < _HashTableSize; i++) {
      if (_HashTable[i] == nullptr) continue;
      Entry *lThis = _HashTable[i];
      while (lThis != nullptr) {
        if (lThis->next == nullptr) break;
        Entry *lNext = lThis->next;
        delete lThis;
        lThis = lNext;
      }
    }
    delete[] _HashTable;
  } // destructor

  void Print() {
    for (int i = 0; i < _HashTableSize; i++) {
      if (_HashTable[i] == nullptr) continue;
      Entry *lEntry = _HashTable[i];
      while (lEntry != nullptr) {
        Entry *lNext = lEntry->next;
        cout << "HashTable[" << i << "] has key " << lEntry->key << endl;
        lEntry = lNext;
      }
    }
  } //

  int GetHashCode(const string &xKey) {
    size_t lHash = 2166136261U; // initial FNV www.isthe.com/chongo/tech/comp/fnv
    for (size_t i = 0; i < xKey.length(); i++) {
      lHash = lHash ^ (xKey[i]);
      lHash = lHash * 16777619;  // FNVMultiple
    }
    return lHash % _HashTableSize;
  } //

  int GetHashCode(int xKey) {
    return xKey % _HashTableSize;
  } //



  V AddOrReplace(K xKey, V xValue)  {
    int lHash = GetHashCode(xKey);
    Entry *lEntry = _HashTable[lHash];

    // new entry
    if (lEntry == nullptr)  {
      _HashTable[lHash] = new Entry(xKey, xValue);
      return V();
    }

    while (lEntry != nullptr) {
      if (lEntry->key == xKey) {
        // replace existing entry  
        V lRemove = lEntry->value;
        lEntry->value = xValue;
        return lRemove;
      }
      if (lEntry->next == nullptr) {
        // end of linked list
        lEntry->next = new Entry(xKey, xValue);
        return V();
      }
      lEntry = lEntry->next;
    }

    // non reachable code:
    return V();
  } //

  V TryGetValue(K xKey) {
    int lHash = GetHashCode(xKey);
    if (_HashTable[lHash] == nullptr) return V();

    Entry *lEntry = _HashTable[lHash];
    while ((lEntry != nullptr) && (lEntry->key != xKey)) lEntry = lEntry->next;

    if (lEntry == nullptr) return V();
    else return lEntry->value;
  } //

  V Remove(K xKey) {
    int lHash = GetHashCode(xKey);
    Entry *lEntry = _HashTable[lHash];
    if (lEntry == nullptr) return V();

    Entry *lPrevious = nullptr;
    while (true) {
      if (lEntry == nullptr) return V(); // end of linked list
      if (lEntry->key == xKey) {
        // remove existing entry  
        V lRemove = lEntry->value;
        if (lPrevious != nullptr) lPrevious->next = lEntry->next;
        else _HashTable[lHash] = nullptr; // no previous element
        delete lEntry;
        return lRemove;
      }
      lPrevious = lEntry;
      lEntry = lEntry->next;
    }

    // non reachable code
    return V();
  } //

}; // class

struct Data {
  int id;
  string text;
  ~Data() {
    cout << "good bye Data struct with id " << id << " and text " << text << endl;
  }
}; // struct


void test() {

  // test with KEY string and VALUE struct

  HashTable<string, Data*> *lHashTable = new HashTable<string, Data*>(5);
  string lTestStrings[] = { "hello", "world", "hola", "mundo", "hallo", "Welt", "hej", "verden", "hello", "wereld" };
  int i = 0;
  for each (string s in lTestStrings) {
    cout << "HashCode example: " << s << " = " << lHashTable->GetHashCode(s) << endl;
    Data *lData = new Data();
    lData->text = s;
    lData->id = i++;
    Data *lReplaced = lHashTable->AddOrReplace(s, lData);
    if (lReplaced != nullptr) delete lReplaced;
  }
  lHashTable->Print();
  Data *lRemoved = lHashTable->Remove("hej");
  if (lRemoved != nullptr) delete lRemoved;
  lHashTable->Print();
  Data *a = lHashTable->TryGetValue("hej");
  Data *b = lHashTable->TryGetValue("hallo");
  if (a == nullptr) cout << "a is null" << endl;
  if (b != nullptr) cout << "b has been found" << endl;

  delete lHashTable;

  cout << endl << endl;

  // test with KEY integer and VALUE integer

  HashTable<int, int> *lHashTable2 = new HashTable<int, int>(5);
  int lTestIntegers[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 };
  for each (int x in lTestIntegers) {
    cout << "HashCode example: " << x << " = " << lHashTable2->GetHashCode(x) << endl;
    lHashTable2->AddOrReplace(x, x + 100);
  }
  lHashTable2->Print();
  delete lHashTable2;

  cin.get();
} //

example output:
HashCode example: hello = 3
HashCode example: world = 2
HashCode example: hola = 3
HashCode example: mundo = 0
HashCode example: hallo = 1
HashCode example: Welt = 2
HashCode example: hej = 0
HashCode example: verden = 0
HashCode example: hello = 3
good bye Data struct with id 0 and text hello
HashCode example: wereld = 1
HashTable[0] has key mundo
HashTable[0] has key hej
HashTable[0] has key verden
HashTable[1] has key hallo
HashTable[1] has key wereld
HashTable[2] has key world
HashTable[2] has key Welt
HashTable[3] has key hello
HashTable[3] has key hola
good bye Data struct with id 6 and text hej
HashTable[0] has key mundo
HashTable[0] has key verden
HashTable[1] has key hallo
HashTable[1] has key wereld
HashTable[2] has key world
HashTable[2] has key Welt
HashTable[3] has key hello
HashTable[3] has key hola
a is null
b has been found

HashCode example: 1 = 1
HashCode example: 2 = 2
HashCode example: 3 = 3
HashCode example: 4 = 4
HashCode example: 5 = 0
HashCode example: 6 = 1
HashCode example: 7 = 2
HashCode example: 8 = 3
HashCode example: 9 = 4
HashCode example: 10 = 0
HashCode example: 11 = 1
HashCode example: 12 = 2
HashTable[0] has key 5
HashTable[0] has key 10
HashTable[1] has key 1
HashTable[1] has key 6
HashTable[1] has key 11
HashTable[2] has key 2
HashTable[2] has key 7
HashTable[2] has key 12
HashTable[3] has key 3
HashTable[3] has key 8
HashTable[4] has key 4
HashTable[4] has key 9

  public class Data {

    public int id;
    public String text;

    @Override
    public String toString() {
      return "id: " + id + ", text: " + text;
    }

    @Override
    protected void finalize() throws Throwable {
      try {
        System.out.println("good bye Data struct with id " + id + " and text " + text);
      } finally {
        super.finalize();
      }
    } // destructor

  } // class
//--------------------------------------------------------------------------------------------
import java.util.ArrayList;

public class HashTable<K, V> {

  public class Entry {

    public K key;
    public V value;
    public Entry next = null;

    public Entry(K xKey, V xValue) {
      key = xKey;
      value = xValue;
    } // constructor
  } // class

  private final long _HashTableSize;
  private final ArrayList<Entry> _HashTable;

  public HashTable(int xHashTableSize) {
    _HashTableSize = xHashTableSize;
    _HashTable = new ArrayList<Entry>();
    //_HashTable = new Entry[xHashTableSize]; // Java generic array creation error
    for (int i = 0; i < xHashTableSize; i++) _HashTable.add(null);
  } // constructor

  public final void Print() {
    for (int i = 0; i < _HashTableSize; i++) {
      if (_HashTable.get(i) == null) continue;
      Entry lEntry = _HashTable.get(i);
      while (lEntry != null) {
        Entry lNext = lEntry.next;
        System.out.println("HashTable[" + i + "] has key " + lEntry.key);
        lEntry = lNext;
      }
    }
  } //

  public final long hashCode(long xKey) {
    return xKey % _HashTableSize;
  } //

  public final long hashCode(K xKey) { 
    if (xKey instanceof Long) return hashCode((Long) xKey);
    return Math.abs(xKey.hashCode() % _HashTableSize); // general approach
  } //

  public final V AddOrReplace(K xKey, V xValue) {
    int lHash = (int) hashCode(xKey);
    Entry lEntry = _HashTable.get(lHash);

    // new entry
    if (lEntry == null) {
      _HashTable.set(lHash, new Entry(xKey, xValue));
      return null;
    }

    while (lEntry != null) {
      if (lEntry.key.equals(xKey)) {
        // replace existing entry 
        V lRemove = lEntry.value;
        lEntry.value = xValue;
        System.out.println("Replacing " + lRemove);
        return lRemove;
      }
      if (lEntry.next == null) {
        // end of linked list
        lEntry.next = new Entry(xKey, xValue);
        return null;
      }
      lEntry = lEntry.next;
    }

    // non reachable code:
    return null;
  } //

  public final V TryGetValue(K xKey) {
    int lHash = (int) hashCode(xKey);
    if (_HashTable.get(lHash) == null) return null;

    Entry lEntry = _HashTable.get(lHash);
    while ((lEntry != null) && (!lEntry.key.equals(xKey)))
      lEntry = lEntry.next;

    if (lEntry == null) return null;
    else return lEntry.value;
  } //

  public final V Remove(K xKey) {
    int lHash = (int) hashCode(xKey);
    Entry lEntry = _HashTable.get(lHash);
    if (lEntry == null) return null;

    Entry lPrevious = null;
    while (true) {
      if (lEntry == null) return null; // end of linked list          
      if (lEntry.key.equals(xKey)) {
        // remove existing entry 
        V lRemove = lEntry.value;
        if (lPrevious != null)
          lPrevious.next = lEntry.next;
        else // no previous element
          _HashTable.set(lHash, null);
        return lRemove;
      }
      lPrevious = lEntry;
      lEntry = lEntry.next;
    }

      // non reachable code
    //return null;
  } //
} // class
//--------------------------------------------------------------------------------------------

import java.io.IOException;

public class Program {

  public static void main(String[] args) {
    try {
      HashTable<String, Data> lHashTable = new HashTable<>(5);
      //= new HashTable<>(5);
      String[] lTestStrings = {"hello", "world", "hola", "mundo", "hallo", "Welt", "hej", "verden", "hello", "wereld"};
      int i = 0;
      for (String s : lTestStrings) {
        System.out.println("HashCode example: " + s + " = " + lHashTable.hashCode(s));
        Data lData = new Data();
        lData.text = s;
        lData.id = i++;
        lHashTable.AddOrReplace(s, lData);
      }
      lHashTable.Print();
      Data lRemoved = lHashTable.Remove("hej");
      lRemoved = null; // de-reference for garbage collector (just for demo purposes)
      lHashTable.Print();
      Data a = lHashTable.TryGetValue("hej");
      Data b = lHashTable.TryGetValue("hallo");
      if (a == null) System.out.println("a is null");
      if (b != null) System.out.println("b has been found");

      System.out.println();
      System.out.println();

      // test with KEY integer and VALUE integer
      HashTable<Integer, Integer> lHashTable2 = new HashTable<>(5);
      int[] lTestIntegers = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
      for (int x : lTestIntegers) {
        System.out.println("HashCode example: " + x + " = " + lHashTable2.hashCode(x));
        lHashTable2.AddOrReplace(x, x + 100);
      }
      lHashTable2.Print();

      System.gc(); // calls destructors of de-referenced objects
      System.in.read();
    } catch (IOException ex) {
      System.out.println("IOException: " + ex.getMessage());
    }
  } //
  
} // class

example output:
HashCode example: hello = 2
HashCode example: world = 2
HashCode example: hola = 0
HashCode example: mundo = 2
HashCode example: hallo = 3
HashCode example: Welt = 2
HashCode example: hej = 1
HashCode example: verden = 2
HashCode example: hello = 2
Replacing id: 0, text: hello
HashCode example: wereld = 3
HashTable[0] has key hola
HashTable[1] has key hej
HashTable[2] has key hello
HashTable[2] has key world
HashTable[2] has key mundo
HashTable[2] has key Welt
HashTable[2] has key verden
HashTable[3] has key hallo
HashTable[3] has key wereld
HashTable[0] has key hola
HashTable[2] has key hello
HashTable[2] has key world
HashTable[2] has key mundo
HashTable[2] has key Welt
HashTable[2] has key verden
HashTable[3] has key hallo
HashTable[3] has key wereld
a is null
b has been found

HashCode example: 1 = 1
HashCode example: 2 = 2
HashCode example: 3 = 3
HashCode example: 4 = 4
HashCode example: 5 = 0
HashCode example: 6 = 1
HashCode example: 7 = 2
HashCode example: 8 = 3
HashCode example: 9 = 4
HashCode example: 10 = 0
HashCode example: 11 = 1
HashCode example: 12 = 2
HashTable[0] has key 5
HashTable[0] has key 10
HashTable[1] has key 1
HashTable[1] has key 6
HashTable[1] has key 11
HashTable[2] has key 2
HashTable[2] has key 7
HashTable[2] has key 12
HashTable[3] has key 3
HashTable[3] has key 8
HashTable[4] has key 4
HashTable[4] has key 9
good bye Data struct with id 0 and text hello
good bye Data struct with id 6 and text hej

About Bastian M.K. Ohta

Happiness only real when shared.

Posted on March 19, 2014, in C#, C++, Java and tagged , , , , , , , . Bookmark the permalink. Leave a comment.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

%d bloggers like this: