migration C#, Java, C++ (day 13), HashTable
The one who has not used a Dictionary in C# is definitely not a programmer. You need Dictionaries to lookup values quickly. But how do they work?
Dictionaries use HashTables. These are, to simplify it here, large arrays of linked lists.
To demonstrate the technique I chose small arrays of 5 elements in my example source codes. Otherwise it would be difficult to produce duplicate entries, because hash values are supposed to have as few duplicates as possible.
Let’s say you have 6 objects and an algorithm that produces equally distributed values between 0 and 7 (see picture) for each object. Now you do not have to walk through a list anymore to find the objects. Simply use that hash value (0 to 7) to check, which of the 8 lists you have to look at. The number of search steps would ideally be only 1/8 compared to a list. Unfortunately hash values are not unique. Therefore you can have many entries for one hash value. In my examples I use a simple linked list to store them.
The C# Dictionary class is obviously more complex and uses larger arrays and nested search algorithms. Well, the following simplified examples are easier to grasp.
HashTables
using System; namespace Day13b { class Program { public class HashTable<K, V> { public class Entry { public K key; public V value; public Entry next = null; public Entry(K xKey, V xValue) { key = xKey; value = xValue; } } // class private readonly uint _HashTableSize; private readonly Entry[] _HashTable; public HashTable(uint xHashTableSize) { _HashTableSize = xHashTableSize; _HashTable = new Entry[xHashTableSize]; for (int i = 0; i < xHashTableSize; i++) _HashTable[i] = null; } // constructor public void Print() { for (int i = 0; i < _HashTableSize; i++) { if (_HashTable[i] == null) continue; Entry lEntry = _HashTable[i]; while (lEntry != null) { Entry lNext = lEntry.next; Console.WriteLine("HashTable[" + i + "] has key " + lEntry.key); lEntry = lNext; } } } // public uint GetHashCode(string xKey) { uint lHash = 2166136261; // initial FNV www.isthe.com/chongo/tech/comp/fnv for (int i = 0; i < xKey.Length; i++) { lHash = lHash ^ (xKey[i]); lHash = lHash * 16777619; // FNVMultiple } return lHash % _HashTableSize; } // public uint GetHashCode(uint xKey) { return xKey % _HashTableSize; } // public uint GetHashCode(K xKey) { if (xKey is string) return GetHashCode(xKey as string); if (xKey is uint) return GetHashCode(Convert.ToUInt32(xKey)); return (uint)xKey.GetHashCode() % _HashTableSize; // general approach } // public V AddOrReplace(K xKey, V xValue) { uint lHash = GetHashCode(xKey); Entry lEntry = _HashTable[lHash]; // new entry if (lEntry == null) { _HashTable[lHash] = new Entry(xKey, xValue); return default(V); } while (lEntry != null) { if (lEntry.key.Equals(xKey)) { // replace existing entry V lRemove = lEntry.value; lEntry.value = xValue; Console.WriteLine("Replacing " + lRemove); return lRemove; } if (lEntry.next == null) { // end of linked list lEntry.next = new Entry(xKey, xValue); return default(V); } lEntry = lEntry.next; } // non reachable code: return default(V); } // public V TryGetValue(K xKey) { uint lHash = GetHashCode(xKey); if (_HashTable[lHash] == null) return default(V); Entry lEntry = _HashTable[lHash]; while ((lEntry != null) && (!lEntry.key.Equals(xKey))) lEntry = lEntry.next; if (lEntry == null) return default(V); else return lEntry.value; } // public V Remove(K xKey) { uint lHash = GetHashCode(xKey); Entry lEntry = _HashTable[lHash]; if (lEntry == null) return default(V); Entry lPrevious = null; while (true) { if (lEntry == null) return default(V); // end of linked list if (lEntry.key.Equals(xKey)) { // remove existing entry V lRemove = lEntry.value; if (lPrevious != null) lPrevious.next = lEntry.next; else _HashTable[lHash] = null; // no previous element return lRemove; } lPrevious = lEntry; lEntry = lEntry.next; } // non reachable code //return default(V); // the compiler complains properly 🙂 } // }; // class public class Data { public int id; public string text; public override string ToString() { return "id: " + id + ", text: " + text; } // ~Data() { Console.WriteLine("good bye Data struct with id " + id + " and text " + text); } // destructor } // class static void Main(string[] args) { // test with KEY string and VALUE struct HashTable<string, Data> lHashTable = new HashTable<string, Data>(5); string[] lTestStrings = { "hello", "world", "hola", "mundo", "hallo", "Welt", "hej", "verden", "hello", "wereld" }; int i = 0; foreach (string s in lTestStrings) { Console.WriteLine("HashCode example: " + s + " = " + lHashTable.GetHashCode(s)); Data lData = new Data(); lData.text = s; lData.id = i++; lHashTable.AddOrReplace(s, lData); } lHashTable.Print(); Data lRemoved = lHashTable.Remove("hej"); lRemoved = null; // de-reference for garbage collector (just for demo purposes) lHashTable.Print(); Data a = lHashTable.TryGetValue("hej"); Data b = lHashTable.TryGetValue("hallo"); if (a == null) Console.WriteLine("a is null"); if (b != null) Console.WriteLine("b has been found"); Console.WriteLine(); Console.WriteLine(); // test with KEY integer and VALUE integer HashTable<uint, uint> lHashTable2 = new HashTable<uint, uint>(5); uint[] lTestIntegers = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 }; foreach (uint x in lTestIntegers) { Console.WriteLine("HashCode example: " + x + " = " + lHashTable2.GetHashCode(x)); lHashTable2.AddOrReplace(x, x + 100); } lHashTable2.Print(); GC.Collect(); // calls destructors of de-referenced objects Console.ReadKey(); } // } // class } // namespace
example output:
HashCode example: hello = 3
HashCode example: world = 2
HashCode example: hola = 3
HashCode example: mundo = 0
HashCode example: hallo = 1
HashCode example: Welt = 2
HashCode example: hej = 0
HashCode example: verden = 0
HashCode example: hello = 3
Replacing id: 0, text: hello
HashCode example: wereld = 1
HashTable[0] has key mundo
HashTable[0] has key hej
HashTable[0] has key verden
HashTable[1] has key hallo
HashTable[1] has key wereld
HashTable[2] has key world
HashTable[2] has key Welt
HashTable[3] has key hello
HashTable[3] has key hola
HashTable[0] has key mundo
HashTable[0] has key verden
HashTable[1] has key hallo
HashTable[1] has key wereld
HashTable[2] has key world
HashTable[2] has key Welt
HashTable[3] has key hello
HashTable[3] has key hola
a is null
b has been foundHashCode example: 1 = 1
HashCode example: 2 = 2
HashCode example: 3 = 3
HashCode example: 4 = 4
HashCode example: 5 = 0
HashCode example: 6 = 1
HashCode example: 7 = 2
HashCode example: 8 = 3
HashCode example: 9 = 4
HashCode example: 10 = 0
HashCode example: 11 = 1
HashCode example: 12 = 2
HashTable[0] has key 5
HashTable[0] has key 10
HashTable[1] has key 1
HashTable[1] has key 6
HashTable[1] has key 11
HashTable[2] has key 2
HashTable[2] has key 7
HashTable[2] has key 12
HashTable[3] has key 3
HashTable[3] has key 8
HashTable[4] has key 4
HashTable[4] has key 9
good bye Data struct with id 0 and text hello
good bye Data struct with id 6 and text hej
One of the main differences between C# and C++ is the memory management. I call the garbage collector at the end with GC.Collect() to show that some objects were de-referenced during the runtime.
Let me also highlight one part of the C# code: GetHashCode(K xKey) {}.
Although there is a definition for string, the above method is called. In fact we have a duplicate overload as soon as we define K as string or unsigned integer. The reason is that the generic class is not specific enough. The type K is closer to K than string. It therefore has a higher priority.
public V AddOrReplace(K xKey, V xValue) { uint lHash = GetHashCode(xKey); ....
And now compare it with a clear type declaration.
foreach (uint x in lTestIntegers) { .... lHashTable2.AddOrReplace(x, x + 100); }
C++ frees memory as soon as you call delete. Compare the output. The destructors are called immediately. You could also use a kind of smart memory management in C++ as well. But why would you then use C++ at all? Such memory management indirectly questions the need for pure C++. The garbage collector for C# is highly optimized. If you need memory management, then don’t program in C++.
#include <string> #include <iostream> #include <cstdio> #include <algorithm> #include <vector> #include <cstdlib> using namespace std; template <typename K, typename V> class HashTable { public: class Entry { public: K key; V value; Entry *next; Entry(K xKey, V xValue) { key = xKey; value = xValue; next = nullptr; } }; // class private: int _HashTableSize; Entry **_HashTable; public: HashTable(int xHashTableSize) : _HashTableSize(xHashTableSize) { _HashTable = new Entry*[xHashTableSize]; for (int i = 0; i < xHashTableSize; i++) _HashTable[i] = nullptr; } // constructor ~HashTable() { for (int i = 0; i < _HashTableSize; i++) { if (_HashTable[i] == nullptr) continue; Entry *lThis = _HashTable[i]; while (lThis != nullptr) { if (lThis->next == nullptr) break; Entry *lNext = lThis->next; delete lThis; lThis = lNext; } } delete[] _HashTable; } // destructor void Print() { for (int i = 0; i < _HashTableSize; i++) { if (_HashTable[i] == nullptr) continue; Entry *lEntry = _HashTable[i]; while (lEntry != nullptr) { Entry *lNext = lEntry->next; cout << "HashTable[" << i << "] has key " << lEntry->key << endl; lEntry = lNext; } } } // int GetHashCode(const string &xKey) { size_t lHash = 2166136261U; // initial FNV www.isthe.com/chongo/tech/comp/fnv for (size_t i = 0; i < xKey.length(); i++) { lHash = lHash ^ (xKey[i]); lHash = lHash * 16777619; // FNVMultiple } return lHash % _HashTableSize; } // int GetHashCode(int xKey) { return xKey % _HashTableSize; } // V AddOrReplace(K xKey, V xValue) { int lHash = GetHashCode(xKey); Entry *lEntry = _HashTable[lHash]; // new entry if (lEntry == nullptr) { _HashTable[lHash] = new Entry(xKey, xValue); return V(); } while (lEntry != nullptr) { if (lEntry->key == xKey) { // replace existing entry V lRemove = lEntry->value; lEntry->value = xValue; return lRemove; } if (lEntry->next == nullptr) { // end of linked list lEntry->next = new Entry(xKey, xValue); return V(); } lEntry = lEntry->next; } // non reachable code: return V(); } // V TryGetValue(K xKey) { int lHash = GetHashCode(xKey); if (_HashTable[lHash] == nullptr) return V(); Entry *lEntry = _HashTable[lHash]; while ((lEntry != nullptr) && (lEntry->key != xKey)) lEntry = lEntry->next; if (lEntry == nullptr) return V(); else return lEntry->value; } // V Remove(K xKey) { int lHash = GetHashCode(xKey); Entry *lEntry = _HashTable[lHash]; if (lEntry == nullptr) return V(); Entry *lPrevious = nullptr; while (true) { if (lEntry == nullptr) return V(); // end of linked list if (lEntry->key == xKey) { // remove existing entry V lRemove = lEntry->value; if (lPrevious != nullptr) lPrevious->next = lEntry->next; else _HashTable[lHash] = nullptr; // no previous element delete lEntry; return lRemove; } lPrevious = lEntry; lEntry = lEntry->next; } // non reachable code return V(); } // }; // class struct Data { int id; string text; ~Data() { cout << "good bye Data struct with id " << id << " and text " << text << endl; } }; // struct void test() { // test with KEY string and VALUE struct HashTable<string, Data*> *lHashTable = new HashTable<string, Data*>(5); string lTestStrings[] = { "hello", "world", "hola", "mundo", "hallo", "Welt", "hej", "verden", "hello", "wereld" }; int i = 0; for each (string s in lTestStrings) { cout << "HashCode example: " << s << " = " << lHashTable->GetHashCode(s) << endl; Data *lData = new Data(); lData->text = s; lData->id = i++; Data *lReplaced = lHashTable->AddOrReplace(s, lData); if (lReplaced != nullptr) delete lReplaced; } lHashTable->Print(); Data *lRemoved = lHashTable->Remove("hej"); if (lRemoved != nullptr) delete lRemoved; lHashTable->Print(); Data *a = lHashTable->TryGetValue("hej"); Data *b = lHashTable->TryGetValue("hallo"); if (a == nullptr) cout << "a is null" << endl; if (b != nullptr) cout << "b has been found" << endl; delete lHashTable; cout << endl << endl; // test with KEY integer and VALUE integer HashTable<int, int> *lHashTable2 = new HashTable<int, int>(5); int lTestIntegers[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 }; for each (int x in lTestIntegers) { cout << "HashCode example: " << x << " = " << lHashTable2->GetHashCode(x) << endl; lHashTable2->AddOrReplace(x, x + 100); } lHashTable2->Print(); delete lHashTable2; cin.get(); } //
example output:
HashCode example: hello = 3
HashCode example: world = 2
HashCode example: hola = 3
HashCode example: mundo = 0
HashCode example: hallo = 1
HashCode example: Welt = 2
HashCode example: hej = 0
HashCode example: verden = 0
HashCode example: hello = 3
good bye Data struct with id 0 and text hello
HashCode example: wereld = 1
HashTable[0] has key mundo
HashTable[0] has key hej
HashTable[0] has key verden
HashTable[1] has key hallo
HashTable[1] has key wereld
HashTable[2] has key world
HashTable[2] has key Welt
HashTable[3] has key hello
HashTable[3] has key hola
good bye Data struct with id 6 and text hej
HashTable[0] has key mundo
HashTable[0] has key verden
HashTable[1] has key hallo
HashTable[1] has key wereld
HashTable[2] has key world
HashTable[2] has key Welt
HashTable[3] has key hello
HashTable[3] has key hola
a is null
b has been foundHashCode example: 1 = 1
HashCode example: 2 = 2
HashCode example: 3 = 3
HashCode example: 4 = 4
HashCode example: 5 = 0
HashCode example: 6 = 1
HashCode example: 7 = 2
HashCode example: 8 = 3
HashCode example: 9 = 4
HashCode example: 10 = 0
HashCode example: 11 = 1
HashCode example: 12 = 2
HashTable[0] has key 5
HashTable[0] has key 10
HashTable[1] has key 1
HashTable[1] has key 6
HashTable[1] has key 11
HashTable[2] has key 2
HashTable[2] has key 7
HashTable[2] has key 12
HashTable[3] has key 3
HashTable[3] has key 8
HashTable[4] has key 4
HashTable[4] has key 9
public class Data { public int id; public String text; @Override public String toString() { return "id: " + id + ", text: " + text; } @Override protected void finalize() throws Throwable { try { System.out.println("good bye Data struct with id " + id + " and text " + text); } finally { super.finalize(); } } // destructor } // class //-------------------------------------------------------------------------------------------- import java.util.ArrayList; public class HashTable<K, V> { public class Entry { public K key; public V value; public Entry next = null; public Entry(K xKey, V xValue) { key = xKey; value = xValue; } // constructor } // class private final long _HashTableSize; private final ArrayList<Entry> _HashTable; public HashTable(int xHashTableSize) { _HashTableSize = xHashTableSize; _HashTable = new ArrayList<Entry>(); //_HashTable = new Entry[xHashTableSize]; // Java generic array creation error for (int i = 0; i < xHashTableSize; i++) _HashTable.add(null); } // constructor public final void Print() { for (int i = 0; i < _HashTableSize; i++) { if (_HashTable.get(i) == null) continue; Entry lEntry = _HashTable.get(i); while (lEntry != null) { Entry lNext = lEntry.next; System.out.println("HashTable[" + i + "] has key " + lEntry.key); lEntry = lNext; } } } // public final long hashCode(long xKey) { return xKey % _HashTableSize; } // public final long hashCode(K xKey) { if (xKey instanceof Long) return hashCode((Long) xKey); return Math.abs(xKey.hashCode() % _HashTableSize); // general approach } // public final V AddOrReplace(K xKey, V xValue) { int lHash = (int) hashCode(xKey); Entry lEntry = _HashTable.get(lHash); // new entry if (lEntry == null) { _HashTable.set(lHash, new Entry(xKey, xValue)); return null; } while (lEntry != null) { if (lEntry.key.equals(xKey)) { // replace existing entry V lRemove = lEntry.value; lEntry.value = xValue; System.out.println("Replacing " + lRemove); return lRemove; } if (lEntry.next == null) { // end of linked list lEntry.next = new Entry(xKey, xValue); return null; } lEntry = lEntry.next; } // non reachable code: return null; } // public final V TryGetValue(K xKey) { int lHash = (int) hashCode(xKey); if (_HashTable.get(lHash) == null) return null; Entry lEntry = _HashTable.get(lHash); while ((lEntry != null) && (!lEntry.key.equals(xKey))) lEntry = lEntry.next; if (lEntry == null) return null; else return lEntry.value; } // public final V Remove(K xKey) { int lHash = (int) hashCode(xKey); Entry lEntry = _HashTable.get(lHash); if (lEntry == null) return null; Entry lPrevious = null; while (true) { if (lEntry == null) return null; // end of linked list if (lEntry.key.equals(xKey)) { // remove existing entry V lRemove = lEntry.value; if (lPrevious != null) lPrevious.next = lEntry.next; else // no previous element _HashTable.set(lHash, null); return lRemove; } lPrevious = lEntry; lEntry = lEntry.next; } // non reachable code //return null; } // } // class //-------------------------------------------------------------------------------------------- import java.io.IOException; public class Program { public static void main(String[] args) { try { HashTable<String, Data> lHashTable = new HashTable<>(5); //= new HashTable<>(5); String[] lTestStrings = {"hello", "world", "hola", "mundo", "hallo", "Welt", "hej", "verden", "hello", "wereld"}; int i = 0; for (String s : lTestStrings) { System.out.println("HashCode example: " + s + " = " + lHashTable.hashCode(s)); Data lData = new Data(); lData.text = s; lData.id = i++; lHashTable.AddOrReplace(s, lData); } lHashTable.Print(); Data lRemoved = lHashTable.Remove("hej"); lRemoved = null; // de-reference for garbage collector (just for demo purposes) lHashTable.Print(); Data a = lHashTable.TryGetValue("hej"); Data b = lHashTable.TryGetValue("hallo"); if (a == null) System.out.println("a is null"); if (b != null) System.out.println("b has been found"); System.out.println(); System.out.println(); // test with KEY integer and VALUE integer HashTable<Integer, Integer> lHashTable2 = new HashTable<>(5); int[] lTestIntegers = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; for (int x : lTestIntegers) { System.out.println("HashCode example: " + x + " = " + lHashTable2.hashCode(x)); lHashTable2.AddOrReplace(x, x + 100); } lHashTable2.Print(); System.gc(); // calls destructors of de-referenced objects System.in.read(); } catch (IOException ex) { System.out.println("IOException: " + ex.getMessage()); } } // } // class
example output:
HashCode example: hello = 2
HashCode example: world = 2
HashCode example: hola = 0
HashCode example: mundo = 2
HashCode example: hallo = 3
HashCode example: Welt = 2
HashCode example: hej = 1
HashCode example: verden = 2
HashCode example: hello = 2
Replacing id: 0, text: hello
HashCode example: wereld = 3
HashTable[0] has key hola
HashTable[1] has key hej
HashTable[2] has key hello
HashTable[2] has key world
HashTable[2] has key mundo
HashTable[2] has key Welt
HashTable[2] has key verden
HashTable[3] has key hallo
HashTable[3] has key wereld
HashTable[0] has key hola
HashTable[2] has key hello
HashTable[2] has key world
HashTable[2] has key mundo
HashTable[2] has key Welt
HashTable[2] has key verden
HashTable[3] has key hallo
HashTable[3] has key wereld
a is null
b has been foundHashCode example: 1 = 1
HashCode example: 2 = 2
HashCode example: 3 = 3
HashCode example: 4 = 4
HashCode example: 5 = 0
HashCode example: 6 = 1
HashCode example: 7 = 2
HashCode example: 8 = 3
HashCode example: 9 = 4
HashCode example: 10 = 0
HashCode example: 11 = 1
HashCode example: 12 = 2
HashTable[0] has key 5
HashTable[0] has key 10
HashTable[1] has key 1
HashTable[1] has key 6
HashTable[1] has key 11
HashTable[2] has key 2
HashTable[2] has key 7
HashTable[2] has key 12
HashTable[3] has key 3
HashTable[3] has key 8
HashTable[4] has key 4
HashTable[4] has key 9
good bye Data struct with id 0 and text hello
good bye Data struct with id 6 and text hej
Posted on March 19, 2014, in C#, C++, Java and tagged C#, C# to C++, C++ to Java, conversion, direct code comparison, Java, Java to C++, migration. Bookmark the permalink. Leave a comment.
Leave a comment
Comments 0