-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhashtable.py
218 lines (193 loc) · 8.83 KB
/
hashtable.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#!python
from linkedlist import LinkedList
class HashTable(object):
def __init__(self, init_size=8):
"""Initialize this hash table with the given initial size."""
self.buckets = [LinkedList() for i in range(init_size)]
# self.buckets = [LinkedList()] * init_size
self.size = 0 # Number of key-value entries
def __str__(self):
"""Return a formatted string representation of this hash table."""
items = ['{!r}: {!r}'.format(key, val) for key, val in self.items()]
return '{' + ', '.join(items) + '}'
def __repr__(self):
"""Return a string representation of this hash table."""
return 'HashTable({!r})'.format(self.items())
def _bucket_index(self, key):
"""Return the bucket index where the given key would be stored."""
return hash(key) % len(self.buckets)
def load_factor(self):
"""Return the load factor, the ratio of number of entries to buckets.
Best and worst case running time: ??? under what conditions? [TODO]"""
# TODO: Calculate load factor
# return ...
return self.size / len(self.buckets)
def keys(self):
"""Return a list of all keys in this hash table.
Best and worst case running time: ??? under what conditions? [TODO]"""
# Collect all keys in each of the buckets
all_keys = []
for bucket in self.buckets:
for key, value in bucket.items():
all_keys.append(key)
return all_keys
def values(self):
"""Return a list of all values in this hash table.
Best and worst case running time: ??? under what conditions? [TODO]"""
# Collect all values in each of the buckets
all_values = []
for bucket in self.buckets:
for key, value in bucket.items():
all_values.append(value)
return all_values
def items(self):
"""Return a list of all entries (key-value pairs) in this hash table.
Best and worst case running time: ??? under what conditions? [TODO]"""
# Collect all pairs of key-value entries in each of the buckets
all_items = []
for bucket in self.buckets:
all_items.extend(bucket.items())
return all_items
def length(self):
"""Return the number of key-value entries by traversing its buckets.
Best and worst case running time: ??? under what conditions? [TODO]"""
# Count number of key-value entries in each of the buckets
item_count = 0
for bucket in self.buckets:
item_count += bucket.length()
return item_count
# Equivalent to this list comprehension:
return sum(bucket.length() for bucket in self.buckets)
def contains(self, key):
"""Return True if this hash table contains the given key, or False.
Best case running time: ??? under what conditions? [TODO]
Worst case running time: ??? under what conditions? [TODO]"""
# Find the bucket the given key belongs in
index = self._bucket_index(key)
bucket = self.buckets[index]
# Check if an entry with the given key exists in that bucket
entry = bucket.find(lambda key_value: key_value[0] == key)
return entry is not None # True or False
def get(self, key):
"""Return the value associated with the given key, or raise KeyError.
Best case running time: ??? under what conditions? [TODO]
Worst case running time: ??? under what conditions? [TODO]"""
# Find the bucket the given key belongs in
index = self._bucket_index(key)
bucket = self.buckets[index]
# Find the entry with the given key in that bucket, if one exists
entry = bucket.find(lambda key_value: key_value[0] == key)
if entry is not None: # Found
# Return the given key's associated value
assert isinstance(entry, tuple)
assert len(entry) == 2
return entry[1]
else: # Not found
raise KeyError('Key not found: {}'.format(key))
def set(self, key, value):
"""Insert or update the given key with its associated value.
Best case running time: ??? under what conditions? [TODO]
Worst case running time: ??? under what conditions? [TODO]"""
# Find the bucket the given key belongs in
index = self._bucket_index(key)
bucket = self.buckets[index]
# Find the entry with the given key in that bucket, if one exists
# Check if an entry with the given key exists in that bucket
entry = bucket.find(lambda key_value: key_value[0] == key)
if entry is not None: # Found
# In this case, the given key's value is being updated
# Remove the old key-value entry from the bucket first
bucket.delete(entry)
self.size -= 1
# Insert the new key-value entry into the bucket in either case
bucket.append((key, value))
self.size += 1
# TODO: Check if the load factor exceeds a threshold such as 0.75
if self.load_factor() > 0.75:
# TODO: If so, automatically resize to reduce the load factor
self._resize()
def delete(self, key):
"""Delete the given key and its associated value, or raise KeyError.
Best case running time: ??? under what conditions? [TODO]
Worst case running time: ??? under what conditions? [TODO]"""
# Find the bucket the given key belongs in
index = self._bucket_index(key)
bucket = self.buckets[index]
# Find the entry with the given key in that bucket, if one exists
entry = bucket.find(lambda key_value: key_value[0] == key)
if entry is not None: # Found
# Remove the key-value entry from the bucket
bucket.delete(entry)
self.size -= 1
else: # Not found
raise KeyError('Key not found: {}'.format(key))
def _resize(self, new_size=None):
"""Resize this hash table's buckets and rehash all key-value entries.
Should be called automatically when load factor exceeds a threshold
such as 0.75 after an insertion (when set is called with a new key).
Best and worst case running time: ??? under what conditions? [TODO]
Best and worst case space usage: ??? what uses this memory? [TODO]"""
# If unspecified, choose new size dynamically based on current size
if new_size is None:
new_size = len(self.buckets) * 2 # Double size
# Option to reduce size if buckets are sparsely filled (low load factor)
elif new_size is 0:
new_size = len(self.buckets) / 2 # Half size
# TODO: Get a list to temporarily hold all current key-value entries
# TODO: Create a new list of new_size total empty linked list buckets
# TODO: Insert each key-value entry into the new list of buckets,
# which will rehash them into a new bucket index based on the new size
#Create a temp list
temp_list = self.items()
# Re-initialize
# self.__init__(new_size)
self.buckets = [LinkedList() for _ in range(new_size)]
# self.buckets = [LinkedList()] * new_size
#Set the size of the new list to 0 AKA empty
self.size = 0
#Inserting the key value to the new list
for key,value in temp_list:
self.set(key, value)
def test_hash_table():
ht = HashTable(4)
print('HashTable: ' + str(ht))
print('Setting entries:')
ht.set('I', 1)
print('set(I, 1): ' + str(ht))
ht.set('V', 5)
print('set(V, 5): ' + str(ht))
print('size: ' + str(ht.size))
print('length: ' + str(ht.length()))
print('buckets: ' + str(len(ht.buckets)))
print('load_factor: ' + str(ht.load_factor()))
ht.set('X', 10)
print('set(X, 10): ' + str(ht))
ht.set('L', 50) # Should trigger resize
print('set(L, 50): ' + str(ht))
print('size: ' + str(ht.size))
print('length: ' + str(ht.length()))
print('buckets: ' + str(len(ht.buckets)))
print('load_factor: ' + str(ht.load_factor()))
print('Getting entries:')
print('get(I): ' + str(ht.get('I')))
print('get(V): ' + str(ht.get('V')))
print('get(X): ' + str(ht.get('X')))
print('get(L): ' + str(ht.get('L')))
print('contains(X): ' + str(ht.contains('X')))
print('contains(Z): ' + str(ht.contains('Z')))
print('Deleting entries:')
ht.delete('I')
print('delete(I): ' + str(ht))
ht.delete('V')
print('delete(V): ' + str(ht))
ht.delete('X')
print('delete(X): ' + str(ht))
ht.delete('L')
print('delete(L): ' + str(ht))
print('contains(X): ' + str(ht.contains('X')))
print('size: ' + str(ht.size))
print('length: ' + str(ht.length()))
print('buckets: ' + str(len(ht.buckets)))
print('load_factor: ' + str(ht.load_factor()))
if __name__ == '__main__':
test_hash_table()