-
Notifications
You must be signed in to change notification settings - Fork 1
/
51_dict_implementation.py
362 lines (319 loc) · 9.14 KB
/
51_dict_implementation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
# %% [markdown]
# # dicts in python
"""
also see c1_09 file
creating a dict + basic methods
eq and hash
crazy dict : override a key
OrderedDict + __reversed__
defaultdict + __missing__
chainMap
readonly dict with MappingProxyTypes
"""
# %%
my_dict = {'key1': "value1", "key2": 9, 45: "ff", (45, 78): 39}
print(my_dict)
# %%
print(dict(a=1, b=2))
# %% methods
my_dict = {"a":1, "b":2}
my_dict.clear()
print(my_dict)
# %%
print(dict.fromkeys(['a', 'b']))
print(dict.fromkeys('ab'))
#%%
aa = dict.fromkeys('abc')
print(aa.pop("b")) # pop returns none
print(aa) # but modified dict
#%%
aa = dict.fromkeys('abc')
print(aa.popitem()) # popitem pops last inserted element
print(aa) # but modified dict
#%% set default for a value
aa = dict.fromkeys('abc')
aa.setdefault("e","def")
print(aa["e"])
#%%
aa = dict.fromkeys('abcde')
aa.setdefault("e","def")
print(aa["e"])
# %%
aa["r"]
#%%
aa = {"a":1}
aa.update({"b":2})
print(aa)
aa.update({"b":3})
print(aa)
#%%
print(aa.get('a'))
print(aa.get('w'))
print(aa.get('w',[]))
#%% iterate through items
my_dict = {"key1":"val1","key2":"val2"}
for key, val in my_dict.items():
print(key,val)
for key in my_dict.keys():
print(key)
for val in my_dict.values():
print(val)
# %%
print({x: x**2 for x in range(3)})
# %%
my_not_working_dict = {[45, 45]: 45}
# %% Python dicts are indexed by keys that can be of any hashable type. A hashable object has a hash value that never changes during its lifetime and it can be compared to other objects. In addition two hashable objects that compare as equal must have the same hash value
print(1 == 1, 1 is 1)
#%%
print("aa" == "aa", "aa" is "aa")
#%%
print((1, 2) == (1, 2), (1, 2) is (1, 2))
#%%
print([1, 2] == [1, 2], [1, 2] is [1, 2])
print({1: 2} == {1: 2}, {1: 2} is {1: 2})
#%%
print((1, [2]) == (1, [2]), (1, [2]) is (1, [2]))
# %%
print(hash(1), hash("aa"), hash(((1, 2))), hash(((1, 2))))
# %%
print(hash([1, 2]))
# %%
# list has an __eq__ method but not a hash method
[1,2] in [[1,2],3] # not that it is the __eq__ that is used with for in
# %%
# let's build a class where the equality comparison always return true and the hash is constant
class HashAndEqClass:
def __hash__(self):
return 45
def __eq__(self, other):
return True
a = HashAndEqClass()
b = HashAndEqClass()
print(a == b, hash(a), hash(b))
print(a in [b])
print(a is b)
# Just because two objects have the same hash does not mean they are identical !
# notice a in [b] returns True. With in, we only look at __eq__ : see EqNoHashClass
# %% Note remember how (1,2) is (1,2) returns true ?
print((1,[2]) is (1,[2]))
# %% Note that there is only one None object, which is why we do comparison with if a is None and not if a == None
print(a == None)
print(a is None)
c = None
print(c is None)
# %%
class HashNoEqClass:
def __hash__(self):
return 45
a = HashNoEqClass()
b = HashNoEqClass()
print(a == b, a is b, a in [b])
# having the same hash is not enough to be equal !
# %%
class EqNoHashClass:
def __eq__(self, other):
return True
a = EqNoHashClass()
b = EqNoHashClass()
print(a == b, a in [b], a is b,) # as expected, only __eq__ is used to test if a in [b]
# %%
print(hash(a))
# %%
# A good practice when you want to define an __eq__ is to define a __hash__ too.
a = HashAndEqClass()
print({a: 5})
# %%
a = HashNoEqClass()
print({a: 5})
# %%
a = EqNoHashClass()
print({a: 5})
# %%
# Conclusion 1 : if you want to use an object as a key in a dict, it must be hashable
# %% bonus hash of strings are salted
hash("rrrr")
# %% [markdown] What about dicts ? Time to focus !
crazy_dict = {True: "yes", 1: "no", 1.0: "maybe"}
# %%
crazy_dict
# ? What happened here ?
#%% Let's decompose
decomposed_dict = dict()
decomposed_dict[True] = "yes"
print(decomposed_dict)
#%%
decomposed_dict[1] = "no"
print(decomposed_dict)
#%%
decomposed_dict[1.0] = "maybe"
print(decomposed_dict)
# ? So, can you guess ?
#%%
# It seems we override the keys.
print(1 == 1.0)
print(True == 1)
# %%
print(isinstance(True, int)) # bools are subtypes of int !
# which means you can do that
print(["value0", "value1"][True]) # ! but seriously, don't
# %% Ok, I have to confess : in fact, to override a key in a dict, the __eq__ is not enough : in fact, python dicts are backed by a hash table data structure which allows for fast lookup. If two values have the same hash (which is a lot faster to calculate than to check for equality), then we look at the eq method to be sure that the key are identical. Indeed, sometimes, different keys have the same hash (it is called a hash collision)
# * ROMAN, I think we should rethink the way we define the hash method. It would allow for faster lookups in the prefill and would probably prevent the overriding of textboxes in our dict : we could do that witout losing all the functionalities given by our custom __eq__
#%%
a = HashAndEqClass()
b = HashAndEqClass()
my_dict = {a:1, b:2}
my_dict
#%%
class Test:
def __init__(self, val, myhash,name):
self.val = val
self.hash = myhash
self.name = name
def __eq__(self, other):
return self.val == other.val
def __hash__(self):
return self.hash
def __repr__(self):
return f"{self.name}<v={self.val},h={self.hash}>"
a = Test(0,0,"a")
b = Test(1,1,"b")
c = Test(0,0,"c")
d = Test(0,1,"d")
e = Test(1,0,"e")
print(a,b,c,d,e)
# %%
dct = {a:1, b:2, c:3, d:4, e:5}
print(dct)
# to overwrite a value : we need the same eq and same hash. Note that the key name is not changed (which makes sense because a and c are supposed to be identical as far as the dict is concerned)
# %% and what about our crazy dict ?
print(True == 1 == 1.0)
print(hash(True),hash(1),hash(1.0))
print(1 is 1, 1.0 is 1.0, True is True)
# %%
print(1 is True, 1 is 1.0, 1.0 is True) # so a dict dont look at identity !
# %%
from collections import OrderedDict
# %% [markdown]
# # Other types of dict
# if key order is important for your algorithm to work, use OrderedDict
print(OrderedDict(key1='value1', key2=2, key3="rrr"))
# %%
print(OrderedDict({"key1": 'value1', "key2": 2, "key3": "rrr"}))
# %%
orddict = OrderedDict({ f"key_{x}":f"value_{x}" for x in range(10)})
print(orddict)
# %%
orddict.popitem(last=True) # last defaults to True
# %%
orddict.popitem(last=False)
# %%
print(orddict)
# %%
orddict.move_to_end("key_3", last=True)
print(orddict)
# %%
orddict.move_to_end("key_3", last=False)
print(orddict)
# %%
# Note : a way to pretty print
import json
print(json.dumps(orddict,indent=4))
# %%
aa = [1,2,3,4]
[a for a in reversed(aa)]
# %%
for key, val in reversed(orddict.items()):
print(key,val)
# %%
# Note that when creating a class, you can define a __reversed__ method
class OrderedLetters:
def __init__(self, list_of_letters):
self.ordered_list = sorted(list_of_letters)
def __repr__(self):
return str(self.ordered_list)
def __reversed__(self):
return iter(sorted(self.ordered_list, reverse=True))
OrderedLetters(['a', 'c', 'b'])
# %%
for l in reversed(OrderedLetters(['a', 'c', 'b'])):
print(l)
# %%
# %%
from collections import defaultdict
dd = defaultdict(int, {"a":1})
print(dd["a"])
print(dd.get("b")) # ! can be a problem
print(dd["b"])
print(dd.get("b")) # ! works as expected
print(dd.get("b", 5)) # does not work
# so decide on default dict or using get but dont combine both as it can lead to unexpected behavior
# %%
class MyDict(dict):
def __missing__(self, key): # methods for subclasses of dicts
print("calling __missing__ method")
rv = ["The value is missing"]
return rv
m = MyDict()
m["a"]=1
print(m)
# %%
print(m["b"])
print(m)
# %%
df = defaultdict(lambda : "<miss>", {"a":1})
df["fa"]
# %%
df = defaultdict(set, {"a":1})
# dont use get !
print(df.get("fa",5))
print(df["fa"])
print(df.get("fa",5))
# %%
# * Chain maps
from collections import ChainMap
dict1 = {'a':1, 'b':2, 'c':3}
dict2 = {'c':4, 'd':5, 'e':6}
dict3 = {'a':7, 'c':9, 'w':15}
# %%
chain = ChainMap(dict1, dict2, dict3)
print(chain)
# %%
print(chain["a"], chain["d"], chain["w"])
# %%
print(chain["ffw"])
# %%
chain.maps # returns a list of dictionnary in order
# %%
new_chain = chain.new_child({"AA":11})
print(new_chain)
# %%
ChainMap({"BB":4545},*chain.maps)
# %%
chain.parents
# %%
# same as
ChainMap(*chain.maps[1:])
# %%
print(chain)
print(list(chain.items())) # for iteration maps are scanned last to first but the values are correct
# %% why use chainmaps ?
# You can keep defaults with no need to overwrite
default_values= {"prefill_tsi": ["AmountTextBox"], "prefill_country": ["CountryNameTextBox"],}
special_use_case = {"prefill_tsi": ["AmountTextBox", "PercentageTextBox"]}
chain = ChainMap(special_use_case, default_values)
chain["prefill_tsi"], chain["prefill_country"]
# %%
# * wrapper to make read only dicts: MappingProxyType : useful to protect state for ex : no need to create a full copy of the dict
from types import MappingProxyType
writable = {"a":1,"b":2}
# %%
read_only = MappingProxyType(writable)
writable, read_only
# %%
read_only["d"] = 5
# %%
# updates to the original are reflected in the proxy
writable["c"] = 5
writable, read_only
# ! In conclusion, dict are the central data structure in python. Most of the time the standard implementation is enough. Use orderedDict to communicate the intent that order is important. Use defaultdict when you find yourself trying to access keys that may or may not be defined.
# %%