-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdiff.py
56 lines (44 loc) · 1.33 KB
/
diff.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
import os.path as osp
import hashlib
import sys
def iter_files(root: str):
for dirpath, dirnames, filenames in os.walk(root):
for name in filenames:
yield osp.relpath(osp.join(dirpath, name), root)
def check_file(path: str):
hasher = hashlib.md5()
size = 0
with open(path, "rb") as f:
while True:
data = f.read(4096)
if not data:
break
hasher.update(data)
size += len(data)
return (hasher.hexdigest(), size)
def analyze(root: str):
files = {}
for name in iter_files(root):
path = osp.join(root, name)
files[name] = check_file(path)
return files
source = analyze(sys.argv[1])
dest = analyze(sys.argv[2])
for name, data in source.items():
try:
dest_data = dest[name]
if data != dest_data:
print(f"Error: {name!r} {data!r} != {dest_data!r}")
else:
print(f"Ok: {name!r} {data!r} == {dest_data!r}")
except KeyError:
print(f"Not found in dest: {name}")
print(f"Source: {len(source)}")
print(f"Dest: {len(dest)}")
print(sum(p[1] for p in source.values()))
print(sum(p[1] for p in dest.values()))
# short_names = [(osp.basename(k), v) for k, v in source.items()]
# short_names.sort(key=lambda p: p[0])
# for x in short_names:
# print(x)