-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhuffman.py
139 lines (115 loc) · 3.66 KB
/
huffman.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from ordered_list import *
from huffman_bit_writer import *
class HuffmanNode:
def __init__(self, char, freq, left=None, right=None):
self.char = char
self.freq = freq
self.left = left
self.right = right
def __repr__(self):
return f"Node({self.char!r})"
def __eq__(self, other):
if type(other) is not HuffmanNode:
return False
return self.freq == other.freq and self.char == other.char
def __lt__(self, other):
if self.freq == other.freq and self.char < other.char:
return True
if self.freq < other.freq:
return True
if self.freq > other.freq:
return False
return False
def cnt_freq(filename):
f = open(filename, "r")
chars = f.readline()
freql = [0] * 256
for c in chars:
freql[ord(c)] += 1
f.close()
return freql
def create_huff_tree(list_of_freqs):
sortedList = OrderedList()
for i in range(len(list_of_freqs)):
if list_of_freqs[i] != 0:
sortedList.add(HuffmanNode(i, list_of_freqs[i]))
if sortedList.size() == 0:
return
if sortedList.size() == 1:
return sortedList.pop(0)
while sortedList.size() > 1:
first = sortedList.pop(0)
second = sortedList.pop(0)
if first.char < second.char:
new_ascii = first.char
else:
new_ascii = second.char
new_node = HuffmanNode(new_ascii, first.freq + second.freq)
new_node.left = first
new_node.right = second
sortedList.add(new_node)
# print(sortedList.python_list())
return sortedList.head.next.item
def create_code(root_node):
if root_node is None:
return None
l = [""] * 256
huff_code = ""
create_code_helper(root_node, l, huff_code)
return l
def create_code_helper(node, l, huff_code):
# print(node.left)
if node.left is None and node.right is None:
l[node.char] = huff_code
if node.left:
create_code_helper(node.left, l, huff_code + "0")
if node.right:
create_code_helper(node.right, l, huff_code + "1")
def huffman_encode(in_file, out_file):
try:
with open(in_file) as f:
f.close()
except FileNotFoundError:
raise FileNotFoundError
open_file = open(in_file, "r")
read_file = open_file.read()
open_file.close()
empty = False
if read_file == "":
empty = True
print("Input is empty, producing empty file")
freq_list = cnt_freq(in_file)
out_header = create_header(freq_list)
huff_tree = create_huff_tree(freq_list)
huff_code_list = create_code(huff_tree)
huff_code = ""
with open(in_file) as f:
f_str = f.readlines()
for line in f_str:
for c in line:
huff_code += huff_code_list[ord(c)]
with open(out_file, "w") as f:
if not empty:
f.write(out_header + "\n")
if len(out_header.split()) != 2:
f.write(huff_code)
compressed = out_file[0:len(out_file) - 4] + "_compressed.txt"
comp = HuffmanBitWriter(compressed)
if not empty:
comp.write_str(out_header + "\n")
comp.write_code(huff_code)
comp.close()
def create_header(list_of_freqs):
output = ""
single_letter = 0
for i in list_of_freqs:
if i != 0:
single_letter += 1
for i in range(len(list_of_freqs)):
if single_letter == 1 and list_of_freqs[i] != 0:
output += str(i) + " "
output += str(list_of_freqs[i]) + "\n"
elif list_of_freqs[i] != 0:
output += str(i) + " "
output += str(list_of_freqs[i]) + " "
return output