-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwordlistgen.py
108 lines (94 loc) · 2.92 KB
/
wordlistgen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# usage: wordlistgen.py [words.txt] [answers.txt] [wordlist.asm]
# words.txt should be sorted by word frequency
# since later entries can be removed if there's not enough space
import sys
ATOZ = "abcdefghijklmnopqrstuvwxyz"
BANK_FREES =[0xa3c, 0xfe4, 0xfe4]
NUM_BANKS = len(BANK_FREES)
fulllist = [i.strip().lower() for i in open(sys.argv[1], "r")]
anslist = [i.strip().lower() for i in open(sys.argv[2], "r")]
outfile = open(sys.argv[3], "w")
output_asm = """
; generated by wordlistgen.py
"""
section_format = """
.section wordlist_data_{0}
{1}
.send
"""
section_1_format = """
_ptrs := ({0})
wordlist_1_ptrs_lo .byte <(_ptrs)
wordlist_1_ptrs_hi .byte >(_ptrs)
wordlist_2_ofs
"""
sizes = None
banks = None
bins = {}
remaining = []
for i in ATOZ:
for j in ATOZ:
bins[i+j] = []
# first iteration: fit banks
for word in fulllist:
key = word[:2]
bins[key].append(word)
# try fitting
try_sizes = [0 for i in range(NUM_BANKS)]
try_banks = [[] for i in range(NUM_BANKS)]
cur_bank = 0
exit = False
for i in ATOZ:
group_size = sum([len(bins[i+j]) for j in ATOZ])
while (try_sizes[cur_bank] + group_size) > (BANK_FREES[cur_bank] // 2):
cur_bank += 1
if cur_bank >= NUM_BANKS:
exit = True
break
if exit:
break
try_sizes[cur_bank] += group_size
try_banks[cur_bank].append(i)
if exit:
remaining.append(word)
bins[key].remove(word)
else:
sizes = try_sizes
banks = try_banks
# second iteration: fill gaps
for word in remaining:
for i in range(NUM_BANKS):
if word[0] not in banks[i]:
continue
if (sizes[i] + 2) <= (BANK_FREES[i] // 2):
print(word)
bins[word[:2]].append(word)
sizes[i] += 2
print(sum([len(bins[i]) for i in bins]))
# write results
for i in range(NUM_BANKS):
stxt = ""
if i == 0:
stxt += section_1_format.format(", ".join(["wordlist_3_"+i for i in ATOZ]))
for j in ATOZ:
stxt += " .byte {}\n".format(", ".join(["{:3}".format(len(bins[j+k])*2) for k in ATOZ]))
stxt += "\n"
for j in banks[i]:
stxt += "wordlist_3_"+j
linectr = 0
for k in ATOZ:
# reverse the list to aid searching, since the game checks from the last member first
for word in bins[j+k][::-1]:
if linectr % 16 == 0:
stxt += "\n .word "
else:
stxt += ", "
c2 = (ord(word[2]) - ord("a") + 1)
c3 = (ord(word[3]) - ord("a") + 1)
c4 = (ord(word[4]) - ord("a") + 1)
il = 0x8000 if word in anslist else 0
stxt += "${:04x}".format(c2 | (c3 << 5) | (c4 << 10) | il)
linectr += 1
stxt += "\n"
output_asm += section_format.format(i + 1, stxt)
outfile.write(output_asm)