Skip to content

Commit 07eb5fa

Browse files
committed
Added Czech wordlists, reduced English wordlist sizes
1 parent 67c4ece commit 07eb5fa

File tree

4 files changed

+55021
-8
lines changed

4 files changed

+55021
-8
lines changed

data-scripts/build_frequency_lists.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,14 @@ def usage():
2929

3030
# maps dict name to num words. None value means "include all words"
3131
DICTIONARIES = dict(
32-
us_tv_and_film = 30000,
32+
us_tv_and_film = 15000,
3333
english_wikipedia = 30000,
34-
passwords = 30000,
34+
passwords = 15000,
3535
surnames = 10000,
3636
male_names = None,
3737
female_names = None,
38+
cs_passwords = 15000,
39+
cs_tv_film_no_dia = 15000,
3840
)
3941

4042
# returns {list_name: {token: rank}}, as tokens and ranks occur in each file.

0 commit comments

Comments
 (0)