forked from FooSoft/yomichan-import
-
Notifications
You must be signed in to change notification settings - Fork 5
/
kanjidic.go
178 lines (150 loc) · 5.79 KB
/
kanjidic.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
package yomitan
import (
"os"
"strconv"
jmdict "github.com/yomidevs/jmdict-go"
)
func kanjidicExtractKanji(entry jmdict.KanjidicCharacter, language string) *dbKanji {
if entry.ReadingMeaning == nil {
return nil
}
kanji := dbKanji{
Character: entry.Literal,
Stats: make(map[string]string),
}
for _, m := range entry.ReadingMeaning.Meanings {
if m.Language == nil && language == "" || m.Language != nil && language == *m.Language {
kanji.Meanings = append(kanji.Meanings, m.Meaning)
}
}
if len(kanji.Meanings) == 0 {
return nil
}
for _, number := range entry.DictionaryNumbers {
kanji.Stats[number.Type] = number.Value
}
if frequency := entry.Misc.Frequency; frequency != nil {
kanji.Stats["freq"] = *frequency
}
if level := entry.Misc.JlptLevel; level != nil {
kanji.Stats["jlpt"] = *level
}
if counts := entry.Misc.StrokeCounts; len(counts) > 0 {
kanji.Stats["strokes"] = counts[0]
}
for _, code := range entry.Codepoint {
kanji.Stats[code.Type] = code.Value
}
for _, code := range entry.QueryCode {
kanji.Stats[code.Type] = code.Value
}
if grade := entry.Misc.Grade; grade != nil {
kanji.Stats["grade"] = *grade
if gradeInt, err := strconv.Atoi(*grade); err == nil {
if gradeInt >= 1 && gradeInt <= 8 {
kanji.addTags("jouyou")
} else if gradeInt >= 9 && gradeInt <= 10 {
kanji.addTags("jinmeiyou")
}
}
}
for _, r := range entry.ReadingMeaning.Readings {
switch r.Type {
case "ja_on":
kanji.Onyomi = append(kanji.Onyomi, r.Value)
case "ja_kun":
kanji.Kunyomi = append(kanji.Kunyomi, r.Value)
}
}
return &kanji
}
func kanjidicExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error {
reader, err := os.Open(inputPath)
if err != nil {
return err
}
defer reader.Close()
dict, err := jmdict.LoadKanjidic(reader)
if err != nil {
return err
}
var langTag string
switch language {
case "french":
langTag = "fr"
case "spanish":
langTag = "es"
case "portuguese":
langTag = "pt"
}
var kanji dbKanjiList
for _, entry := range dict.Characters {
kanjiCurr := kanjidicExtractKanji(entry, langTag)
if kanjiCurr != nil {
kanji = append(kanji, *kanjiCurr)
}
}
if title == "" {
title = "KANJIDIC2"
}
kanjidicDate := dict.Header.DatabaseVersion
title = title + " [" + kanjidicDate + "]"
tags := dbTagList{
dbTag{Name: "jouyou", Notes: "included in list of regular-use characters", Category: "frequent", Order: -5},
dbTag{Name: "jinmeiyou", Notes: "included in list of characters for use in personal names", Category: "frequent", Order: -5},
dbTag{Name: "freq", Notes: "Frequency", Category: "misc"},
dbTag{Name: "grade", Notes: "Grade level", Category: "misc"},
dbTag{Name: "jlpt", Notes: "JLPT level", Category: "misc"},
dbTag{Name: "strokes", Notes: "Stroke count", Category: "misc"},
dbTag{Name: "jis208", Notes: "JIS X 0208-1997 kuten code", Category: "code"},
dbTag{Name: "jis212", Notes: "JIS X 0212-1990 kuten code", Category: "code"},
dbTag{Name: "jis213", Notes: "JIS X 0213-2000 kuten code", Category: "code"},
dbTag{Name: "ucs", Notes: "Unicode hex code", Category: "code"},
dbTag{Name: "deroo", Notes: "2001 Kanji", Category: "class"},
dbTag{Name: "four_corner", Notes: "Four corner code", Category: "class"},
dbTag{Name: "misclass", Notes: "Misclassification", Category: "class"},
dbTag{Name: "sh_desc", Notes: "The Kanji Dictionary", Category: "class"},
dbTag{Name: "skip", Notes: "SKIP code", Category: "class"},
dbTag{Name: "busy_people", Notes: "Japanese For Busy People", Category: "index"},
dbTag{Name: "crowley", Notes: "The Kanji Way to Japanese Language Power", Category: "index"},
dbTag{Name: "gakken", Notes: "A New Dictionary of Kanji Usage", Category: "index"},
dbTag{Name: "halpern_kkd", Notes: "Kodansha Kanji Dictionary", Category: "index"},
dbTag{Name: "halpern_kkld", Notes: "Kanji Learners Dictionary", Category: "index"},
dbTag{Name: "halpern_kkld_2ed", Notes: "Kanji Learners Dictionary", Category: "index"},
dbTag{Name: "halpern_njecd", Notes: "New Japanese-English Character Dictionary", Category: "index"},
dbTag{Name: "heisig", Notes: "Remembering The Kanji", Category: "index"},
dbTag{Name: "heisig6", Notes: "Remembering The Kanji, Sixth Ed.", Category: "index"},
dbTag{Name: "henshall", Notes: "A Guide To Remembering Japanese Characters", Category: "index"},
dbTag{Name: "henshall3", Notes: "A Guide To Reading and Writing Japanese", Category: "index"},
dbTag{Name: "jf_cards", Notes: "Japanese Kanji Flashcards", Category: "index"},
dbTag{Name: "kanji_in_context", Notes: "Kanji in Context", Category: "index"},
dbTag{Name: "kodansha_compact", Notes: "Kodansha Compact Kanji Guide", Category: "index"},
dbTag{Name: "maniette", Notes: "Les Kanjis dans la tete", Category: "index"},
dbTag{Name: "moro", Notes: "Daikanwajiten", Category: "index"},
dbTag{Name: "nelson_c", Notes: "Modern Reader's Japanese-English Character Dictionary", Category: "index"},
dbTag{Name: "nelson_n", Notes: "The New Nelson Japanese-English Character Dictionary", Category: "index"},
dbTag{Name: "oneill_kk", Notes: "Essential Kanji", Category: "index"},
dbTag{Name: "oneill_names", Notes: "Japanese Names", Category: "index"},
dbTag{Name: "sakade", Notes: "A Guide To Reading and Writing Japanese", Category: "index"},
dbTag{Name: "sh_kk", Notes: "Kanji and Kana", Category: "index"},
dbTag{Name: "sh_kk2", Notes: "Kanji and Kana", Category: "index"},
dbTag{Name: "tutt_cards", Notes: "Tuttle Kanji Cards", Category: "index"},
}
recordData := map[string]dbRecordList{
"kanji": kanji.crush(),
"tag": tags.crush(),
}
index := dbIndex{
Title: title,
Revision: "kanjidic2." + kanjidicDate,
Sequenced: false,
Attribution: edrdgAttribution,
}
return writeDb(
outputPath,
index,
recordData,
stride,
pretty,
)
}