Skip to content
This repository has been archived by the owner on Feb 25, 2023. It is now read-only.

Commit

Permalink
Add support for undocumented frequency and information tags
Browse files Browse the repository at this point in the history
Custom dictionary files using the JMdict XML format may contain
nonstandard frequency and information tags.
  • Loading branch information
stephenmk committed Jan 30, 2023
1 parent aab0319 commit 0b328e1
Showing 1 changed file with 13 additions and 5 deletions.
18 changes: 13 additions & 5 deletions jmdict_headword.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ func (h *headword) SetFlags(infoTags, freqTags []string) {
h.IsAteji = true
case "gikun":
h.IsGikun = true
default:
fmt.Println("Unknown information tag type: " + infoTag)
h.TermTags = append(h.TermTags, infoTag)
}
}
if h.IsOutdated && h.IsRareKanji {
Expand All @@ -138,27 +141,32 @@ func (h *headword) SetFlags(infoTags, freqTags []string) {
}

func (h *headword) SetTermTags(freqTags []string) {
h.TermTags = []string{}
if h.IsPriority {
h.TermTags = append(h.TermTags, priorityTagName)
}
knownFreqTags := []string{"ichi1", "ichi2", "gai1", "gai2", "spec1", "spec2"}
for _, tag := range freqTags {
isNewsFreqTag, _ := regexp.MatchString(`nf\d\d`, tag)
if isNewsFreqTag {
// nf tags are divided into ranks of 500
// (nf01 to nf48), but it will be easier
// for the user to read 1k, 2k, etc.
// (nf01 to nf48). Let's combine them into
// ranks of 1k (news1k, news2k, ..., news24k).
var i int
if _, err := fmt.Sscanf(tag, "nf%2d", &i); err == nil {
i = (i + (i % 2)) / 2
newsTag := "news" + strconv.Itoa(i) + "k"
h.TermTags = append(h.TermTags, newsTag)
}
} else if tag == "news1" || tag == "news2" {
// News tags are derived from the nf
// rankings, so these are not needed.
continue
} else {
tagWithoutTheNumber := tag[:len(tag)-1] // "ichi", "gai", or "spec"
} else if slices.Contains(knownFreqTags, tag) {
tagWithoutTheNumber := tag[:len(tag)-1]
h.TermTags = append(h.TermTags, tagWithoutTheNumber)
} else {
fmt.Println("Unknown frequency tag type: " + tag)
h.TermTags = append(h.TermTags, tag)
}
}
if h.IsIrregular {
Expand Down

0 comments on commit 0b328e1

Please sign in to comment.