From 43230c088c446a2f8cb160f488c863e3323a727d Mon Sep 17 00:00:00 2001 From: goropikari Date: Sat, 10 Sep 2022 23:23:33 +0900 Subject: [PATCH] support unicode --- Makefile | 2 +- README.md | 6 +- automata/common.go | 44 +- automata/dfa.go | 54 + automata/dot.go | 28 +- automata/nfa.go | 34 +- automata/nfa_test.go | 106 +- compiler/generator/export_test.go | 2 +- compiler/generator/generator.go | 351 ++-- compiler/generator/generator_test.go | 401 ++--- compiler/generator/parser.go | 58 +- compiler/generator/template.go | 71 +- compiler/regexp/ast_printer.go | 10 + compiler/regexp/code_generator.go | 169 +- compiler/regexp/compiler.go | 22 +- compiler/regexp/error.go | 5 + compiler/regexp/lexer.go | 16 +- compiler/regexp/parser.go | 146 +- compiler/regexp/parser_test.go | 15 +- go.mod | 3 + go.sum | 6 + main.go | 56 +- sample/README.md | 4 +- sample/main.go | 2422 +++++++++----------------- sample/main_test.go | 19 +- sample/sample.l | 6 +- sample/word_counter/README.md | 8 +- sample/word_counter/main.go | 234 +-- sample/word_counter/wc.l | 7 +- 29 files changed, 1816 insertions(+), 2489 deletions(-) create mode 100644 compiler/regexp/error.go diff --git a/Makefile b/Makefile index e85395a..6c7665d 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ .PHONY: test -test: +test: build @go test -shuffle on $(shell go list ./... | grep -v sample) .PHONY: test-verbose diff --git a/README.md b/README.md index 9e6734b..3865d57 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ tlex is lexical analyzer generator such as Lex. This is toy implementation for my study, so don't use for production. -tlex supports only ASCII string, doesn't do unicode. +tlex supports Unicode. ```bash @@ -25,7 +25,7 @@ Usage of ./tlex: $ tlex -src sample.l -pkg main -o main.go $ go run main.go -func foo123bar() int { +func foo123barあいう () int { x := 1 * 10 + 123 - 1000 / 5432 y := float64(x) @@ -37,6 +37,8 @@ Keyword "func" Identifier "foo123bar" +Hiragana + "あいう" LParen "(" RParen diff --git a/automata/common.go b/automata/common.go index a1660e5..ffe1382 100644 --- a/automata/common.go +++ b/automata/common.go @@ -3,6 +3,7 @@ package automata import ( "crypto/sha256" stdmath "math" + "unicode" "github.com/goropikari/tlex/collection" "github.com/goropikari/tlex/utils/guid" @@ -21,11 +22,8 @@ func init() { } } -var unicodeRange = []Interval{ - NewInterval(0, 127), - NewInterval(49152, 57343), - NewInterval(14680064, 15728639), - NewInterval(4026531840, 4160749567), +var UnicodeRange = []Interval{ + NewInterval(0, int(unicode.MaxRune)), } type RegexID int @@ -36,19 +34,19 @@ type Nothing struct{} var nothing = Nothing{} type Interval struct { - l int - r int + L int + R int } func NewInterval(s, e int) Interval { return Interval{ - l: s, - r: e, + L: s, + R: e, } } func (x Interval) Overlap(y Interval) bool { - return y.l <= x.r && x.l <= y.r + return y.L <= x.R && x.L <= y.R } func (x Interval) Difference(y Interval) []Interval { @@ -57,11 +55,11 @@ func (x Interval) Difference(y Interval) []Interval { } ret := make([]Interval, 0, 2) - if x.l < y.l { - ret = append(ret, NewInterval(x.l, y.l-1)) + if x.L < y.L { + ret = append(ret, NewInterval(x.L, y.L-1)) } - if y.r < x.r { - ret = append(ret, NewInterval(y.r+1, x.r)) + if x.R > y.R { + ret = append(ret, NewInterval(y.R+1, x.R)) } return ret @@ -71,10 +69,10 @@ func (x Interval) Difference(y Interval) []Interval { func Disjoin(intvs []Interval) []Interval { pq := collection.NewPriorityQueue(func(x, y Interval) bool { // ascending order - if x.l != y.l { - return x.l > y.l + if x.L != y.L { + return x.L > y.L } - return x.r > y.r + return x.R > y.R }) for _, v := range intvs { @@ -89,17 +87,17 @@ func Disjoin(intvs []Interval) []Interval { pq.Pop() if t1.Overlap(t2) { - if t1.l < t2.l { - nx1 := NewInterval(t1.l, t2.l-1) - nx2 := NewInterval(t2.l, t1.r) - nx3 := NewInterval(t2.l, t2.r) + if t1.L < t2.L { + nx1 := NewInterval(t1.L, t2.L-1) + nx2 := NewInterval(t2.L, t1.R) + nx3 := NewInterval(t2.L, t2.R) pq.Push(nx1) pq.Push(nx2) pq.Push(nx3) } else { // t1.l == t2.l pq.Push(t1) - nx := NewInterval(t1.r+1, t2.r) - if t1.r+1 <= t2.r { + nx := NewInterval(t1.R+1, t2.R) + if t1.R+1 <= t2.R { pq.Push(nx) } } diff --git a/automata/dfa.go b/automata/dfa.go index fd9d652..a9d7a8d 100644 --- a/automata/dfa.go +++ b/automata/dfa.go @@ -12,6 +12,11 @@ func NewDFATransition() *DFATransition { } } +func (trans *DFATransition) GetMap(sid StateID) (map[Interval]StateID, bool) { + mp, ok := trans.delta[sid] + return mp, ok +} + func (trans *DFATransition) Set(from StateID, intv Interval, to StateID) { _, ok := trans.delta[from] if !ok { @@ -21,6 +26,17 @@ func (trans *DFATransition) Set(from StateID, intv Interval, to StateID) { trans.delta[from][intv] = to } +func (trans *DFATransition) step(from StateID, intv Interval) (StateID, bool) { + if mp, ok := trans.delta[from]; ok { + for t, to := range mp { + if t.Overlap(intv) { + return to, true + } + } + } + return 0, false +} + type DFA struct { size int intvs []Interval @@ -31,12 +47,50 @@ type DFA struct { stIDToRegID StateIDToRegexID } +func (dfa *DFA) GetInitState() StateID { + return dfa.initState +} + +func (dfa *DFA) GetFinStates() *collection.Set[StateID] { + return dfa.finStates +} + +func (dfa *DFA) GetStates() []StateID { + return dfa.states.Slice() +} + +func (dfa *DFA) GetRegexID(sid StateID) RegexID { + return dfa.stIDToRegID.Get(sid) +} + +func (dfa *DFA) GetTransitionTable() *DFATransition { + return dfa.trans +} + +func (dfa *DFA) Accept(s string) (RegexID, bool) { + rs := []rune(s) + currSid := dfa.initState + for _, r := range rs { + intv := NewInterval(int(r), int(r)) + nx, ok := dfa.trans.step(currSid, intv) + if !ok { + return 0, false + } + currSid = nx + } + return dfa.stIDToRegID.Get(currSid), dfa.finStates.Contains(currSid) +} + // ここで入る intv は dfa.intvs に入っていることを前提としている func (dfa *DFA) stepIntv(sid StateID, intv Interval) (stateID StateID, nonDeadState bool) { retID, ok := dfa.trans.delta[sid][intv] return retID, ok } +// state minimization for lexical analyzer +// Compilers: Principles, Techniques, and Tools, 2ed ed., ISBN 9780321486813 (Dragon book) +// p.181 Algorithm 3.39 +// p.184 3.9.7 State Minimization in Lexical Analyzers func (dfa *DFA) grouping() [][]StateID { regIDMap := map[RegexID][]StateID{} siter := dfa.states.Iterator() diff --git a/automata/dot.go b/automata/dot.go index d3f74d3..5af0d1f 100644 --- a/automata/dot.go +++ b/automata/dot.go @@ -65,7 +65,7 @@ func (nfa NFA) ToDot() (string, error) { edges := make(map[collection.Pair[StateID, StateID]]string) for from, mp := range nfa.trans.mp { for intv, tos := range mp { - symbols := fmt.Sprintf("[%c-%c]", intv.l, intv.r) + symbols := fmt.Sprintf("[%c-%c]", intv.L, intv.R) titer := tos.Iterator() for titer.HasNext() { to := titer.Next() @@ -167,7 +167,7 @@ func (nfa ImdNFA) ToDot() (string, error) { edges := make(map[collection.Pair[StateID, StateID]]string) for from, mp := range nfa.trans.mp { for intv, tos := range mp { - symbols := fmt.Sprintf("[%c-%c]", intv.l, intv.r) + symbols := fmt.Sprintf("[%c-%c]", intv.L, intv.R) titer := tos.iterator() for titer.HasNext() { to := titer.Next() @@ -222,8 +222,18 @@ func (nfa ImdNFA) ToDot() (string, error) { func (dfa DFA) ToDot() (string, error) { g := graphviz.New() - - ftBinary, _ := os.ReadFile("./ipaexg00401/ipaexg.ttf") + var ftBinary []byte + if exists("/usr/share/fonts/opentype/ipaexfont-gothic/ipaexg.ttf") { + ftBinary, _ = os.ReadFile("/usr/share/fonts/opentype/ipaexfont-gothic/ipaexg.ttf") + } else if exists("/usr/share/fonts/OTF/ipaexm.ttf") { + ftBinary, _ = os.ReadFile("/usr/share/fonts/OTF/ipaexm.ttf") + } else { + var err error + ftBinary, err = os.ReadFile("./ipaexg00401/ipaexg.ttf") + if err != nil { + panic(err) + } + } ft, _ := truetype.Parse(ftBinary) g.SetFontFace(func(size float64) (font.Face, error) { opt := &truetype.Options{ @@ -286,7 +296,10 @@ func (dfa DFA) ToDot() (string, error) { edges := make(map[collection.Pair[StateID, StateID]]string) for from, mp := range dfa.trans.delta { for intv, to := range mp { - symbols := fmt.Sprintf("[%s-%s]", string(rune(intv.l)), string(rune(intv.r))) + var lstr, rstr string + lstr = fmt.Sprintf("%v", intv.L) + rstr = fmt.Sprintf("%v", intv.R) + symbols := fmt.Sprintf("[%s-%s]", lstr, rstr) p := collection.NewPair(from, to) if _, ok := edges[p]; ok { edges[p] = edges[p] + "\n" + symbols @@ -323,3 +336,8 @@ func (dfa DFA) ToDot() (string, error) { return buf.String(), nil } + +func exists(filename string) bool { + _, err := os.Stat(filename) + return err == nil +} diff --git a/automata/nfa.go b/automata/nfa.go index 108e81a..34ee35d 100644 --- a/automata/nfa.go +++ b/automata/nfa.go @@ -8,18 +8,20 @@ type EpsilonTransition struct { mp map[StateID]*collection.Set[StateID] } -func NewEpsilonTransition(mp map[StateID]*collection.Set[StateID]) EpsilonTransition { +func NewEpsilonTransition() EpsilonTransition { return EpsilonTransition{ - mp: mp, + mp: make(map[StateID]*collection.Set[StateID]), } } -func (t EpsilonTransition) set(from, to StateID) { +func (t EpsilonTransition) Set(from, to StateID) EpsilonTransition { if _, ok := t.mp[from]; ok { t.mp[from].Insert(to) } else { t.mp[from] = collection.NewSet[StateID]().Insert(to) } + + return t } func (trans *EpsilonTransition) merge(other EpsilonTransition) { @@ -40,8 +42,24 @@ type NFATransition struct { mp map[StateID]map[Interval]*collection.Set[StateID] } -func NewTransition(mp map[StateID]map[Interval]*collection.Set[StateID]) NFATransition { - return NFATransition{mp: mp} +func NewNFATransition() NFATransition { + return NFATransition{ + mp: make(map[StateID]map[Interval]*collection.Set[StateID]), + } +} + +func (trans NFATransition) Set(from StateID, intv Interval, to StateID) NFATransition { + _, ok := trans.mp[from] + if !ok { + trans.mp[from] = make(map[Interval]*collection.Set[StateID]) + } + _, ok = trans.mp[from][intv] + if !ok { + trans.mp[from][intv] = collection.NewSet[StateID]() + } + trans.mp[from][intv].Insert(to) + + return trans } func (trans NFATransition) merge(other NFATransition) { @@ -122,7 +140,7 @@ func (nfa *NFA) Concat(other *NFA) *NFA { iiter := other.initStates.Iterator() for iiter.HasNext() { to := iiter.Next() - nfa.epsilonTrans.set(from, to) + nfa.epsilonTrans.Set(from, to) } } nfa.finStates = other.finStates @@ -137,12 +155,12 @@ func (nfa *NFA) Star() *NFA { fiter := nfa.finStates.Iterator() for fiter.HasNext() { from := fiter.Next() - nfa.epsilonTrans.set(from, sid) + nfa.epsilonTrans.Set(from, sid) } iiter := nfa.initStates.Iterator() for iiter.HasNext() { to := iiter.Next() - nfa.epsilonTrans.set(sid, to) + nfa.epsilonTrans.Set(sid, to) } states := collection.NewSet[StateID]().Insert(sid) diff --git a/automata/nfa_test.go b/automata/nfa_test.go index f14ddaa..68ba9db 100644 --- a/automata/nfa_test.go +++ b/automata/nfa_test.go @@ -17,14 +17,8 @@ func TestNFA(t *testing.T) { id1 := automata.NewStateID() nfa1 := automata.NewNFA( collection.NewSet[automata.StateID]().Insert(id0).Insert(id1), - automata.NewEpsilonTransition(make(map[automata.StateID]*collection.Set[automata.StateID])), - automata.NewTransition( - map[automata.StateID]map[automata.Interval]*collection.Set[automata.StateID]{ - id0: { - automata.NewInterval(65, 65): collection.NewSet[automata.StateID]().Insert(id1), - }, - }, - ), + automata.NewEpsilonTransition(), + automata.NewNFATransition().Set(id0, automata.NewInterval(65, 65), id1), collection.NewSet[automata.StateID]().Insert(id0), collection.NewSet[automata.StateID]().Insert(id1), ) @@ -36,19 +30,10 @@ func TestNFA(t *testing.T) { id5 := automata.NewStateID() nfa2 := automata.NewNFA( collection.NewSet[automata.StateID]().Insert(id2).Insert(id3).Insert(id4).Insert(id5), - automata.NewEpsilonTransition(map[automata.StateID]*collection.Set[automata.StateID]{ - id3: collection.NewSet[automata.StateID]().Insert(id4), - }), - automata.NewTransition( - map[automata.StateID]map[automata.Interval]*collection.Set[automata.StateID]{ - id2: { - automata.NewInterval(65, 65): collection.NewSet[automata.StateID]().Insert(id3), - }, - id4: { - automata.NewInterval(66, 66): collection.NewSet[automata.StateID]().Insert(id5), - }, - }, - ), + automata.NewEpsilonTransition(), + automata.NewNFATransition(). + Set(id2, automata.NewInterval(65, 65), id3). + Set(id4, automata.NewInterval(66, 66), id5), collection.NewSet[automata.StateID]().Insert(id2), collection.NewSet[automata.StateID]().Insert(id5), ) @@ -64,26 +49,11 @@ func TestNFA(t *testing.T) { id13 := automata.NewStateID() nfa3 := automata.NewNFA( collection.NewSet[automata.StateID]().Insert(id6).Insert(id7).Insert(id8).Insert(id9).Insert(id10).Insert(id11).Insert(id12).Insert(id13), - automata.NewEpsilonTransition(map[automata.StateID]*collection.Set[automata.StateID]{ - id6: collection.NewSet[automata.StateID]().Insert(id7).Insert(id9), - id8: collection.NewSet[automata.StateID]().Insert(id6), - id10: collection.NewSet[automata.StateID]().Insert(id11), - id11: collection.NewSet[automata.StateID]().Insert(id12), - id13: collection.NewSet[automata.StateID]().Insert(id11), - }), - automata.NewTransition( - map[automata.StateID]map[automata.Interval]*collection.Set[automata.StateID]{ - id7: { - automata.NewInterval(65, 65): collection.NewSet[automata.StateID]().Insert(id8), - }, - id9: { - automata.NewInterval(66, 66): collection.NewSet[automata.StateID]().Insert(id10), - }, - id12: { - automata.NewInterval(66, 66): collection.NewSet[automata.StateID]().Insert(id13), - }, - }, - ), + automata.NewEpsilonTransition(), + automata.NewNFATransition(). + Set(id7, automata.NewInterval(65, 65), id8). + Set(id9, automata.NewInterval(66, 66), id10). + Set(id12, automata.NewInterval(66, 66), id13), collection.NewSet[automata.StateID]().Insert(id6), collection.NewSet[automata.StateID]().Insert(id11), ) @@ -103,14 +73,9 @@ func TestNFA2(t *testing.T) { id1 := automata.NewStateID() nfa1 := automata.NewNFA( collection.NewSet[automata.StateID]().Insert(id0).Insert(id1), - automata.NewEpsilonTransition(make(map[automata.StateID]*collection.Set[automata.StateID])), - automata.NewTransition( - map[automata.StateID]map[automata.Interval]*collection.Set[automata.StateID]{ - id0: { - automata.NewInterval(12354, 12358): collection.NewSet[automata.StateID]().Insert(id1), - }, - }, - ), + automata.NewEpsilonTransition(), + automata.NewNFATransition(). + Set(id0, automata.NewInterval(12354, 12358), id1), collection.NewSet[automata.StateID]().Insert(id0), collection.NewSet[automata.StateID]().Insert(id1), ) @@ -122,19 +87,10 @@ func TestNFA2(t *testing.T) { id5 := automata.NewStateID() nfa2 := automata.NewNFA( collection.NewSet[automata.StateID]().Insert(id2).Insert(id3).Insert(id4).Insert(id5), - automata.NewEpsilonTransition(map[automata.StateID]*collection.Set[automata.StateID]{ - id3: collection.NewSet[automata.StateID]().Insert(id4), - }), - automata.NewTransition( - map[automata.StateID]map[automata.Interval]*collection.Set[automata.StateID]{ - id2: { - automata.NewInterval(12354, 12356): collection.NewSet[automata.StateID]().Insert(id3), - }, - id4: { - automata.NewInterval(66, 70): collection.NewSet[automata.StateID]().Insert(id5), - }, - }, - ), + automata.NewEpsilonTransition().Set(id3, id4), + automata.NewNFATransition(). + Set(id2, automata.NewInterval(12354, 12356), id3). + Set(id4, automata.NewInterval(66, 70), id5), collection.NewSet[automata.StateID]().Insert(id2), collection.NewSet[automata.StateID]().Insert(id5), ) @@ -150,32 +106,16 @@ func TestNFA2(t *testing.T) { id13 := automata.NewStateID() nfa3 := automata.NewNFA( collection.NewSet[automata.StateID]().Insert(id6).Insert(id7).Insert(id8).Insert(id9).Insert(id10).Insert(id11).Insert(id12).Insert(id13), - automata.NewEpsilonTransition(map[automata.StateID]*collection.Set[automata.StateID]{ - id6: collection.NewSet[automata.StateID]().Insert(id7).Insert(id9), - id8: collection.NewSet[automata.StateID]().Insert(id6), - id10: collection.NewSet[automata.StateID]().Insert(id11), - id11: collection.NewSet[automata.StateID]().Insert(id12), - id13: collection.NewSet[automata.StateID]().Insert(id11), - }), - automata.NewTransition( - map[automata.StateID]map[automata.Interval]*collection.Set[automata.StateID]{ - id7: { - automata.NewInterval(66, 68): collection.NewSet[automata.StateID]().Insert(id8), - }, - id9: { - automata.NewInterval(66, 66): collection.NewSet[automata.StateID]().Insert(id10), - }, - id12: { - automata.NewInterval(66, 66): collection.NewSet[automata.StateID]().Insert(id13), - }, - }, - ), + automata.NewEpsilonTransition().Set(id6, id7).Set(id6, id9).Set(id8, id6).Set(id10, id11).Set(id11, id12).Set(id13, id11), + automata.NewNFATransition(). + Set(id7, automata.NewInterval(66, 68), id8). + Set(id9, automata.NewInterval(66, 66), id10). + Set(id12, automata.NewInterval(66, 66), id13), collection.NewSet[automata.StateID]().Insert(id6), collection.NewSet[automata.StateID]().Insert(id11), ) nfa3.SetRegexID(3) - // a|ab|a*bb* nfa := nfa1.Sum(nfa2).Sum(nfa3).ToImdNFA().ToDFA().LexerMinimize() fmt.Println(nfa.ToDot()) } diff --git a/compiler/generator/export_test.go b/compiler/generator/export_test.go index 757a55d..8e08211 100644 --- a/compiler/generator/export_test.go +++ b/compiler/generator/export_test.go @@ -1,3 +1,3 @@ package generator -// var LexerNFA = lexerNFA +var LexerNFA = lexerNFA diff --git a/compiler/generator/generator.go b/compiler/generator/generator.go index 004f268..e316847 100644 --- a/compiler/generator/generator.go +++ b/compiler/generator/generator.go @@ -1,175 +1,180 @@ package generator -// type LexerTemplate struct { -// PackageName string -// EmbeddedTmpl string -// StateIDToRegexIDTmpl string -// FinStatesTmpl string -// TransitionTableTmpl string -// RegexActionsTmpl string -// UserCodeTmpl string -// } - -// func Generate(r *bufio.Reader, pkgName string, outfile string) { -// // parse lexer configuration -// parser := NewParser(r) -// def, rules, userCode := parser.Parse() - -// // compile regex and generate DFA -// regexs := make([]string, 0) -// actions := make([]string, 0) -// for _, v := range rules { -// regexs = append(regexs, v[0]) -// actions = append(actions, v[1]) -// } -// dfa := lexerDFA(regexs) -// stToID := make(map[automata.State]int) -// id := 1 -// stToID[dfa.GetInitState()] = id -// id++ -// for _, st := range dfa.GetStates() { -// if st == dfa.GetInitState() { -// continue -// } -// stToID[st] = id -// id++ -// } -// idToSt := make([]automata.State, id) -// idToRegexID := make([]automata.RegexID, id) -// for st, id := range stToID { -// idToSt[id] = st -// idToRegexID[id] = dfa.GetRegexID(st) -// } - -// // generate lexer file -// embeddedTmpl := def -// stateIDToRegexIDTmpl := genStIdToRegexID(idToRegexID) -// finStatesTmpl := genFinStates(idToSt, dfa.GetFinStates()) -// transitionTableTmpl := genTransitionTable(stToID, idToSt, dfa.GetTransitionTable()) -// regexActionsTmpl := genRegexActions(actions) -// userCodeTmpl := userCode - -// lexCfg := LexerTemplate{ -// PackageName: pkgName, -// EmbeddedTmpl: embeddedTmpl, -// StateIDToRegexIDTmpl: stateIDToRegexIDTmpl, -// FinStatesTmpl: finStatesTmpl, -// TransitionTableTmpl: transitionTableTmpl, -// RegexActionsTmpl: regexActionsTmpl, -// UserCodeTmpl: userCodeTmpl, -// } -// s := tmpl -// t := template.Must(template.New("lexer").Parse(s)) - -// var buf bytes.Buffer -// if err := t.Execute(&buf, lexCfg); err != nil { -// panic(err) -// } - -// f, err := os.OpenFile(outfile, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644) -// if err != nil { -// log.Fatal(err) -// } -// data, err := imports.Process(outfile, buf.Bytes(), nil) -// if err != nil { -// panic(err) -// } -// io.Copy(f, bytes.NewReader(data)) -// if err := f.Close(); err != nil { -// log.Fatal(err) -// } -// } - -// func genStIdToRegexID(idToRegexID []automata.RegexID) string { -// var buf bytes.Buffer -// for _, rid := range idToRegexID[1:] { -// buf.WriteString(fmt.Sprintf("%v,\n", rid)) -// } - -// return buf.String() -// } - -// func genFinStates(idToSt []automata.State, finStates *collection.Set[automata.State]) string { -// var buf bytes.Buffer -// for i, st := range idToSt { -// if finStates.Contains(st) { -// buf.WriteString(fmt.Sprintf("%v: {},\n", i)) -// } -// } - -// return buf.String() -// } - -// func genTransitionTable(stToID map[automata.State]int, idToSt []automata.State, delta *automata.DFATransition) string { -// tbl := make(map[int]map[byte]int) -// var buf bytes.Buffer -// iter := delta.Iterator() -// for iter.HasNext() { -// pair, to := iter.Next() -// from := pair.First -// b := pair.Second -// if _, ok := tbl[stToID[from]]; !ok { -// tbl[stToID[from]] = make(map[byte]int) -// } -// tbl[stToID[from]][b] = stToID[to] -// } - -// for fromID := 1; fromID <= len(stToID); fromID++ { -// if _, ok := tbl[fromID]; ok { -// buf.WriteString(fmt.Sprintf("%v: {\n", fromID)) -// for _, b := range automata.SupportedChars { -// if toID, ok2 := tbl[fromID][b]; ok2 { -// buf.WriteString(fmt.Sprintf("%v: %v,\n", b, toID)) -// } -// } -// buf.WriteString("},\n") -// } -// } - -// return buf.String() -// } - -// func lexerNFA(regexs []string) automata.NFA { -// nfas := make([]*automata.NFA, 0) -// for i, regex := range regexs { -// nfa := parse(regex) -// (&nfa).SetRegexID(automata.RegexID(i + 1)) -// nfas = append(nfas, &nfa) -// } - -// nfa := *nfas[0] -// for _, n := range nfas[1:] { -// nfa = nfa.SumWithRegexID(*n) -// } - -// return nfa -// } - -// func lexerDFA(regexs []string) automata.DFA { -// nfa := lexerNFA(regexs) - -// return nfa.ToImNFA().ToDFA().LexerMinimize().RemoveBH() -// } - -// func genRegexActions(actions []string) string { - -// var buf bytes.Buffer -// for i, v := range actions { -// buf.WriteString(fmt.Sprintf("case %v:\n", i+1)) -// buf.WriteString(v + "\n") -// buf.WriteString("goto yystart\n") -// } - -// return buf.String() -// } - -// func parse(regex string) automata.NFA { -// lex := regexp.NewLexer(regex) -// tokens := lex.Scan() -// parser := regexp.NewParser(tokens) -// ast, _ := parser.Parse() -// gen := regexp.NewCodeGenerator() -// ast.Accept(gen) - -// return gen.GetNFA() -// } +import ( + "bufio" + "bytes" + "fmt" + "io" + "log" + "os" + "text/template" + + "github.com/goropikari/tlex/automata" + "github.com/goropikari/tlex/collection" + "github.com/goropikari/tlex/compiler/regexp" + "golang.org/x/tools/imports" +) + +type LexerTemplate struct { + PackageName string + EmbeddedTmpl string + StateIDToRegexIDTmpl string + FinStatesTmpl string + TransitionTableTmpl string + RegexActionsTmpl string + UserCodeTmpl string +} + +func Generate(r *bufio.Reader, pkgName string, outfile string) { + // parse lexer configuration + parser := NewParser(r) + def, rules, userCode := parser.Parse() + + // compile regex and generate DFA + regexs := make([]string, 0) + actions := make([]string, 0) + for _, v := range rules { + regexs = append(regexs, v[0]) + actions = append(actions, v[1]) + } + dfa := lexerDFA(regexs) + oldstIDToNewStID := make(map[automata.StateID]automata.StateID) + id := automata.StateID(1) // state id = 0 is reserved for dead state. + oldstIDToNewStID[dfa.GetInitState()] = automata.StateID(id) + id++ + for _, st := range dfa.GetStates() { + if st == dfa.GetInitState() { + continue + } + oldstIDToNewStID[st] = id + id++ + } + idToRegexID := make([]automata.RegexID, id) + newStIDToOldStID := make([]automata.StateID, id) + for oldid, newid := range oldstIDToNewStID { + idToRegexID[newid] = dfa.GetRegexID(oldid) + newStIDToOldStID[newid] = oldid + } + + // generate lexer file + embeddedTmpl := def + stateIDToRegexIDTmpl := genStIdToRegexID(idToRegexID) + finStatesTmpl := genFinStates(newStIDToOldStID, dfa.GetFinStates()) + transitionTableTmpl := genTransitionTable(oldstIDToNewStID, newStIDToOldStID, dfa.GetTransitionTable()) + regexActionsTmpl := genRegexActions(actions) + userCodeTmpl := userCode + + lexCfg := LexerTemplate{ + PackageName: pkgName, + EmbeddedTmpl: embeddedTmpl, + StateIDToRegexIDTmpl: stateIDToRegexIDTmpl, + FinStatesTmpl: finStatesTmpl, + TransitionTableTmpl: transitionTableTmpl, + RegexActionsTmpl: regexActionsTmpl, + UserCodeTmpl: userCodeTmpl, + } + s := tmpl + t := template.Must(template.New("lexer").Parse(s)) + + var buf bytes.Buffer + if err := t.Execute(&buf, lexCfg); err != nil { + panic(err) + } + // t.Execute(os.Stdout, lexCfg) + + f, err := os.OpenFile(outfile, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + log.Fatal(err) + } + data, err := imports.Process(outfile, buf.Bytes(), nil) + if err != nil { + panic(err) + } + io.Copy(f, bytes.NewReader(data)) + if err := f.Close(); err != nil { + log.Fatal(err) + } +} + +func genStIdToRegexID(idToRegexID []automata.RegexID) string { + var buf bytes.Buffer + for _, rid := range idToRegexID[1:] { + buf.WriteString(fmt.Sprintf("%v,\n", rid)) + } + + return buf.String() +} + +func genFinStates(newStIDToOldStID []automata.StateID, finStates *collection.Set[automata.StateID]) string { + var buf bytes.Buffer + for i, st := range newStIDToOldStID { + if finStates.Contains(st) { + buf.WriteString(fmt.Sprintf("%v: {},\n", i)) + } + } + + return buf.String() +} + +func genTransitionTable(oldIDToNewID map[automata.StateID]automata.StateID, newIDToOldID []automata.StateID, delta *automata.DFATransition) string { + var buf bytes.Buffer + for fromID := automata.StateID(1); fromID <= automata.StateID(len(oldIDToNewID)); fromID++ { + mp, ok := delta.GetMap(newIDToOldID[fromID]) + if !ok { + continue + } + buf.WriteString(fmt.Sprintf("%v: {\n", fromID)) + for intv, oldtoID := range mp { + toID := oldIDToNewID[oldtoID] + buf.WriteString(fmt.Sprintf("yyinterval{l: %v, r: %v}: %v,\n", intv.L, intv.R, toID)) + } + buf.WriteString("},\n") + } + + return buf.String() +} + +func lexerNFA(regexs []string) *automata.NFA { + nfas := make([]*automata.NFA, 0) + for i, regex := range regexs { + nfa := parse(regex) + nfa.SetRegexID(automata.RegexID(i + 1)) + nfas = append(nfas, nfa) + } + + nfa := nfas[0] + for _, n := range nfas[1:] { + nfa = nfa.Sum(n) + } + + return nfa +} + +func lexerDFA(regexs []string) *automata.DFA { + nfa := lexerNFA(regexs) + + return nfa.ToImdNFA().ToDFA().LexerMinimize() +} + +func genRegexActions(actions []string) string { + + var buf bytes.Buffer + for i, v := range actions { + buf.WriteString(fmt.Sprintf("case %v:\n", i+1)) + buf.WriteString(v + "\n") + buf.WriteString("goto yystart\n") + } + + return buf.String() +} + +func parse(regex string) *automata.NFA { + lex := regexp.NewLexer(regex) + tokens := lex.Scan() + parser := regexp.NewParser(tokens) + ast, _ := parser.Parse() + gen := regexp.NewCodeGenerator() + ast.Accept(gen) + + return gen.GetNFA() +} diff --git a/compiler/generator/generator_test.go b/compiler/generator/generator_test.go index 5fcf541..ab67cce 100644 --- a/compiler/generator/generator_test.go +++ b/compiler/generator/generator_test.go @@ -1,211 +1,212 @@ package generator_test -// func TestDFA_Accept(t *testing.T) { -// t.Parallel() +import ( + "fmt" + "testing" -// letter := "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)" -// digit := "(0|1|2|3|4|5|6|7|8|9)" -// digits := digit + digit + "*" -// id := fmt.Sprintf("%v(%v|%v)*", letter, letter, digit) + "github.com/goropikari/tlex/automata" + "github.com/goropikari/tlex/compiler/generator" + "github.com/stretchr/testify/require" +) -// regexs := []string{ -// digits, // regexID: 1 -// "if|then|begin|end|func", // regexID: 2 -// id, // regexID: 3 -// "\\+|\\-|\\*|/", // regexID: 4 -// "( |\n|\t|\r)", // regexID: 5 -// "\\.", // regexID: 6 -// ".", // regexID: 7 -// } +func TestDFA_Accept(t *testing.T) { + t.Parallel() -// tests := []struct { -// name string -// regexs []string -// given string -// // expected -// accept bool -// regexID automata.RegexID -// }{ -// { -// name: "digits", -// regexs: regexs, -// given: "123", -// accept: true, -// regexID: 1, -// }, -// { -// name: "keyword: if", -// regexs: regexs, -// given: "if", -// accept: true, -// regexID: 2, -// }, -// { -// name: "keyword: then", -// regexs: regexs, -// given: "then", -// accept: true, -// regexID: 2, -// }, -// { -// name: "keyword: begin", -// regexs: regexs, -// given: "begin", -// accept: true, -// regexID: 2, -// }, -// { -// name: "keyword: end", -// regexs: regexs, -// given: "end", -// accept: true, -// regexID: 2, -// }, -// { -// name: "keyword: func", -// regexs: regexs, -// given: "func", -// accept: true, -// regexID: 2, -// }, -// { -// name: "identifier", -// regexs: regexs, -// given: "ifhoge", -// accept: true, -// regexID: 3, -// }, -// { -// name: "identifier: hoge", -// regexs: regexs, -// given: "hoge", -// accept: true, -// regexID: 3, -// }, -// { -// name: "operator: +", -// regexs: regexs, -// given: "+", -// accept: true, -// regexID: 4, -// }, -// { -// name: "operator: -", -// regexs: regexs, -// given: "-", -// accept: true, -// regexID: 4, -// }, -// { -// name: "operator: *", -// regexs: regexs, -// given: "*", -// accept: true, -// regexID: 4, -// }, -// { -// name: "operator: /", -// regexs: regexs, -// given: "/", -// accept: true, -// regexID: 4, -// }, -// { -// name: "whitespace: space", -// regexs: regexs, -// given: " ", -// accept: true, -// regexID: 5, -// }, -// { -// name: "dot: .", -// regexs: regexs, -// given: ".", -// accept: true, -// regexID: 6, -// }, -// { -// name: "other: %", -// regexs: regexs, -// given: "%", -// accept: true, -// regexID: 7, -// }, -// { -// name: "dot2: ..", -// regexs: regexs, -// given: "..", -// accept: false, -// regexID: 0, -// }, -// { -// name: "identifier: start with digit", -// regexs: regexs, -// given: "0hoge", -// accept: false, -// regexID: 0, -// }, -// { -// name: "arbitrary character", -// regexs: []string{".*"}, -// given: "abc", -// accept: true, -// regexID: 1, -// }, -// } + letter := "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)" + digit := "(0|1|2|3|4|5|6|7|8|9)" + digits := digit + digit + "*" + id := fmt.Sprintf("%v(%v|%v)*", letter, letter, digit) -// for _, tt := range tests { -// tt := tt -// t.Run(tt.name, func(t *testing.T) { -// t.Parallel() + regexs := []string{ + digits, // regexID: 1 + "if|then|begin|end|func|あいう", // regexID: 2 + id, // regexID: 3 + "\\+|\\-|\\*|/", // regexID: 4 + "( |\n|\t|\r)", // regexID: 5 + "\\.", // regexID: 6 + ".", // regexID: 7 + } -// dfa := generator.LexerNFA(tt.regexs).ToImNFA().ToDFA().LexerMinimize().RemoveBH() + tests := []struct { + name string + regexs []string + given string + // expected + accept bool + regexID automata.RegexID + }{ + { + name: "digits", + regexs: regexs, + given: "123", + accept: true, + regexID: 1, + }, + { + name: "keyword: if", + regexs: regexs, + given: "if", + accept: true, + regexID: 2, + }, + { + name: "keyword: then", + regexs: regexs, + given: "then", + accept: true, + regexID: 2, + }, + { + name: "keyword: begin", + regexs: regexs, + given: "begin", + accept: true, + regexID: 2, + }, + { + name: "keyword: end", + regexs: regexs, + given: "end", + accept: true, + regexID: 2, + }, + { + name: "keyword: func", + regexs: regexs, + given: "func", + accept: true, + regexID: 2, + }, + { + name: "keyword: unicode", + regexs: regexs, + given: "あいう", + accept: true, + regexID: 2, + }, + { + name: "identifier", + regexs: regexs, + given: "ifhoge", + accept: true, + regexID: 3, + }, + { + name: "identifier: hoge", + regexs: regexs, + given: "hoge", + accept: true, + regexID: 3, + }, + { + name: "operator: +", + regexs: regexs, + given: "+", + accept: true, + regexID: 4, + }, + { + name: "operator: -", + regexs: regexs, + given: "-", + accept: true, + regexID: 4, + }, + { + name: "operator: *", + regexs: regexs, + given: "*", + accept: true, + regexID: 4, + }, + { + name: "operator: /", + regexs: regexs, + given: "/", + accept: true, + regexID: 4, + }, + { + name: "whitespace: space", + regexs: regexs, + given: " ", + accept: true, + regexID: 5, + }, + { + name: "dot: .", + regexs: regexs, + given: ".", + accept: true, + regexID: 6, + }, + { + name: "other: %", + regexs: regexs, + given: "%", + accept: true, + regexID: 7, + }, + { + name: "dot2: ..", + regexs: regexs, + given: "..", + accept: false, + regexID: 0, + }, + { + name: "identifier: start with digit", + regexs: regexs, + given: "0hoge", + accept: false, + regexID: 0, + }, + { + name: "arbitrary character", + regexs: []string{".*"}, + given: "abc", + accept: true, + regexID: 1, + }, + } -// regexID, accept := dfa.Accept(tt.given) + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() -// require.Equal(t, tt.accept, accept) -// require.Equal(t, tt.regexID, regexID) -// }) -// } -// } + dfa := generator.LexerNFA(tt.regexs).ToImdNFA().ToDFA().LexerMinimize() -// func TestDot(t *testing.T) { -// // // generate dot file -// s, _ := generator.LexerNFA([]string{"a", "abb", "a*bb*"}). -// // ToImNFA(). -// // ToDFA(). -// // LexerMinimize(). -// // RemoveBH(). -// ToDot() -// // s, _ := generator.LexerNFA([]string{ -// // "if|for|while|func|return", -// // "[a-zA-Z][a-zA-Z0-9]*", -// // "[1-9][0-9]*", -// // "[ \t\n\r]*", -// // "\\(", -// // "\\)", -// // "{", -// // "}", -// // "\\+|\\-|\\*|/|:=|==|!=", -// // ".", -// // }).ToImNFA().ToDFA().LexerMinimize().RemoveBH().ToDot() -// // s, _ := generator.LexerNFA([]string{".*"}). -// // ToImNFA(). -// // ToDFA(). -// // LexerMinimize(). -// // RemoveBH(). -// // ToDot() -// err := os.WriteFile("ex.dot", []byte(s), 0666) -// if err != nil { -// log.Fatal(err) -// } -// graph, err := graphviz.ParseBytes([]byte(s)) -// if err != nil { -// log.Fatal(err) -// } -// g := graphviz.New() -// if err := g.RenderFilename(graph, graphviz.PNG, "ex.png"); err != nil { -// log.Fatal(err) -// } + regexID, accept := dfa.Accept(tt.given) -// } + require.Equal(t, tt.accept, accept) + require.Equal(t, tt.regexID, regexID) + }) + } +} + +func TestDot(t *testing.T) { + // _, _ = generator.LexerNFA([]string{"a", "abb", "a*bb*"}). + // ToImdNFA(). + // ToDFA(). + // LexerMinimize(). + // ToDot() + generator.LexerNFA( + []string{ + "if|for|while|func|return", + "[a-zA-Z][a-zA-Z0-9]*", + "[1-9][0-9]*", + "[ \t\n\r]*", + "\\(", + "\\)", + "{", + "}", + "\\+|\\-|\\*|/|:=|==|!=", + ".", + }, + ). + ToImdNFA(). + ToDFA(). + LexerMinimize(). + ToDot() +} diff --git a/compiler/generator/parser.go b/compiler/generator/parser.go index 4b9b457..0810456 100644 --- a/compiler/generator/parser.go +++ b/compiler/generator/parser.go @@ -34,11 +34,11 @@ func (p *Parser) Parse() (def string, rules [][]string, userCode string) { } func (p *Parser) parseRules(ruleStr string) [][]string { - r := bytes.NewBufferString("\n" + ruleStr) + buf := bytes.NewBufferString("\n" + ruleStr) rules := make([][]string, 0) for { - rule := p.readRule(r) - blk := p.readBlock(r) + rule := p.readRule(buf) + blk := p.readBlock(buf) if blk == "" { break } @@ -48,25 +48,25 @@ func (p *Parser) parseRules(ruleStr string) [][]string { return rules } -func (p *Parser) readRule(r io.ByteReader) string { - var prev byte +func (p *Parser) readRule(reader io.RuneReader) string { + var prev rune for { - b, err := r.ReadByte() + r, _, err := reader.ReadRune() if err != nil { if errors.Is(err, io.EOF) { return "" } panic(err) } - if prev == '\n' && b == '"' { + if prev == '\n' && r == '"' { break } - prev = b + prev = r } - bs := make([]byte, 0) + rs := make([]rune, 0) for { - b, err := r.ReadByte() + r, _, err := reader.ReadRune() if err != nil { if errors.Is(err, io.EOF) { break @@ -74,74 +74,74 @@ func (p *Parser) readRule(r io.ByteReader) string { panic(err) } - switch b { + switch r { case '\\': if prev == '\\' { - bs = append(bs, b) + rs = append(rs, r) prev = 0 continue } - prev = b + prev = r continue case 'n': if prev == '\\' { - bs = append(bs, '\n') + rs = append(rs, '\n') prev = 0 continue } case 'r': if prev == '\\' { - bs = append(bs, '\r') + rs = append(rs, '\r') prev = 0 continue } case 't': if prev == '\\' { - bs = append(bs, '\t') + rs = append(rs, '\t') prev = 0 continue } case '"': if prev == '\\' { - prev = b - bs = append(bs, b) + prev = r + rs = append(rs, r) continue } - return string(bs) + return string(rs) } - prev = b - bs = append(bs, b) + prev = r + rs = append(rs, r) } return "" } -func (p *Parser) readBlock(r io.ByteReader) string { +func (p *Parser) readBlock(reader io.RuneReader) string { for { - b, err := r.ReadByte() + r, _, err := reader.ReadRune() if err != nil { if errors.Is(err, io.EOF) { return "" } panic(err) } - if b == '{' { + if r == '{' { break } } nparen := 1 - bs := []byte{'{'} + rs := []rune{'{'} for { - b, err := r.ReadByte() + r, _, err := reader.ReadRune() if err != nil { if errors.Is(err, io.EOF) { break } panic(err) } - bs = append(bs, b) - switch b { + rs = append(rs, r) + switch r { case '{': // コメント, 文字列中に { が使われていたときはインクリメントしない処理が本来は必要 nparen++ @@ -149,7 +149,7 @@ func (p *Parser) readBlock(r io.ByteReader) string { // コメント, 文字列中に { が使われていたときはデクリメントしない処理が本来は必要 nparen-- if nparen == 0 { - return string(bs) + return string(rs) } } } diff --git a/compiler/generator/template.go b/compiler/generator/template.go index e30020d..1ecc305 100644 --- a/compiler/generator/template.go +++ b/compiler/generator/template.go @@ -22,28 +22,42 @@ var ( // state id to regex id var yyStateIDToRegexID = []yyRegexID{ - 0, // state 0 は BH state - {{ .StateIDToRegexIDTmpl }} + 0, // state 0 is dead state + {{ .StateIDToRegexIDTmpl }} } var yyFinStates = map[yyStateID]struct{}{ - {{ .FinStatesTmpl }} + {{ .FinStatesTmpl }} } -var yyTransitionTable = map[yyStateID]map[byte]yyStateID{ - {{ .TransitionTableTmpl }} +type yyinterval struct { + l int + r int } -func yyNextStep(id yyStateID, b byte) yyStateID { +func (x yyinterval) overlap(y yyinterval) bool { + return y.l <= x.r && x.l <= y.r +} + +var yyTransitionTable = map[yyStateID]map[yyinterval]yyStateID{ + {{ .TransitionTableTmpl }} +} + +func yyNextStep(id yyStateID, r rune) yyStateID { if mp, ok := yyTransitionTable[id]; ok { - return mp[b] + t := yyinterval{l: int(r), r: int(r)} + for intv, sid := range mp { + if intv.overlap(t) { + return sid + } + } } return 0 } type yyLexer struct { - rs io.ReadSeeker + rs RuneReadSeeker beginPos int finPos int currPos int @@ -52,7 +66,12 @@ type yyLexer struct { YYText string } -func New(rs io.ReadSeeker) *yyLexer { +type RuneReadSeeker interface { + io.ReadSeeker + io.RuneScanner +} + +func New(rs RuneReadSeeker) *yyLexer { return &yyLexer{ rs: rs, beginPos: 0, @@ -63,23 +82,22 @@ func New(rs io.ReadSeeker) *yyLexer { } } -func (yylex *yyLexer) currByte() (byte, error) { - b := make([]byte, 1) - if _, err := yylex.rs.Read(b); err != nil { - return 0, err +func (yylex *yyLexer) currRune() (rune, int, error) { + ru, size, err := yylex.rs.ReadRune() + if err != nil { + return 0, 0, err } - if _, err := yylex.rs.Seek(int64(yylex.currPos), io.SeekStart); err != nil { - return 0, err + if err := yylex.rs.UnreadRune(); err != nil { + return 0, 0, err } - - return b[0], nil + return ru, size, nil } func (yylex *yyLexer) Next() (int, error) { yyEofCnt := 0 yystart: for { - yyb, err := yylex.currByte() + yyr, yysize, err := yylex.currRune() if err != nil { if errors.Is(err, io.EOF) { yyEofCnt++ @@ -90,9 +108,16 @@ yystart: return 0, err } finProcess: - yyNxStID := yyNextStep(yylex.currStateID, yyb) + yyNxStID := yyNextStep(yylex.currStateID, yyr) if yyNxStID == 0 { - yydata := make([]byte, yylex.finPos+1-yylex.beginPos) + if _, err := yylex.rs.Seek(int64(yylex.finPos), io.SeekStart); err != nil { + return 0, err + } + _, lastSize, err := yylex.currRune() + if err != nil { + return 0, err + } + yydata := make([]byte, yylex.finPos+lastSize-yylex.beginPos) if _, err := yylex.rs.Seek(int64(yylex.beginPos), io.SeekStart); err != nil { return 0, err } @@ -101,7 +126,7 @@ yystart: } yylex.YYText = string(yydata) YYText = yylex.YYText - yyNewCurrPos := yylex.finPos + 1 + yyNewCurrPos := yylex.finPos + lastSize yylex.beginPos = yyNewCurrPos yylex.finPos = yyNewCurrPos yylex.currPos = yyNewCurrPos @@ -112,7 +137,7 @@ yystart: switch regexID { case 0: return 0, ErrYYScan - {{ .RegexActionsTmpl }} + {{ .RegexActionsTmpl }} default: return 0, ErrYYScan } @@ -122,7 +147,7 @@ yystart: yylex.finRegexID = yyStateIDToRegexID[yyNxStID] } yylex.currStateID = yyNxStID - yylex.currPos++ + yylex.currPos+=yysize if _, err := yylex.rs.Seek(int64(yylex.currPos), io.SeekStart); err != nil { return 0, err } diff --git a/compiler/regexp/ast_printer.go b/compiler/regexp/ast_printer.go index 52710cb..7c41fc2 100644 --- a/compiler/regexp/ast_printer.go +++ b/compiler/regexp/ast_printer.go @@ -57,6 +57,16 @@ func (p *ASTPrinter) VisitDotExpr(expr DotExpr) { p.str += s } +func (p *ASTPrinter) VisitRangeExpr(expr RangeExpr) { + p.str += p.header("RangeExpr") + strs := make([]string, 0) + strs = append(strs, fmt.Sprintf("%v%v", repTab(p.depth+1), expr.neg)) + for _, intv := range expr.intvs { + strs = append(strs, fmt.Sprintf("%v[%v-%v]", repTab(p.depth+1), intv.l, intv.r)) + } + p.str += strings.Join(strs, "\n") + "\n" +} + func (p *ASTPrinter) header(name string) string { s := repTab(p.depth) s += name + "\n" diff --git a/compiler/regexp/code_generator.go b/compiler/regexp/code_generator.go index bf2e55a..4247dd7 100644 --- a/compiler/regexp/code_generator.go +++ b/compiler/regexp/code_generator.go @@ -1,71 +1,102 @@ package regexp -// type CodeGenerator struct { -// nfa automata.NFA -// } - -// func NewCodeGenerator() *CodeGenerator { -// return &CodeGenerator{} -// } - -// func (gen *CodeGenerator) GetNFA() automata.NFA { -// return gen.nfa -// } - -// func (gen *CodeGenerator) VisitSumExpr(expr SumExpr) { -// expr.lhs.Accept(gen) -// lhs := gen.nfa -// expr.rhs.Accept(gen) -// rhs := gen.nfa - -// gen.nfa = lhs.Sum(rhs) -// } - -// func (gen *CodeGenerator) VisitConcatExpr(expr ConcatExpr) { -// expr.lhs.Accept(gen) -// lhs := gen.nfa -// expr.rhs.Accept(gen) -// rhs := gen.nfa - -// gen.nfa = lhs.Concat(rhs) -// } - -// func (gen *CodeGenerator) VisitStarExpr(expr StarExpr) { -// expr.expr.Accept(gen) -// gen.nfa = gen.nfa.Star() -// } - -// func (gen *CodeGenerator) VisitSymbolExpr(expr SymbolExpr) { -// from := automata.NewState(automata.StateID(guid.New())) -// to := automata.NewState(automata.StateID(guid.New())) - -// gen.nfa = automata.NewNFA( -// collection.NewSet[automata.State]().Insert(from).Insert(to), -// automata.NFATransition{ -// collection.NewPair(from, expr.sym): collection.NewSet[automata.State]().Insert(to), -// }, -// collection.NewSet[automata.State]().Insert(from), -// collection.NewSet[automata.State]().Insert(to), -// ) -// } - -// func (gen *CodeGenerator) VisitDotExpr(expr DotExpr) { -// from := automata.NewState(automata.StateID(guid.New())) -// trans := make(automata.NFATransition) -// states := collection.NewSet[automata.State]().Insert(from) -// finStates := collection.NewSet[automata.State]() - -// for _, b := range automata.SupportedChars { -// to := automata.NewState(automata.StateID(guid.New())) -// states = states.Insert(to) -// finStates = finStates.Insert(to) -// trans[collection.NewPair(from, b)] = collection.NewSet[automata.State]().Insert(to) -// } - -// gen.nfa = automata.NewNFA( -// states, -// trans, -// collection.NewSet[automata.State]().Insert(from), -// finStates, -// ) -// } +import ( + "unicode" + + "github.com/goropikari/tlex/automata" + "github.com/goropikari/tlex/collection" +) + +type CodeGenerator struct { + nfa *automata.NFA +} + +func NewCodeGenerator() *CodeGenerator { + return &CodeGenerator{} +} + +func (gen *CodeGenerator) GetNFA() *automata.NFA { + return gen.nfa +} + +func (gen *CodeGenerator) VisitSumExpr(expr SumExpr) { + expr.lhs.Accept(gen) + lhs := gen.nfa + expr.rhs.Accept(gen) + rhs := gen.nfa + + gen.nfa = lhs.Sum(rhs) +} + +func (gen *CodeGenerator) VisitConcatExpr(expr ConcatExpr) { + expr.lhs.Accept(gen) + lhs := gen.nfa + expr.rhs.Accept(gen) + rhs := gen.nfa + + gen.nfa = lhs.Concat(rhs) +} + +func (gen *CodeGenerator) VisitStarExpr(expr StarExpr) { + expr.expr.Accept(gen) + gen.nfa = gen.nfa.Star() +} + +func (gen *CodeGenerator) VisitSymbolExpr(expr SymbolExpr) { + from := automata.NewStateID() + to := automata.NewStateID() + gen.nfa = automata.NewNFA( + collection.NewSet[automata.StateID]().Insert(from).Insert(to), + automata.NewEpsilonTransition(), + automata.NewNFATransition().Set(from, automata.NewInterval(int(expr.sym), int(expr.sym)), to), + collection.NewSet[automata.StateID]().Insert(from), + collection.NewSet[automata.StateID]().Insert(to), + ) +} + +func (gen *CodeGenerator) VisitRangeExpr(expr RangeExpr) { + from := automata.NewStateID() + to := automata.NewStateID() + trans := automata.NewNFATransition() + + intvs := expr.intervals() + for _, intv := range intvs { + trans.Set(from, intv, to) + } + + gen.nfa = automata.NewNFA( + collection.NewSet[automata.StateID]().Insert(from).Insert(to), + automata.NewEpsilonTransition(), + trans, + collection.NewSet[automata.StateID]().Insert(from), + collection.NewSet[automata.StateID]().Insert(to), + ) +} + +var dotRanges = []automata.Interval{ + automata.NewInterval(0, 9), + automata.NewInterval(11, int(unicode.MaxRune)), +} + +func (gen *CodeGenerator) VisitDotExpr(expr DotExpr) { + from := automata.NewStateID() + to := automata.NewStateID() + initStates := collection.NewSet[automata.StateID]().Insert(from) + trans := automata.NewNFATransition() + states := collection.NewSet[automata.StateID]().Insert(from).Insert(to) + finStates := collection.NewSet[automata.StateID]() + + for _, intv := range dotRanges { + states = states.Insert(to) + finStates = finStates.Insert(to) + trans.Set(from, intv, to) + } + + gen.nfa = automata.NewNFA( + states, + automata.NewEpsilonTransition(), + trans, + initStates, + finStates, + ) +} diff --git a/compiler/regexp/compiler.go b/compiler/regexp/compiler.go index 456974c..32ca835 100644 --- a/compiler/regexp/compiler.go +++ b/compiler/regexp/compiler.go @@ -1,13 +1,15 @@ package regexp -// func Compile(regexp string) automata.DFA { -// lex := NewLexer(regexp) -// tokens := lex.Scan() -// parser := NewParser(tokens) -// ast, _ := parser.Parse() -// gen := NewCodeGenerator() -// ast.Accept(gen) -// dfa := gen.GetNFA().ToDFA().LexerMinimize() +import "github.com/goropikari/tlex/automata" -// return dfa -// } +func Compile(regexp string) *automata.DFA { + lex := NewLexer(regexp) + tokens := lex.Scan() + parser := NewParser(tokens) + ast, _ := parser.Parse() + gen := NewCodeGenerator() + ast.Accept(gen) + dfa := gen.GetNFA().ToImdNFA().ToDFA().LexerMinimize() + + return dfa +} diff --git a/compiler/regexp/error.go b/compiler/regexp/error.go new file mode 100644 index 0000000..24fab09 --- /dev/null +++ b/compiler/regexp/error.go @@ -0,0 +1,5 @@ +package regexp + +import "errors" + +var ErrNotImplemented = errors.New("not implemented") diff --git a/compiler/regexp/lexer.go b/compiler/regexp/lexer.go index 33163cb..91c437e 100644 --- a/compiler/regexp/lexer.go +++ b/compiler/regexp/lexer.go @@ -26,10 +26,10 @@ const ( type Token struct { typ TokenType - val byte + val rune } -func NewToken(typ TokenType, val byte) Token { +func NewToken(typ TokenType, val rune) Token { return Token{typ: typ, val: val} } @@ -37,36 +37,36 @@ func (tok Token) GetType() TokenType { return tok.typ } -func (tok Token) GetByte() byte { +func (tok Token) GetRune() rune { return tok.val } type Lexer struct { - regexp []byte + regexp []rune tokens []Token pos int length int } func NewLexer(regexp string) *Lexer { - return &Lexer{regexp: []byte(regexp), pos: 0, length: len(regexp)} + return &Lexer{regexp: []rune(regexp), pos: 0, length: len(regexp)} } -func (lex *Lexer) peek() (byte, error) { +func (lex *Lexer) peek() (rune, error) { if lex.pos >= len(lex.regexp) { return 0, io.EOF } return lex.regexp[lex.pos], nil } -// func (lex *Lexer) next() (byte, error) { +// func (lex *Lexer) next() (rune, error) { // if lex.pos+1 >= lex.length { // return 0, io.EOF // } // return lex.regexp[lex.pos+1], nil // } -func (lex *Lexer) read() (byte, error) { +func (lex *Lexer) read() (rune, error) { b, err := lex.peek() if err != nil { return 0, err diff --git a/compiler/regexp/parser.go b/compiler/regexp/parser.go index 6643a06..d7d7af5 100644 --- a/compiler/regexp/parser.go +++ b/compiler/regexp/parser.go @@ -27,6 +27,7 @@ type NodeVisitor interface { VisitConcatExpr(ConcatExpr) VisitStarExpr(StarExpr) VisitSymbolExpr(SymbolExpr) + VisitRangeExpr(RangeExpr) VisitDotExpr(DotExpr) } @@ -86,11 +87,25 @@ func (p *Parser) sum() (RegexExpr, error) { return lhs, nil } +type interval struct { + l int + r int +} + +func newInterval(l, r int) interval { + return interval{l: l, r: r} +} + +func newIntervalRune(r rune) interval { + return newInterval(int(r), int(r)) +} + func (p *Parser) set() (RegexExpr, error) { neg := false - bs := make([]byte, 0) - var prev byte + var prev rune + deq := collection.NewDeque[interval]() + isFirst := true for { tok, err := p.peek() if err != nil { @@ -99,58 +114,55 @@ func (p *Parser) set() (RegexExpr, error) { switch tok.GetType() { case RSqBracketTokenType: if prev == '-' { - return nil, ErrParse + deq.PushBack(newIntervalRune('-')) } goto Out case NegationTokenType: - prev = tok.GetByte() - neg = true + ru := tok.GetRune() + if isFirst { + neg = true + } else { + deq.PushBack(newIntervalRune(ru)) + } + prev = ru case MinusTokenType: - prev = tok.GetByte() + if prev == '-' { + return nil, ErrParse + } + prev = tok.GetRune() default: - b := tok.GetByte() + ru := tok.GetRune() if prev == '-' { - from := bs[len(bs)-1] - if from > b { + if deq.Size() == 0 { return nil, ErrParse } - for t := from + 1; t < b; t++ { - bs = append(bs, t) + intv := deq.Back() + deq.PopBack() + if intv.l > int(ru) { + return nil, ErrParse } + intv.r = int(ru) + deq.PushBack(intv) + } else { + deq.PushBack(newIntervalRune(ru)) } - bs = append(bs, b) - prev = b + prev = ru } - _, _ = p.read() - } -Out: - var expr RegexExpr - if !neg { - expr = NewSymbolExpr(bs[0]) - if len(bs) == 1 { - return expr, nil - } - - for i := 1; i < len(bs); i++ { - rhs := NewSymbolExpr(bs[i]) - expr = NewSumExpr(expr, rhs) + if _, err := p.read(); err != nil { + return nil, err } - return expr, nil + isFirst = false } - ruSet := collection.NewSet[byte]() - for _, b := range bs { - ruSet.Insert(b) - } - for _, b := range automata.SupportedChars { - if !ruSet.Contains(b) { - if expr == nil { - expr = NewSymbolExpr(b) - } else { - expr = NewSumExpr(expr, NewSymbolExpr(b)) - } - } +Out: + var expr RegexExpr + intvs := make([]interval, 0) + for deq.Size() > 0 { + intv := deq.Front() + deq.PopFront() + intvs = append(intvs, intv) } + expr = NewRangeExpr(neg, intvs) return expr, nil } @@ -208,7 +220,7 @@ func (p *Parser) primary() (RegexExpr, error) { switch s.GetType() { case SymbolTokenType: - return NewSymbolExpr(s.GetByte()), nil + return NewSymbolExpr(s.GetRune()), nil case DotTokenType: return NewDotExpr(), nil case LParenTokenType: @@ -279,10 +291,10 @@ func (expr StarExpr) Accept(v NodeVisitor) { } type SymbolExpr struct { - sym byte + sym rune } -func NewSymbolExpr(sym byte) SymbolExpr { +func NewSymbolExpr(sym rune) SymbolExpr { return SymbolExpr{sym: sym} } @@ -290,6 +302,56 @@ func (expr SymbolExpr) Accept(v NodeVisitor) { v.VisitSymbolExpr(expr) } +type RangeExpr struct { + neg bool + intvs []interval +} + +func NewRangeExpr(neg bool, intvs []interval) RangeExpr { + return RangeExpr{neg: neg, intvs: intvs} +} + +func (expr RangeExpr) Accept(v NodeVisitor) { + v.VisitRangeExpr(expr) +} + +func (expr RangeExpr) intervals() []automata.Interval { + tmpIntvs := make([]automata.Interval, 0) + for _, intv := range expr.intvs { + tmpIntvs = append(tmpIntvs, automata.NewInterval(intv.l, intv.r)) + } + if !expr.neg { + return tmpIntvs + } + + deq := collection.NewDeque[automata.Interval]() + for _, intv := range automata.UnicodeRange { + deq.PushBack(intv) + } + + intvs := make([]automata.Interval, 0) + for deq.Size() > 0 { + fr := deq.Front() + deq.PopFront() + ok := true + for _, intv := range tmpIntvs { + if fr.Overlap(intv) { + ok = false + ls := fr.Difference(intv) + for _, t := range ls { + deq.PushBack(t) + } + break + } + } + if ok { + intvs = append(intvs, fr) + } + } + + return intvs +} + type DotExpr struct { } diff --git a/compiler/regexp/parser_test.go b/compiler/regexp/parser_test.go index 4f11e9f..8ba62ae 100644 --- a/compiler/regexp/parser_test.go +++ b/compiler/regexp/parser_test.go @@ -95,7 +95,7 @@ func TestParser_Lexer_Parse(t *testing.T) { }{ { name: "lexer & parser test", - given: "a(b|c*)de|fg*hi|.*", + given: "a(b|c*)de|fg*hi|.*|[^あ-おa-zαβ]", expected: ` SumExpr ConcatExpr @@ -126,9 +126,16 @@ SumExpr h SymbolExpr i - StarExpr - DotExpr - . + SumExpr + StarExpr + DotExpr + . + RangeExpr + true + [12354-12362] + [97-122] + [945-945] + [946-946] `, }, { diff --git a/go.mod b/go.mod index 6774b2a..cfe531e 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/stretchr/testify v1.8.0 golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e golang.org/x/image v0.0.0-20220902085622-e7cb96979f69 + golang.org/x/tools v0.1.12 ) require ( @@ -15,5 +16,7 @@ require ( github.com/fogleman/gg v1.3.0 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect + golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 6f162ed..319cd0c 100644 --- a/go.sum +++ b/go.sum @@ -28,12 +28,18 @@ golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e/go.mod h1:Kr81I6Kryrl9sr8s2F golang.org/x/image v0.0.0-20200119044424-58c23975cae1/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/image v0.0.0-20220902085622-e7cb96979f69 h1:Lj6HJGCSn5AjxRAH2+r35Mir4icalbqku+CLUtjnvXY= golang.org/x/image v0.0.0-20220902085622-e7cb96979f69/go.mod h1:doUCurBvlfPMKfmIpRIywoHmhN3VyhnoFDbvIEWF4hY= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 h1:6zppjxzCulZykYSLyVDYbneBfbaBIQPYMevg0bEwv2s= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.1.12 h1:VveCTK38A2rkS8ZqFY25HIDFscX5X9OoEhJd3quQmXU= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/main.go b/main.go index 5d43c92..779dc5c 100644 --- a/main.go +++ b/main.go @@ -1,27 +1,37 @@ package main -// var ( -// pkgName string -// srcfile string -// outfile string -// ) +import ( + "bufio" + "flag" + "fmt" + "log" + "os" -// func main() { -// // go get github.com/pkg/profile -// // go tool pprof -http=":8081" cpu.pprof -// // defer profile.Start(profile.ProfilePath(".")).Stop() -// flag.StringVar(&pkgName, "pkg", "main", "generated go file package name") -// flag.StringVar(&srcfile, "src", "", "input lexer configuration file") -// flag.StringVar(&outfile, "o", "tlex.yy.go", "generated file path") -// flag.Parse() -// if srcfile == "" { -// fmt.Fprint(os.Stderr, "srcfile is required.\n") -// } + "github.com/goropikari/tlex/compiler/generator" +) -// f, err := os.OpenFile(srcfile, os.O_RDONLY, 0644) -// if err != nil { -// log.Fatal(err) -// } -// r := bufio.NewReader(f) -// generator.Generate(r, pkgName, outfile) -// } +var ( + pkgName string + srcfile string + outfile string +) + +func main() { + // go get github.com/pkg/profile + // go tool pprof -http=":8081" cpu.pprof + // defer profile.Start(profile.ProfilePath(".")).Stop() + flag.StringVar(&pkgName, "pkg", "main", "generated go file package name") + flag.StringVar(&srcfile, "src", "", "input lexer configuration file") + flag.StringVar(&outfile, "o", "tlex.yy.go", "generated file path") + flag.Parse() + if srcfile == "" { + fmt.Fprint(os.Stderr, "srcfile is required.\n") + } + + f, err := os.OpenFile(srcfile, os.O_RDONLY, 0644) + if err != nil { + log.Fatal(err) + } + r := bufio.NewReader(f) + generator.Generate(r, pkgName, outfile) +} diff --git a/sample/README.md b/sample/README.md index 1c003ee..de6cfa9 100644 --- a/sample/README.md +++ b/sample/README.md @@ -1,4 +1,4 @@ -Lexical analyzer generator configuration file is following structure. +The configuration file of tlex is following structure. ``` %{ @@ -13,5 +13,3 @@ USER CODE (OPTIONAL) ``` `yy` and `YY` prefix variable names are reserved word for generated lexical analyzer file. - - diff --git a/sample/main.go b/sample/main.go index 56faf41..e2f0e5d 100644 --- a/sample/main.go +++ b/sample/main.go @@ -21,6 +21,7 @@ const ( LBracket RBracket Operator + Hiragana ) type yyStateID = int @@ -34,35 +35,36 @@ var ( // state id to regex id var yyStateIDToRegexID = []yyRegexID{ - 0, // state 0 は BH state + 0, // state 0 is dead state 5, - 11, - 11, - 9, - 4, 3, - 1, + 10, 3, 3, - 10, 3, - 2, - 7, 3, 3, - 5, 3, + 1, 3, - 8, + 11, 3, - 6, + 2, 3, + 4, 3, 3, + 5, + 12, 3, + 7, + 8, + 12, 3, 3, 3, + 6, + 9, 3, 3, 3, @@ -71,6 +73,7 @@ var yyStateIDToRegexID = []yyRegexID{ } var yyFinStates = map[yyStateID]struct{}{ + 0: {}, 1: {}, 2: {}, 3: {}, @@ -104,1643 +107,828 @@ var yyFinStates = map[yyStateID]struct{}{ 31: {}, 32: {}, 33: {}, + 34: {}, +} + +type yyinterval struct { + l int + r int +} + +func (x yyinterval) overlap(y yyinterval) bool { + return y.l <= x.r && x.l <= y.r } -var yyTransitionTable = map[yyStateID]map[byte]yyStateID{ +var yyTransitionTable = map[yyStateID]map[yyinterval]yyStateID{ 1: { - 1: 2, - 2: 2, - 3: 2, - 4: 2, - 5: 2, - 6: 2, - 7: 2, - 8: 2, - 9: 16, - 10: 16, - 11: 2, - 12: 2, - 13: 16, - 14: 2, - 15: 2, - 16: 2, - 17: 2, - 18: 2, - 19: 2, - 20: 2, - 21: 2, - 22: 2, - 23: 2, - 24: 2, - 25: 2, - 26: 2, - 27: 2, - 28: 2, - 29: 2, - 30: 2, - 31: 2, - 32: 16, - 33: 3, - 34: 2, - 35: 2, - 36: 2, - 37: 2, - 38: 2, - 39: 2, - 40: 21, - 41: 13, - 42: 10, - 43: 10, - 44: 2, - 45: 10, - 46: 2, - 47: 10, - 48: 2, - 49: 5, - 50: 5, - 51: 5, - 52: 5, - 53: 5, - 54: 5, - 55: 5, - 56: 5, - 57: 5, - 58: 3, - 59: 2, - 60: 2, - 61: 3, - 62: 2, - 63: 2, - 64: 2, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 91: 2, - 92: 2, - 93: 2, - 94: 2, - 95: 2, - 96: 2, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 6, - 102: 18, - 103: 6, - 104: 6, - 105: 32, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 6, - 111: 6, - 112: 6, - 113: 6, - 114: 22, - 115: 6, - 116: 6, - 117: 6, - 118: 6, - 119: 26, - 120: 6, - 121: 6, - 122: 6, - 123: 19, - 124: 2, - 125: 4, - 126: 2, - 127: 2, + yyinterval{l: 13, r: 13}: 19, + yyinterval{l: 33, r: 33}: 24, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 14, r: 31}: 20, + yyinterval{l: 34, r: 39}: 20, + yyinterval{l: 91, r: 96}: 20, + yyinterval{l: 40, r: 40}: 28, + yyinterval{l: 58, r: 58}: 24, + yyinterval{l: 49, r: 51}: 16, + yyinterval{l: 10, r: 10}: 19, + yyinterval{l: 12437, r: 1114111}: 20, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 53, r: 53}: 16, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 44, r: 44}: 20, + yyinterval{l: 116, r: 116}: 2, + yyinterval{l: 119, r: 119}: 25, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 61, r: 61}: 24, + yyinterval{l: 11, r: 12}: 20, + yyinterval{l: 48, r: 48}: 20, + yyinterval{l: 46, r: 46}: 20, + yyinterval{l: 123, r: 123}: 23, + yyinterval{l: 111, r: 111}: 2, + yyinterval{l: 110, r: 110}: 2, + yyinterval{l: 47, r: 47}: 3, + yyinterval{l: 55, r: 57}: 16, + yyinterval{l: 12353, r: 12436}: 12, + yyinterval{l: 125, r: 125}: 29, + yyinterval{l: 42, r: 42}: 3, + yyinterval{l: 9, r: 9}: 19, + yyinterval{l: 32, r: 32}: 19, + yyinterval{l: 0, r: 8}: 20, + yyinterval{l: 105, r: 105}: 27, + yyinterval{l: 102, r: 102}: 26, + yyinterval{l: 54, r: 54}: 16, + yyinterval{l: 59, r: 60}: 20, + yyinterval{l: 52, r: 52}: 16, + yyinterval{l: 62, r: 64}: 20, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 108, r: 108}: 2, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 114, r: 114}: 21, + yyinterval{l: 45, r: 45}: 3, + yyinterval{l: 124, r: 124}: 20, + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 126, r: 12352}: 20, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 41, r: 41}: 22, + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 43, r: 43}: 3, }, - 3: { - 61: 10, + 2: { + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 114, r: 114}: 2, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 111, r: 111}: 2, + yyinterval{l: 53, r: 53}: 2, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 108, r: 108}: 2, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 116, r: 116}: 2, + yyinterval{l: 110, r: 110}: 2, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 112, r: 113}: 2, + }, + 4: { + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 111, r: 111}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 53, r: 53}: 2, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 116, r: 116}: 2, + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 110, r: 110}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 108, r: 108}: 2, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 114, r: 114}: 2, + yyinterval{l: 117, r: 117}: 13, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 103, r: 103}: 2, }, 5: { - 48: 5, - 49: 5, - 50: 5, - 51: 5, - 52: 5, - 53: 5, - 54: 5, - 55: 5, - 56: 5, - 57: 5, + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 116, r: 116}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 114, r: 114}: 2, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 53, r: 53}: 2, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 108, r: 108}: 2, + yyinterval{l: 111, r: 111}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 110, r: 110}: 2, + yyinterval{l: 52, r: 52}: 14, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 109, r: 109}: 2, }, 6: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 6, - 102: 6, - 103: 6, - 104: 6, - 105: 6, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 6, - 111: 6, - 112: 6, - 113: 6, - 114: 6, - 115: 6, - 116: 6, - 117: 6, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 116, r: 116}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 108, r: 108}: 2, + yyinterval{l: 111, r: 111}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 53, r: 53}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 114, r: 114}: 2, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 110, r: 110}: 2, + yyinterval{l: 97, r: 97}: 15, }, 7: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 6, - 102: 6, - 103: 6, - 104: 6, - 105: 6, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 6, - 111: 6, - 112: 6, - 113: 6, - 114: 6, - 115: 6, - 116: 6, - 117: 6, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 99, r: 99}: 10, + yyinterval{l: 111, r: 111}: 2, + yyinterval{l: 116, r: 116}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 114, r: 114}: 2, + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 108, r: 108}: 2, + yyinterval{l: 53, r: 53}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 110, r: 110}: 2, + yyinterval{l: 118, r: 118}: 2, }, 8: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 6, - 102: 6, - 103: 6, - 104: 6, - 105: 6, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 6, - 111: 6, - 112: 6, - 113: 6, - 114: 28, - 115: 6, - 116: 6, - 117: 6, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 53, r: 53}: 2, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 111, r: 111}: 2, + yyinterval{l: 110, r: 110}: 2, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 108, r: 108}: 2, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 114, r: 114}: 2, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 116, r: 116}: 2, + yyinterval{l: 101, r: 101}: 10, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 65, r: 90}: 2, }, 9: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 27, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 6, - 102: 6, - 103: 6, - 104: 6, - 105: 6, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 6, - 111: 6, - 112: 6, - 113: 6, - 114: 6, - 115: 6, - 116: 6, - 117: 6, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, + yyinterval{l: 116, r: 116}: 2, + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 111, r: 111}: 2, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 110, r: 110}: 10, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 53, r: 53}: 2, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 114, r: 114}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 108, r: 108}: 2, + }, + 10: { + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 116, r: 116}: 2, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 111, r: 111}: 2, + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 53, r: 53}: 2, + yyinterval{l: 114, r: 114}: 2, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 108, r: 108}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 110, r: 110}: 2, }, 11: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 7, - 100: 6, - 101: 6, - 102: 6, - 103: 6, - 104: 6, - 105: 6, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 6, - 111: 6, - 112: 6, - 113: 6, - 114: 6, - 115: 6, - 116: 6, - 117: 6, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 116, r: 116}: 14, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 110, r: 110}: 2, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 114, r: 114}: 2, + yyinterval{l: 108, r: 108}: 2, + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 111, r: 111}: 2, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 53, r: 53}: 2, }, 12: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 6, - 102: 6, - 103: 6, - 104: 6, - 105: 6, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 6, - 111: 6, - 112: 6, - 113: 6, - 114: 6, - 115: 6, - 116: 6, - 117: 6, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, + yyinterval{l: 12353, r: 12436}: 12, + }, + 13: { + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 116, r: 116}: 2, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 110, r: 110}: 2, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 114, r: 114}: 9, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 111, r: 111}: 2, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 53, r: 53}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 108, r: 108}: 2, + yyinterval{l: 115, r: 115}: 2, }, 14: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 6, - 102: 6, - 103: 6, - 104: 6, - 105: 6, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 6, - 111: 6, - 112: 6, - 113: 6, - 114: 6, - 115: 6, - 116: 29, - 117: 6, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 114, r: 114}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 108, r: 108}: 2, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 53, r: 53}: 2, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 116, r: 116}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 110, r: 110}: 2, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 111, r: 111}: 2, }, 15: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 6, - 102: 6, - 103: 6, - 104: 6, - 105: 30, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 6, - 111: 6, - 112: 6, - 113: 6, - 114: 6, - 115: 6, - 116: 6, - 117: 6, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 110, r: 110}: 2, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 114, r: 114}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 116, r: 116}: 18, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 53, r: 53}: 2, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 108, r: 108}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 111, r: 111}: 2, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 97, r: 97}: 2, }, 16: { - 9: 16, - 10: 16, - 13: 16, - 32: 16, + yyinterval{l: 52, r: 52}: 16, + yyinterval{l: 53, r: 53}: 16, + yyinterval{l: 54, r: 54}: 16, + yyinterval{l: 55, r: 57}: 16, + yyinterval{l: 48, r: 48}: 16, + yyinterval{l: 49, r: 51}: 16, }, 17: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 6, - 102: 6, - 103: 6, - 104: 6, - 105: 6, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 6, - 111: 6, - 112: 6, - 113: 6, - 114: 6, - 115: 6, - 116: 12, - 117: 6, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 108, r: 108}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 110, r: 110}: 2, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 53, r: 53}: 2, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 114, r: 114}: 2, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 111, r: 111}: 2, + yyinterval{l: 116, r: 116}: 4, }, 18: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 6, - 102: 6, - 103: 6, - 104: 6, - 105: 6, - 106: 6, - 107: 6, - 108: 24, - 109: 6, - 110: 6, - 111: 31, - 112: 6, - 113: 6, - 114: 6, - 115: 6, - 116: 6, - 117: 33, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 53, r: 53}: 2, + yyinterval{l: 111, r: 111}: 2, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 114, r: 114}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 108, r: 108}: 2, + yyinterval{l: 54, r: 54}: 5, + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 116, r: 116}: 2, + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 110, r: 110}: 2, }, - 20: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 7, - 102: 6, - 103: 6, - 104: 6, - 105: 6, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 6, - 111: 6, - 112: 6, - 113: 6, - 114: 6, - 115: 6, - 116: 6, - 117: 6, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, + 19: { + yyinterval{l: 9, r: 9}: 19, + yyinterval{l: 10, r: 10}: 19, + yyinterval{l: 13, r: 13}: 19, + yyinterval{l: 32, r: 32}: 19, }, - 22: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 14, - 102: 6, - 103: 6, - 104: 6, - 105: 6, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 6, - 111: 6, - 112: 6, - 113: 6, - 114: 6, - 115: 6, - 116: 6, - 117: 6, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, - }, - 23: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 6, - 102: 6, - 103: 6, - 104: 6, - 105: 6, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 6, - 111: 6, - 112: 6, - 113: 6, - 114: 6, - 115: 6, - 116: 9, - 117: 6, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, + 21: { + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 114, r: 114}: 2, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 101, r: 101}: 17, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 110, r: 110}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 116, r: 116}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 53, r: 53}: 2, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 108, r: 108}: 2, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 111, r: 111}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 112, r: 113}: 2, }, 24: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 6, - 102: 6, - 103: 6, - 104: 6, - 105: 6, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 6, - 111: 25, - 112: 6, - 113: 6, - 114: 6, - 115: 6, - 116: 6, - 117: 6, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, + yyinterval{l: 61, r: 61}: 3, }, 25: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 23, - 98: 6, - 99: 6, - 100: 6, - 101: 6, - 102: 6, - 103: 6, - 104: 6, - 105: 6, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 6, - 111: 6, - 112: 6, - 113: 6, - 114: 6, - 115: 6, - 116: 6, - 117: 6, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, + yyinterval{l: 108, r: 108}: 2, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 53, r: 53}: 2, + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 110, r: 110}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 114, r: 114}: 2, + yyinterval{l: 111, r: 111}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 104, r: 104}: 34, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 116, r: 116}: 2, }, 26: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 6, - 102: 6, - 103: 6, - 104: 15, - 105: 6, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 6, - 111: 6, - 112: 6, - 113: 6, - 114: 6, - 115: 6, - 116: 6, - 117: 6, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 110, r: 110}: 2, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 53, r: 53}: 2, + yyinterval{l: 111, r: 111}: 32, + yyinterval{l: 114, r: 114}: 2, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 108, r: 108}: 33, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 117, r: 117}: 31, + yyinterval{l: 116, r: 116}: 2, }, 27: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 12, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 6, - 102: 6, - 103: 6, - 104: 6, - 105: 6, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 6, - 111: 6, - 112: 6, - 113: 6, - 114: 6, - 115: 6, - 116: 6, - 117: 6, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, - }, - 28: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 6, - 102: 6, - 103: 6, - 104: 6, - 105: 6, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 7, - 111: 6, - 112: 6, - 113: 6, - 114: 6, - 115: 6, - 116: 6, - 117: 6, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, - }, - 29: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 6, - 102: 6, - 103: 6, - 104: 6, - 105: 6, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 6, - 111: 6, - 112: 6, - 113: 6, - 114: 6, - 115: 6, - 116: 6, - 117: 8, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 114, r: 114}: 2, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 53, r: 53}: 2, + yyinterval{l: 111, r: 111}: 2, + yyinterval{l: 108, r: 108}: 2, + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 102, r: 102}: 10, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 116, r: 116}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 110, r: 110}: 11, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 104, r: 104}: 2, }, 30: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 6, - 102: 6, - 103: 6, - 104: 6, - 105: 6, - 106: 6, - 107: 6, - 108: 20, - 109: 6, - 110: 6, - 111: 6, - 112: 6, - 113: 6, - 114: 6, - 115: 6, - 116: 6, - 117: 6, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, + yyinterval{l: 116, r: 116}: 2, + yyinterval{l: 111, r: 111}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 114, r: 114}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 108, r: 108}: 8, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 110, r: 110}: 2, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 53, r: 53}: 2, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 119, r: 119}: 2, }, 31: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 6, - 102: 6, - 103: 6, - 104: 6, - 105: 6, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 6, - 111: 6, - 112: 6, - 113: 6, - 114: 7, - 115: 6, - 116: 6, - 117: 6, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 110, r: 110}: 7, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 114, r: 114}: 2, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 108, r: 108}: 2, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 111, r: 111}: 2, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 53, r: 53}: 2, + yyinterval{l: 116, r: 116}: 2, }, 32: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 6, - 102: 7, - 103: 6, - 104: 6, - 105: 6, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 17, - 111: 6, - 112: 6, - 113: 6, - 114: 6, - 115: 6, - 116: 6, - 117: 6, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 53, r: 53}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 108, r: 108}: 2, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 116, r: 116}: 2, + yyinterval{l: 114, r: 114}: 10, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 110, r: 110}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 111, r: 111}: 2, }, 33: { - 48: 6, - 49: 6, - 50: 6, - 51: 6, - 52: 6, - 53: 6, - 54: 6, - 55: 6, - 56: 6, - 57: 6, - 65: 6, - 66: 6, - 67: 6, - 68: 6, - 69: 6, - 70: 6, - 71: 6, - 72: 6, - 73: 6, - 74: 6, - 75: 6, - 76: 6, - 77: 6, - 78: 6, - 79: 6, - 80: 6, - 81: 6, - 82: 6, - 83: 6, - 84: 6, - 85: 6, - 86: 6, - 87: 6, - 88: 6, - 89: 6, - 90: 6, - 97: 6, - 98: 6, - 99: 6, - 100: 6, - 101: 6, - 102: 6, - 103: 6, - 104: 6, - 105: 6, - 106: 6, - 107: 6, - 108: 6, - 109: 6, - 110: 11, - 111: 6, - 112: 6, - 113: 6, - 114: 6, - 115: 6, - 116: 6, - 117: 6, - 118: 6, - 119: 6, - 120: 6, - 121: 6, - 122: 6, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 55, r: 57}: 2, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 114, r: 114}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 108, r: 108}: 2, + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 116, r: 116}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 105, r: 105}: 2, + yyinterval{l: 111, r: 111}: 6, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 110, r: 110}: 2, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 53, r: 53}: 2, + }, + 34: { + yyinterval{l: 48, r: 48}: 2, + yyinterval{l: 102, r: 102}: 2, + yyinterval{l: 120, r: 122}: 2, + yyinterval{l: 111, r: 111}: 2, + yyinterval{l: 99, r: 99}: 2, + yyinterval{l: 114, r: 114}: 2, + yyinterval{l: 65, r: 90}: 2, + yyinterval{l: 117, r: 117}: 2, + yyinterval{l: 119, r: 119}: 2, + yyinterval{l: 106, r: 107}: 2, + yyinterval{l: 116, r: 116}: 2, + yyinterval{l: 54, r: 54}: 2, + yyinterval{l: 110, r: 110}: 2, + yyinterval{l: 100, r: 100}: 2, + yyinterval{l: 109, r: 109}: 2, + yyinterval{l: 52, r: 52}: 2, + yyinterval{l: 115, r: 115}: 2, + yyinterval{l: 53, r: 53}: 2, + yyinterval{l: 97, r: 97}: 2, + yyinterval{l: 103, r: 103}: 2, + yyinterval{l: 108, r: 108}: 2, + yyinterval{l: 105, r: 105}: 30, + yyinterval{l: 101, r: 101}: 2, + yyinterval{l: 118, r: 118}: 2, + yyinterval{l: 98, r: 98}: 2, + yyinterval{l: 49, r: 51}: 2, + yyinterval{l: 112, r: 113}: 2, + yyinterval{l: 104, r: 104}: 2, + yyinterval{l: 55, r: 57}: 2, }, } -func yyNextStep(id yyStateID, b byte) yyStateID { +func yyNextStep(id yyStateID, r rune) yyStateID { if mp, ok := yyTransitionTable[id]; ok { - return mp[b] + t := yyinterval{l: int(r), r: int(r)} + for intv, sid := range mp { + if intv.overlap(t) { + return sid + } + } } return 0 } type yyLexer struct { - rs io.ReadSeeker + rs RuneReadSeeker beginPos int finPos int currPos int @@ -1749,7 +937,12 @@ type yyLexer struct { YYText string } -func New(rs io.ReadSeeker) *yyLexer { +type RuneReadSeeker interface { + io.ReadSeeker + io.RuneScanner +} + +func New(rs RuneReadSeeker) *yyLexer { return &yyLexer{ rs: rs, beginPos: 0, @@ -1760,23 +953,22 @@ func New(rs io.ReadSeeker) *yyLexer { } } -func (yylex *yyLexer) currByte() (byte, error) { - b := make([]byte, 1) - if _, err := yylex.rs.Read(b); err != nil { - return 0, err +func (yylex *yyLexer) currRune() (rune, int, error) { + ru, size, err := yylex.rs.ReadRune() + if err != nil { + return 0, 0, err } - if _, err := yylex.rs.Seek(int64(yylex.currPos), io.SeekStart); err != nil { - return 0, err + if err := yylex.rs.UnreadRune(); err != nil { + return 0, 0, err } - - return b[0], nil + return ru, size, nil } func (yylex *yyLexer) Next() (int, error) { yyEofCnt := 0 yystart: for { - yyb, err := yylex.currByte() + yyr, yysize, err := yylex.currRune() if err != nil { if errors.Is(err, io.EOF) { yyEofCnt++ @@ -1787,9 +979,16 @@ yystart: return 0, err } finProcess: - yyNxStID := yyNextStep(yylex.currStateID, yyb) + yyNxStID := yyNextStep(yylex.currStateID, yyr) if yyNxStID == 0 { - yydata := make([]byte, yylex.finPos+1-yylex.beginPos) + if _, err := yylex.rs.Seek(int64(yylex.finPos), io.SeekStart); err != nil { + return 0, err + } + _, lastSize, err := yylex.currRune() + if err != nil { + return 0, err + } + yydata := make([]byte, yylex.finPos+lastSize-yylex.beginPos) if _, err := yylex.rs.Seek(int64(yylex.beginPos), io.SeekStart); err != nil { return 0, err } @@ -1798,7 +997,7 @@ yystart: } yylex.YYText = string(yydata) YYText = yylex.YYText - yyNewCurrPos := yylex.finPos + 1 + yyNewCurrPos := yylex.finPos + lastSize yylex.beginPos = yyNewCurrPos yylex.finPos = yyNewCurrPos yylex.currPos = yyNewCurrPos @@ -1859,6 +1058,11 @@ yystart: } goto yystart case 11: + { + return Hiragana, nil + } + goto yystart + case 12: { } goto yystart @@ -1872,7 +1076,7 @@ yystart: yylex.finRegexID = yyStateIDToRegexID[yyNxStID] } yylex.currStateID = yyNxStID - yylex.currPos++ + yylex.currPos += yysize if _, err := yylex.rs.Seek(int64(yylex.currPos), io.SeekStart); err != nil { return 0, err } @@ -1884,7 +1088,7 @@ yystart: // This part is optional func main() { program := ` -func foo123bar() int { +func foo123barあいう () int { x := 1 * 10 + 123 - 1000 / 5432 y := float64(x) @@ -1906,6 +1110,8 @@ func foo123bar() int { switch n { case Keyword: fmt.Println("Keyword") + case Hiragana: + fmt.Println("Hiragana") case Type: fmt.Println("Type") case Identifier: diff --git a/sample/main_test.go b/sample/main_test.go index b57daa3..b4398e9 100644 --- a/sample/main_test.go +++ b/sample/main_test.go @@ -9,7 +9,7 @@ import ( func TestLexer(t *testing.T) { program := ` -func foo000() int { +func foo000あいう() int { x := 1 * 10 + 123 - 1000 / 5432 return x @@ -26,6 +26,7 @@ func foo000() int { }{ {Keyword, "func"}, {Identifier, "foo000"}, + {Hiragana, "あいう"}, {LParen, "("}, {RParen, ")"}, {Type, "int"}, @@ -58,19 +59,19 @@ func foo000() int { } if len(expected) != len(ns) { - t.Error("type is different") + t.Error("the number of recognize token is different from expected length.") } for i, v := range expected { if v.typ != ns[i] { - t.Error("type is different") + t.Errorf("type is different: expected %v but %v", v.typ, ns[i]) } - } - if len(expected) != len(strs) { - t.Error("token is different") - } - for i, v := range expected { if v.text != strs[i] { - t.Error("token is different") + t.Errorf("token is different: expected %v but %v", v.text, strs[i]) } } + // for i, v := range expected { + // if v.text != strs[i] { + // t.Errorf("token is different: expected %v but %v", v.text, strs[i]) + // } + // } } diff --git a/sample/sample.l b/sample/sample.l index 4aabb2b..b2a10ac 100644 --- a/sample/sample.l +++ b/sample/sample.l @@ -18,6 +18,7 @@ const ( LBracket RBracket Operator + Hiragana ) %} @@ -33,13 +34,14 @@ const ( "{" { return LBracket, nil } "}" { return RBracket, nil } "\\+|\\-|\\*|/|:=|==|!=" { return Operator, nil } +"[ぁ-ゔ]*" { return Hiragana, nil } "." {} %% // This part is optional func main() { program := ` -func foo123bar() int { +func foo123barあいう () int { x := 1 * 10 + 123 - 1000 / 5432 y := float64(x) @@ -61,6 +63,8 @@ func foo123bar() int { switch n { case Keyword: fmt.Println("Keyword") + case Hiragana: + fmt.Println("Hiragana") case Type: fmt.Println("Type") case Identifier: diff --git a/sample/word_counter/README.md b/sample/word_counter/README.md index 79bb718..7e471f3 100644 --- a/sample/word_counter/README.md +++ b/sample/word_counter/README.md @@ -6,9 +6,9 @@ $ go run main.go ``` hello world hello tlex - +あいう αβγ ----------------- -number of lines: 2 -number of words: 4 -number of chars: 23 +number of lines: 3 +number of words: 6 +number of chars: 31 ``` diff --git a/sample/word_counter/main.go b/sample/word_counter/main.go index 6495338..18d27f1 100644 --- a/sample/word_counter/main.go +++ b/sample/word_counter/main.go @@ -26,172 +26,80 @@ var ( var yyStateIDToRegexID = []yyRegexID{ 0, // state 0 は BH state 1, + 2, + 1, 1, 3, - 2, } var yyFinStates = map[yyStateID]struct{}{ + 0: {}, 1: {}, 2: {}, 3: {}, 4: {}, + 5: {}, } -var yyTransitionTable = map[yyStateID]map[byte]yyStateID{ +type yyinterval struct { + l int + r int +} + +func (x yyinterval) overlap(y yyinterval) bool { + return y.l <= x.r && x.l <= y.r +} + +var yyTransitionTable = map[yyStateID]map[yyinterval]yyStateID{ 1: { - 9: 4, - 10: 3, - 13: 4, - 32: 4, - 40: 2, - 41: 2, - 48: 2, - 49: 2, - 50: 2, - 51: 2, - 52: 2, - 53: 2, - 54: 2, - 55: 2, - 56: 2, - 57: 2, - 65: 2, - 66: 2, - 67: 2, - 68: 2, - 69: 2, - 70: 2, - 71: 2, - 72: 2, - 73: 2, - 74: 2, - 75: 2, - 76: 2, - 77: 2, - 78: 2, - 79: 2, - 80: 2, - 81: 2, - 82: 2, - 83: 2, - 84: 2, - 85: 2, - 86: 2, - 87: 2, - 88: 2, - 89: 2, - 90: 2, - 97: 2, - 98: 2, - 99: 2, - 100: 2, - 101: 2, - 102: 2, - 103: 2, - 104: 2, - 105: 2, - 106: 2, - 107: 2, - 108: 2, - 109: 2, - 110: 2, - 111: 2, - 112: 2, - 113: 2, - 114: 2, - 115: 2, - 116: 2, - 117: 2, - 118: 2, - 119: 2, - 120: 2, - 121: 2, - 122: 2, + yyinterval{l: 9, r: 9}: 3, + yyinterval{l: 10, r: 10}: 5, + yyinterval{l: 11, r: 12}: 2, + yyinterval{l: 13, r: 13}: 3, + yyinterval{l: 14, r: 31}: 2, + yyinterval{l: 32, r: 32}: 4, + yyinterval{l: 33, r: 1114111}: 2, + yyinterval{l: 0, r: 8}: 2, }, 2: { - 40: 2, - 41: 2, - 48: 2, - 49: 2, - 50: 2, - 51: 2, - 52: 2, - 53: 2, - 54: 2, - 55: 2, - 56: 2, - 57: 2, - 65: 2, - 66: 2, - 67: 2, - 68: 2, - 69: 2, - 70: 2, - 71: 2, - 72: 2, - 73: 2, - 74: 2, - 75: 2, - 76: 2, - 77: 2, - 78: 2, - 79: 2, - 80: 2, - 81: 2, - 82: 2, - 83: 2, - 84: 2, - 85: 2, - 86: 2, - 87: 2, - 88: 2, - 89: 2, - 90: 2, - 97: 2, - 98: 2, - 99: 2, - 100: 2, - 101: 2, - 102: 2, - 103: 2, - 104: 2, - 105: 2, - 106: 2, - 107: 2, - 108: 2, - 109: 2, - 110: 2, - 111: 2, - 112: 2, - 113: 2, - 114: 2, - 115: 2, - 116: 2, - 117: 2, - 118: 2, - 119: 2, - 120: 2, - 121: 2, - 122: 2, + yyinterval{l: 33, r: 1114111}: 2, + yyinterval{l: 0, r: 8}: 2, + yyinterval{l: 9, r: 9}: 2, + yyinterval{l: 11, r: 12}: 2, + yyinterval{l: 13, r: 13}: 2, + yyinterval{l: 14, r: 31}: 2, + }, + 3: { + yyinterval{l: 9, r: 9}: 3, + yyinterval{l: 11, r: 12}: 2, + yyinterval{l: 13, r: 13}: 3, + yyinterval{l: 14, r: 31}: 2, + yyinterval{l: 32, r: 32}: 4, + yyinterval{l: 33, r: 1114111}: 2, + yyinterval{l: 0, r: 8}: 2, }, 4: { - 9: 4, - 13: 4, - 32: 4, + yyinterval{l: 32, r: 32}: 4, + yyinterval{l: 9, r: 9}: 4, + yyinterval{l: 13, r: 13}: 4, }, } -func yyNextStep(id yyStateID, b byte) yyStateID { +func yyNextStep(id yyStateID, r rune) yyStateID { if mp, ok := yyTransitionTable[id]; ok { - return mp[b] + t := yyinterval{l: int(r), r: int(r)} + for intv, sid := range mp { + if intv.overlap(t) { + return sid + } + } } return 0 } type yyLexer struct { - rs io.ReadSeeker + rs RuneReadSeeker beginPos int finPos int currPos int @@ -200,7 +108,12 @@ type yyLexer struct { YYText string } -func New(rs io.ReadSeeker) *yyLexer { +type RuneReadSeeker interface { + io.ReadSeeker + io.RuneScanner +} + +func New(rs RuneReadSeeker) *yyLexer { return &yyLexer{ rs: rs, beginPos: 0, @@ -211,23 +124,22 @@ func New(rs io.ReadSeeker) *yyLexer { } } -func (yylex *yyLexer) currByte() (byte, error) { - b := make([]byte, 1) - if _, err := yylex.rs.Read(b); err != nil { - return 0, err +func (yylex *yyLexer) currRune() (rune, int, error) { + ru, size, err := yylex.rs.ReadRune() + if err != nil { + return 0, 0, err } - if _, err := yylex.rs.Seek(int64(yylex.currPos), io.SeekStart); err != nil { - return 0, err + if err := yylex.rs.UnreadRune(); err != nil { + return 0, 0, err } - - return b[0], nil + return ru, size, nil } func (yylex *yyLexer) Next() (int, error) { yyEofCnt := 0 yystart: for { - yyb, err := yylex.currByte() + yyr, yysize, err := yylex.currRune() if err != nil { if errors.Is(err, io.EOF) { yyEofCnt++ @@ -238,9 +150,16 @@ yystart: return 0, err } finProcess: - yyNxStID := yyNextStep(yylex.currStateID, yyb) + yyNxStID := yyNextStep(yylex.currStateID, yyr) if yyNxStID == 0 { - yydata := make([]byte, yylex.finPos+1-yylex.beginPos) + if _, err := yylex.rs.Seek(int64(yylex.finPos), io.SeekStart); err != nil { + return 0, err + } + _, lastSize, err := yylex.currRune() + if err != nil { + return 0, err + } + yydata := make([]byte, yylex.finPos+lastSize-yylex.beginPos) if _, err := yylex.rs.Seek(int64(yylex.beginPos), io.SeekStart); err != nil { return 0, err } @@ -249,7 +168,7 @@ yystart: } yylex.YYText = string(yydata) YYText = yylex.YYText - yyNewCurrPos := yylex.finPos + 1 + yyNewCurrPos := yylex.finPos + lastSize yylex.beginPos = yyNewCurrPos yylex.finPos = yyNewCurrPos yylex.currPos = yyNewCurrPos @@ -262,13 +181,13 @@ yystart: return 0, ErrYYScan case 1: { - nc += len(YYText) - nw++ + nc++ } goto yystart case 2: { - nc++ + nc += len([]rune(YYText)) + nw++ } goto yystart case 3: @@ -287,7 +206,7 @@ yystart: yylex.finRegexID = yyStateIDToRegexID[yyNxStID] } yylex.currStateID = yyNxStID - yylex.currPos++ + yylex.currPos += yysize if _, err := yylex.rs.Seek(int64(yylex.currPos), io.SeekStart); err != nil { return 0, err } @@ -300,6 +219,7 @@ yystart: func main() { program := `hello world hello tlex +あいう αβγ ` fmt.Print(program) fmt.Println("-----------------") diff --git a/sample/word_counter/wc.l b/sample/word_counter/wc.l index 4d6cf43..8795be0 100644 --- a/sample/word_counter/wc.l +++ b/sample/word_counter/wc.l @@ -9,11 +9,11 @@ var nl = 0 %} %% -"[a-zA-Z0-9()]*" { - nc += len(YYText) +"[ \t\r]*" { nc++ } +"[^ \n]*" { + nc += len([]rune(YYText)) nw++ } -"[ \t\r]*" { nc++ } "\n" { nl++ nc++ @@ -24,6 +24,7 @@ var nl = 0 func main() { program := `hello world hello tlex +あいう αβγ ` fmt.Print(program) fmt.Println("-----------------")