-
Notifications
You must be signed in to change notification settings - Fork 0
/
article.go
137 lines (107 loc) · 3.4 KB
/
article.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
package nlm
import (
"embed"
"fmt"
"io"
"math/rand"
"strings"
"gopkg.in/yaml.v3"
)
//go:embed articles/*.txt
var articleFiles embed.FS
type Article struct {
Title string `yaml:"title"`
Subtitle string `yaml:"subtitle"`
Paragraphs []string
}
func LoadArticle(r io.Reader) (*Article, error) {
var buf strings.Builder
_, err := io.Copy(&buf, r)
if err != nil {
return nil, err
}
var article Article
sections := strings.Split(buf.String(), "---\n")
if len(sections) != 3 {
return nil, fmt.Errorf("article format is incorrect. Article needs to start with three dashes and have three dashes separating the preamble from the article")
}
err = yaml.Unmarshal([]byte(sections[1]), &article)
if err != nil {
return nil, fmt.Errorf("unable to parse preamble: %s", err)
}
article.Paragraphs = strings.Split(sections[2], "\n\n")
return &article, nil
}
func GetArticleList() ([]string, error) {
dirEntries, err := articleFiles.ReadDir("articles")
if err != nil {
return nil, fmt.Errorf("unable to load articles: %s", err)
}
filenames := make([]string, len(dirEntries))
for i, v := range dirEntries {
filenames[i] = v.Name()
}
return filenames, nil
}
func GetNamedArticle(name string) (*Article, error) {
r, err := articleFiles.Open(fmt.Sprintf("articles/%s", name))
if err != nil {
return nil, fmt.Errorf("error opening article: %s", err)
}
defer r.Close()
article, err := LoadArticle(r)
if err != nil {
return nil, fmt.Errorf("error reading article: %s", err)
}
return article, nil
}
type ArticleBuilder struct {
TitleMarkov MarkovSource `json:"title_markov"`
ContentMarkov MarkovSource `json:"content_markov"`
TitleStarters []string `json:"title_starters"`
ContentStarters []string `json:"content_starters"`
}
// NewArticleBuilder creates a Markov article builder with a chain
// size of titlesize for titles and size for content.
func NewArticleBuilder(titlesize, size int) (*ArticleBuilder, error) {
articles, err := GetArticleList()
if err != nil {
return nil, fmt.Errorf("unable to get article list: %s", err)
}
titlemb := NewMarkovBuilder()
contentmb := NewMarkovBuilder()
titleStarters := []string{}
contentStarters := []string{}
for _, articleName := range articles {
article, err := GetNamedArticle(articleName)
if err != nil {
return nil, fmt.Errorf("unable to retrieve article %s: %s", articleName, err)
}
titleStarters = append(titleStarters, getPrefix(article.Title, titlesize))
titlemb.AddText(article.Title, titlesize, EndOfDocument)
for i, para := range article.Paragraphs {
var endRune rune
if i >= len(article.Paragraphs)-1 {
endRune = EndOfDocument
} else {
endRune = EndOfParagraph
}
contentStarters = append(contentStarters, getPrefix(para, size))
contentmb.AddText(para, size, endRune)
}
}
ret := ArticleBuilder{titlemb.ConvertToSource(), contentmb.ConvertToSource(), titleStarters, contentStarters}
return &ret, nil
}
func getPrefix(s string, size int) string {
r := []rune(s)
return string(r[:size])
}
func (ab *ArticleBuilder) GenerateArticle(titleSize, articleSize int) *Article {
titleStart := ab.TitleStarters[rand.Int()%len(ab.TitleStarters)]
title := ab.TitleMarkov.GenerateText(titleStart, titleSize)[0]
contentStart := ab.ContentStarters[rand.Int()%len(ab.ContentStarters)]
content := ab.ContentMarkov.GenerateText(contentStart, articleSize)
ret := Article{title, "", content}
return &ret
}