Releases: huantt/plaintext-extractor
Releases · huantt/plaintext-extractor
v1.1.0
V1.0.1 - Update how to truncate new line
v1.0.1 fix(html): update how to truncate new line
V1.0.0 - Markdown & Html Extractors
Usage
Markdown extractor
markdownContent := "# H1 \n*italic* **bold** `code` `not code [link](https://example.com) ![image](https://image.com/image.png) ~~strikethrough~~"
extractor := NewMarkdownExtractor()
output, err := extractor.PlainText(markdownContent)
if err != nil {
panic(err)
}
fmt.Println(output)
// Output: H1 \nitalic bold code `not code link image strikethrough
Custom Markdown Tag
markdownContent := "This is {color:#0A84FF}red{color}"
customTag := markdown.Tag{
Name: "color-custom-tag",
FullRegex: regexp.MustCompile("{color:[a-zA-Z0-9#]+}(.*?){color}"),
StartRegex: regexp.MustCompile("{color:[a-zA-Z0-9#]+}"),
EndRegex: regexp.MustCompile("{color}"),
}
markdownExtractor := NewMarkdownExtractor(customTag)
plaintextExtractor := plaintext.NewExtractor(markdownExtractor.PlainText)
plaintext, err := plaintextExtractor.PlainText(markdownContent)
if err != nil{
panic(nil)
}
fmt.Println(plaintext)
// Output: This is red
HTML Extractor
html := `<div>This is a <a href="https://example.com">link</a></div>`
extractor := NewHtmlExtractor()
output, err := extractor.PlainText(html)
if err != nil {
panic(err)
}
fmt.Println(output)
// Output: This is a link
Multiple extractors
input := `<div> html </div> *markdown*`
markdownExtractor := markdown.NewExtractor()
htmlExtractor := html.NewExtractor()
extractor := NewExtractor(markdownExtractor.PlainText, htmlExtractor.PlainText)
output, err := extractor.PlainText(input)
if err != nil {
panic(err)
}
fmt.Println(output)
// Output: html markdown