Skip to content

Commit

Permalink
domutils: added MoveListItems
Browse files Browse the repository at this point in the history
MoveListItems moves non-"li" nodes into the previous "li" nodes
  • Loading branch information
JohannesKaufmann committed Dec 26, 2024
1 parent 9344542 commit 0191495
Show file tree
Hide file tree
Showing 5 changed files with 196 additions and 11 deletions.
53 changes: 53 additions & 0 deletions internal/domutils/list_items.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package domutils

import (
"context"
"strings"

"github.com/JohannesKaufmann/dom"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)

// MoveListItems moves non-"li" nodes into the previous "li" nodes.
func MoveListItems(ctx context.Context, n *html.Node) {
if n.Type == html.ElementNode && (n.Data == "ol" || n.Data == "ul") {
var previousLi *html.Node

// Collect children to avoid modifying the slice while iterating.
children := dom.AllChildNodes(n)

for _, child := range children {
if child.Type == html.ElementNode && child.Data == "li" {
previousLi = child
} else if child.Type == html.TextNode && strings.TrimSpace(child.Data) == "" {
// Skip the node, probably just formatting of code
} else {
// We expect that inside an "ol"/"ul" there are *only* "li" nodes.
// But sometimes that is not the case...

if previousLi != nil {
// There is a previous "li" node,
// so we move this content into the other "li" node.
n.RemoveChild(child)

previousLi.AppendChild(child)
} else {
// There is no previous "li" node,
// so we wrap this node with it's own "li" node.

newNode := &html.Node{
Type: html.ElementNode,
DataAtom: atom.Li,
Data: "li",
}
previousLi = dom.WrapNode(child, newNode)
}
}
}
}

for c := n.FirstChild; c != nil; c = c.NextSibling {
MoveListItems(ctx, c)
}
}
123 changes: 123 additions & 0 deletions internal/domutils/list_items_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
package domutils

import (
"context"
"testing"

"github.com/JohannesKaufmann/html-to-markdown/v2/internal/tester"
)

func TestMoveListItems(t *testing.T) {
runs := []struct {
desc string
input string
expected string
}{
{
desc: "not needed in normal list",
input: "<div><ul><li>A</li><li>B</li><li>C</li></ul></div>",
expected: `
├─body
│ ├─div
│ │ ├─ul
│ │ │ ├─li
│ │ │ │ ├─#text "A"
│ │ │ ├─li
│ │ │ │ ├─#text "B"
│ │ │ ├─li
│ │ │ │ ├─#text "C"
`,
},
{
desc: "#text moves into the previous li",
input: "<ul><li>A</li>B</ul>",
expected: `
├─body
│ ├─ul
│ │ ├─li
│ │ │ ├─#text "A"
│ │ │ ├─#text "B"
`,
},
{
desc: "div moves into the previous li",
input: "<ul><li>A</li><div>B</div></ul>",
expected: `
├─body
│ ├─ul
│ │ ├─li
│ │ │ ├─#text "A"
│ │ │ ├─div
│ │ │ │ ├─#text "B"
`,
},
{
desc: "ol moves into the previous li",
input: "<ul><li>A</li><ol><li>B</li></ol></ul>",
expected: `
├─body
│ ├─ul
│ │ ├─li
│ │ │ ├─#text "A"
│ │ │ ├─ol
│ │ │ │ ├─li
│ │ │ │ │ ├─#text "B"
`,
},
{
desc: "no existing li",
input: "<ul><span>A</span><span>B</span></ul>",
expected: `
├─body
│ ├─ul
│ │ ├─li
│ │ │ ├─span
│ │ │ │ ├─#text "A"
│ │ │ ├─span
│ │ │ │ ├─#text "B"
`,
},
{
desc: "basic moved list",
input: `
<ol>
<li>One</li>
<li>Two</li>
<ol>
<li>Two point one</li>
<li>Two point two</li>
</ol>
</ol>
`,
expected: `
├─body
│ ├─ol
│ │ ├─#text "\n\t"
│ │ ├─li
│ │ │ ├─#text "One"
│ │ ├─#text "\n\t"
│ │ ├─li
│ │ │ ├─#text "Two"
│ │ │ ├─ol
│ │ │ │ ├─#text "\n\t\t"
│ │ │ │ ├─li
│ │ │ │ │ ├─#text "Two point one"
│ │ │ │ ├─#text "\n\t\t"
│ │ │ │ ├─li
│ │ │ │ │ ├─#text "Two point two"
│ │ │ │ ├─#text "\n\t"
│ │ ├─#text "\n\t"
│ │ ├─#text "\n"
`,
},
}
for _, run := range runs {
t.Run(run.desc, func(t *testing.T) {
doc := tester.Parse(t, run.input, "")

MoveListItems(context.TODO(), doc)

tester.ExpectRepresentation(t, doc, "output", run.expected)
})
}
}
5 changes: 4 additions & 1 deletion plugin/commonmark/handle_pre_render.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,10 @@ func (c *commonmark) handlePreRender(ctx converter.Context, doc *html.Node) {
domutils.RemoveRedundant(doc, nameIsBothLink)
domutils.SwapTags(ctx, doc, nameIsBoldOrItalic, nameIsLink)

// - - - Headings - - - //
// - - - Heading - - - //
domutils.SwapTags(ctx, doc, nameIsLink, nameIsHeading)
domutils.LeafBlockAlternatives(ctx, doc)

// - - - List - - - //
domutils.MoveListItems(ctx, doc)
}
3 changes: 2 additions & 1 deletion plugin/commonmark/testdata/GoldenFiles/link.out.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ Wir freuen uns über eine [Mail](mailto:[email protected]?body=Hello%0AJohannes)!
<!--list with link-->

- [a(b)\[c\]](/page.html)
- [a\]](/page.html)

[a\]](/page.html)

<!--TODO: list with paragraph-->

Expand Down
23 changes: 14 additions & 9 deletions plugin/commonmark/testdata/GoldenFiles/list.out.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,15 +145,20 @@ text between
<!-- with other elements inside the list -->

1. A 1 (div)
2. A 2 (#text)
3. A 3 (li)
4. A 4 (#text)
5. 1. B 1 (li)
2. 1. C 1 (li)
2. C 2 (div)
3. C 3 (div)
3. B 2 (div)
4. B 3 (li)

A 2 (#text)
2. A 3 (li) A 4 (#text)

1. B 1 (li)

1. C 1 (li)

C 2 (div)

C 3 (div)

B 2 (div)
2. B 3 (li)

<!--THE END-->

Expand Down

0 comments on commit 0191495

Please sign in to comment.