-
Notifications
You must be signed in to change notification settings - Fork 135
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
MoveListItems moves non-"li" nodes into the previous "li" nodes
- Loading branch information
1 parent
9344542
commit 0191495
Showing
5 changed files
with
196 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
package domutils | ||
|
||
import ( | ||
"context" | ||
"strings" | ||
|
||
"github.com/JohannesKaufmann/dom" | ||
"golang.org/x/net/html" | ||
"golang.org/x/net/html/atom" | ||
) | ||
|
||
// MoveListItems moves non-"li" nodes into the previous "li" nodes. | ||
func MoveListItems(ctx context.Context, n *html.Node) { | ||
if n.Type == html.ElementNode && (n.Data == "ol" || n.Data == "ul") { | ||
var previousLi *html.Node | ||
|
||
// Collect children to avoid modifying the slice while iterating. | ||
children := dom.AllChildNodes(n) | ||
|
||
for _, child := range children { | ||
if child.Type == html.ElementNode && child.Data == "li" { | ||
previousLi = child | ||
} else if child.Type == html.TextNode && strings.TrimSpace(child.Data) == "" { | ||
// Skip the node, probably just formatting of code | ||
} else { | ||
// We expect that inside an "ol"/"ul" there are *only* "li" nodes. | ||
// But sometimes that is not the case... | ||
|
||
if previousLi != nil { | ||
// There is a previous "li" node, | ||
// so we move this content into the other "li" node. | ||
n.RemoveChild(child) | ||
|
||
previousLi.AppendChild(child) | ||
} else { | ||
// There is no previous "li" node, | ||
// so we wrap this node with it's own "li" node. | ||
|
||
newNode := &html.Node{ | ||
Type: html.ElementNode, | ||
DataAtom: atom.Li, | ||
Data: "li", | ||
} | ||
previousLi = dom.WrapNode(child, newNode) | ||
} | ||
} | ||
} | ||
} | ||
|
||
for c := n.FirstChild; c != nil; c = c.NextSibling { | ||
MoveListItems(ctx, c) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
package domutils | ||
|
||
import ( | ||
"context" | ||
"testing" | ||
|
||
"github.com/JohannesKaufmann/html-to-markdown/v2/internal/tester" | ||
) | ||
|
||
func TestMoveListItems(t *testing.T) { | ||
runs := []struct { | ||
desc string | ||
input string | ||
expected string | ||
}{ | ||
{ | ||
desc: "not needed in normal list", | ||
input: "<div><ul><li>A</li><li>B</li><li>C</li></ul></div>", | ||
expected: ` | ||
├─body | ||
│ ├─div | ||
│ │ ├─ul | ||
│ │ │ ├─li | ||
│ │ │ │ ├─#text "A" | ||
│ │ │ ├─li | ||
│ │ │ │ ├─#text "B" | ||
│ │ │ ├─li | ||
│ │ │ │ ├─#text "C" | ||
`, | ||
}, | ||
{ | ||
desc: "#text moves into the previous li", | ||
input: "<ul><li>A</li>B</ul>", | ||
expected: ` | ||
├─body | ||
│ ├─ul | ||
│ │ ├─li | ||
│ │ │ ├─#text "A" | ||
│ │ │ ├─#text "B" | ||
`, | ||
}, | ||
{ | ||
desc: "div moves into the previous li", | ||
input: "<ul><li>A</li><div>B</div></ul>", | ||
expected: ` | ||
├─body | ||
│ ├─ul | ||
│ │ ├─li | ||
│ │ │ ├─#text "A" | ||
│ │ │ ├─div | ||
│ │ │ │ ├─#text "B" | ||
`, | ||
}, | ||
{ | ||
desc: "ol moves into the previous li", | ||
input: "<ul><li>A</li><ol><li>B</li></ol></ul>", | ||
expected: ` | ||
├─body | ||
│ ├─ul | ||
│ │ ├─li | ||
│ │ │ ├─#text "A" | ||
│ │ │ ├─ol | ||
│ │ │ │ ├─li | ||
│ │ │ │ │ ├─#text "B" | ||
`, | ||
}, | ||
{ | ||
desc: "no existing li", | ||
input: "<ul><span>A</span><span>B</span></ul>", | ||
expected: ` | ||
├─body | ||
│ ├─ul | ||
│ │ ├─li | ||
│ │ │ ├─span | ||
│ │ │ │ ├─#text "A" | ||
│ │ │ ├─span | ||
│ │ │ │ ├─#text "B" | ||
`, | ||
}, | ||
{ | ||
desc: "basic moved list", | ||
input: ` | ||
<ol> | ||
<li>One</li> | ||
<li>Two</li> | ||
<ol> | ||
<li>Two point one</li> | ||
<li>Two point two</li> | ||
</ol> | ||
</ol> | ||
`, | ||
expected: ` | ||
├─body | ||
│ ├─ol | ||
│ │ ├─#text "\n\t" | ||
│ │ ├─li | ||
│ │ │ ├─#text "One" | ||
│ │ ├─#text "\n\t" | ||
│ │ ├─li | ||
│ │ │ ├─#text "Two" | ||
│ │ │ ├─ol | ||
│ │ │ │ ├─#text "\n\t\t" | ||
│ │ │ │ ├─li | ||
│ │ │ │ │ ├─#text "Two point one" | ||
│ │ │ │ ├─#text "\n\t\t" | ||
│ │ │ │ ├─li | ||
│ │ │ │ │ ├─#text "Two point two" | ||
│ │ │ │ ├─#text "\n\t" | ||
│ │ ├─#text "\n\t" | ||
│ │ ├─#text "\n" | ||
`, | ||
}, | ||
} | ||
for _, run := range runs { | ||
t.Run(run.desc, func(t *testing.T) { | ||
doc := tester.Parse(t, run.input, "") | ||
|
||
MoveListItems(context.TODO(), doc) | ||
|
||
tester.ExpectRepresentation(t, doc, "output", run.expected) | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -79,7 +79,8 @@ Wir freuen uns über eine [Mail](mailto:[email protected]?body=Hello%0AJohannes)! | |
<!--list with link--> | ||
|
||
- [a(b)\[c\]](/page.html) | ||
- [a\]](/page.html) | ||
|
||
[a\]](/page.html) | ||
|
||
<!--TODO: list with paragraph--> | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters