Skip to content

Commit

Permalink
Merge PR #120
Browse files Browse the repository at this point in the history
  • Loading branch information
zhengchun committed Dec 2, 2024
2 parents bb12302 + 6ad6686 commit 3603825
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 53 deletions.
107 changes: 56 additions & 51 deletions node.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package xmlquery

import (
"bufio"
"encoding/xml"
"fmt"
"html"
"io"
"strings"
)

Expand Down Expand Up @@ -153,32 +155,34 @@ type indentation struct {
level int
hasChild bool
indent string
b *strings.Builder
w io.Writer
}

func newIndentation(indent string, b *strings.Builder) *indentation {
func newIndentation(indent string, w io.Writer) *indentation {
if indent == "" {
return nil
}
return &indentation{
indent: indent,
b: b,
w: w,
}
}

func (i *indentation) NewLine() {
if i == nil {
return
}
i.b.WriteString("\n")
io.WriteString(i.w, "\n")
}

func (i *indentation) Open() {
if i == nil {
return
}
i.b.WriteString("\n")
i.b.WriteString(strings.Repeat(i.indent, i.level))

io.WriteString(i.w, "\n")
io.WriteString(i.w, strings.Repeat(i.indent, i.level))

i.level++
i.hasChild = false
}
Expand All @@ -189,119 +193,120 @@ func (i *indentation) Close() {
}
i.level--
if i.hasChild {
i.b.WriteString("\n")
i.b.WriteString(strings.Repeat(i.indent, i.level))
io.WriteString(i.w, "\n")
io.WriteString(i.w, strings.Repeat(i.indent, i.level))
}
i.hasChild = true
}

func outputXML(b *strings.Builder, n *Node, preserveSpaces bool, config *outputConfiguration, indent *indentation) {
func outputXML(w io.Writer, n *Node, preserveSpaces bool, config *outputConfiguration, indent *indentation) {
preserveSpaces = calculatePreserveSpaces(n, preserveSpaces)
switch n.Type {
case TextNode:
b.WriteString(html.EscapeString(n.sanitizedData(preserveSpaces)))
io.WriteString(w, html.EscapeString(n.sanitizedData(preserveSpaces)))
return
case CharDataNode:
b.WriteString("<![CDATA[")
b.WriteString(n.Data)
b.WriteString("]]>")
io.WriteString(w, "<![CDATA[")
io.WriteString(w, n.Data)
io.WriteString(w, "]]>")
return
case CommentNode:
if !config.skipComments {
b.WriteString("<!--")
b.WriteString(n.Data)
b.WriteString("-->")
io.WriteString(w, "<!--")
io.WriteString(w, n.Data)
io.WriteString(w, "-->")
}
return
case NotationNode:
indent.NewLine()
fmt.Fprintf(b, "<!%s>", n.Data)
fmt.Fprintf(w, "<!%s>", n.Data)
return
case DeclarationNode:
b.WriteString("<?" + n.Data)
io.WriteString(w, "<?" + n.Data)
default:
indent.Open()
if n.Prefix == "" {
b.WriteString("<" + n.Data)
io.WriteString(w, "<" + n.Data)
} else {
fmt.Fprintf(b, "<%s:%s", n.Prefix, n.Data)
fmt.Fprintf(w, "<%s:%s", n.Prefix, n.Data)
}
}

for _, attr := range n.Attr {
if attr.Name.Space != "" {
fmt.Fprintf(b, ` %s:%s=`, attr.Name.Space, attr.Name.Local)
fmt.Fprintf(w, ` %s:%s=`, attr.Name.Space, attr.Name.Local)
} else {
fmt.Fprintf(b, ` %s=`, attr.Name.Local)
fmt.Fprintf(w, ` %s=`, attr.Name.Local)
}
b.WriteByte('"')
b.WriteString(html.EscapeString(attr.Value))
b.WriteByte('"')

fmt.Fprintf(w, `"%v"`, html.EscapeString(attr.Value))
}
if n.Type == DeclarationNode {
b.WriteString("?>")
io.WriteString(w, "?>")
} else {
if n.FirstChild != nil || !config.emptyElementTagSupport {
b.WriteString(">")
io.WriteString(w, ">")
} else {
b.WriteString("/>")
io.WriteString(w, "/>")
indent.Close()
return
}
}
for child := n.FirstChild; child != nil; child = child.NextSibling {
outputXML(b, child, preserveSpaces, config, indent)
outputXML(w, child, preserveSpaces, config, indent)
}
if n.Type != DeclarationNode {
indent.Close()
if n.Prefix == "" {
fmt.Fprintf(b, "</%s>", n.Data)
fmt.Fprintf(w, "</%s>", n.Data)
} else {
fmt.Fprintf(b, "</%s:%s>", n.Prefix, n.Data)
fmt.Fprintf(w, "</%s:%s>", n.Prefix, n.Data)
}
}
}

// OutputXML returns the text that including tags name.
func (n *Node) OutputXML(self bool) string {

config := &outputConfiguration{
printSelf: true,
emptyElementTagSupport: false,
if self {
return n.OutputXMLWithOptions(WithOutputSelf())
}
preserveSpaces := calculatePreserveSpaces(n, false)
var b strings.Builder
if self && n.Type != DocumentNode {
outputXML(&b, n, preserveSpaces, config, newIndentation(config.useIndentation, &b))
} else {
for n := n.FirstChild; n != nil; n = n.NextSibling {
outputXML(&b, n, preserveSpaces, config, newIndentation(config.useIndentation, &b))
}
}

return b.String()
return n.OutputXMLWithOptions()
}

// OutputXMLWithOptions returns the text that including tags name.
func (n *Node) OutputXMLWithOptions(opts ...OutputOption) string {
var b strings.Builder
n.WriteWithOptions(&b, opts...)
return b.String()
}

// Write writes xml to given writer.
func (n *Node) Write(writer io.Writer, self bool) {
if self {
n.WriteWithOptions(writer, WithOutputSelf())
}
n.WriteWithOptions(writer)
}

// WriteWithOptions writes xml with given options to given writer.
func (n *Node) WriteWithOptions(writer io.Writer, opts ...OutputOption) {
config := &outputConfiguration{}
// Set the options
for _, opt := range opts {
opt(config)
}
pastPreserveSpaces := config.preserveSpaces
preserveSpaces := calculatePreserveSpaces(n, pastPreserveSpaces)
var b strings.Builder
b := bufio.NewWriter(writer)
defer b.Flush()

if config.printSelf && n.Type != DocumentNode {
outputXML(&b, n, preserveSpaces, config, newIndentation(config.useIndentation, &b))
outputXML(b, n, preserveSpaces, config, newIndentation(config.useIndentation, b))
} else {
for n := n.FirstChild; n != nil; n = n.NextSibling {
outputXML(&b, n, preserveSpaces, config, newIndentation(config.useIndentation, &b))
outputXML(b, n, preserveSpaces, config, newIndentation(config.useIndentation, b))
}
}

return b.String()
}

// AddAttr adds a new attribute specified by 'key' and 'val' to a node 'n'.
Expand Down
82 changes: 80 additions & 2 deletions node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -343,8 +343,7 @@ func TestSelectElement(t *testing.T) {
t.Fatalf("n is nil")
}

var ns []*Node
ns = aaa.SelectElements("CCC")
ns := aaa.SelectElements("CCC")
if len(ns) != 2 {
t.Fatalf("len(ns)!=2")
}
Expand All @@ -365,6 +364,23 @@ func TestEscapeOutputValue(t *testing.T) {

}

func TestEscapeValueWrite(t *testing.T) {
data := `<AAA>&lt;*&gt;</AAA>`

root, err := Parse(strings.NewReader(data))
if err != nil {
t.Error(err)
}

var b strings.Builder
root.Write(&b, true)
escapedInnerText := b.String()
if !strings.Contains(escapedInnerText, "&lt;*&gt;") {
t.Fatal("Inner Text has not been escaped")
}

}

func TestUnnecessaryEscapeOutputValue(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<class_list xml:space="preserve">
Expand All @@ -391,6 +407,34 @@ func TestUnnecessaryEscapeOutputValue(t *testing.T) {

}

func TestUnnecessaryEscapeValueWrite(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<class_list xml:space="preserve">
<student>
<name> Robert </name>
<grade>A+</grade>
</student>
</class_list>`

root, err := Parse(strings.NewReader(data))
if err != nil {
t.Error(err)
}

var b strings.Builder
root.Write(&b, true)
escapedInnerText := b.String()
if strings.Contains(escapedInnerText, "&#x9") {
t.Fatal("\\n has been escaped unnecessarily")
}

if strings.Contains(escapedInnerText, "&#xA") {
t.Fatal("\\t has been escaped unnecessarily")
}

}

func TestHtmlUnescapeStringOriginString(t *testing.T) {
// has escape html character and \t
data := `<?xml version="1.0" encoding="utf-8"?>
Expand All @@ -412,6 +456,29 @@ func TestHtmlUnescapeStringOriginString(t *testing.T) {

}

func TestHtmlUnescapeStringOriginStringWrite(t *testing.T) {
// has escape html character and \t
data := `<?xml version="1.0" encoding="utf-8"?>
<example xml:space="preserve"><word>&amp;#48; </word></example>`

root, err := Parse(strings.NewReader(data))
if err != nil {
t.Error(err)
}

var b strings.Builder
root.Write(&b, false)
escapedInnerText := b.String()
unescapeString := html.UnescapeString(escapedInnerText)
if strings.Contains(unescapeString, "&amp;") {
t.Fatal("&amp; need unescape")
}
if !strings.Contains(escapedInnerText, "&amp;#48;\t\t") {
t.Fatal("Inner Text should keep plain text")
}

}

func TestOutputXMLWithNamespacePrefix(t *testing.T) {
s := `<?xml version="1.0" encoding="UTF-8"?><S:Envelope xmlns:S="http://schemas.xmlsoap.org/soap/envelope/"><S:Body></S:Body></S:Envelope>`
doc, _ := Parse(strings.NewReader(s))
Expand All @@ -420,6 +487,17 @@ func TestOutputXMLWithNamespacePrefix(t *testing.T) {
}
}

func TestWriteWithNamespacePrefix(t *testing.T) {
s := `<?xml version="1.0" encoding="UTF-8"?><S:Envelope xmlns:S="http://schemas.xmlsoap.org/soap/envelope/"><S:Body></S:Body></S:Envelope>`
doc, _ := Parse(strings.NewReader(s))
var b strings.Builder
doc.Write(&b, false)
if s != b.String() {
t.Fatal("xml document missing some characters")
}
}


func TestQueryWithPrefix(t *testing.T) {
s := `<?xml version="1.0" encoding="UTF-8"?><S:Envelope xmlns:S="http://schemas.xmlsoap.org/soap/envelope/"><S:Body test="1"><ns2:Fault xmlns:ns2="http://schemas.xmlsoap.org/soap/envelope/" xmlns:ns3="http://www.w3.org/2003/05/soap-envelope"><faultcode>ns2:Client</faultcode><faultstring>This is a client fault</faultstring></ns2:Fault></S:Body></S:Envelope>`
doc, _ := Parse(strings.NewReader(s))
Expand Down

0 comments on commit 3603825

Please sign in to comment.