-
-
Notifications
You must be signed in to change notification settings - Fork 74
/
node.go
189 lines (169 loc) · 4.36 KB
/
node.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
package idr
import (
"fmt"
"strings"
"sync"
"sync/atomic"
)
// NodeType is the type of Node in an IDR.
type NodeType uint
const (
// DocumentNode is the type of the root Node in an IDR tree.
DocumentNode NodeType = iota
// ElementNode is the type of element Node in an IDR tree.
ElementNode
// TextNode is the type of text/data Node in an IDR tree.
TextNode
// AttributeNode is the type of attribute Node in an IDR tree.
AttributeNode
)
// String converts NodeType to a string.
func (nt NodeType) String() string {
switch nt {
case DocumentNode:
return "DocumentNode"
case ElementNode:
return "ElementNode"
case TextNode:
return "TextNode"
case AttributeNode:
return "AttributeNode"
default:
return fmt.Sprintf("(unknown NodeType: %d)", nt)
}
}
// Node represents a node of element/data in an IDR (intermediate data representation) ingested and created
// by the omniparser.
// Credit: this is by and large a copy and some adaptation from
// https://github.com/antchfx/xmlquery/blob/master/node.go. The reasons we want to have our own struct:
// - one struct to represent XML/JSON/EDI/CSV/txt/etc. Vs antchfx's work have one struct (in each repo)
// for each format.
// - Node allocation recycling.
// - more stability
type Node struct {
// ID uniquely identifies a Node, whether it's newly created or recycled and reused from
// the node allocation cache. Previously we sometimes used a *Node's pointer address as a
// unique ID which isn't sufficiently unique any more given the introduction of using
// sync.Pool for node allocation caching.
ID int64
Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node
Type NodeType
Data string
FormatSpecific interface{}
}
// Give test a chance to turn node caching on/off. Not exported; always caching in production code.
var nodeCaching = true
var nodePool sync.Pool
func allocNode() *Node {
n := &Node{}
n.reset()
return n
}
func resetNodePool() {
nodePool = sync.Pool{
New: func() interface{} {
return allocNode()
},
}
}
func init() {
resetNodePool()
}
// CreateNode creates a generic *Node.
func CreateNode(ntype NodeType, data string) *Node {
if nodeCaching {
// Node out of pool has already been reset.
n := nodePool.Get().(*Node)
n.Type = ntype
n.Data = data
return n
}
n := allocNode()
n.Type = ntype
n.Data = data
return n
}
var nodeID = int64(0)
func newNodeID() int64 {
return atomic.AddInt64(&nodeID, 1)
}
func (n *Node) reset() {
n.ID = newNodeID()
n.Parent, n.FirstChild, n.LastChild, n.PrevSibling, n.NextSibling = nil, nil, nil, nil, nil
n.Type = 0
n.Data = ""
n.FormatSpecific = nil
}
// InnerText returns a Node's children's texts concatenated.
// Note (in an XML IDR tree) none of the AttributeNode's text will be included.
func (n *Node) InnerText() string {
var s strings.Builder
var captureText func(*Node)
captureText = func(a *Node) {
switch a.Type {
case TextNode:
s.WriteString(a.Data)
default:
for child := a.FirstChild; child != nil; child = child.NextSibling {
if child.Type != AttributeNode {
captureText(child)
}
}
}
}
captureText(n)
return s.String()
}
// AddChild adds 'n' as the new last child to 'parent'.
func AddChild(parent, n *Node) {
n.Parent = parent
n.NextSibling = nil
if parent.FirstChild == nil {
parent.FirstChild = n
n.PrevSibling = nil
} else {
parent.LastChild.NextSibling = n
n.PrevSibling = parent.LastChild
}
parent.LastChild = n
}
// RemoveAndReleaseTree removes a node and its subtree from an IDR tree it is in and
// release the resources (Node allocation) associated with the node and its subtree.
func RemoveAndReleaseTree(n *Node) {
if n.Parent == nil {
goto recycle
}
if n.Parent.FirstChild == n {
if n.Parent.LastChild == n {
n.Parent.FirstChild = nil
n.Parent.LastChild = nil
} else {
n.Parent.FirstChild = n.NextSibling
n.NextSibling.PrevSibling = nil
}
} else {
if n.Parent.LastChild == n {
n.Parent.LastChild = n.PrevSibling
n.PrevSibling.NextSibling = nil
} else {
n.PrevSibling.NextSibling = n.NextSibling
n.NextSibling.PrevSibling = n.PrevSibling
}
}
recycle:
recycle(n)
}
func recycle(n *Node) {
if !nodeCaching {
return
}
for c := n.FirstChild; c != nil; {
// Have to save c.NextSibling before recycle(c) call or
// c.NextSibling would be wiped out during the call.
next := c.NextSibling
recycle(c)
c = next
}
n.reset()
nodePool.Put(n)
}