Skip to content

Commit

Permalink
fix: check if token decoding is valid or EOF
Browse files Browse the repository at this point in the history
  • Loading branch information
HarshNarayanJha committed Nov 8, 2024
1 parent 8d67026 commit 0898a6c
Showing 1 changed file with 13 additions and 1 deletion.
14 changes: 13 additions & 1 deletion internal/pkg/crawl/extractor/xml.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,25 @@ func XML(resp *http.Response) (URLs []*url.URL, sitemap bool, err error) {
sitemap = true
}

decoder := xml.NewDecoder(strings.NewReader(string(xmlBody)))
reader := strings.NewReader(string(xmlBody))
decoder := xml.NewDecoder(reader)

var (
startElement xml.StartElement
currentNode *LeafNode
leafNodes []LeafNode
)

// try to decode one token to see if stream is open
_, err = decoder.Token()
if err != nil {
return nil, sitemap, err
}

// seek back to 0 if we are still here
reader.Seek(0, 0)
decoder = xml.NewDecoder(reader)

for {
tok, err := decoder.Token()
if err == io.EOF {
Expand Down

0 comments on commit 0898a6c

Please sign in to comment.