From 0898a6cf897b767358bee0c7fc6979278f6b0a5e Mon Sep 17 00:00:00 2001 From: Harsh Narayan Jha <50262541+HarshNarayanJha@users.noreply.github.com> Date: Fri, 8 Nov 2024 13:25:48 +0530 Subject: [PATCH] fix: check if token decoding is valid or EOF --- internal/pkg/crawl/extractor/xml.go | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/internal/pkg/crawl/extractor/xml.go b/internal/pkg/crawl/extractor/xml.go index 3d21b0ba..c435ba9d 100644 --- a/internal/pkg/crawl/extractor/xml.go +++ b/internal/pkg/crawl/extractor/xml.go @@ -23,13 +23,25 @@ func XML(resp *http.Response) (URLs []*url.URL, sitemap bool, err error) { sitemap = true } - decoder := xml.NewDecoder(strings.NewReader(string(xmlBody))) + reader := strings.NewReader(string(xmlBody)) + decoder := xml.NewDecoder(reader) + var ( startElement xml.StartElement currentNode *LeafNode leafNodes []LeafNode ) + // try to decode one token to see if stream is open + _, err = decoder.Token() + if err != nil { + return nil, sitemap, err + } + + // seek back to 0 if we are still here + reader.Seek(0, 0) + decoder = xml.NewDecoder(reader) + for { tok, err := decoder.Token() if err == io.EOF {