Skip to content

Commit

Permalink
Merge pull request #700 from SSWConsulting/optimise-unscannable-links
Browse files Browse the repository at this point in the history
Optimise calls to Unscannable Links endpoint
  • Loading branch information
tombui99 authored Oct 17, 2023
2 parents 6a554cb + 7193144 commit b98c9dc
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions docker/sswlinkauditor.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,15 @@ func getHref(t html.Token) (ok bool, href string) {
return
}

func check(link Link, linkch chan LinkStatus, number int) {
func check(link Link, linkch chan LinkStatus, number int, unscannableLinks []string) {
fmt.Println("CHEC", number, link.url)

client := &http.Client{
Timeout: 1 * time.Minute,
}
method := "HEAD"

if isLinkUnscannable(link.url) {
if isLinkUnscannable(link.url, unscannableLinks) {
method = "GET"
}

Expand Down Expand Up @@ -214,8 +214,7 @@ func writeResultFile(allUrls map[string]LinkStatus) {
f.Close()
}

func isLinkUnscannable(a string) bool {
unscannableLinks := getUnscannableLinks();
func isLinkUnscannable(a string, unscannableLinks []string) bool {
for _, b := range unscannableLinks {
if strings.HasPrefix(strings.ToLower(a), strings.ToLower(b)) {
return true
Expand All @@ -236,7 +235,6 @@ func getUnscannableLinks() []string {

var linksList []string
json.Unmarshal(respBody, &linksList)

return linksList
}

Expand All @@ -255,6 +253,8 @@ func main() {

start := time.Now()

unscannableLinks := getUnscannableLinks();

chUrls := make(chan Link)
chAllUrls := make(chan LinkStatus)

Expand Down Expand Up @@ -292,7 +292,7 @@ func main() {
if strings.Index(link.url, startUrl.url) == 0 && link.linkType == "a" && !isResourceFile(link.url) {
crawl(link, chUrls, chAllUrls, crawling)
} else {
check(link, chAllUrls, crawling)
check(link, chAllUrls, crawling, unscannableLinks)
}

<-concurrentGoroutines
Expand All @@ -302,7 +302,7 @@ func main() {
if strings.Index(link.url, startUrl.url) == 0 && link.linkType == "a" && !isResourceFile(link.url) {
go crawl(link, chUrls, chAllUrls, crawling)
} else {
go check(link, chAllUrls, crawling)
go check(link, chAllUrls, crawling, unscannableLinks)
}
}

Expand Down

0 comments on commit b98c9dc

Please sign in to comment.