Skip to content

Commit

Permalink
use ACR cache
Browse files Browse the repository at this point in the history
  • Loading branch information
dogancanbakir committed Oct 15, 2024
1 parent 49d7ccf commit 769c562
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ type Runner struct {
browser *Browser
errorPageClassifier *errorpageclassifier.ErrorPageClassifier
pHashClusters []pHashCluster
simHashes map[uint64]struct{}
simHashes gcache.Cache[uint64, struct{}]
httpApiEndpoint *Server
}

Expand Down Expand Up @@ -361,7 +361,7 @@ func New(options *Options) (*Runner, error) {
}

runner.errorPageClassifier = errorpageclassifier.New()
runner.simHashes = make(map[uint64]struct{})
runner.simHashes = gcache.New[uint64, struct{}](1000).ARC().Build()

if options.HttpApiEndpoint != "" {
apiServer := NewServer(options.HttpApiEndpoint, options)
Expand Down Expand Up @@ -519,18 +519,19 @@ func (r *Runner) seen(k string) bool {

func (r *Runner) duplicate(resp []byte) bool {
respSimHash := simhash.Simhash(simhash.NewWordFeatureSet(resp))
if _, exists := r.simHashes[respSimHash]; exists {
if r.simHashes.Has(respSimHash) {
gologger.Warning().Msgf("Skipping duplicate response with simhash %d\n", respSimHash)
return true
}
for simHash := range r.simHashes {

for simHash := range r.simHashes.GetALL(false) {
// lower threshold for increased precision
if simhash.Compare(simHash, respSimHash) <= 3 {
gologger.Warning().Msgf("Skipping near-duplicate response with simhash %d\n", respSimHash)
return true
}
}
r.simHashes[respSimHash] = struct{}{}
r.simHashes.Set(respSimHash, struct{}{})

Check failure on line 534 in runner/runner.go

View workflow job for this annotation

GitHub Actions / Lint Test

Error return value of `r.simHashes.Set` is not checked (errcheck)
return false
}

Expand Down

0 comments on commit 769c562

Please sign in to comment.