Skip to content

Commit

Permalink
feat: add jitter between http requests
Browse files Browse the repository at this point in the history
Signed-off-by: knqyf263 <[email protected]>
  • Loading branch information
knqyf263 committed Sep 26, 2024
1 parent 1084be2 commit 8d76a91
Showing 1 changed file with 30 additions and 24 deletions.
54 changes: 30 additions & 24 deletions pkg/crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"fmt"
"io"
"log/slog"
"math/rand"
"net/http"
"path/filepath"
"strings"
Expand Down Expand Up @@ -156,13 +157,9 @@ loop:
}

func (c *Crawler) Visit(ctx context.Context, url string) error {
req, err := retryablehttp.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return xerrors.Errorf("unable to new HTTP request: %w", err)
}
resp, err := c.http.Do(req)
resp, err := c.httpGet(ctx, url)
if err != nil {
return xerrors.Errorf("http get error (%s): %w", url, err)
return xerrors.Errorf("http get error: %w", err)
}
defer resp.Body.Close()

Expand Down Expand Up @@ -290,13 +287,9 @@ func (c *Crawler) crawlSHA1(ctx context.Context, baseURL string, meta *Metadata,
}

func (c *Crawler) sha1Urls(ctx context.Context, url string) ([]string, error) {
req, err := retryablehttp.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, xerrors.Errorf("unable to new HTTP request: %w", err)
}
resp, err := c.http.Do(req)
resp, err := c.httpGet(ctx, url)
if err != nil {
return nil, xerrors.Errorf("http get error (%s): %w", url, err)
return nil, xerrors.Errorf("http get error: %w", err)
}
defer func() { _ = resp.Body.Close() }()

Expand Down Expand Up @@ -328,13 +321,9 @@ func (c *Crawler) parseMetadata(ctx context.Context, url string) (*Metadata, err
return nil, nil
}

req, err := retryablehttp.NewRequestWithContext(ctx, http.MethodGet, url, nil)
resp, err := c.httpGet(ctx, url)
if err != nil {
return nil, xerrors.Errorf("unable to new HTTP request: %w", err)
}
resp, err := c.http.Do(req)
if err != nil {
return nil, xerrors.Errorf("http get error (%s): %w", url, err)
return nil, xerrors.Errorf("http get error: %w", err)
}
defer resp.Body.Close()

Expand Down Expand Up @@ -363,13 +352,9 @@ func (c *Crawler) parseMetadata(ctx context.Context, url string) (*Metadata, err
}

func (c *Crawler) fetchSHA1(ctx context.Context, url string) ([]byte, error) {
req, err := retryablehttp.NewRequestWithContext(ctx, http.MethodGet, url, nil)
resp, err := c.httpGet(ctx, url)
if err != nil {
return nil, xerrors.Errorf("unable to new HTTP request: %w", err)
}
resp, err := c.http.Do(req)
if err != nil {
return nil, xerrors.Errorf("http get error (%s): %w", url, err)
return nil, xerrors.Errorf("http get error: %w", err)
}
defer func() { _ = resp.Body.Close() }()

Expand Down Expand Up @@ -408,6 +393,27 @@ func (c *Crawler) fetchSHA1(ctx context.Context, url string) ([]byte, error) {
return sha1b, nil
}

func (c *Crawler) httpGet(ctx context.Context, url string) (*http.Response, error) {
// Sleep for a while to avoid 429 error
randomSleep()

req, err := retryablehttp.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, xerrors.Errorf("unable to create a HTTP request: %w", err)
}
resp, err := c.http.Do(req)
if err != nil {
return nil, xerrors.Errorf("http error (%s): %w", url, err)
}
return resp, nil
}

func randomSleep() {
// Seed rand
r := rand.New(rand.NewSource(int64(time.Now().Nanosecond())))
time.Sleep(time.Duration(r.Float64() * float64(100*time.Millisecond)))
}

func versionFromSha1URL(artifactId, sha1URL string) string {
ss := strings.Split(sha1URL, "/")
fileName := ss[len(ss)-1]
Expand Down

0 comments on commit 8d76a91

Please sign in to comment.