Skip to content

Commit

Permalink
Use context when scraping
Browse files Browse the repository at this point in the history
  • Loading branch information
mono0x committed Aug 18, 2024
1 parent 6b7c367 commit fbb8840
Show file tree
Hide file tree
Showing 11 changed files with 42 additions and 18 deletions.
3 changes: 2 additions & 1 deletion scraper/source.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
package scraper

import (
"context"
"net/url"

"github.com/gorilla/feeds"
)

type Source interface {
Name() string
Scrape(query url.Values) (*feeds.Feed, error)
Scrape(ctx context.Context, query url.Values) (*feeds.Feed, error)
}
7 changes: 3 additions & 4 deletions scraper/source/googlecalendar/source.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,25 +43,24 @@ func (s *source) Name() string {
return "google-calendar"
}

func (s *source) Scrape(query url.Values) (*feeds.Feed, error) {
func (s *source) Scrape(ctx context.Context, query url.Values) (*feeds.Feed, error) {
calendarID := query.Get("id")
if calendarID == "" {
return &feeds.Feed{}, nil
}
events, err := s.fetch(calendarID)
events, err := s.fetch(ctx, calendarID)
if err != nil {
return nil, err
}
return s.render(events, calendarID)
}

func (s *source) fetch(calendarID string) (*calendar.Events, error) {
func (s *source) fetch(ctx context.Context, calendarID string) (*calendar.Events, error) {
config, err := google.JWTConfigFromJSON(([]byte)(os.Getenv("GOOGLE_CLIENT_CREDENTIALS")), calendar.CalendarReadonlyScope)
if err != nil {
return nil, fmt.Errorf("%w", err)
}

ctx := context.Background()
ctx = context.WithValue(ctx, oauth2.HTTPClient, s.httpClient)

client := config.Client(ctx)
Expand Down
9 changes: 7 additions & 2 deletions scraper/source/impresswatchcolumn/source.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package impresswatchcolumn

import (
"context"
"fmt"
"html"
"net/http"
Expand Down Expand Up @@ -42,7 +43,7 @@ func (*source) Name() string {
return "impress-watch-column"
}

func (s *source) Scrape(query url.Values) (*feeds.Feed, error) {
func (s *source) Scrape(ctx context.Context, query url.Values) (*feeds.Feed, error) {
site := query.Get("site")
column := query.Get("column")
if site == "" || column == "" {
Expand All @@ -54,7 +55,11 @@ func (s *source) Scrape(query url.Values) (*feeds.Feed, error) {

r := strings.NewReplacer("{site}", site, "{column}", column)

res, err := s.httpClient.Get(r.Replace(s.baseURL + endpoint))
req, err := http.NewRequestWithContext(ctx, "GET", r.Replace(s.baseURL+endpoint), nil)
if err != nil {
return nil, fmt.Errorf("%w", err)
}
res, err := s.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("%w", err)
}
Expand Down
3 changes: 2 additions & 1 deletion scraper/source/impresswatchcolumn/source_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package impresswatchcolumn

import (
"context"
"net/http"
"net/http/httptest"
"net/url"
Expand Down Expand Up @@ -31,7 +32,7 @@ func TestScrape(t *testing.T) {
v := url.Values{}
v.Set("site", "k-tai")
v.Set("column", "stapa")
feed, err := source.Scrape(v)
feed, err := source.Scrape(context.Background(), v)
if err != nil {
t.Fatal(err)
}
Expand Down
9 changes: 7 additions & 2 deletions scraper/source/kittychaninfo/source.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package kittychaninfo

import (
"context"
"fmt"
"io"
"net/http"
Expand Down Expand Up @@ -48,8 +49,12 @@ func (s *source) Name() string {
return "kittychan-info"
}

func (s *source) Scrape(url.Values) (*feeds.Feed, error) {
res, err := s.httpClient.Get(s.baseURL + endpoint)
func (s *source) Scrape(ctx context.Context, _ url.Values) (*feeds.Feed, error) {
req, err := http.NewRequestWithContext(ctx, "GET", s.baseURL+endpoint, nil)
if err != nil {
return nil, fmt.Errorf("%w", err)
}
res, err := s.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("%w", err)
}
Expand Down
3 changes: 2 additions & 1 deletion scraper/source/kittychaninfo/source_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package kittychaninfo

import (
"context"
"net/http"
"net/http/httptest"
"net/url"
Expand Down Expand Up @@ -33,7 +34,7 @@ func TestScrape(t *testing.T) {
t.Fatal(err)
}

feed, err := source.Scrape(url.Values{})
feed, err := source.Scrape(context.Background(), url.Values{})
if err != nil {
t.Fatal(err)
}
Expand Down
9 changes: 7 additions & 2 deletions scraper/source/lalapiroomevent/source.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package lalapiroomevent

import (
"context"
"fmt"
"net/http"
"net/url"
Expand Down Expand Up @@ -38,8 +39,12 @@ func (*source) Name() string {
return "lalapi-room-event"
}

func (s *source) Scrape(query url.Values) (*feeds.Feed, error) {
res, err := s.httpClient.Get(s.baseURL + endpoint)
func (s *source) Scrape(ctx context.Context, query url.Values) (*feeds.Feed, error) {
req, err := http.NewRequestWithContext(ctx, "GET", s.baseURL+endpoint, nil)
if err != nil {
return nil, fmt.Errorf("%w", err)
}
res, err := s.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("%w", err)
}
Expand Down
3 changes: 2 additions & 1 deletion scraper/source/lalapiroomevent/source_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package lalapiroomevent

import (
"context"
"net/http"
"net/http/httptest"
"net/url"
Expand Down Expand Up @@ -28,7 +29,7 @@ func TestScrape(t *testing.T) {
source := NewSource(server.Client())
source.baseURL = server.URL

feed, err := source.Scrape(url.Values{})
feed, err := source.Scrape(context.Background(), url.Values{})
if err != nil {
t.Fatal(err)
}
Expand Down
9 changes: 7 additions & 2 deletions scraper/source/yuyakekoyakenews/source.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package yuyakekoyakenews

import (
"context"
"fmt"
"net/http"
"net/url"
Expand Down Expand Up @@ -36,8 +37,12 @@ func (s *source) Name() string {
return "yuyakekoyake-news"
}

func (s *source) Scrape(url.Values) (*feeds.Feed, error) {
res, err := s.httpClient.Get(s.baseURL + endpoint)
func (s *source) Scrape(ctx context.Context, _ url.Values) (*feeds.Feed, error) {
req, err := http.NewRequestWithContext(ctx, "GET", s.baseURL+endpoint, nil)
if err != nil {
return nil, fmt.Errorf("%w", err)
}
res, err := s.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("%w", err)
}
Expand Down
3 changes: 2 additions & 1 deletion scraper/source/yuyakekoyakenews/source_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package yuyakekoyakenews

import (
"context"
"net/http"
"net/http/httptest"
"net/url"
Expand Down Expand Up @@ -28,7 +29,7 @@ func TestScrape(t *testing.T) {
source := NewSource(server.Client())
source.baseURL = server.URL

feed, err := source.Scrape(url.Values{})
feed, err := source.Scrape(context.Background(), url.Values{})
if err != nil {
t.Fatal(err)
}
Expand Down
2 changes: 1 addition & 1 deletion server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func NewHandler(sources []scraper.Source) (http.Handler, error) {
return
}

feed, err := source.Scrape(r.URL.Query())
feed, err := source.Scrape(r.Context(), r.URL.Query())
if err != nil {
log.Printf("%v: %+v\n", reflect.TypeOf(source), err)
w.WriteHeader(http.StatusServiceUnavailable)
Expand Down

0 comments on commit fbb8840

Please sign in to comment.