-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathspider.go
67 lines (50 loc) · 1.22 KB
/
spider.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
package scrapejsp
import (
"context"
"encoding/json"
"fmt"
"log"
"github.com/tech-engine/goscrapy/cmd/gos"
"github.com/tech-engine/goscrapy/pkg/core"
)
type Spider struct {
gos.ICoreSpider[*Record]
}
func NewSpider(ctx context.Context) (*Spider, <-chan error) {
// use proxies
// proxies := core.WithProxies("proxy_url1", "proxy_url2", ...)
// core := gos.New[*Record]().WithClient(
// gos.DefaultClient(proxies),
// )
core := gos.New[*Record]()
// Add middlewares
core.MiddlewareManager.Add(MIDDLEWARES...)
// Add pipelines
core.PipelineManager.Add(PIPELINES...)
errCh := make(chan error)
go func() {
errCh <- core.Start(ctx)
}()
return &Spider{
core,
}, errCh
}
// This is the entrypoint to the spider
func (s *Spider) StartRequest(ctx context.Context, job *Job) {
req := s.NewRequest()
// req.Meta("JOB", job)
req.Url("https://jsonplaceholder.typicode.com/todos/1")
s.Request(req, s.parse)
}
func (s *Spider) Close(ctx context.Context) {
}
func (s *Spider) parse(ctx context.Context, resp core.IResponseReader) {
fmt.Printf("status: %d", resp.StatusCode())
var data Record
err := json.Unmarshal(resp.Bytes(), &data)
if err != nil {
log.Fatalln(err)
}
// to push to pipelines
s.Yield(&data)
}