-
-
Notifications
You must be signed in to change notification settings - Fork 101
/
link_fetcher.go
124 lines (97 loc) · 2.2 KB
/
link_fetcher.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
package main
import (
"fmt"
"mime"
"net/url"
"strings"
)
type linkFetcher struct {
client httpClient
pageParsers []pageParser
cache cache
options linkFetcherOptions
}
type fetchResult struct {
StatusCode int
Page page
}
func newLinkFetcher(c httpClient, ps []pageParser, o linkFetcherOptions) *linkFetcher {
return &linkFetcher{c, ps, newCache(), o}
}
// Fetch fetches a link and returns a successful status code and optionally HTML page, or an error.
func (f *linkFetcher) Fetch(u string) (int, page, error) {
u, fr, err := separateFragment(u)
if err != nil {
return 0, nil, err
}
s, p, err := f.sendRequestWithCache(u)
if err != nil {
return 0, nil, err
} else if p == nil || f.options.IgnoreFragments || fr == "" || strings.HasPrefix(fr, ":~:") {
// TODO Support text fragments.
return s, p, nil
} else if _, ok := p.Fragments()[fr]; !ok {
return 0, nil, fmt.Errorf("id #%v not found", fr)
}
return s, p, nil
}
func (f *linkFetcher) sendRequestWithCache(u string) (int, page, error) {
x, store := f.cache.LoadOrStore(u)
if store == nil {
if err, ok := x.(error); ok {
return 0, nil, err
}
r := x.(fetchResult)
return r.StatusCode, r.Page, nil
}
s, p, err := f.sendRequest(u)
if err == nil {
store(fetchResult{s, p})
} else {
store(err)
}
return s, p, err
}
func (f *linkFetcher) sendRequest(s string) (int, page, error) {
u, err := url.Parse(s)
if err != nil {
return 0, nil, err
}
r, err := f.client.Get(u, nil)
if err != nil {
return 0, nil, err
}
t := ""
if s := strings.TrimSpace(r.Header("Content-Type")); s != "" {
t, _, err = mime.ParseMediaType(s)
if err != nil {
return 0, nil, err
}
}
bs, err := r.Body()
if err != nil {
return 0, nil, err
}
for _, pp := range f.pageParsers {
u, err := url.Parse(r.URL())
if err != nil {
return 0, nil, err
}
p, err := pp.Parse(u, t, bs)
if err != nil {
return 0, nil, err
} else if p != nil {
return r.StatusCode(), p, nil
}
}
return r.StatusCode(), nil, nil
}
func separateFragment(s string) (string, string, error) {
u, err := url.Parse(s)
if err != nil {
return "", "", err
}
f := u.Fragment
u.Fragment = ""
return u.String(), f, nil
}