-
Notifications
You must be signed in to change notification settings - Fork 5
/
iconhash.go
221 lines (194 loc) · 4.56 KB
/
iconhash.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
package gofofa
import (
"bytes"
"encoding/base64"
"errors"
"fmt"
"github.com/sirupsen/logrus"
"github.com/twmb/murmur3"
"github.com/vincent-petithory/dataurl"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
"io/ioutil"
"net/http"
"net/url"
"os"
"strings"
)
// mmh3Hash32 generate icon hash
func mmh3Hash32(raw []byte) string {
bckd := base64.StdEncoding.EncodeToString(raw)
var buffer bytes.Buffer
for i := 0; i < len(bckd); i++ {
ch := bckd[i]
buffer.WriteByte(ch)
if (i+1)%76 == 0 {
buffer.WriteByte('\n')
}
}
buffer.WriteByte('\n')
return fmt.Sprintf("%d", int32(murmur3.Sum32(buffer.Bytes())))
}
func isImageContent(contentType string) bool {
if strings.HasPrefix(contentType, "image/") {
return true
}
return false
}
// fileIconHash local file hash
func fileIconHash(url string) (hash string, err error) {
var data []byte
logrus.Debug("load local file:", url)
data, err = os.ReadFile(url)
if err != nil {
return
}
ct := http.DetectContentType(data)
logrus.Debug("local file format:", ct)
if isImageContent(ct) {
hash = mmh3Hash32(data)
} else {
err = errors.New("content is not a image")
return
}
return
}
// fetchURLContent fetch content and type from url
func fetchURLContent(iconUrl string) (data []byte, contentType string, err error) {
// fetch url
var resp *http.Response
resp, err = http.Get(iconUrl)
if err != nil {
return
}
// read data
defer resp.Body.Close()
data, err = ioutil.ReadAll(resp.Body)
if err != nil {
return
}
// check content type by header
contentType = resp.Header.Get("Content-type")
if len(contentType) > 0 {
return
}
// check content type by data
contentType = http.DetectContentType(data)
return
}
// ExtractIconFromHtml extract link icon from html
func ExtractIconFromHtml(data []byte) string {
r := bytes.NewReader(data)
z := html.NewTokenizer(r)
tokenize:
for {
tt := z.Next()
var href string
var isIconLink bool
switch tt {
case html.ErrorToken:
// End of the document, we're done
return ""
case html.StartTagToken, html.SelfClosingTagToken:
name, hasAttr := z.TagName()
if atom.Link == atom.Lookup(name) {
for hasAttr {
var k, v []byte
k, v, hasAttr = z.TagAttr()
switch string(k) {
case "rel":
cs := strings.Split(strings.ToLower(string(v)), " ")
for _, c := range cs {
if strings.EqualFold(c, "icon") {
isIconLink = true
break
}
}
if !isIconLink {
continue tokenize
}
case "href":
href = string(v)
}
}
}
}
if isIconLink && href != "" {
return href
}
}
}
// IconHash
// if url is a local icon file, then calc the hash
// if url is remote icon url, the download and calc the hash
// if url is web homepage, then try to parse favicon url and download it, then calc the hash
func IconHash(iconUrl string) (hash string, err error) {
// check if local file
_, err = os.Stat(iconUrl)
if err == nil {
// 存在
return fileIconHash(iconUrl)
}
//// 还有不存在的错误?
//if !errors.Is(err, os.ErrNotExist) {
// return
//}
if !strings.Contains(iconUrl, "://") {
err = errors.New("icon url is not valid url")
return
}
var u *url.URL
u, err = url.Parse(iconUrl)
if err != nil {
return
}
// remote url
var data []byte
var contentType string
data, contentType, err = fetchURLContent(iconUrl)
if isImageContent(contentType) {
hash = mmh3Hash32(data)
return
}
// parse icon url
var parsedURL string
if strings.Contains(contentType, "html") {
logrus.Debug("try to parse favicon url")
parsedURL = ExtractIconFromHtml(data)
}
if len(parsedURL) > 0 {
logrus.Debug("parsed favicon url from html:", parsedURL)
// inner base64
if strings.HasPrefix(parsedURL, "data:image") {
var dataURL *dataurl.DataURL
dataURL, err = dataurl.DecodeString(parsedURL)
if err != nil {
return
}
if isImageContent(dataURL.MediaType.ContentType()) {
hash = mmh3Hash32(dataURL.Data)
return
}
}
if rel, errP := url.Parse(parsedURL); errP == nil {
newURL := u.ResolveReference(rel)
data, contentType, err = fetchURLContent(newURL.String())
if isImageContent(contentType) {
hash = mmh3Hash32(data)
return
}
} else {
logrus.Debug("parsed favicon url is not valid:", errP)
}
}
// just try default favicon.ico
logrus.Debug("try default favicon.ico")
defaultIconURL := u.Scheme + "://" + u.Host + "/favicon.ico"
data, contentType, err = fetchURLContent(defaultIconURL)
if isImageContent(contentType) {
hash = mmh3Hash32(data)
return
}
err = errors.New("can not find any icon")
return
}