-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfitscrap.go
76 lines (55 loc) · 1.43 KB
/
fitscrap.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
package main
import (
"encoding/json"
"fmt"
"log"
"net/http"
"github.com/gocolly/colly"
)
type pageInfo struct {
Heading map[string]int
Excerpt map[string]int
}
type pages struct {
articles []pageInfo
}
func (box *pages) AddItem(item MyBoxItem) []MyBoxItem {
box.Items = append(box.Items, item)
return box.Items
}
func main() {
http.HandleFunc("/", crawl)
http.ListenAndServe(":3000", nil)
}
func crawl(w http.ResponseWriter, r *http.Request) {
scrappedPage := &pageInfo{Heading: make(map[string]int), Excerpt: make(map[string]int)}
pages := make([]pageInfo, 20)
url := "https://hackernoon.com/"
collector := colly.NewCollector(
colly.AllowedDomains("hackernoon.com", "medium.com"),
)
collector.OnResponse(func(r *colly.Response) {
log.Println("response received")
})
collector.OnError(func(r *colly.Response, err error) {
log.Println("error:", err)
})
collector.OnHTML(".js-trackedPost a", func(e *colly.HTMLElement) {
heading := e.ChildText("h3")
excerpt := e.ChildText("div.u-contentSansThin")
if heading != "" && excerpt != "" {
scrappedPage.Heading[heading]++
scrappedPage.Excerpt[excerpt]++
pages = append({Heading: "ef", Excerpt: "fe"})
jsonOutput, err := json.Marshal(scrappedPage)
if err != nil {
fmt.Println(err)
return
}
// fmt.Println(string(jsonOutput))
w.Header().Set("Content-Type", "application/json")
w.Write(jsonOutput)
}
})
collector.Visit(url)
}