-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawler.js
110 lines (92 loc) · 3.25 KB
/
crawler.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
const dns = require('./checkers/dns')
const http = require('./checkers/http')
const sni = require('./checkers/sni')
const fs = require('fs')
const PQueue = require('p-queue')
const LineByLineReader = require('line-by-line')
const csvWriter = require('csv-write-stream')
const filePath = process.argv[2]
const concurrencyLimit = parseInt(process.argv[3] || 10)
const concurrencyPendingLimit = concurrencyLimit * 10
const reportPath = process.argv[4] || `${__dirname}/out.csv`
const queue = new PQueue({ concurrency: concurrencyLimit })
const ca = fs.readFileSync(`${__dirname}/ca.pem`)
const lr = new LineByLineReader(filePath)
const writer = csvWriter()
writer.pipe(fs.createWriteStream(reportPath))
lr.on('line', (line) => {
const csv = line.split(',')
const rank = csv[0]
const hostname = csv[1]
if (!rank || !hostname) {
console.warn(`invalid line ${rank},${hostname}`)
return
}
queue.add(createTask(rank, hostname))
if (queue.size >= concurrencyPendingLimit) {
console.log('reader is paused')
lr.pause()
}
})
let interval = setInterval(() => {
if (queue.size < concurrencyPendingLimit) {
console.log('reader is resumed')
lr.resume();
}
}, 500)
lr.on('end', () => {
console.log('All lines are read, file is closed now.')
clearInterval(interval)
})
function createTask(rank, hostname, tries = 0) {
return async () => {
try {
const result = await Promise.all([
dns.check(hostname, [process.env.DNS_HOST]),
http.check(hostname, { host: process.env.HTTP_HOST }),
sni.check(hostname, ca, { host: process.env.TLS_HOST })
])
const dnsResult = result[0]
const httpResult = result[1]
const sniResult = result[2]
const data = {
rank,
hostname,
dns: !dnsResult.filtered && dnsResult.ip === '1.2.3.4',
dns_ip: dnsResult.filtered ? dnsResult.ip : '',
http: !httpResult.filtered && httpResult.body === 'OK',
http_status: httpResult.status,
sni: !sniResult.filtered && sniResult.cn === '1.2.3.4',
sni_error: sniResult.error || '',
}
console.log(data)
writer.write(data)
} catch (err) {
if (tries > 10) {
const data = {
rank,
hostname,
dns: 'unknown',
dns_ip: 'unknown',
http: 'unknown',
http_status: 'unknown',
sni: 'unknown',
sni_error: 'unknown'
}
console.warn(data)
writer.write(data)
return
}
queue.add(createTask(rank, hostname, tries + 1))
console.log(`retry task: ${rank},${hostname}`, err)
if (tries > 5) {
queue.pause()
console.log('queue is paused:', rank, hostname, err);
setTimeout(() => {
queue.start()
console.log('queue is resumed')
}, 5000)
}
}
}
}