-
Notifications
You must be signed in to change notification settings - Fork 1
/
rss.grabber.js
80 lines (69 loc) · 1.84 KB
/
rss.grabber.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
var https = require('https');
var http = require('http');
var url = require('url');
var RssCreator = function(dataHandler, options) {
var result = {};
var userCallback = null;
function readFullResponse(response, callback) {
var encoding = 'utf8';
if (response.headers['content-type']) {
var contentType = response.headers['content-type'];
var matchResult = contentType.match(/charset=(.*)/i);
if (matchResult) {
encoding = matchResult[1];
} else {
encoding = "";
}
}
if (encoding.toLowerCase() != "utf8") {
response.setEncoding('binary');
}
var data = "";
response.on('readable', function() {
data += response.read();
});
response.on('end', function() {
callback(data);
});
}
function createBaseUrl(feedUrl) {
if (feedUrl.lastIndexOf('/') == feedUrl.length)
return feedUrl;
var parsedUrl = url.parse(feedUrl);
var resultUrl = {};
var lastSlash = parsedUrl.path.lastIndexOf('/');
if (lastSlash != -1) {
var baseUrl = parsedUrl.path.substring(0, lastSlash);
resultUrl.baseUrl = parsedUrl.protocol + '//' + parsedUrl.host + baseUrl + '/';
} else {
resultUrl.baseUrl = feedUrl + '/';
}
resultUrl.host = parsedUrl.protocol + '//' + parsedUrl.host;
return resultUrl;
}
function scrape(urls) {
if (urls.length > 0) {
var feedUrl = urls.pop();
var requestHttp = feedUrl.indexOf('https') != -1 ? https : http;
requestHttp.get(feedUrl, function(res) {
readFullResponse(res, function(data) {
var feed = dataHandler(data, createBaseUrl(feedUrl));
if (feed) {
result[feedUrl] = feed;
}
scrape(urls);
});
}).on('error', function(e) {
result[feedUrl] = e;
scrape(urls);
});
} else {
userCallback(result);
}
}
this.grab = function(urls, callback) {
userCallback = callback;
scrape(urls);
};
};
module.exports = RssCreator;