diff --git a/app/bilicrawler-0.1.1.js b/app/bilicrawler-0.1.1.js
new file mode 100644
index 0000000..5ce5aba
--- /dev/null
+++ b/app/bilicrawler-0.1.1.js
@@ -0,0 +1,103 @@
+const superagent = require('superagent');
+var moment = require('moment');
+moment.locale('zh-cn');
+
+const logit = document.getElementById('log-process')
+
+
+
+const getPackageAsync = () => {
+ const url = `http://45.32.68.44:16123/getPackage`;
+ return new Promise((resolve, reject) => {
+ superagent.get(url).end((err, res) => {
+ if (err) reject(err)
+ resolve(res && res.text)
+ })
+ })
+}
+const uploadPackageAsync = (pid, cardList) => {
+ const url = `http://45.32.68.44:16123/uploadPackage`;
+ const data = {
+ pid: pid,
+ package: JSON.stringify(cardList)
+ }
+ return new Promise((resolve, reject) => superagent.post(url).type('form').send(data).timeout(3000).end((err, res) => resolve(res && res.text)))
+}
+// 爬取用户信息
+const fetchUserInfo = (mid) => {
+ const url = `http://api.bilibili.com/x/web-interface/card?mid=${mid}`;
+ return new Promise((resolve, reject) => superagent.get(url).end((err, res) => resolve(res && res.text)))
+}
+// 休眠函数
+const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms))
+// 区间数组生成 rangeArray(0,4) => [0,1,2,3,4]
+const rangeArray = (start, end) => Array(end - start + 1).fill(0).map((v, i) => i + start)
+// 按千生成区间数组
+const packageArray = packageId => rangeArray(packageId * 1000 + 1, (packageId + 1) * 1000)
+const nowstr = () => moment().format('YYYY-MM-DD HH:mm:ss')
+// mids:待处理mid列表,
+const packageFetchInsertAsync = async (pid, mids) => {
+ const midSize = mids.length
+ let cardList = []
+ let loopCount = 0
+ while (mids.length > 0) {
+ loopCount++
+ // 循环两遍未结束,强行退出
+ if (loopCount > midSize * 2) break
+ try {
+ let mid = mids.pop();
+ let rs = await fetchUserInfo(mid);
+ if (rs) {
+ const data = JSON.parse(rs).data;
+ data.card.mid = mid;
+ data.card.archive_count = data.archive_count;
+ data.card.ctime = nowstr()
+ cardList.push(data.card);
+ } else {
+ mids.push(mid)
+ }
+ } catch (error) {
+ mids.push(mid)
+ console.error(`mid=${mid}`, error)
+ }
+ await sleep(60)
+ }
+ await sleep(1000)
+ if (cardList.length === midSize) {
+ await uploadPackageAsync(pid, cardList)
+ } else {
+ console.error(`${nowstr()} failed to fetch info, mids=${mids}`);
+ }
+}
+
+//function exec () {
+const run = async () => {
+ console.log(nowstr() + " Start to fetch member info.")
+ logit.innerHTML += nowstr() + " Start to fetch member info.
";
+
+ for (;;) {
+ const data = await getPackageAsync();
+ const pid = JSON.parse(data).pid;
+ if (pid == -1) break
+
+ const mids = packageArray(pid)
+ console.log(`${nowstr()} Get package ${pid}, fetch mids [${mids[0]}, ${mids[mids.length-1]}]`);
+
+ logit.innerHTML += `${nowstr()} Get package ${pid}, fetch mids [${mids[0]}, ${mids[mids.length-1]}]`;
+ logit.innerHTML += "
";
+
+ const rs = await packageFetchInsertAsync(pid, mids)
+ console.log(`${nowstr()} Send package ${pid}`);
+ logit.innerHTML += `${nowstr()} Send package ${pid}`;
+ logit.innerHTML += "
";
+ }
+ console.log(nowstr() + ` End fetch.`);
+ logit.innerHTML += nowstr() + ` End fetch.`;
+ logit.innerHTML += "
"
+}
+// start code
+// run();
+//}
+
+
+document.querySelector('#btn-run').addEventListener('click', run)
\ No newline at end of file
diff --git a/app/bilicrawler.js b/app/bilicrawler.js
index 5ce5aba..0c69949 100644
--- a/app/bilicrawler.js
+++ b/app/bilicrawler.js
@@ -1,11 +1,9 @@
+const logit = document.getElementById('log-process')
+
const superagent = require('superagent');
var moment = require('moment');
moment.locale('zh-cn');
-const logit = document.getElementById('log-process')
-
-
-
const getPackageAsync = () => {
const url = `http://45.32.68.44:16123/getPackage`;
return new Promise((resolve, reject) => {
@@ -44,37 +42,42 @@ const packageFetchInsertAsync = async (pid, mids) => {
loopCount++
// 循环两遍未结束,强行退出
if (loopCount > midSize * 2) break
+ let mid = mids.pop();
try {
- let mid = mids.pop();
- let rs = await fetchUserInfo(mid);
- if (rs) {
- const data = JSON.parse(rs).data;
- data.card.mid = mid;
- data.card.archive_count = data.archive_count;
- data.card.ctime = nowstr()
- cardList.push(data.card);
- } else {
+ fetchUserInfo(mid).then(rs => {
+ if (rs) {
+ const data = JSON.parse(rs).data;
+ data.card.mid = mid;
+ data.card.archive_count = data.archive_count;
+ data.card.ctime = nowstr()
+ cardList.push(data.card);
+ } else {
+ mids.push(mid)
+ }
+ }).catch(err => {
mids.push(mid)
- }
+ console.error(`mid=${mid}`, err)
+ });
+
} catch (error) {
mids.push(mid)
console.error(`mid=${mid}`, error)
}
- await sleep(60)
+ await sleep(210) //ms
}
- await sleep(1000)
+ // await sleep(1000)
if (cardList.length === midSize) {
await uploadPackageAsync(pid, cardList)
+ console.log(`${nowstr()} Send package ${pid}`);
} else {
console.error(`${nowstr()} failed to fetch info, mids=${mids}`);
+ logit.innerHTML += `${nowstr()} failed to fetch info, mids=${mids}`;
+ logit.innerHTML += "
";
}
}
-//function exec () {
const run = async () => {
console.log(nowstr() + " Start to fetch member info.")
- logit.innerHTML += nowstr() + " Start to fetch member info.
";
-
for (;;) {
const data = await getPackageAsync();
const pid = JSON.parse(data).pid;
@@ -83,13 +86,10 @@ const run = async () => {
const mids = packageArray(pid)
console.log(`${nowstr()} Get package ${pid}, fetch mids [${mids[0]}, ${mids[mids.length-1]}]`);
- logit.innerHTML += `${nowstr()} Get package ${pid}, fetch mids [${mids[0]}, ${mids[mids.length-1]}]`;
+ logit.innerHTML += `${nowstr()} Get package ${pid}, fetch mids [${mids[0]}, ${mids[mids.length-1]}]`;
logit.innerHTML += "
";
- const rs = await packageFetchInsertAsync(pid, mids)
- console.log(`${nowstr()} Send package ${pid}`);
- logit.innerHTML += `${nowstr()} Send package ${pid}`;
- logit.innerHTML += "
";
+ await packageFetchInsertAsync(pid, mids)
}
console.log(nowstr() + ` End fetch.`);
logit.innerHTML += nowstr() + ` End fetch.`;
@@ -97,7 +97,6 @@ const run = async () => {
}
// start code
// run();
-//}
document.querySelector('#btn-run').addEventListener('click', run)
\ No newline at end of file
diff --git a/app/package.json b/app/package.json
index 033d0f0..2b2c32a 100755
--- a/app/package.json
+++ b/app/package.json
@@ -1,6 +1,6 @@
{
"name": "bilispider",
- "version": "0.1.1",
+ "version": "0.1.2",
"description": "Bilibili spider to crawl data",
"main": "main.js",
"scripts": {