Skip to content

Commit

Permalink
Merge pull request #6 from emptymalei/master
Browse files Browse the repository at this point in the history
integrate new script into electron app
  • Loading branch information
emptymalei authored Feb 23, 2018
2 parents d9032df + 3a5c71f commit 938ad4e
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 26 deletions.
103 changes: 103 additions & 0 deletions app/bilicrawler-0.1.1.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
const superagent = require('superagent');
var moment = require('moment');
moment.locale('zh-cn');

const logit = document.getElementById('log-process')



const getPackageAsync = () => {
const url = `http://45.32.68.44:16123/getPackage`;
return new Promise((resolve, reject) => {
superagent.get(url).end((err, res) => {
if (err) reject(err)
resolve(res && res.text)
})
})
}
const uploadPackageAsync = (pid, cardList) => {
const url = `http://45.32.68.44:16123/uploadPackage`;
const data = {
pid: pid,
package: JSON.stringify(cardList)
}
return new Promise((resolve, reject) => superagent.post(url).type('form').send(data).timeout(3000).end((err, res) => resolve(res && res.text)))
}
// 爬取用户信息
const fetchUserInfo = (mid) => {
const url = `http://api.bilibili.com/x/web-interface/card?mid=${mid}`;
return new Promise((resolve, reject) => superagent.get(url).end((err, res) => resolve(res && res.text)))
}
// 休眠函数
const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms))
// 区间数组生成 rangeArray(0,4) => [0,1,2,3,4]
const rangeArray = (start, end) => Array(end - start + 1).fill(0).map((v, i) => i + start)
// 按千生成区间数组
const packageArray = packageId => rangeArray(packageId * 1000 + 1, (packageId + 1) * 1000)
const nowstr = () => moment().format('YYYY-MM-DD HH:mm:ss')
// mids:待处理mid列表,
const packageFetchInsertAsync = async (pid, mids) => {
const midSize = mids.length
let cardList = []
let loopCount = 0
while (mids.length > 0) {
loopCount++
// 循环两遍未结束,强行退出
if (loopCount > midSize * 2) break
try {
let mid = mids.pop();
let rs = await fetchUserInfo(mid);
if (rs) {
const data = JSON.parse(rs).data;
data.card.mid = mid;
data.card.archive_count = data.archive_count;
data.card.ctime = nowstr()
cardList.push(data.card);
} else {
mids.push(mid)
}
} catch (error) {
mids.push(mid)
console.error(`mid=${mid}`, error)
}
await sleep(60)
}
await sleep(1000)
if (cardList.length === midSize) {
await uploadPackageAsync(pid, cardList)
} else {
console.error(`${nowstr()} failed to fetch info, mids=${mids}`);
}
}

//function exec () {
const run = async () => {
console.log(nowstr() + " Start to fetch member info.")
logit.innerHTML += nowstr() + " Start to fetch member info. <br>";

for (;;) {
const data = await getPackageAsync();
const pid = JSON.parse(data).pid;
if (pid == -1) break

const mids = packageArray(pid)
console.log(`${nowstr()} Get package ${pid}, fetch mids [${mids[0]}, ${mids[mids.length-1]}]`);

logit.innerHTML += `${nowstr()} Get package ${pid}, fetch mids [${mids[0]}, ${mids[mids.length-1]}]`;
logit.innerHTML += "<br>";

const rs = await packageFetchInsertAsync(pid, mids)
console.log(`${nowstr()} Send package ${pid}`);
logit.innerHTML += `${nowstr()} Send package ${pid}`;
logit.innerHTML += "<br>";
}
console.log(nowstr() + ` End fetch.`);
logit.innerHTML += nowstr() + ` End fetch.`;
logit.innerHTML += "<br>"
}
// start code
// run();
//}


document.querySelector('#btn-run').addEventListener('click', run)
49 changes: 24 additions & 25 deletions app/bilicrawler.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
const logit = document.getElementById('log-process')

const superagent = require('superagent');
var moment = require('moment');
moment.locale('zh-cn');

const logit = document.getElementById('log-process')



const getPackageAsync = () => {
const url = `http://45.32.68.44:16123/getPackage`;
return new Promise((resolve, reject) => {
Expand Down Expand Up @@ -44,37 +42,42 @@ const packageFetchInsertAsync = async (pid, mids) => {
loopCount++
// 循环两遍未结束,强行退出
if (loopCount > midSize * 2) break
let mid = mids.pop();
try {
let mid = mids.pop();
let rs = await fetchUserInfo(mid);
if (rs) {
const data = JSON.parse(rs).data;
data.card.mid = mid;
data.card.archive_count = data.archive_count;
data.card.ctime = nowstr()
cardList.push(data.card);
} else {
fetchUserInfo(mid).then(rs => {
if (rs) {
const data = JSON.parse(rs).data;
data.card.mid = mid;
data.card.archive_count = data.archive_count;
data.card.ctime = nowstr()
cardList.push(data.card);
} else {
mids.push(mid)
}
}).catch(err => {
mids.push(mid)
}
console.error(`mid=${mid}`, err)
});

} catch (error) {
mids.push(mid)
console.error(`mid=${mid}`, error)
}
await sleep(60)
await sleep(210) //ms
}
await sleep(1000)
// await sleep(1000)
if (cardList.length === midSize) {
await uploadPackageAsync(pid, cardList)
console.log(`${nowstr()} Send package ${pid}`);
} else {
console.error(`${nowstr()} failed to fetch info, mids=${mids}`);
logit.innerHTML += `${nowstr()} failed to fetch info, mids=${mids}`;
logit.innerHTML += "<br>";
}
}

//function exec () {
const run = async () => {
console.log(nowstr() + " Start to fetch member info.")
logit.innerHTML += nowstr() + " Start to fetch member info. <br>";

for (;;) {
const data = await getPackageAsync();
const pid = JSON.parse(data).pid;
Expand All @@ -83,21 +86,17 @@ const run = async () => {
const mids = packageArray(pid)
console.log(`${nowstr()} Get package ${pid}, fetch mids [${mids[0]}, ${mids[mids.length-1]}]`);

logit.innerHTML += `${nowstr()} Get package ${pid}, fetch mids [${mids[0]}, ${mids[mids.length-1]}]`;
logit.innerHTML += `${nowstr()} Get package ${pid}, fetch mids [${mids[0]}, ${mids[mids.length-1]}]`;
logit.innerHTML += "<br>";

const rs = await packageFetchInsertAsync(pid, mids)
console.log(`${nowstr()} Send package ${pid}`);
logit.innerHTML += `${nowstr()} Send package ${pid}`;
logit.innerHTML += "<br>";
await packageFetchInsertAsync(pid, mids)
}
console.log(nowstr() + ` End fetch.`);
logit.innerHTML += nowstr() + ` End fetch.`;
logit.innerHTML += "<br>"
}
// start code
// run();
//}


document.querySelector('#btn-run').addEventListener('click', run)
2 changes: 1 addition & 1 deletion app/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "bilispider",
"version": "0.1.1",
"version": "0.1.2",
"description": "Bilibili spider to crawl data",
"main": "main.js",
"scripts": {
Expand Down

0 comments on commit 938ad4e

Please sign in to comment.