-
Notifications
You must be signed in to change notification settings - Fork 2
/
bt_test.js
135 lines (100 loc) · 3.12 KB
/
bt_test.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
//viewportSize being the actual size of the headless browser
//the clipRect is the portion of the page you are taking a screenshot of
//page.clipRect = { top: 0, left: 0, width: 1024, height: 768 };
//the rest of the code is the same as the previous example
var wp = require('webpage')
var page = wp.create();
page.settings.resourceTimeout = 3000;
//page.settings.javascriptEnabled = false;
page.settings.loadImages = false;
var fs = require('fs')
var page_root = 'http://bt.aisex.com/bt/thread.php?fid=4&page='
var num_of_pages = 10;
var page_urls = []
var file_path = 'C:/StudyProj/mybase.html';
var html_path = 'file:///' + file_path;
function parse_page()
{
console.log('Start Parsing');
var mv_urls = page.evaluate(
function()
{
return [].map.call(
document.querySelectorAll('tr.tr3 h3 a'),
function(a_tag)
{
return a_tag.getAttribute('href');
}
)
}
);
console.log('End Parsing')
console.log('found links='+mv_urls.length);
// for(var i = 0; i< my_urls.length; ++i)
// {
// console.log(my_urls[i]);
// }
phantom.exit();
}
// // function OpenTopPage()
// // {
// // var bookUrls = page.evaluate(
// // function()
// // {
// // return [].map.call(
// // document.querySelectorAll('div.col-md-12.article a.title-link'),
// // function(a)
// // {
// // return a.getAttribute('href')
// // }
// // )
// // }
// // )
// // var num_links = bookUrls.length
// // for(var i = 0;i < num_links;++i)
// // {
// // var bookUrl = bookUrls[i]
// // if(bookUrl.length > 0 )
// // {
// // urls.push(url_root + bookUrls[i])
// // }
// // }
// // console.log( 'found books = ' + urls.length )
// // }
// function OpenNextTopPage()
// {
// page.close()
// ++topPageCount;
// if(topPageCount >= num_of_pages)
// {
// console.log(' Final Top page is reached, will visit each child page to get book ')
// count = 0;
// fs.write("books.txt", urls.join('\n'), {mode: 'w', charset: 'UTF-8'})
// phantom.exit()
// }
// console.log(' page count = ' + topPageCount)
// page = wp.create()
// page.open(topPageUrls[topPageCount], function( status )
// {
// console.log("visit "+ topPageUrls[topPageCount] + ' Status:' + status )
// OpenTopPage()
// window.setTimeout(OpenNextTopPage, 500)
// }
// )
// }
// phantom.onError = function(msg, trace) {
// var msgStack = ['PHANTOM ERROR: ' + msg];
// if (trace && trace.length) {
// msgStack.push('TRACE:');
// trace.forEach(function(t) {
// msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + (t.function ? ' (in function ' + t.function +')' : ''));
// });
// }
// console.error(msgStack.join('\n'));
// phantom.exit(1);
// };
page.open( html_path, function(status) {
console.log(page.title)
//console.log(html_path)
parse_page();
})