-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathsearch_engine_crawler.php
62 lines (50 loc) · 1022 Bytes
/
search_engine_crawler.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
<?php
/*
Author : Giriraj Namachivayam
DOC : 29-jan-2014
License : Free to use
Description : Extract Search Engine results in single page by using HTML DOM.
*/
class search_engine_crawler {
/*
# Google, Bing, Yahoo,ask,amazon
*/
public $searchEngineName='';
/*
# Search Engine URL
*/
public $searchEnginelink = '';
/*
# Record Maximum Display count
*/
public $maxcount;
/*
# Search Query
*/
public $q;
/*
# DOM ID (h3.r, h3, div.classname, div#divid)
*/
public $linkDomId;
/*
# pagination keyword (start,b,page,num)
*/
public $start;
/*
# Crawl Search Engine Result
*/
function SimpleCrawler() {
print "<h1>".$this->searchEngineName."</h1>";
for ($i=0;$i<$this->maxcount;$i+=10){
$url=$this->searchEnginelink."".$this->q."&".$this->start."=".$i;
#print $url;
$html_inner = file_get_html($url);
$row = $i;
foreach($html_inner->find($this->linkDomId) as $ece){
$row++;
echo $row.". ".$ece->innertext . '<BR>';
}
}
}
}
?>