forked from terminusfoundation/watcher
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.php
33 lines (27 loc) · 969 Bytes
/
index.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
<?php
require 'vendor/autoload.php';
use Goutte\Client;
$css_selector = "a.title.may-blank";
$thing_to_scrape = "_text";
$client = new Client();
// TODO: set up an api. Design brainstormer below:
// VERBS:
// - test: test a scrape (pass `url` and any number of `filter`, `filterxpath`, `extract`, ... methods)
// - add: as test, but store scrape configuration in database
// - ...
$verb = $_GET['verb'];
if ($verb == 'test') {
if ( !filter_has_var( INPUT_GET, 'url') ) { $url = "https://industra.space"; } else {
$url = filter_input( INPUT_GET, 'url', FILTER_SANITIZE_URL);
$crawler = $client->request('GET', $url);
$output = $crawler;
//$output = $crawler->filterXPath('descendant-or-self::body');
//$output = $crawler->filter($css_selector)->extract($thing_to_scrape);
print_r($output->html());
}}
else {
echo "<h1>BAD VERB</h1>";
echo "ALLOWED VERBS: test<br />";
echo "EXAMPLE: http://localhost/?verb=test&url=https://google.com";
}
?>