-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.php
63 lines (46 loc) · 1.69 KB
/
parser.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
<?php
require_once dirname(__FILE__) . '/vendor/autoload.php';
use XPathSelector\Selector;
use League\Csv\Writer;
outputCsv(parseContracts(loadContracts()));
function loadContracts() {
$data = file_get_contents(dirname(__FILE__) . '/data.xml');
return Selector::loadXML($data);
}
function parseContracts($xs) {
return $xs->findAll('//item')->map(function($contract) {
return array_merge([
'title' => trim($contract->find('title')->extract()),
'url' => trim($contract->find('link')->extract()),
'department' => trim($contract->find('dc:creator')->extract()),
], parseDescription($contract->find('description')->extract()));
});
}
function parseDescription($description) {
$descriptionKeys = [];
$descriptionValues = [];
$xs = Selector::loadHTML($description);
$descriptionRows = $xs->findAll('//td');
// There's no signifier of heading vs value, so we walk the tree to find keys and values
$i = 0;
foreach ($descriptionRows as $row) {
if ($i % 2 == 0) {
$descriptionKeys[] = trim($row->extract());
} else {
$descriptionValues[] = trim($row->extract());
}
$i++;
}
return array_combine($descriptionKeys, $descriptionValues);
}
function outputCsv($contracts) {
$writer = Writer::createFromPath(new SplFileObject(dirname(__FILE__) . '/contracts.csv', 'a+'), 'w');
// tell excel that the COMMA separated values file is separated with... commas
$writer->insertOne('sep=,');
// headers
$writer->insertOne(array_keys($contracts[0]));
// rows
foreach ($contracts as $contract) {
$writer->insertOne(array_values($contract));
}
}