-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathHarvestTextRazor.php
34 lines (26 loc) · 933 Bytes
/
HarvestTextRazor.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
<?php
require_once 'vendor/autoload.php';
TextRazorSettings::setApiKey(file_get_contents('textrazor.key'));
$text = file_get_contents($argv[1]);
if (!$text) {
die('no text');
}
$modes = ['entities', 'topics', 'words', 'phrases', 'dependency-trees', 'relations', /*'entailments', */'senses'];
$chunks = \DimeExtraction\Chunker::getChunks($text);
if (!file_exists(__DIR__ . '/textrazor-out')) {
mkdir(__DIR__ . '/textrazor-out');
}
foreach ($modes as $mode) {
echo "running $mode on " . count($chunks) . " chunks...\n";
$tr = new TextRazor();
$tr->addExtractor($mode);
foreach ($chunks as $i => $chunk) {
try {
$ent = $tr->analyze($chunk);
} catch (Exception $e) {
echo "Exception: " . $e->getMessage() . "\n";
exit;
}
file_put_contents(__DIR__ . '/textrazor-out/' . $mode . '-' . $i . '-' . basename($argv[1]), serialize($ent));
}
}