forked from clarin-eric/ParlaMint
-
Notifications
You must be signed in to change notification settings - Fork 0
/
validate.pl
executable file
·43 lines (39 loc) · 1.15 KB
/
validate.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/perl
# Validate ParlaMint corpora (either samples or complete corpora)
# with the ParlaMint ODD derived schema
use warnings;
use utf8;
use open ':utf8';
binmode(STDIN, ':utf8');
binmode(STDOUT, ':utf8');
binmode(STDERR, ':utf8');
use FindBin qw($Bin);
$what = shift;
if ($what eq 'samples') {
$mask = 'ParlaMint-*/ParlaMint-*.xml';
}
elsif ($what eq 'master') {
$mask = 'ParlaMint-*.TEI/ParlaMint-*.xml ';
$mask .= 'ParlaMint-*.TEI/*/ParlaMint-*.xml ';
$mask .= 'ParlaMint-*.TEI.ana/ParlaMint-*.xml ';
$mask .= 'ParlaMint-*.TEI.ana/*/ParlaMint-*.xml';
}
else {
die "First parameter must be 'samples' or 'master'\n"
}
$black = '(taxonomy|list)';
$inDir = shift;
unless (-d $inDir) {
die "Second parameter must be top level input directory\n"
}
#Execution
$Jing = "java -jar /usr/share/java/jing.jar";
$Schema = "$Bin/ParlaMint.odd.rng";
foreach my $inFile (glob "$inDir/$mask") {
next if $inFile =~ /$black/;
($fName) = $inFile =~ m|([^/]+\.xml)|;
print STDERR "INFO: Validating $fName\n";
#`$Jing $Schema $inFile`;
system("$Jing $Schema $inFile") == 0
or print STDERR "ERROR: Validation of $fName failed!\n";
}