forked from clarin-eric/ParlaMint
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparlamint-tei2vert.pl
executable file
·47 lines (40 loc) · 1.3 KB
/
parlamint-tei2vert.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/perl
use warnings;
use utf8;
use FindBin qw($Bin);
use File::Spec;
$rootFile = File::Spec->rel2abs(shift);
$outDir = shift;
($rootDir) = $rootFile =~ m|(.+)/|;
binmode(STDERR, 'utf8');
`mkdir $outDir` unless -e "$outDir";
$Saxon = 'java -jar /usr/share/java/saxon.jar';
$TEI2VERT = "$Bin/parlamint2xmlvert.xsl";
$POLISH = "$Bin/parlamint-xml2vert.pl";
die "Can't find root TEI file with teiHeader: $rootFile\n"
unless -e $rootFile;
open(IN, '<:utf8', $rootFile);
$/ = ">";
while (<IN>) {
if (m|<xi:include |) {
($href) = m|href="(.+?)"| or
die "Can't find href in xi:include!\n";
push(@inFiles, "$rootDir/$href");
}
}
close IN;
foreach $inFile (@inFiles) {
if (($fName) = $inFile =~ m|(ParlaMint-[A-Z]{2}(?:-[A-Z0-9]{1,3})?(?:-[a-z]{2,3})?_[^/]+)\.ana\.xml|) {
print STDERR "INFO: Converting $fName\n";
}
elsif (($fName) = $inFile =~ m|(ParlaMint-[A-Z]{2}(?:-[A-Z0-9]{1,3})?(?:-[a-z]{2,3})?_[^/]+)\.xml|) {
print STDERR "INFO: Debug conversion of $fName\n";
}
else {die "Weird input file $inFile\n"}
my $outFile = "$outDir/$fName.vert";
$command = "$Saxon hdr=$rootFile -xsl:$TEI2VERT $inFile | $POLISH > $outFile";
#print STDERR "\$ $command\n";
my $status = system($command);
die "ERROR: Conversion to vert for $inFile failed!\n"
if $status;
}