-
Notifications
You must be signed in to change notification settings - Fork 5
/
gram2dtd.pl
64 lines (59 loc) · 1.62 KB
/
gram2dtd.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/perl
# Copyright (C) 2005 Kevin P. Scannell <[email protected]>
# This is free software; see the file COPYING for copying conditions. There is
# NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# This script is called from a language pack directory via the
# Makefile target "gram-xx.dtd"
use strict;
use warnings;
my %HoH = ();
open (POS, "pos-$ARGV[0].txt");
$HoH{'X'}{'>'}=1;
while(<POS>) {
chomp;
s/^#.*$//;
if (/\S/) {
s/^[0-9]+\s+//;
(my $tag, my $attrs) = m/^<([A-Z]) *([^>]*)>/;
$HoH{ $tag }{ '>' }++; # the '>' entry holds the count for this tag
if ($attrs) {
while ($attrs =~ m/([^ ]+)/g) {
my $full = $1;
$full =~ s/[ =].*//;
$HoH{ $tag }{ $full }++; # '>' can't be an attribute!
}
}
}
}
close POS;
my @sorted = sort keys %HoH;
my $gramtags = join ' | ',@sorted;
print "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n";
print "<!ENTITY % tag \"( $gramtags )\">\n";
print "<!ENTITY % mycontent \"( #PCDATA | $gramtags | E | B )*\">\n";
print "<!ELEMENT teacs ( line )+>\n";
print "<!ELEMENT line \%mycontent; >\n";
foreach my $t (@sorted) {
print "<!ELEMENT $t (#PCDATA)>\n";
my @attrs = sort keys %{$HoH{$t}};
if (scalar @attrs > 1) {
print "<!ATTLIST $t\n";
foreach my $attr (@attrs) {
unless ($attr eq '>') {
print "\t\t$attr CDATA #";
if ($HoH{$t}->{'>'} == $HoH{$t}->{$attr}) {
print "REQUIRED\n";
}
else {
print "IMPLIED\n";
}
}
}
print ">\n";
}
}
print "<!ELEMENT B (#PCDATA | Z )*>\n";
print "<!ELEMENT E \%mycontent; >\n";
print "<!ATTLIST E msg CDATA #REQUIRED>\n";
print "<!ELEMENT Z (\%tag;)* >\n";
exit 0;