forked from clarin-eric/ParlaMint
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathministers-tsv2tei.xsl
185 lines (175 loc) · 7.08 KB
/
ministers-tsv2tei.xsl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
<?xml version='1.0' encoding='UTF-8'?>
<!-- Insert minister affiliations from TSV into the TEI root file
Note that all existing minister affiliations in TEI and removed
-->
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns="http://www.tei-c.org/ns/1.0"
xmlns:tei="http://www.tei-c.org/ns/1.0"
xmlns:fn="http://www.w3.org/2005/xpath-functions"
exclude-result-prefixes="fn tei">
<!-- File with TSV data -->
<xsl:param name="tsv"/>
<xsl:output method="xml" version="1.0" encoding="utf-8" indent="yes" omit-xml-declaration="no"/>
<xsl:key name="id" match="tei:*" use="@xml:id"/>
<xsl:variable name="profileDesc" select="tei:teiCorpus/tei:teiHeader/tei:profileDesc"/>
<!-- NOTE: we have to discuss how to name "regions" in setting! -->
<xsl:variable name="corpusCountry"
select="$profileDesc/
tei:settingDesc/tei:setting/tei:name
[@type = 'country' or @type = 'region']/@key"/>
<!-- Parse TSV into a
listPerson/person[@xml:id]/affiliation[@role='minister']@from][@to][@ref][@ana]
structure -->
<xsl:variable name="data">
<listPerson>
<xsl:variable name="text" select="unparsed-text($tsv, 'UTF-8')"/>
<xsl:for-each select="tokenize($text, ' ')">
<xsl:if test="matches(., '\t') and not(matches(., '^Country'))">
<xsl:analyze-string select="."
regex="^([^\t]+)\t([^\t]+)\t([^\t]+)\t([^\t]*)\t([^\t]*)\t?([^\t]*)\t?([^\t]*)\t?([^\t]*)\t?([^\t]*).*">
<xsl:matching-substring>
<xsl:variable name="country" select="regex-group(1)"/>
<xsl:variable name="personID" select="regex-group(2)"/>
<xsl:variable name="role" select="regex-group(3)"/>
<xsl:variable name="from" select="regex-group(4)"/>
<xsl:variable name="to" select="regex-group(5)"/>
<xsl:variable name="government" select="regex-group(6)"/>
<xsl:variable name="ministry" select="regex-group(7)"/>
<xsl:variable name="name-xx" select="regex-group(8)"/>
<xsl:variable name="name-en" select="regex-group(9)"/>
<xsl:if test = '$country != $corpusCountry'>
<xsl:message terminate="yes"
select="concat('FATAL: TEI corpus country = ', $corpusCountry,
' does not match TSV country = ', $country,
' in TSV line ', .)"/>
</xsl:if>
<xsl:if test = "$role != 'minister'">
<xsl:message terminate="yes"
select="concat('FATAL: Role ', $role,
' does not match minister! TSV is: ', .)"/>
</xsl:if>
<xsl:choose>
<xsl:when test = "not(key('id', $personID, $profileDesc)/self::tei:person)">
<xsl:message terminate="no"
select="concat('WARN: Person ', $personID,
' not found in TEI corpus, skipping! TSV is: ', .)"/>
</xsl:when>
<xsl:otherwise>
<!--xsl:message select="concat('INFO: Found minister ', $personID)"/-->
<xsl:call-template name="parse-minister">
<xsl:with-param name="personID" select="$personID"/>
<xsl:with-param name="from" select="$from"/>
<xsl:with-param name="to" select="$to"/>
<xsl:with-param name="government" select="$government"/>
<xsl:with-param name="ministry" select="$ministry"/>
<xsl:with-param name="name-xx" select="$name-xx"/>
<xsl:with-param name="name-en" select="$name-en"/>
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:matching-substring>
<xsl:non-matching-substring>
<xsl:message terminate="yes"
select="concat('FATAL: Bad line in TSV: ', .)"/>
</xsl:non-matching-substring>
</xsl:analyze-string>
</xsl:if>
</xsl:for-each>
</listPerson>
</xsl:variable>
<xsl:template match="/">
<xsl:text> </xsl:text>
<xsl:apply-templates/>
</xsl:template>
<xsl:template match="tei:listPerson/tei:person">
<!-- Get affiliation info from TSV for this person, if it exists -->
<xsl:variable name="minister" select="key('id', @xml:id, $data)"/>
<xsl:copy>
<xsl:apply-templates select="@*"/>
<xsl:apply-templates/>
<xsl:if test="$minister/self::tei:person">
<xsl:message select="concat('INFO: Inserting minister affiliation(s) for ', @xml:id)"/>
<xsl:for-each select="$minister/tei:affiliation">
<xsl:message select="concat('INFO: Inserting affiliation ',
@ana, ' from ', @from, ' to ', @to)"/>
<xsl:copy-of select="."/>
</xsl:for-each>
</xsl:if>
</xsl:copy>
</xsl:template>
<!-- Remove old ministers from TEI -->
<xsl:template match="tei:affiliation[@role = 'minister']">
<xsl:message select="concat('INFO: Removing minister affiliation for ',
parent::tei:person/@xml:id, ': ', @ana, ' from ', @from, ' to ', @to)"/>
</xsl:template>
<xsl:template match="*">
<xsl:copy>
<xsl:apply-templates select="@*"/>
<xsl:apply-templates select="*|text()|comment()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="@*|comment()">
<xsl:copy/>
</xsl:template>
<xsl:template name="parse-minister">
<xsl:param name="personID"/>
<xsl:param name="from"/>
<xsl:param name="to"/>
<xsl:param name="government"/>
<xsl:param name="ministry"/>
<xsl:param name="name-xx"/>
<xsl:param name="name-en"/>
<person xml:id="{$personID}">
<affiliation role="minister">
<!-- Re-insert # in references to IDs for affiliation/@ref -->
<xsl:if test="normalize-space($from) and $from != '-'">
<xsl:attribute name="from" select="$from"/>
</xsl:if>
<xsl:if test="normalize-space($to) and $to != '-'">
<xsl:attribute name="to" select="$to"/>
</xsl:if>
<xsl:if test="normalize-space($government) and $government != '-'">
<xsl:attribute name="ref">
<xsl:variable name="org" select="key('id', $government, $profileDesc)/
ancestor::tei:org/@xml:id"/>
<xsl:choose>
<xsl:when test = "not(normalize-space($org))">
<xsl:message terminate="no"
select="concat('ERROR: Cant find government organisation for term ',
$government
, .)"/>
<!-- #, '! TSV is: ' -->
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="concat('#', $org)"/>
</xsl:otherwise>
</xsl:choose>
</xsl:attribute>
</xsl:if>
<xsl:if test="(normalize-space($government) and $government != '-') or
(normalize-space($ministry) and $ministry != '-')">
<xsl:variable name="ana">
<xsl:if test="normalize-space($government) and $government != '-'">
<xsl:value-of select="concat('#', replace($government, ' ', ' #'))"/>
</xsl:if>
<xsl:text> </xsl:text>
<xsl:if test="normalize-space($ministry) and $ministry != '-'">
<xsl:value-of select="concat('#', replace($ministry, ' ', ' #'))"/>
</xsl:if>
</xsl:variable>
<xsl:attribute name="ana" select="normalize-space($ana)"/>
</xsl:if>
<xsl:if test="(normalize-space($name-xx) and $name-xx != '-')">
<roleName>
<xsl:value-of select="normalize-space($name-xx)"/>
</roleName>
</xsl:if>
<xsl:if test="(normalize-space($name-en) and $name-en != '-')">
<roleName xml:lang="en">
<xsl:value-of select="normalize-space($name-en)"/>
</roleName>
</xsl:if>
</affiliation>
</person>
</xsl:template>
</xsl:stylesheet>