Skip to content

Commit

Permalink
Add additional data to speech entry in database #8
Browse files Browse the repository at this point in the history
  • Loading branch information
JetamZ committed Oct 17, 2024
1 parent e22089d commit f99d76e
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 16 deletions.
11 changes: 8 additions & 3 deletions DatabaseCommunication/DatabaseInserter.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def insert_persons(self, persons):
if len(p.sex) > 1:
p.sex = 'U'

cursor.execute(PersonCommands.INSERT_ALL,(p.personID, p.sex, p.birth,))
cursor.execute(PersonCommands.INSERT_ALL,(p.personID, p.sex, p.birth))
self.__insert_name_records(p.name_records,p.personID, cursor)
self.connection.commit()

Expand Down Expand Up @@ -114,14 +114,19 @@ def insert_speeches(self, speeches):
with self.connection.cursor() as cursor:
for author in speeches:
for s in speeches[author]:
cursor.execute(SpeechCommands.INSERT_ALL, (s.when,
cursor.execute(SpeechCommands.INSERT_ALL, (s.speechID,
s.when,
str(s.tokens),
str(s.sentences),
str(s.named_entity_refferences),
s.role[1:],
s.speakerID[1:],
s.total_duration,
s.earliest_timeline,
s.latest_timeline))
s.latest_timeline,
s.unaligned_tokens,
s.time_spoken,
s.time_silent,
s.time_unknown))

self.connection.commit()
6 changes: 5 additions & 1 deletion DatabaseCommunication/DatabaseTableCreator.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def create_tables(self):
""",
"""
CREATE TABLE IF NOT EXISTS speech (
id SERIAL PRIMARY KEY,
id VARCHAR(100) PRIMARY KEY,
date DATE,
token_count INTEGER,
sentence_count INTEGER,
Expand All @@ -58,6 +58,10 @@ def create_tables(self):
total_duration REAL,
earliest_timestamp VARCHAR(100),
latest_timestamp VARCHAR(100),
unaligned_tokens INTEGER,
time_spoken REAL,
time_silent REAL,
time_unknown REAL,
FOREIGN KEY (person_id)
REFERENCES Person (person_id)
)
Expand Down
4 changes: 2 additions & 2 deletions DatabaseCommunication/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ class OrganisationCommands(StrEnum):

class SpeechCommands(StrEnum):
INSERT_ALL = """
INSERT INTO speech(date, token_count, sentence_count, named_entity_count, role, person_id, total_duration, earliest_timestamp, latest_timestamp)
VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s)
INSERT INTO speech(id, date, token_count, sentence_count, named_entity_count, role, person_id, total_duration, earliest_timestamp, latest_timestamp, unaligned_tokens, time_spoken, time_silent, time_unknown)
VALUES(%s,%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON CONFLICT (id) DO NOTHING
"""
26 changes: 16 additions & 10 deletions MetadataExtraction/timestampsCSV.xslt
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,24 @@
<!-- ID - of the speaker if the type is 'S' or of the token if the Type is 'T' -->
<!-- begin - marks the beginning of the token in audio -->
<!-- end - marks the end of the token in audion-->
<xsl:text>Type,ID,Speech,Begin,End,Time&#10;</xsl:text>
<xsl:text>Type,ID,Begin,End,Duration,Time&#10;</xsl:text>
<xsl:apply-templates select="tei:text/tei:body/tei:div/tei:u" />
<xsl:for-each select="//tei:w">
<xsl:call-template name="word" />
</xsl:for-each>

<!-- Artificially insert one "speaker" row so that the information about last speech is stored.-->
<xsl:text>S,END,END,END,END,END</xsl:text>
</xsl:template>


<!-- Keep the ID of a speaker -->
<xsl:template match="tei:u">
<xsl:text>S,</xsl:text>
<xsl:value-of select="@who"/>
<xsl:text>,,&#10;</xsl:text>
<xsl:text>,</xsl:text>
<xsl:value-of select="@xml:id" />
<xsl:text>,,,&#10;</xsl:text>
<xsl:for-each select="descendant::tei:w">
<xsl:call-template name="word" />
</xsl:for-each>
</xsl:template>

<!-- <xsl:template match="tei:w"> -->
Expand All @@ -36,10 +41,6 @@
<xsl:value-of select="@xml:id" />
<xsl:text>,</xsl:text>

<!-- Get the utterance the tag belongs to -->
<xsl:value-of select="substring-before(substring-after(@xml:id, 'u'), '.p')" />
<xsl:text>,</xsl:text>

<!-- Get the start timestamp of the tag -->
<xsl:variable name="startSynch" select="preceding-sibling::tei:anchor[1]/@synch" />
<xsl:value-of select="key('whenByID', substring($startSynch, 2))/@interval" />
Expand All @@ -49,7 +50,12 @@
<xsl:variable name="endSynch" select="following-sibling::tei:anchor[1]/@synch" />
<xsl:value-of select="key('whenByID', substring($endSynch, 2))/@interval" />
<xsl:text>,</xsl:text>


<!-- Get the duration of the tag -->
<xsl:variable name="first" select="key('whenByID', substring($endSynch, 2))/@interval " />
<xsl:variable name="second" select="key('whenByID', substring($startSynch, 2))/@interval " />
<xsl:value-of select="$first - $second" />
<xsl:text>,</xsl:text>
<!-- Get the time the speech was given-->
<xsl:variable name="sinceRef" select="key('whenByID', substring($startSynch, 2))/@since" />
<xsl:value-of select="key('whenByID', substring($sinceRef, 2))/@absolute" />
Expand Down

0 comments on commit f99d76e

Please sign in to comment.