Skip to content

Commit

Permalink
Add first batch of tests for speech amd timestamps loading #9
Browse files Browse the repository at this point in the history
  • Loading branch information
JetamZ committed Oct 20, 2024
1 parent 7aec1f4 commit bf0b96a
Show file tree
Hide file tree
Showing 9 changed files with 25,098 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---SPEECH---
ID: ps2013-001-01-000-999.u1
author: #MiroslavaNemcova.1952
role: #chair
when: 2013-11-25
tokens: 25
sentences: 12
named entity refferences: 18
total duration: 172760.0
total spoken: 15200.0
time silent: 68160.0
time unknown: 89400.0
unaligned tokens: 1
earliest timeline: 2013-11-25T13:58:00
latest timeline: 2013-11-25T13:58:00

Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---SPEECH---
ID: ps2013-001-01-000-999.u1
author: #MiroslavaNemcova.1952
role: #chair
when: 2013-11-25
tokens: 175
sentences: 12
named entity refferences: 18
total duration: 0
total spoken: 0
time silent: 0
time unknown: 0
unaligned tokens: 175
earliest timeline: None
latest timeline: None

Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
---SPEECH---
ID: ps2013-001-01-002-002.u1
author: #MiroslavaNemcova.1952
role: #chair
when: 2013-11-25
tokens: 148
sentences: 12
named entity refferences: 11
total duration: 143120.0
total spoken: 58880.0
time silent: 12440.0
time unknown: 71800.0
unaligned tokens: 17
earliest timeline: 2013-11-25T13:58:00
latest timeline: 2013-11-25T13:58:00
---SPEECH---
ID: ps2013-001-01-002-002.u3
author: #MiroslavaNemcova.1952
role: #chair
when: 2013-11-25
tokens: 49
sentences: 3
named entity refferences: 5
total duration: 68900.0
total spoken: 22930.0
time silent: 11510.0
time unknown: 34460.0
unaligned tokens: 1
earliest timeline: 2013-11-25T14:38:00
latest timeline: 2013-11-25T14:38:00
---SPEECH---
ID: ps2013-001-01-002-002.u2
author: #LubomirZaoralek.1956
role: #regular
when: 2013-11-25
tokens: 877
sentences: 206
named entity refferences: 604
total duration: 1865080.0
total spoken: 323020.0
time silent: 1209490.0
time unknown: 332570.0
unaligned tokens: 161
earliest timeline: 2013-11-25T13:58:00
latest timeline: 2013-11-25T14:38:00

16 changes: 16 additions & 0 deletions test/MetadataExtraction/examples/expected/expectedReal.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---SPEECH---
ID: ps2013-001-01-000-999.u1
author: #MiroslavaNemcova.1952
role: #chair
when: 2013-11-25
tokens: 175
sentences: 12
named entity refferences: 18
total duration: 190520.0
total spoken: 75000.0
time silent: -26850.0
time unknown: 142370.0
unaligned tokens: 14
earliest timeline: 2013-11-25T13:58:00
latest timeline: 2013-11-25T13:58:00

1,276 changes: 1,276 additions & 0 deletions test/MetadataExtraction/examples/inputs/encapsulatedWords.xml

Large diffs are not rendered by default.

953 changes: 953 additions & 0 deletions test/MetadataExtraction/examples/inputs/missingTimestamps.xml

Large diffs are not rendered by default.

19,797 changes: 19,797 additions & 0 deletions test/MetadataExtraction/examples/inputs/multipleSpeeches.xml

Large diffs are not rendered by default.

2,928 changes: 2,928 additions & 0 deletions test/MetadataExtraction/examples/inputs/real.xml

Large diffs are not rendered by default.

50 changes: 50 additions & 0 deletions test/MetadataExtraction/tester.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import subprocess
import difflib
import argparse


args_parser = argparse.ArgumentParser()
args_parser.add_argument("--s",action="store_true", help="Set this flag to test speeches")

test_cases_speeches = [
("REAL","--file=examples/inputs/real.xml", "--wd=../../MetadataExtraction", "examples/expected/expectedReal.txt"),
("MISSING TIMESTAMPS","--file=examples/inputs/missingTimestamps.xml", "--wd=../../MetadataExtraction", "examples/expected/expectedMissingTimestamps.txt"),
("ENCAPSULATED WORDS","--file=examples/inputs/encapsulatedWords.xml", "--wd=../../MetadataExtraction", "examples/expected/expectedEncapsulatedWords.txt"),
("MULTIPLE SPEECHES", "--file=examples/inputs/multipleSpeeches.xml", "--wd=../../MetadataExtraction", "examples/expected/expectedMultipleSpeeches.txt"),
]


my_venv = "../../../bakalarka/bin/python3"
speech_parser = "../../MetadataExtraction/speechParser2.py"

def test_speeches():
for test_name, test_file, test_wd, expected_output in test_cases_speeches:
expected = ""
process = subprocess.run([my_venv, speech_parser, test_file, test_wd], capture_output=True, text=True)

actual_output = process.stdout.strip()

with open(expected_output, 'r') as f:
expected = f.read().strip()

if (actual_output == expected):
print(f"Test {test_name}: PASSED!")
else:
diff = difflib.unified_diff(expected.splitlines(), actual_output.splitlines(),
fromfile='expected',tofile='actual',lineterm='')

print(f"Test {test_name}: FAILED!")
print(f"-------------------------")
for line in diff:
print(line)
print(f"-------------------------")

def main(args):
if args.s:
print("####TESTING SPEECH PARSER####")
test_speeches()
print("########## DONE #############")

if __name__ == "__main__":
main(args_parser.parse_args())

0 comments on commit bf0b96a

Please sign in to comment.