Add first batch of tests for speech amd timestamps loading #9

ufal · Oct 20, 2024 · bf0b96a · bf0b96a
1 parent 7aec1f4
commit bf0b96a
Show file tree

Hide file tree

Showing 9 changed files with 25,098 additions and 0 deletions.
diff --git a/test/MetadataExtraction/examples/expected/expectedEncapsulatedWords.txt b/test/MetadataExtraction/examples/expected/expectedEncapsulatedWords.txt
@@ -0,0 +1,16 @@
+---SPEECH---
+ID: ps2013-001-01-000-999.u1
+author: #MiroslavaNemcova.1952
+role: #chair
+when: 2013-11-25
+tokens: 25
+sentences: 12
+named entity refferences: 18
+total duration: 172760.0
+total spoken: 15200.0
+time silent: 68160.0
+time unknown: 89400.0
+unaligned tokens: 1
+earliest timeline: 2013-11-25T13:58:00
+latest timeline: 2013-11-25T13:58:00
+
diff --git a/test/MetadataExtraction/examples/expected/expectedMissingTimestamps.txt b/test/MetadataExtraction/examples/expected/expectedMissingTimestamps.txt
@@ -0,0 +1,16 @@
+---SPEECH---
+ID: ps2013-001-01-000-999.u1
+author: #MiroslavaNemcova.1952
+role: #chair
+when: 2013-11-25
+tokens: 175
+sentences: 12
+named entity refferences: 18
+total duration: 0
+total spoken: 0
+time silent: 0
+time unknown: 0
+unaligned tokens: 175
+earliest timeline: None
+latest timeline: None
+
diff --git a/test/MetadataExtraction/examples/expected/expectedMultipleSpeeches.txt b/test/MetadataExtraction/examples/expected/expectedMultipleSpeeches.txt
@@ -0,0 +1,46 @@
+---SPEECH---
+ID: ps2013-001-01-002-002.u1
+author: #MiroslavaNemcova.1952
+role: #chair
+when: 2013-11-25
+tokens: 148
+sentences: 12
+named entity refferences: 11
+total duration: 143120.0
+total spoken: 58880.0
+time silent: 12440.0
+time unknown: 71800.0
+unaligned tokens: 17
+earliest timeline: 2013-11-25T13:58:00
+latest timeline: 2013-11-25T13:58:00
+---SPEECH---
+ID: ps2013-001-01-002-002.u3
+author: #MiroslavaNemcova.1952
+role: #chair
+when: 2013-11-25
+tokens: 49
+sentences: 3
+named entity refferences: 5
+total duration: 68900.0
+total spoken: 22930.0
+time silent: 11510.0
+time unknown: 34460.0
+unaligned tokens: 1
+earliest timeline: 2013-11-25T14:38:00
+latest timeline: 2013-11-25T14:38:00
+---SPEECH---
+ID: ps2013-001-01-002-002.u2
+author: #LubomirZaoralek.1956
+role: #regular
+when: 2013-11-25
+tokens: 877
+sentences: 206
+named entity refferences: 604
+total duration: 1865080.0
+total spoken: 323020.0
+time silent: 1209490.0
+time unknown: 332570.0
+unaligned tokens: 161
+earliest timeline: 2013-11-25T13:58:00
+latest timeline: 2013-11-25T14:38:00
+
diff --git a/test/MetadataExtraction/examples/expected/expectedReal.txt b/test/MetadataExtraction/examples/expected/expectedReal.txt
@@ -0,0 +1,16 @@
+---SPEECH---
+ID: ps2013-001-01-000-999.u1
+author: #MiroslavaNemcova.1952
+role: #chair
+when: 2013-11-25
+tokens: 175
+sentences: 12
+named entity refferences: 18
+total duration: 190520.0
+total spoken: 75000.0
+time silent: -26850.0
+time unknown: 142370.0
+unaligned tokens: 14
+earliest timeline: 2013-11-25T13:58:00
+latest timeline: 2013-11-25T13:58:00
+
diff --git a/test/MetadataExtraction/examples/inputs/encapsulatedWords.xml b/test/MetadataExtraction/examples/inputs/encapsulatedWords.xml
diff --git a/test/MetadataExtraction/examples/inputs/missingTimestamps.xml b/test/MetadataExtraction/examples/inputs/missingTimestamps.xml
diff --git a/test/MetadataExtraction/examples/inputs/multipleSpeeches.xml b/test/MetadataExtraction/examples/inputs/multipleSpeeches.xml
diff --git a/test/MetadataExtraction/examples/inputs/real.xml b/test/MetadataExtraction/examples/inputs/real.xml
diff --git a/test/MetadataExtraction/tester.py b/test/MetadataExtraction/tester.py
@@ -0,0 +1,50 @@
+import subprocess
+import difflib
+import argparse
+
+
+args_parser = argparse.ArgumentParser()
+args_parser.add_argument("--s",action="store_true", help="Set this flag to test speeches")
+
+test_cases_speeches = [
+    ("REAL","--file=examples/inputs/real.xml", "--wd=../../MetadataExtraction", "examples/expected/expectedReal.txt"),
+    ("MISSING TIMESTAMPS","--file=examples/inputs/missingTimestamps.xml", "--wd=../../MetadataExtraction", "examples/expected/expectedMissingTimestamps.txt"),
+    ("ENCAPSULATED WORDS","--file=examples/inputs/encapsulatedWords.xml", "--wd=../../MetadataExtraction", "examples/expected/expectedEncapsulatedWords.txt"),
+    ("MULTIPLE SPEECHES", "--file=examples/inputs/multipleSpeeches.xml",  "--wd=../../MetadataExtraction", "examples/expected/expectedMultipleSpeeches.txt"),    
+]
+
+
+my_venv = "../../../bakalarka/bin/python3"
+speech_parser = "../../MetadataExtraction/speechParser2.py"
+
+def test_speeches():
+    for test_name, test_file, test_wd, expected_output in test_cases_speeches:
+        expected = ""
+        process = subprocess.run([my_venv, speech_parser, test_file, test_wd], capture_output=True, text=True)
+
+        actual_output = process.stdout.strip()
+
+        with open(expected_output, 'r') as f:
+            expected = f.read().strip()
+
+        if (actual_output == expected):
+            print(f"Test {test_name}: PASSED!")
+        else:
+            diff = difflib.unified_diff(expected.splitlines(), actual_output.splitlines(),
+                                    fromfile='expected',tofile='actual',lineterm='')
+
+            print(f"Test {test_name}: FAILED!")
+            print(f"-------------------------")
+            for line in diff:
+                print(line)
+            print(f"-------------------------")
+
+def main(args):
+    if args.s:
+        print("####TESTING SPEECH PARSER####")
+        test_speeches()
+        print("########## DONE #############")
+
+if __name__ == "__main__":
+    main(args_parser.parse_args())
+