-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add first batch of tests for speech amd timestamps loading #9
- Loading branch information
Showing
9 changed files
with
25,098 additions
and
0 deletions.
There are no files selected for viewing
16 changes: 16 additions & 0 deletions
16
test/MetadataExtraction/examples/expected/expectedEncapsulatedWords.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
---SPEECH--- | ||
ID: ps2013-001-01-000-999.u1 | ||
author: #MiroslavaNemcova.1952 | ||
role: #chair | ||
when: 2013-11-25 | ||
tokens: 25 | ||
sentences: 12 | ||
named entity refferences: 18 | ||
total duration: 172760.0 | ||
total spoken: 15200.0 | ||
time silent: 68160.0 | ||
time unknown: 89400.0 | ||
unaligned tokens: 1 | ||
earliest timeline: 2013-11-25T13:58:00 | ||
latest timeline: 2013-11-25T13:58:00 | ||
|
16 changes: 16 additions & 0 deletions
16
test/MetadataExtraction/examples/expected/expectedMissingTimestamps.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
---SPEECH--- | ||
ID: ps2013-001-01-000-999.u1 | ||
author: #MiroslavaNemcova.1952 | ||
role: #chair | ||
when: 2013-11-25 | ||
tokens: 175 | ||
sentences: 12 | ||
named entity refferences: 18 | ||
total duration: 0 | ||
total spoken: 0 | ||
time silent: 0 | ||
time unknown: 0 | ||
unaligned tokens: 175 | ||
earliest timeline: None | ||
latest timeline: None | ||
|
46 changes: 46 additions & 0 deletions
46
test/MetadataExtraction/examples/expected/expectedMultipleSpeeches.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
---SPEECH--- | ||
ID: ps2013-001-01-002-002.u1 | ||
author: #MiroslavaNemcova.1952 | ||
role: #chair | ||
when: 2013-11-25 | ||
tokens: 148 | ||
sentences: 12 | ||
named entity refferences: 11 | ||
total duration: 143120.0 | ||
total spoken: 58880.0 | ||
time silent: 12440.0 | ||
time unknown: 71800.0 | ||
unaligned tokens: 17 | ||
earliest timeline: 2013-11-25T13:58:00 | ||
latest timeline: 2013-11-25T13:58:00 | ||
---SPEECH--- | ||
ID: ps2013-001-01-002-002.u3 | ||
author: #MiroslavaNemcova.1952 | ||
role: #chair | ||
when: 2013-11-25 | ||
tokens: 49 | ||
sentences: 3 | ||
named entity refferences: 5 | ||
total duration: 68900.0 | ||
total spoken: 22930.0 | ||
time silent: 11510.0 | ||
time unknown: 34460.0 | ||
unaligned tokens: 1 | ||
earliest timeline: 2013-11-25T14:38:00 | ||
latest timeline: 2013-11-25T14:38:00 | ||
---SPEECH--- | ||
ID: ps2013-001-01-002-002.u2 | ||
author: #LubomirZaoralek.1956 | ||
role: #regular | ||
when: 2013-11-25 | ||
tokens: 877 | ||
sentences: 206 | ||
named entity refferences: 604 | ||
total duration: 1865080.0 | ||
total spoken: 323020.0 | ||
time silent: 1209490.0 | ||
time unknown: 332570.0 | ||
unaligned tokens: 161 | ||
earliest timeline: 2013-11-25T13:58:00 | ||
latest timeline: 2013-11-25T14:38:00 | ||
|
16 changes: 16 additions & 0 deletions
16
test/MetadataExtraction/examples/expected/expectedReal.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
---SPEECH--- | ||
ID: ps2013-001-01-000-999.u1 | ||
author: #MiroslavaNemcova.1952 | ||
role: #chair | ||
when: 2013-11-25 | ||
tokens: 175 | ||
sentences: 12 | ||
named entity refferences: 18 | ||
total duration: 190520.0 | ||
total spoken: 75000.0 | ||
time silent: -26850.0 | ||
time unknown: 142370.0 | ||
unaligned tokens: 14 | ||
earliest timeline: 2013-11-25T13:58:00 | ||
latest timeline: 2013-11-25T13:58:00 | ||
|
1,276 changes: 1,276 additions & 0 deletions
1,276
test/MetadataExtraction/examples/inputs/encapsulatedWords.xml
Large diffs are not rendered by default.
Oops, something went wrong.
953 changes: 953 additions & 0 deletions
953
test/MetadataExtraction/examples/inputs/missingTimestamps.xml
Large diffs are not rendered by default.
Oops, something went wrong.
19,797 changes: 19,797 additions & 0 deletions
19,797
test/MetadataExtraction/examples/inputs/multipleSpeeches.xml
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import subprocess | ||
import difflib | ||
import argparse | ||
|
||
|
||
args_parser = argparse.ArgumentParser() | ||
args_parser.add_argument("--s",action="store_true", help="Set this flag to test speeches") | ||
|
||
test_cases_speeches = [ | ||
("REAL","--file=examples/inputs/real.xml", "--wd=../../MetadataExtraction", "examples/expected/expectedReal.txt"), | ||
("MISSING TIMESTAMPS","--file=examples/inputs/missingTimestamps.xml", "--wd=../../MetadataExtraction", "examples/expected/expectedMissingTimestamps.txt"), | ||
("ENCAPSULATED WORDS","--file=examples/inputs/encapsulatedWords.xml", "--wd=../../MetadataExtraction", "examples/expected/expectedEncapsulatedWords.txt"), | ||
("MULTIPLE SPEECHES", "--file=examples/inputs/multipleSpeeches.xml", "--wd=../../MetadataExtraction", "examples/expected/expectedMultipleSpeeches.txt"), | ||
] | ||
|
||
|
||
my_venv = "../../../bakalarka/bin/python3" | ||
speech_parser = "../../MetadataExtraction/speechParser2.py" | ||
|
||
def test_speeches(): | ||
for test_name, test_file, test_wd, expected_output in test_cases_speeches: | ||
expected = "" | ||
process = subprocess.run([my_venv, speech_parser, test_file, test_wd], capture_output=True, text=True) | ||
|
||
actual_output = process.stdout.strip() | ||
|
||
with open(expected_output, 'r') as f: | ||
expected = f.read().strip() | ||
|
||
if (actual_output == expected): | ||
print(f"Test {test_name}: PASSED!") | ||
else: | ||
diff = difflib.unified_diff(expected.splitlines(), actual_output.splitlines(), | ||
fromfile='expected',tofile='actual',lineterm='') | ||
|
||
print(f"Test {test_name}: FAILED!") | ||
print(f"-------------------------") | ||
for line in diff: | ||
print(line) | ||
print(f"-------------------------") | ||
|
||
def main(args): | ||
if args.s: | ||
print("####TESTING SPEECH PARSER####") | ||
test_speeches() | ||
print("########## DONE #############") | ||
|
||
if __name__ == "__main__": | ||
main(args_parser.parse_args()) | ||
|