-
Notifications
You must be signed in to change notification settings - Fork 0
/
align.py
105 lines (92 loc) · 4.21 KB
/
align.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python
# coding=utf-8
import sys
import os
import json
from aeneas.executetask import ExecuteTask
from aeneas.task import Task
from aeneas.textfile import TextFile
from aeneas.language import Language
from aeneas.textfile import TextFragment
from json_to_lines import get_par_lines
from audioread import audio_open
from word_time_distribution import distribute_words
from read_aeneas_json import parse_aeneas
def total_FA(soundfile, mylines, myhead, mytail, config=None):
"""Runs Aeneas as a library. This function isn't in use, currently,
as we haven't managed to get reliable results in this way."""
# create Task object
if config is None:
config_string = (u"task_language=nor|is_text_type=plain|os_task_file_format=json|is_audio_file_head_length=%s|is_audio_file_tail_length=%s" % (myhead, mytail))
print(config_string)
else:
config_string = (u"task_language=nor|is_text_type=plain|os_task_file_format=json|is_audio_file_head_length=%s|is_audio_file_tail_length=%s|%s" % (myhead, mytail, config))
print(config_string)
task = Task(config_string=config_string)
print(task)
task.audio_file_path_absolute = soundfile
textfile = TextFile()
print(textfile)
#task.sync_map_file_path_absolute = outfile
for identifier, frag_text in mylines:
textfile.add_fragment(TextFragment(identifier, Language.NOR, frag_text, frag_text))
task.text_file = textfile
print(len(task.text_file))
ExecuteTask(task).execute()
syncmaplist = task.sync_map.fragments
return syncmaplist
def compute_alignments(soundfile, asr_dict, config=None):
"""Creates a list of dicts with paragraph ids and timecodes
when running Aeneas as a library. Since we haven't managed to
run Aeneas as a library, this function is not in use. In the
alternative pipeline where Aeneas is run in the terminal, this
function is replaced by read_aeneas_json.parse_aeneas"""
timecodelist = []
with audio_open(soundfile) as sf:
duration = sf.duration
bil = 1000000000
paragraphs = get_par_lines(asr_dict)
speechstart = paragraphs[0]['start']/bil
speechend = paragraphs[-1]['end']/bil
allines = [(x['id'], [x['string']]) for x in paragraphs]
head = speechstart-3
tail = duration-speechend-3
align = total_FA(soundfile, allines, head, tail, config)
alignOnlySpeech = align[1:-1]
timecodelist = [{'id': paragraphs[n]['id'], 'start': int(alignOnlySpeech[n].begin*bil), 'end': int(alignOnlySpeech[n].end*bil)} for n in range(len(alignOnlySpeech))]
return timecodelist
def realign_json(googledict, aeneasdict):
"""Takes as input a Google Cloud StT transcription and the same transcription
forcefully aligned with Aeneas, both in the form of loaded json files. Returns
a dict, compatible with Google Cloud StT and Språklabben, which corresponds to
the Google transcriptions, but with the start and end timecodes from Aeneas, and
with the timecodes of all other words generated by the heuristics in
word_time_distributrion.distribute words."""
alignments = parse_aeneas(googledict, aeneasdict)
returndict = {'paragraphs': []}
for n in range(len(googledict['paragraphs'])):
par = googledict['paragraphs'][n]
mydict = {}
mydict['id'] = par['id']
mydict['speaker'] = par['speaker']
mydict['startTime'] = alignments[n]['start']
mydict['words'] = par['words']
mydict['words'][0]['startTime'] = alignments[n]['start']
mydict['words'][-1]['endTime'] = alignments[n]['end']
mydict['words'] = distribute_words(mydict['words'])
returndict['paragraphs'].append(mydict)
return returndict
if __name__ == "__main__":
try:
googlejson = sys.argv[1]
aeneasjson = sys.argv[2]
outfile = sys.argv[3]
except IndexError:
sys.exit("Please provide filenames: python align.py googlejson aeneasjson outfile")
with open(googlejson, 'r') as google:
googledict = json.load(google)
with open(aeneasjson, 'r') as aeneas:
aeneasdict = json.load(aeneas)
newdict = realign_json(googledict, aeneasdict)
with open(outfile, 'w') as out:
json.dump(newdict, out, ensure_ascii=False)