-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate-vtt.rb
128 lines (105 loc) · 4.34 KB
/
generate-vtt.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# frozen_string_literal: true
usage 'generate-vtt [options] json recognition file'
aliases :generate_vtt, :vtt
summary 'generate a caption file from an audio recognition'
description 'Converts a IBM Watson json audio recognition file to a WebVTT caption file'
flag nil, :force, 'force creation of a caption file'
class GenerateVTT < ::Nanoc::CLI::CommandRunner
Utterance = Struct.new(:word, :start_time, :end_time, :speaker)
class VTTCue
attr_reader :identifier, :start_time, :end_time, :text
def self.v(speaker, text, start_time = nil)
v_span = String.new
v_span << "<v %SPEAKER_#{speaker}>#{text}</v>"
v_span.prepend("<#{start_time}>") if start_time
v_span
end
def initialize(identifier, utterances)
@identifier = identifier
@start_time = WebVTT::Timestamp.new(utterances.first.start_time)
@end_time = WebVTT::Timestamp.new(utterances.last.end_time)
@text = utterances
.chunk { |utt| utt.speaker }
.map.with_index do |(speaker, ch_utts), idx|
self.class.v(speaker,
ch_utts.map { |u| u.word }.join(' '),
idx == 0 ? nil : WebVTT::Timestamp.new(ch_utts.first.start_time))
end.join
end
def to_webvtt
cue = String.new
cue << "\ncue-#{identifier}"
cue << "\n#{start_time} --> #{end_time}"
cue << "\n#{text}\n"
cue
end
alias to_s to_webvtt
end
def run
require 'yajl'
require 'webvtt'
# Extract arguments
if arguments.length != 1
raise Nanoc::Int::Errors::GenericTrivial, "usage: #{command.usage}"
end
recognition_path = arguments[0]
# Check for file existence and force flag
meeting_dir, recognition_name = File.split(recognition_path)
output_file = File.join(meeting_dir, File.basename(recognition_name, '.*') + '.vtt')
if File.exist?(output_file) && !options[:force]
raise(
Nanoc::Int::Errors::GenericTrivial,
"The transcript was not created because '#{output_file}' already exists. " \
'Re-run the command using --force to create the transcript anyway.',
)
end
# Setup notifications
Nanoc::Int::NotificationCenter.on(:file_created) do |file_path|
Nanoc::CLI::Logger.instance.file(:high, :create, file_path)
end
recognition_file = File.open(recognition_path)
parser = Yajl::Parser.new(symbolize_keys: true)
recognition = parser.parse(recognition_file)
utterance_map = {}
speaker_labels = recognition[:speaker_labels].to_enum
recognition[:results].each do |result|
next unless result[:final]
alternative = result[:alternatives].first
utterances = alternative[:timestamps].map do |u|
speaker_label = speaker_labels.next
Utterance.new(*u, speaker_label[:speaker])
end
start_time = utterances.first.start_time
utterance_map[start_time] = utterances
end
cues = utterance_map.values
.map.with_index(1) { |u, i| VTTCue.new(i, u) }
meeting_date = recognition_path[/([0-9]{4}-[0-9]{2}-[0-9]{2})/]
header = <<~EOS
WEBVTT - SCWG WG4 Telecon (#{meeting_date})
lang: en
NOTE This file was generated for the FORCE11 Scholarly Commons Working
Group 4 through a semi-automatic process. It now represents the
authoritative transcript of this telecon, and should be edited for
correction, clarification, and diplomacy. Other representations of the
meeting will be generated from this document. Except in the case of
elision or redaction of content, please keep time intervals and other cue
metadata intact.
NOTE For further information about the WebVTT syntax and data model,
visit: https://w3c.github.io/webvtt/
NOTE A few specific directions to consider:
- Use the `newthought` class on a voice span to split up one speaker's text into separate thoughts.
- Use the `unintelligible` class to mark something as unintelligible.
- Use lang spans to mark up words from languages other than English.
- Use the `question` class to mark up questions for the person being interviewed.
EOS
content = header + cues.map(&:to_webvtt).compact.join
write(output_file, content)
end
private
def write(filename, content)
File.write(filename, content)
Nanoc::Int::NotificationCenter.post(:file_created, filename)
end
end
runner GenerateVTT