-
Notifications
You must be signed in to change notification settings - Fork 0
/
create-video-manifest.py
70 lines (58 loc) · 2.03 KB
/
create-video-manifest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import json
import math
import datetime
from pysubparser import parser
import click
import sys
import unicodedata
import re
def slugify(value, allow_unicode=False):
"""
Taken from https://github.com/django/django/blob/master/django/utils/text.py
Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
dashes to single dashes. Remove characters that aren't alphanumerics,
underscores, or hyphens. Convert to lowercase. Also strip leading and
trailing whitespace, dashes, and underscores.
"""
value = str(value)
if allow_unicode:
value = unicodedata.normalize("NFKC", value)
else:
value = (
unicodedata.normalize("NFKD", value)
.encode("ascii", "ignore")
.decode("ascii")
)
value = re.sub(r"[^\w\s-]", "", value.lower())
return re.sub(r"[-\s]+", "-", value).strip("-_")
def get_microseconds(t):
return (
(t.hour * 3600 * 1000_000)
+ (t.minute * 60 * 1000_000)
+ (t.second * 1000_000)
+ t.microsecond
)
@click.command()
@click.option("--sub-file", type=click.File("r"), required=True)
@click.option("--output-manifest-file", type=click.File("w"), required=True)
def cli(sub_file, output_manifest_file):
subtitles = parser.parse(sub_file.name)
output_json = []
for subtitle in subtitles:
start_time = get_microseconds(subtitle.start)
end_time = get_microseconds(subtitle.end)
length = end_time - start_time
length_in_seconds = math.ceil(length / 1000_000)
start_time_in_seconds = math.floor(start_time / 1000_000)
clip_filename = f"{slugify(subtitle.text)}-{start_time_in_seconds}.mp4"
output_json.append(
{
"start_time": start_time_in_seconds,
"length": length_in_seconds,
"rename_to": clip_filename,
"title": subtitle.text,
}
)
json.dump(output_json, output_manifest_file, indent=4, sort_keys=True)
if __name__ == "__main__":
cli()