-
Notifications
You must be signed in to change notification settings - Fork 62
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* basic changes to allow files box * basic imports, yay! * video_scene_timelines to work on video imports! * add is_compatible_with checks to processors that cannot run on new media top_datasets * more is_compatible fixes * necessary function for checking media_types * enable more processors on media datasets * consolidate user_input file type * detect mimetype from filename best I can do without downloading all the files first. * handle zip archives; allow log and metadata files * do not count metadata or log files in num_files * move machine learning processors so they can be imported elsewhere * audio_to_text datasource * When validating zip file uploads, send list of file attributes instead of the first 128K of the zip file * Check type of files in zip when uploading media * Skip useless files when uploading media as zip * check multiple zip types in JS * js !=== python * fix media_type for loose file imports; fix extension for audio_to_text preset; fix merge for some processors w/ media_type --------- Co-authored-by: Stijn Peeters <[email protected]>
- Loading branch information
Showing
50 changed files
with
599 additions
and
54 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Use default data source init function | ||
from common.lib.helpers import init_datasource | ||
|
||
# Internal identifier for this data source | ||
DATASOURCE = "upload-audio-to-text" | ||
NAME = "Upload Audio to Text" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
""" | ||
Audio Upload to Text | ||
This data source acts similar to a Preset, but because it needs SearchMedia's validate_query and after_create methods | ||
to run, chaining that processor does not work (Presets essentially only run the process and after_process methods | ||
of their processors and skip those two datasource only methods). | ||
""" | ||
|
||
from datasources.media_import.import_media import SearchMedia | ||
from processors.machine_learning.whisper_speech_to_text import AudioToText | ||
|
||
|
||
class AudioUploadToText(SearchMedia): | ||
type = "upload-audio-to-text-search" # job ID | ||
category = "Search" # category | ||
title = "Convert speech to text" # title displayed in UI | ||
description = "Upload your own audio and use OpenAI's Whisper model to create transcripts" # description displayed in UI | ||
|
||
@classmethod | ||
def is_compatible_with(cls, module=None, user=None): | ||
#TODO: False here does not appear to actually remove the datasource from the "Create dataset" page so technically | ||
# this method is not necessary; if we can adjust that behavior, it ought to function as intended | ||
|
||
# Ensure the Whisper model is available | ||
return AudioToText.is_compatible_with(module=module, user=user) | ||
|
||
@classmethod | ||
def get_options(cls, parent_dataset=None, user=None): | ||
# We need both sets of options for this datasource | ||
media_options = SearchMedia.get_options(parent_dataset=parent_dataset, user=user) | ||
whisper_options = AudioToText.get_options(parent_dataset=parent_dataset, user=user) | ||
media_options.update(whisper_options) | ||
|
||
#TODO: there are some odd formatting issues if we use those derived options | ||
# The intro help text is not displayed correct (does not wrap) | ||
# Advanced Settings uses []() links which do not work on the "Create dataset" page, so we adjust | ||
|
||
media_options["intro"]["help"] = ("Upload audio files here to convert speech to text. " | ||
"4CAT will use OpenAI's Whisper model to create transcripts." | ||
"\n\nFor information on using advanced settings: [Command Line Arguments (CLI)](https://github.com/openai/whisper/blob/248b6cb124225dd263bb9bd32d060b6517e067f8/whisper/transcribe.py#LL374C3-L374C3)") | ||
media_options["advanced"]["help"] = "Advanced Settings" | ||
|
||
return media_options | ||
|
||
@staticmethod | ||
def validate_query(query, request, user): | ||
# We need SearchMedia's validate_query to upload the media | ||
media_query = SearchMedia.validate_query(query, request, user) | ||
|
||
# Here's the real trick: act like a preset and add another processor to the pipeline | ||
media_query["next"] = [{"type": "audio-to-text", | ||
"parameters": query.copy()}] | ||
return media_query |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Use default data source init function | ||
from common.lib.helpers import init_datasource | ||
|
||
# Internal identifier for this data source | ||
DATASOURCE = "media-import" | ||
NAME = "Import/upload Media files" |
Oops, something went wrong.