Skip to content

Commit

Permalink
DMI Service Manager sanitize filenames if needed
Browse files Browse the repository at this point in the history
  • Loading branch information
dale-wahl committed Sep 5, 2023
1 parent f34917a commit 24663f9
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 17 deletions.
34 changes: 27 additions & 7 deletions common/lib/dmi_service_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
import time
from json import JSONDecodeError
from werkzeug.utils import secure_filename

import requests
from pathlib import Path
Expand All @@ -31,7 +32,10 @@ class DsmOutOfMemory(DmiServiceManagerException):

class DmiServiceManager:
"""
Class to manage interactions with a DMI Service Manager server.
Found here:
https://github.com/digitalmethodsinitiative/dmi_service_manager
"""
def __init__(self, processor):
"""
Expand Down Expand Up @@ -245,26 +249,30 @@ def send_files(self, file_collection_name, results_name, files_to_upload, dir_wi
to_upload_filenames = [filename for filename in files_to_upload if filename not in uploaded_files]
total_files_to_upload = len(to_upload_filenames)

if total_files_to_upload > 0 or results_name not in existing_files:
api_upload_endpoint = f"{self.server_address}send_files"

# Check if results folder exists
if results_name not in existing_files:
total_files_to_upload += 1
# Create a blank file to upload into results folder
empty_placeholder = f"4CAT_{results_name}_blank.txt"
with open(dir_with_files.joinpath(empty_placeholder), 'w') as file:
file.write('')
to_upload_filenames = [empty_placeholder] + to_upload_filenames

if total_files_to_upload > 0:
api_upload_endpoint = f"{self.server_address}send_files"

self.processor.dataset.update_status(f"Uploading {total_files_to_upload} files")
files_uploaded = 0
while to_upload_filenames:
upload_file = to_upload_filenames.pop()
# Upload files
if files_uploaded == 0:
upload_file = empty_placeholder
# Upload a blank file to create the results folder
# Upload a blank results file to results folder
response = requests.post(api_upload_endpoint,
files=[(results_name, open(dir_with_files.joinpath(empty_placeholder), 'rb'))],
files=[(results_name, open(dir_with_files.joinpath(upload_file), 'rb'))],
data=data, timeout=120)
else:
upload_file = to_upload_filenames.pop()
# All other files uploading to general upload folder belonging to parent dataset collection
response = requests.post(api_upload_endpoint,
files=[('4cat_uploads', open(dir_with_files.joinpath(upload_file), 'rb'))],
data=data, timeout=120)
Expand Down Expand Up @@ -312,6 +320,18 @@ def download_results(self, filenames_to_download, folder_name, local_output_dir,
with open(local_output_dir.joinpath(filename), 'wb') as file:
file.write(file_response.content)

def sanitize_filenames(self, filename):
"""
If source is local, no sanitization needed. If source is remote, the server sanitizes and as such, we need to
ensure our filenames match what the server expects.
"""
if self.local_or_remote == "local":
return filename
elif self.local_or_remote == "remote":
return secure_filename(filename)
else:
raise DmiServiceManagerException("dmi_service_manager.local_or_remote setting must be 'local' or 'remote'")

@staticmethod
def get_folder_name(dataset):
"""
Expand Down
2 changes: 1 addition & 1 deletion processors/audio/whisper_speech_to_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def process(self):
setting = setting if setting[:2] == "--" else "--" + setting.lstrip("-")
data["args"].extend([setting, str(value)])
# Finally, add audio files to args
data["args"].extend([f"data/{path_to_files.joinpath(filename)}" for filename in audio_filenames])
data["args"].extend([f"data/{path_to_files.joinpath(dmi_service_manager.sanitize_filenames(filename))}" for filename in audio_filenames])

# Send request to DMI Service Manager
self.dataset.update_status(f"Requesting service from DMI Service Manager...")
Expand Down
2 changes: 1 addition & 1 deletion processors/conversion/text_from_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def process(self):
data = {'args': ['--model', self.parameters.get("model_type"),
'--output_dir', f"data/{path_to_results}",
'--images']}
data["args"].extend([f"data/{path_to_files.joinpath(filename)}" for filename in image_filenames])
data["args"].extend([f"data/{path_to_files.joinpath(dmi_service_manager.sanitize_filenames(filename))}" for filename in image_filenames])

# Send request to DMI Service Manager
self.dataset.update_status(f"Requesting service from DMI Service Manager...")
Expand Down
15 changes: 7 additions & 8 deletions processors/visualisation/clip_categorize_images.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,13 @@
"""
OpenAI CLIP categorize images
"""
import datetime
import os
import json
import time
import requests
from pathlib import Path
from json import JSONDecodeError


from backend.lib.processor import BasicProcessor
from common.lib.dmi_service_manager import DmiServiceManager, DmiServiceManagerException, DsmOutOfMemory
from common.lib.exceptions import ProcessorException, ProcessorInterruptedException
from common.lib.exceptions import ProcessorInterruptedException
from common.lib.user_input import UserInput
from common.config_manager import config

Expand Down Expand Up @@ -151,7 +146,11 @@ def process(self):
dmi_service_manager = DmiServiceManager(processor=self)

# Check GPU memory available
gpu_memory, info = dmi_service_manager.check_gpu_memory_available("clip")
try:
gpu_memory, info = dmi_service_manager.check_gpu_memory_available("clip")
except DmiServiceManagerException as e:
self.dataset.finish_with_error(str(e))
return
if not gpu_memory:
if info.get("reason") == "GPU not enabled on this instance of DMI Service Manager":
self.dataset.update_status("DMI Service Manager GPU not enabled; using CPU")
Expand All @@ -176,7 +175,7 @@ def process(self):
}

# Finally, add image files to args
data["args"].extend([f"data/{path_to_files.joinpath(filename)}" for filename in image_filenames])
data["args"].extend([f"data/{path_to_files.joinpath(dmi_service_manager.sanitize_filenames(filename))}" for filename in image_filenames])

# Send request to DMI Service Manager
self.dataset.update_status(f"Requesting service from DMI Service Manager...")
Expand Down

0 comments on commit 24663f9

Please sign in to comment.