Skip to content

Commit

Permalink
Firooz1/feat/informative email with file name (#34)
Browse files Browse the repository at this point in the history
This merge resolves #37, #33, and #21.

- Allows submitting multiple audio files in addition to a single file or a folder.
- Replaces array job submission of multiple audio files with multiple single file submissions. Results in cleaner code.
- Customised and more informative result emails to users. 
- OOD app has option to to send result files as email attachments.
- Add buttons to OOD app for result and log folders. Makes the complete stage user friendly.
- Adds smtp server error handling.
- Email sender changed to [email protected]
- Fix typos.
  • Loading branch information
hsnfirooz authored Dec 18, 2024
1 parent 13f3fd5 commit 54a11be
Show file tree
Hide file tree
Showing 9 changed files with 452 additions and 261 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ Alternatively, process multiple audio files in a folder
speech2text audio-files/
```

Using the latter option submits the files as an [array job](https://scicomp.aalto.fi/triton/tut/array/). See [src/submit.py](src/submit.py) for details about the submissions.
See [src/submit.py](src/submit.py) for details about the submissions.

The audio file(s) can be in any common audio (.wav, .mp3, .aff, etc.) or video (.mp4, .mov, etc.) format.

Expand Down
54 changes: 50 additions & 4 deletions bin/deploy-data/ood/completed.md.erb
Original file line number Diff line number Diff line change
@@ -1,4 +1,50 @@
## Submission is done!
Your job has been submitted successfully. You will get an email when transcirption is done. <br>
To access the log folder, click in the session id. <br>
The results will be in the Path folder you selected.
<%-
fs_base_url = "https://ondemand.triton.aalto.fi/pun/sys/dashboard/files/fs"
%>

<%-
# Define the path to the connection.yml file
connection_file_path = "#{staged_root}/connection.yml"

# Parse the YAML file to get the value of the key 'audio_path'
require 'yaml'
connection_file_content = YAML.load_file(connection_file_path)
audio_path = connection_file_content['audio_path']
if File.file?(audio_path)
audio_path = File.dirname(audio_path)
end
audio_path = File.join(audio_path, 'results')
%>

### Submission is done!

<body>
Your job has been submitted successfully. You will get an email when transcription is done. <br>

<div class="btn-group btn-group-toggle" data-toggle="buttons" style="display: flex; justify-content: space-between; gap: 10px;">
<label class="btn btn-info" style="flex-grow: 1;">
<input type="radio" name="options" id="results" autocomplete="off" onclick="window.open('<%= "#{fs_base_url}/#{audio_path}" %>', '_blank')">
<i class="fa fa-folder"></i> Results Folder
</label>
<label class="btn btn-warning" style="flex-grow: 1;">
<input type="radio" name="options" id="logs" autocomplete="off" onclick="window.open('<%= "#{fs_base_url}/#{staged_root}" %>', '_blank')">
<i class="fa fa-folder"></i> Log Folder
</label>
<label class="btn btn-dark" style="margin-left: auto;">
<input type="radio" name="options" id="contact" autocomplete="off" onclick="window.open('mailto:[email protected]', '_blank')">
<i class="fa fa-envelope"></i> Contact Support
</label>
</div>

<div>
<details class="abstract">
<summary>Script Log (Debug)</summary>
<div class="highlight" style="overflow: auto; max-width: 100%;">
<pre><span></span>
<iframe src="<%= "#{fs_base_url}#{staged_root}/output.log" %>" width="100%" height="300" frameBorder="0" style="border: none;"></iframe>
</pre>
</div>
</details>
</div>

</body>
97 changes: 93 additions & 4 deletions bin/deploy-data/ood/form.js
Original file line number Diff line number Diff line change
@@ -1,27 +1,116 @@
/**
* Decode the audio_path to replace '%20' with space
*/

// File path in OOD has a prefix
const OOD_PREFIX_PATH = "/pun/sys/dashboard/files/fs/";

function decode_audio_path() {
let audio_path = $("#batch_connect_session_context_audio_path");
let selectedFiles = [];

// Find all rows with the 'selected' class
$("#batch_connect_session_context_audio_path_path_selector_table tr.selected").each(function() {
// Extract the file path from the data attribute
let filePath = $(this).data("api-url");
if (filePath) {
let absolutePath = filePath.replace(OOD_PREFIX_PATH, ''); // Remove the prefix from the path
selectedFiles.push(decodeURIComponent(absolutePath));
}
});

// Join paths with a separator, e.g., comma
$("#batch_connect_session_context_audio_path").val(selectedFiles.join(', '));

}


function toggle_data_warning(isChecked) {
// Select the label associated with the checkbox
const label = $("label[for='batch_connect_session_context_send_attachments']");
const warningMessage = `
<div id="confidential-warning" style="color: blue; margin-top: 5px;">
We recommed this only if your audio files do not include any confidential data.
</div>
`;
if (isChecked) {
// Add a border box around the form group
label.closest('.form-group').css({
"border": "1px solid blue",
"padding": "2px"
});
$("#batch_connect_session_context_send_attachments")
.closest('.form-group')
.append(warningMessage);
} else {

$("#confidential-warning").remove();
// Reset the label color and border to its default
label.css("color", "");
label.closest('.form-group').css({
"border": "",
"padding": ""
});
}
}


audio_path.val(decodeURIComponent(audio_path.val()))
function validate_AudioPath(event) {
let audio_path = $("#batch_connect_session_context_audio_path").val();
if (!audio_path) {
event.preventDefault();
alert("The audio path field cannot be empty.");
}
}


function toggle_visibilty_of_form_group(form_id, show) {
let form_element = $(form_id);
let parent = form_element.parent();
console.log("Show value:", show);
if(show == true) {
parent.show();
} else {
parent.hide();
}
}


/**
* Sets the event handler for file selector button.
* Triggering the handler based on the field change doesn't work
* as the field doesn't get the focus.
*/
function set_audio_path_handler() {
function add_event_handlers() {
let audio_path_button = $(
"#batch_connect_session_context_audio_path_path_selector_button"
);
audio_path_button.click(decode_audio_path);

let email_checkbox = $(
"#batch_connect_session_context_send_attachments"
);
email_checkbox.change(function() {
toggle_data_warning(email_checkbox.is(':checked'));
});

let submit_button = $("input[type='submit'][name='commit']");
submit_button.click(validate_AudioPath);

let advance_settings = $("#batch_connect_session_context_advance_options");
advance_settings.change(function() {
toggle_visibilty_of_form_group(
"#batch_connect_session_context_model_selector",
advance_settings.is(':checked'))
});
}


/**
* Install event handlers
*/
$(document).ready(function () {
set_audio_path_handler();
add_event_handlers();

// Hide the advance settings at the beggining
toggle_visibilty_of_form_group("#batch_connect_session_context_model_selector", 'false')
});
25 changes: 24 additions & 1 deletion bin/deploy-data/ood/form.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@ cacheable: false
form:
- audio_path
- email_field
- send_attachments
- language_field
- advance_options
- model_selector


attributes:
audio_path:
widget: "path_selector"
Expand All @@ -20,6 +21,7 @@ attributes:
help: |
Select the file for transcription OR a folder containing mutiple audio files.
language_field:
label: Language
widget: select
Expand All @@ -39,6 +41,27 @@ attributes:
Only works with aalto email.
You will get an email when the transcription is done.
send_attachments:
label: "Receive transcirpted files as email attachments?"
display: true
widget: check_box
checked_value: True
unchecked_value: False
help: |
Select this option to recieve the transcripted files to your email.
advance_options:
label: "Advance Settings"
display: false
widget: check_box
checked_value: true
unchecked_value: false
help: |
Show advance settings.
model_selector:
label: Model
widget: select
Expand Down
4 changes: 4 additions & 0 deletions bin/deploy-data/ood/submit.yml.erb
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@
---
batch_connect:
template: "basic"
conn_file: "connection.yml"
conn_params:
- audio_path
script_wrapper: |
export SPEECH2TEXT_LANGUAGE=<%= language_field %>
export SPEECH2TEXT_EMAIL=<%= email_field %>
export audio_path="<%= audio_path %>"
export SPEECH2TEXT_WHISPER_MODEL=<%= model_selector %>
export SPEECH2TEXT_EMAIL_ATTACHMENTS=<%= send_attachments %>
export SPEECH2TEXT_ONDEMAND=True
%s

Expand Down
145 changes: 145 additions & 0 deletions src/email_notification.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication

from pathlib import Path, PosixPath

AALTO_SMTP_SERVER = 'smtp.aalto.fi'
RSE_EMAIL = '[email protected]'

def sendemail(to: str,
file_name: PosixPath,
file_path: str,
subject: str,
sender: str,
send_attachments: bool,
job_id: int = None):

msg = MIMEMultipart('alternative')

msg['Subject'] = subject
msg['From'] = sender
msg['To'] = to

attachments: list[MIMEApplication]

# Job successful
if not job_id:
body = f"""
<html>
<body>
<p>Hi,</p>
<p>Your transcription job for file '{file_name}' is now completed.</p>
<p>Transcripted files are available inside the <a href="{get_ood_url(file_path)}">results folder</a>.</p>"""

if send_attachments:
# Remove the last </p>
body = body[:-4] + f"""
Transcripted results are also attached to this email.</p>
"""
attachments = get_result_files(file_name, file_path)

body += f"""
<p>If you any questions or feedbacks, please reply to this email or visit our <a href="https://scicomp.aalto.fi/help/garage/">daily garage</a>, every day at 13:00 EET.</p>
<p>Best,</p>
<p>Aalto Scientific Computing</p>
</body>
</html>
"""

msg.attach(MIMEText(body, 'html'))

# Job failed
else:
body = f"""
<html>
<body>
<p>Hi,</p>
<p>Your transcription job #{job_id} for file '{file_name}' has been failed :(</p>
<p>Log files are available inside the <a href="{get_ood_url(file_path)}">log folder</a> and are also attached to this email.</p>
<p>Please reply to this email so our team can investigate the issue or visit our <a href="https://scicomp.aalto.fi/help/garage/">daily garage</a>, every day at 13:00 EET.</p>
<p>Best,</p>
<p>Aalto Scientific Computing</p>
</body>
</html>
"""

msg.attach(MIMEText(body, 'html'))

attachments = get_log_files(file_name, file_path, job_id)

if send_attachments:
for attachment in attachments:
msg.attach(attachment)

try:
smtp = smtplib.SMTP(AALTO_SMTP_SERVER)
smtp.send_message(msg)
except smtplib.SMTPException as e:
print(f"Failed to send email. Error {e}")
finally:
smtp.quit()


def get_result_files(file_name: PosixPath, file_path:str):
txt_file = f"{file_path}/{Path(file_name).stem}.txt"
csv_file = f"{file_path}/{Path(file_name).stem}.csv"

res = []

with open(txt_file, 'rb') as f:
log = MIMEApplication(f.read(), Name=f"{file_name}.txt")
log['Content-Disposition'] = f'attachment; filename="{file_name}.txt"'
res.append(log)

with open(csv_file, 'rb') as f:
log = MIMEApplication(f.read(), Name=f"{file_name}.csv")
log['Content-Disposition'] = f'attachment; filename="{file_name}.csv"'
res.append(log)

return res

def get_ood_url(file_path: str):
OOD_BASE_URL = "https://ondemand.triton.aalto.fi"
OOD_DATAROOT = "/pun/sys/dashboard/files/fs"

return OOD_BASE_URL + OOD_DATAROOT + file_path


def get_log_files(file_name: PosixPath, file_path:str, job_id: int):
error_file = f"{file_path}/speech2text_{file_name}_{job_id}.err"
out_file = f"{file_path}/speech2text_{file_name}_{job_id}.out"

res = []

with open(error_file, 'rb') as f:
log = MIMEApplication(f.read(), Name=f"{file_name}_{job_id}.err")
log['Content-Disposition'] = f'attachment; filename="{file_name}_{job_id}.err"'
res.append(log)

with open(out_file, 'rb') as f:
log = MIMEApplication(f.read(), Name=f"{file_name}_{job_id}.out")
log['Content-Disposition'] = f'attachment; filename="{file_name}_{job_id}.out"'
res.append(log)

return res


def main():
import argparse

parser = argparse.ArgumentParser(description='Send email notification for job completion.')
parser.add_argument('--to', type=str, required=True, help='Email recipient.')
parser.add_argument('--file_name', type=str, required=True, help='The audio file name to include in the email.')
parser.add_argument('--file_path', type=str, required=True, help='The file path for creating ondemand url to the result folder.')
parser.add_argument('--email_subject', type=str, required=True, help='Email subject')
parser.add_argument('--sender', type=str, default=RSE_EMAIL, help='The sender email address.')
parser.add_argument('--attachment', type=bool, default=False, help='Send results via email.')
parser.add_argument('--job_id', type=str, required=False, help='The job ID to include in the email.')
args = parser.parse_args()

sendemail(args.to, args.file_name, args.file_path, args.email_subject, args.sender, args.attachment, args.job_id)

if __name__ == '__main__':
main()
Loading

0 comments on commit 54a11be

Please sign in to comment.