Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added gradio interface #11

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 50 additions & 31 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,53 +4,72 @@ MAINTAINER Ivan Bondarenko <[email protected]>
ENV TZ=UTC
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime

RUN apt-get update

RUN apt-get install -y apt-utils && \
apt-get install -y gcc && \
apt-get install -y make && \
apt-get install -y autoconf && \
apt-get install -y automake && \
apt-get install -y apt-transport-https && \
apt-get install -y build-essential && \
apt-get install -y git g++ autoconf-archive libtool && \
apt-get install -y python3-setuptools python3-dev && \
apt-get install -y cmake-data && \
apt-get install -y vim && \
apt-get install -y wget && \
apt-get install -y libbz2-dev && \
apt-get install -y ffmpeg && \
apt-get install -y tar zip unzip && \
apt-get install -y zlib1g zlib1g-dev lzma liblzma-dev && \
apt-get install -y libboost-all-dev
# Установка необходимых пакетов и очистка кэша
RUN apt-get update && \
apt-get install -y --no-install-recommends \
apt-utils \
gcc \
make \
autoconf \
automake \
apt-transport-https \
build-essential \
git \
g++ \
autoconf-archive \
libtool \
python3-setuptools \
python3-dev \
cmake-data \
vim \
wget \
libbz2-dev \
ffmpeg \
tar \
zip \
unzip \
zlib1g \
zlib1g-dev \
lzma \
liblzma-dev \
libboost-all-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
ENV NVIDIA_REQUIRE_CUDA "cuda>=11.0"

RUN python3 --version
RUN pip3 --version
# Проверка версий Python и pip
RUN python3 --version && pip3 --version

RUN mkdir /usr/src/pisets
RUN mkdir /usr/src/huggingface_cached
# Создание необходимых директорий
RUN mkdir -p /usr/src/pisets /usr/src/huggingface_cached

COPY ./server_ru.py /usr/src/pisets/server_ru.py
COPY ./download_models.py /usr/src/pisets/download_models.py
COPY ./requirements.txt /usr/src/pisets/requirements.txt
# Копирование файлов
COPY ./server_ru.py /usr/src/pisets/
COPY ./download_models.py /usr/src/pisets/
COPY ./requirements.txt /usr/src/pisets/
COPY ./asr/ /usr/src/pisets/asr/
COPY ./utils/ /usr/src/pisets/utils/
COPY ./vad/ /usr/src/pisets/vad/
COPY ./wav_io/ /usr/src/pisets/wav_io/

WORKDIR /usr/src/pisets

RUN python3 -m pip install --upgrade pip
RUN python3 -m pip install -r requirements.txt
# Установка зависимостей Python и очистка кэша pip
RUN python3 -m pip install --upgrade pip && \
python3 -m pip install -r requirements.txt && \
python3 -m pip cache purge

RUN export HF_HOME=/usr/src/huggingface_cached
RUN export PYTORCH_CUDA_ALLOC_CONF=garbage_collection_threshold:0.6,max_split_size_mb:128
# Установка переменных окружения
ENV HF_HOME=/usr/src/huggingface_cached
ENV PYTORCH_CUDA_ALLOC_CONF=garbage_collection_threshold:0.6,max_split_size_mb:128

# Проверка работы библиотеки transformers
RUN python -c "from transformers import pipeline; print(pipeline('sentiment-analysis', model='philschmid/tiny-bert-sst2-distilled')('we love you'))"

# Запуск скрипта для загрузки моделей
RUN python3 download_models.py ru

ENTRYPOINT ["python3", "server_ru.py"]
ENTRYPOINT ["python3", "server_ru.py"]
44 changes: 44 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
services:
model_service:
image: bond005/pisets:0.3
build:
context: .
dockerfile: Dockerfile
environment:
- HF_HOME=/usr/src/huggingface_cached
- PYTORCH_CUDA_ALLOC_CONF=garbage_collection_threshold:0.6,max_split_size_mb:128
volumes:
- ./data:/usr/src/huggingface_cached # Пример монтирования тома для кэша Hugging Face
networks:
- internal_network # Подключение к внутренней сети
healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:5000/ready" ] # Проверка доступности эндпоинта /ready
interval: 30s # Интервал между проверками
timeout: 10s # Таймаут для каждой проверки
retries: 3 # Количество попыток перед тем, как контейнер будет считаться нездоровым
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1 # alternatively, use `count: all` for all GPUs
capabilities: [ gpu ]


web_client:
image: bond005/pisets:web_client-0.2
build:
context: .
dockerfile: web_client.Dockerfile
environment:
- PISETS_API_HOST=model_service
- PISETS_API_PORT=80
ports:
- "80:80" # Порт, на котором будет доступен веб-клиент
networks:
- internal_network # Подключение к внутренней сети
depends_on:
- model_service

networks:
internal_network: # Определение внутренней сети
54 changes: 35 additions & 19 deletions server_ru.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
import asyncio
import logging
import os
import tempfile
import uuid

from flask import Flask, request, jsonify, send_file
import numpy as np
import uuid
import asyncio
from docx import Document
from flask import Flask, request, jsonify, send_file

from wav_io.wav_io import transform_to_wavpcm, load_sound
from wav_io.wav_io import TARGET_SAMPLING_FREQUENCY
from asr.asr import initialize_model_for_speech_recognition
from asr.asr import initialize_model_for_speech_classification
from asr.asr import initialize_model_for_speech_recognition
from asr.asr import initialize_model_for_speech_segmentation
from asr.asr import transcribe as transcribe_speech
from utils.utils import time_to_str

from docx import Document
from wav_io.wav_io import TARGET_SAMPLING_FREQUENCY
from wav_io.wav_io import transform_to_wavpcm, load_sound

speech_to_srt_logger = logging.getLogger(__name__)
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
Expand Down Expand Up @@ -61,6 +60,11 @@ def ready():
return 'OK'


@app.route('/statuses', methods=['GET'])
async def statuses():
return jsonify(task_status)


@app.route('/transcribe', methods=['POST'])
async def transcribe():
task_id = str(uuid.uuid4())
Expand All @@ -74,9 +78,12 @@ async def transcribe():
file = request.files['audio']
if file.filename == '':
speech_to_srt_logger.error('400: No audio file provided for upload')
task_status[task_id] = jsonify(
{"status": "Error", "status_code": 400, "message": "No audio file provided for upload"})
return task_status[task_id]
task_status[task_id] = {
"status": "Error",
"status_code": 400,
"message": "No audio file provided for upload"
}
return jsonify(task_status[task_id])

point_pos = file.filename.rfind('.')
if point_pos > 0:
Expand All @@ -85,9 +92,11 @@ async def transcribe():
src_file_ext = ''
if len(src_file_ext) == 0:
speech_to_srt_logger.error('400: Unknown type of the file provided for upload')
task_status[task_id] = jsonify(
{"status": "Error", "status_code": 400, "message": "Unknown type of the file provided for upload"})
return task_status[task_id]
task_status[task_id] = {
"status": "Error",
"status_code": 400,
"message": "Unknown type of the file provided for upload"}
return jsonify(task_status[task_id])
tmp_audio_name = ''
tmp_wav_name = ''
err_msg = ''
Expand Down Expand Up @@ -141,12 +150,12 @@ async def transcribe():
speech_to_srt_logger.info(f'The sound "{file.filename}" is empty.')
else:
speech_to_srt_logger.error(task_id)
await asyncio.create_task(create_result_file(input_sound, segmenter, vad, asr, task_id))
await asyncio.create_task(create_result_file(input_sound, segmenter, vad, asr, task_id, filename=file.filename))

return jsonify({'task_id': task_id})


async def create_result_file(input_sound, segmenter, vad, asr, task_id):
async def create_result_file(input_sound, segmenter, vad, asr, task_id, filename=None):
texts_with_timestamps = transcribe_speech(input_sound, segmenter, vad, asr, MIN_FRAME_SIZE, MAX_FRAME_SIZE)
output_filename = task_id + '.docx'
doc = Document()
Expand All @@ -158,15 +167,20 @@ async def create_result_file(input_sound, segmenter, vad, asr, task_id):
result_path = os.path.join(RESULTS_FOLDER, output_filename)
doc.save(result_path)

task_status[task_id] = jsonify({'status': 'Ready', 'status_code': 200, 'result_path': result_path})
task_status[task_id] = {
'status': 'Ready',
'status_code': 200,
'result_path': result_path,
'source_filename': filename
}


@app.route('/status/<task_id>', methods=['GET'])
def get_status(task_id):
status = task_status.get(task_id, None)
if status is None:
return jsonify({'error': 'Task not found'}), 404
return status
return jsonify(status)


@app.route('/download_result/<task_id>', methods=['GET'])
Expand All @@ -179,4 +193,6 @@ def download_result(task_id):


if __name__ == '__main__':
app.run(host='0.0.0.0', port=8040)
host = os.getenv("PISETS_API_HOST", "0.0.0.0")
port = int(os.getenv("PISETS_API_PORT", "80"))
app.run(host=host, port=port)
8 changes: 8 additions & 0 deletions web_client.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM python:3.10-alpine

RUN pip install --no-cache-dir gradio==5.9.1 requests==2.32.3 pandas==2.2.3

COPY web_client.py /app/web_client.py

EXPOSE 80
CMD ["python3.10", "/app/web_client.py"]
113 changes: 113 additions & 0 deletions web_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import os
import tempfile
from pathlib import Path

import gradio as gr
import pandas as pd
import requests

API_URL = os.getenv("PISETS_API_URL", "http://localhost:8040")
status_columns = ["ID задачи", "Имя файла", "Статус", "Скачать"]


def upload_audio(audio_file_path):
if audio_file_path is None:
return "Please upload an audio file."
audio_file_path = Path(audio_file_path)

# Отправка аудиофайла на обработку
files = {'audio': open(audio_file_path, 'rb')}
response = requests.post(f"{API_URL}/transcribe", files=files)

if response.status_code == 200:
task_id = response.json()['task_id']
return task_id
else:
return f"Error: {response.text}"


def get_all_statuses():
response = requests.get(f"{API_URL}/statuses")

if response.status_code == 200:
statuses = response.json()
# Создаем список словарей для DataFrame
tasks_list = []
for task_id, task_info in statuses.items():
if not isinstance(task_info, dict):
task_info = task_info.json()
# Извлекаем статус из ответа API
status = task_info.get('status', 'Unknown')
source_filename = task_info.get('source_filename', 'Unknown')

tasks_list.append({
status_columns[0]: task_id,
status_columns[1]: source_filename,
status_columns[2]: status,
status_columns[3]: "⬇️" if status == "Ready" else "-"
})

# Создаем DataFrame
df = pd.DataFrame(tasks_list)
return df
else:
return pd.DataFrame(columns=status_columns)


def download_result(task_id, filename):
if not task_id:
return None

response = requests.get(f"{API_URL}/download_result/{task_id}")

if response.status_code == 200:
# Сохраняем результат во временный файл
directory = tempfile.mkdtemp(task_id)
output_path = Path(directory) / f"{filename}.docx"
with open(output_path, 'wb') as f:
f.write(response.content)
return_path = str(output_path.absolute().resolve())
return return_path
else:
return None


def handle_download_click(evt: gr.SelectData, statuses_df):
# Получаем task_id из выбранной строки
task_id = statuses_df.iloc[evt.index[0]][status_columns[0]]
filename = statuses_df.iloc[evt.index[0]][status_columns[1]]
return download_result(task_id, filename)


with gr.Blocks() as demo:
gr.Markdown("# Сервис автоматического распознавания речи")

with gr.Row():
with gr.Column():
audio_input = gr.Audio(type="filepath", label="Загрузка файла")
upload_button = gr.Button("Отправить на обработку")

with gr.Row():
with gr.Column():
status_table = gr.Dataframe(
headers=status_columns,
interactive=False,
)
download_output = gr.File(label="Скачанные результаты")

# События
upload_button.click(
fn=upload_audio,
inputs=audio_input,
outputs=None
)

status_table.select(
fn=handle_download_click,
inputs=status_table,
outputs=download_output
)

status_table.attach_load_event(get_all_statuses, 1, None, )

demo.launch(server_name="0.0.0.0", server_port=80, ssl_verify=False, debug=False)