From 940faf43c2a2d4176ef64e765b86791507cb5b5e Mon Sep 17 00:00:00 2001 From: "andrey.zubkov@evraz.com" Date: Wed, 18 Dec 2024 14:03:16 +0700 Subject: [PATCH] Added gradio interface (and web-client.Dockerfile) for web-UI interaction with API, plus docker-compose.yml --- Dockerfile | 81 ++++++++++++++++++------------ docker-compose.yml | 44 ++++++++++++++++ server_ru.py | 54 +++++++++++++------- web_client.Dockerfile | 8 +++ web_client.py | 113 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 250 insertions(+), 50 deletions(-) create mode 100644 docker-compose.yml create mode 100644 web_client.Dockerfile create mode 100644 web_client.py diff --git a/Dockerfile b/Dockerfile index 89847be..3378a33 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,39 +4,52 @@ MAINTAINER Ivan Bondarenko ENV TZ=UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime -RUN apt-get update - -RUN apt-get install -y apt-utils && \ - apt-get install -y gcc && \ - apt-get install -y make && \ - apt-get install -y autoconf && \ - apt-get install -y automake && \ - apt-get install -y apt-transport-https && \ - apt-get install -y build-essential && \ - apt-get install -y git g++ autoconf-archive libtool && \ - apt-get install -y python3-setuptools python3-dev && \ - apt-get install -y cmake-data && \ - apt-get install -y vim && \ - apt-get install -y wget && \ - apt-get install -y libbz2-dev && \ - apt-get install -y ffmpeg && \ - apt-get install -y tar zip unzip && \ - apt-get install -y zlib1g zlib1g-dev lzma liblzma-dev && \ - apt-get install -y libboost-all-dev +# Установка необходимых пакетов и очистка кэша +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + apt-utils \ + gcc \ + make \ + autoconf \ + automake \ + apt-transport-https \ + build-essential \ + git \ + g++ \ + autoconf-archive \ + libtool \ + python3-setuptools \ + python3-dev \ + cmake-data \ + vim \ + wget \ + libbz2-dev \ + ffmpeg \ + tar \ + zip \ + unzip \ + zlib1g \ + zlib1g-dev \ + lzma \ + liblzma-dev \ + libboost-all-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* ENV NVIDIA_VISIBLE_DEVICES all ENV NVIDIA_DRIVER_CAPABILITIES compute,utility ENV NVIDIA_REQUIRE_CUDA "cuda>=11.0" -RUN python3 --version -RUN pip3 --version +# Проверка версий Python и pip +RUN python3 --version && pip3 --version -RUN mkdir /usr/src/pisets -RUN mkdir /usr/src/huggingface_cached +# Создание необходимых директорий +RUN mkdir -p /usr/src/pisets /usr/src/huggingface_cached -COPY ./server_ru.py /usr/src/pisets/server_ru.py -COPY ./download_models.py /usr/src/pisets/download_models.py -COPY ./requirements.txt /usr/src/pisets/requirements.txt +# Копирование файлов +COPY ./server_ru.py /usr/src/pisets/ +COPY ./download_models.py /usr/src/pisets/ +COPY ./requirements.txt /usr/src/pisets/ COPY ./asr/ /usr/src/pisets/asr/ COPY ./utils/ /usr/src/pisets/utils/ COPY ./vad/ /usr/src/pisets/vad/ @@ -44,13 +57,19 @@ COPY ./wav_io/ /usr/src/pisets/wav_io/ WORKDIR /usr/src/pisets -RUN python3 -m pip install --upgrade pip -RUN python3 -m pip install -r requirements.txt +# Установка зависимостей Python и очистка кэша pip +RUN python3 -m pip install --upgrade pip && \ + python3 -m pip install -r requirements.txt && \ + python3 -m pip cache purge -RUN export HF_HOME=/usr/src/huggingface_cached -RUN export PYTORCH_CUDA_ALLOC_CONF=garbage_collection_threshold:0.6,max_split_size_mb:128 +# Установка переменных окружения +ENV HF_HOME=/usr/src/huggingface_cached +ENV PYTORCH_CUDA_ALLOC_CONF=garbage_collection_threshold:0.6,max_split_size_mb:128 + +# Проверка работы библиотеки transformers RUN python -c "from transformers import pipeline; print(pipeline('sentiment-analysis', model='philschmid/tiny-bert-sst2-distilled')('we love you'))" +# Запуск скрипта для загрузки моделей RUN python3 download_models.py ru -ENTRYPOINT ["python3", "server_ru.py"] +ENTRYPOINT ["python3", "server_ru.py"] \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..f6215ec --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,44 @@ +services: + model_service: + image: bond005/pisets:0.3 + build: + context: . + dockerfile: Dockerfile + environment: + - HF_HOME=/usr/src/huggingface_cached + - PYTORCH_CUDA_ALLOC_CONF=garbage_collection_threshold:0.6,max_split_size_mb:128 + volumes: + - ./data:/usr/src/huggingface_cached # Пример монтирования тома для кэша Hugging Face + networks: + - internal_network # Подключение к внутренней сети + healthcheck: + test: [ "CMD", "curl", "-f", "http://localhost:5000/ready" ] # Проверка доступности эндпоинта /ready + interval: 30s # Интервал между проверками + timeout: 10s # Таймаут для каждой проверки + retries: 3 # Количество попыток перед тем, как контейнер будет считаться нездоровым + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 # alternatively, use `count: all` for all GPUs + capabilities: [ gpu ] + + + web_client: + image: bond005/pisets:web_client-0.2 + build: + context: . + dockerfile: web_client.Dockerfile + environment: + - PISETS_API_HOST=model_service + - PISETS_API_PORT=80 + ports: + - "80:80" # Порт, на котором будет доступен веб-клиент + networks: + - internal_network # Подключение к внутренней сети + depends_on: + - model_service + +networks: + internal_network: # Определение внутренней сети \ No newline at end of file diff --git a/server_ru.py b/server_ru.py index 5cea57e..a6af22b 100644 --- a/server_ru.py +++ b/server_ru.py @@ -1,21 +1,20 @@ +import asyncio import logging import os import tempfile +import uuid -from flask import Flask, request, jsonify, send_file import numpy as np -import uuid -import asyncio +from docx import Document +from flask import Flask, request, jsonify, send_file -from wav_io.wav_io import transform_to_wavpcm, load_sound -from wav_io.wav_io import TARGET_SAMPLING_FREQUENCY -from asr.asr import initialize_model_for_speech_recognition from asr.asr import initialize_model_for_speech_classification +from asr.asr import initialize_model_for_speech_recognition from asr.asr import initialize_model_for_speech_segmentation from asr.asr import transcribe as transcribe_speech from utils.utils import time_to_str - -from docx import Document +from wav_io.wav_io import TARGET_SAMPLING_FREQUENCY +from wav_io.wav_io import transform_to_wavpcm, load_sound speech_to_srt_logger = logging.getLogger(__name__) logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', @@ -61,6 +60,11 @@ def ready(): return 'OK' +@app.route('/statuses', methods=['GET']) +async def statuses(): + return jsonify(task_status) + + @app.route('/transcribe', methods=['POST']) async def transcribe(): task_id = str(uuid.uuid4()) @@ -74,9 +78,12 @@ async def transcribe(): file = request.files['audio'] if file.filename == '': speech_to_srt_logger.error('400: No audio file provided for upload') - task_status[task_id] = jsonify( - {"status": "Error", "status_code": 400, "message": "No audio file provided for upload"}) - return task_status[task_id] + task_status[task_id] = { + "status": "Error", + "status_code": 400, + "message": "No audio file provided for upload" + } + return jsonify(task_status[task_id]) point_pos = file.filename.rfind('.') if point_pos > 0: @@ -85,9 +92,11 @@ async def transcribe(): src_file_ext = '' if len(src_file_ext) == 0: speech_to_srt_logger.error('400: Unknown type of the file provided for upload') - task_status[task_id] = jsonify( - {"status": "Error", "status_code": 400, "message": "Unknown type of the file provided for upload"}) - return task_status[task_id] + task_status[task_id] = { + "status": "Error", + "status_code": 400, + "message": "Unknown type of the file provided for upload"} + return jsonify(task_status[task_id]) tmp_audio_name = '' tmp_wav_name = '' err_msg = '' @@ -141,12 +150,12 @@ async def transcribe(): speech_to_srt_logger.info(f'The sound "{file.filename}" is empty.') else: speech_to_srt_logger.error(task_id) - await asyncio.create_task(create_result_file(input_sound, segmenter, vad, asr, task_id)) + await asyncio.create_task(create_result_file(input_sound, segmenter, vad, asr, task_id, filename=file.filename)) return jsonify({'task_id': task_id}) -async def create_result_file(input_sound, segmenter, vad, asr, task_id): +async def create_result_file(input_sound, segmenter, vad, asr, task_id, filename=None): texts_with_timestamps = transcribe_speech(input_sound, segmenter, vad, asr, MIN_FRAME_SIZE, MAX_FRAME_SIZE) output_filename = task_id + '.docx' doc = Document() @@ -158,7 +167,12 @@ async def create_result_file(input_sound, segmenter, vad, asr, task_id): result_path = os.path.join(RESULTS_FOLDER, output_filename) doc.save(result_path) - task_status[task_id] = jsonify({'status': 'Ready', 'status_code': 200, 'result_path': result_path}) + task_status[task_id] = { + 'status': 'Ready', + 'status_code': 200, + 'result_path': result_path, + 'source_filename': filename + } @app.route('/status/', methods=['GET']) @@ -166,7 +180,7 @@ def get_status(task_id): status = task_status.get(task_id, None) if status is None: return jsonify({'error': 'Task not found'}), 404 - return status + return jsonify(status) @app.route('/download_result/', methods=['GET']) @@ -179,4 +193,6 @@ def download_result(task_id): if __name__ == '__main__': - app.run(host='0.0.0.0', port=8040) + host = os.getenv("PISETS_API_HOST", "0.0.0.0") + port = int(os.getenv("PISETS_API_PORT", "80")) + app.run(host=host, port=port) diff --git a/web_client.Dockerfile b/web_client.Dockerfile new file mode 100644 index 0000000..7938e30 --- /dev/null +++ b/web_client.Dockerfile @@ -0,0 +1,8 @@ +FROM python:3.10-alpine + +RUN pip install --no-cache-dir gradio==5.9.1 requests==2.32.3 pandas==2.2.3 + +COPY web_client.py /app/web_client.py + +EXPOSE 80 +CMD ["python3.10", "/app/web_client.py"] \ No newline at end of file diff --git a/web_client.py b/web_client.py new file mode 100644 index 0000000..e5cee29 --- /dev/null +++ b/web_client.py @@ -0,0 +1,113 @@ +import os +import tempfile +from pathlib import Path + +import gradio as gr +import pandas as pd +import requests + +API_URL = os.getenv("PISETS_API_URL", "http://localhost:8040") +status_columns = ["ID задачи", "Имя файла", "Статус", "Скачать"] + + +def upload_audio(audio_file_path): + if audio_file_path is None: + return "Please upload an audio file." + audio_file_path = Path(audio_file_path) + + # Отправка аудиофайла на обработку + files = {'audio': open(audio_file_path, 'rb')} + response = requests.post(f"{API_URL}/transcribe", files=files) + + if response.status_code == 200: + task_id = response.json()['task_id'] + return task_id + else: + return f"Error: {response.text}" + + +def get_all_statuses(): + response = requests.get(f"{API_URL}/statuses") + + if response.status_code == 200: + statuses = response.json() + # Создаем список словарей для DataFrame + tasks_list = [] + for task_id, task_info in statuses.items(): + if not isinstance(task_info, dict): + task_info = task_info.json() + # Извлекаем статус из ответа API + status = task_info.get('status', 'Unknown') + source_filename = task_info.get('source_filename', 'Unknown') + + tasks_list.append({ + status_columns[0]: task_id, + status_columns[1]: source_filename, + status_columns[2]: status, + status_columns[3]: "⬇️" if status == "Ready" else "-" + }) + + # Создаем DataFrame + df = pd.DataFrame(tasks_list) + return df + else: + return pd.DataFrame(columns=status_columns) + + +def download_result(task_id, filename): + if not task_id: + return None + + response = requests.get(f"{API_URL}/download_result/{task_id}") + + if response.status_code == 200: + # Сохраняем результат во временный файл + directory = tempfile.mkdtemp(task_id) + output_path = Path(directory) / f"{filename}.docx" + with open(output_path, 'wb') as f: + f.write(response.content) + return_path = str(output_path.absolute().resolve()) + return return_path + else: + return None + + +def handle_download_click(evt: gr.SelectData, statuses_df): + # Получаем task_id из выбранной строки + task_id = statuses_df.iloc[evt.index[0]][status_columns[0]] + filename = statuses_df.iloc[evt.index[0]][status_columns[1]] + return download_result(task_id, filename) + + +with gr.Blocks() as demo: + gr.Markdown("# Сервис автоматического распознавания речи") + + with gr.Row(): + with gr.Column(): + audio_input = gr.Audio(type="filepath", label="Загрузка файла") + upload_button = gr.Button("Отправить на обработку") + + with gr.Row(): + with gr.Column(): + status_table = gr.Dataframe( + headers=status_columns, + interactive=False, + ) + download_output = gr.File(label="Скачанные результаты") + + # События + upload_button.click( + fn=upload_audio, + inputs=audio_input, + outputs=None + ) + + status_table.select( + fn=handle_download_click, + inputs=status_table, + outputs=download_output + ) + + status_table.attach_load_event(get_all_statuses, 1, None, ) + +demo.launch(server_name="0.0.0.0", server_port=80, ssl_verify=False, debug=False)