bond005 · guility · Dec 18, 2024
diff --git a/Dockerfile b/Dockerfile
@@ -4,53 +4,72 @@ MAINTAINER Ivan Bondarenko <[email protected]>
 ENV TZ=UTC
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime
 
-RUN apt-get update
-
-RUN apt-get install -y apt-utils && \
-    apt-get install -y gcc && \
-    apt-get install -y make && \
-    apt-get install -y autoconf && \
-    apt-get install -y automake && \
-    apt-get install -y apt-transport-https && \
-    apt-get install -y build-essential && \
-    apt-get install -y git g++ autoconf-archive libtool && \
-    apt-get install -y python3-setuptools python3-dev && \
-    apt-get install -y cmake-data && \
-    apt-get install -y vim && \
-    apt-get install -y wget && \
-    apt-get install -y libbz2-dev && \
-    apt-get install -y ffmpeg && \
-    apt-get install -y tar zip unzip && \
-    apt-get install -y zlib1g zlib1g-dev lzma liblzma-dev && \
-    apt-get install -y libboost-all-dev
+# Установка необходимых пакетов и очистка кэша
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        apt-utils \
+        gcc \
+        make \
+        autoconf \
+        automake \
+        apt-transport-https \
+        build-essential \
+        git \
+        g++ \
+        autoconf-archive \
+        libtool \
+        python3-setuptools \
+        python3-dev \
+        cmake-data \
+        vim \
+        wget \
+        libbz2-dev \
+        ffmpeg \
+        tar \
+        zip \
+        unzip \
+        zlib1g \
+        zlib1g-dev \
+        lzma \
+        liblzma-dev \
+        libboost-all-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
 
 ENV NVIDIA_VISIBLE_DEVICES all
 ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
 ENV NVIDIA_REQUIRE_CUDA "cuda>=11.0"
 
-RUN python3 --version
-RUN pip3 --version
+# Проверка версий Python и pip
+RUN python3 --version && pip3 --version
 
-RUN mkdir /usr/src/pisets
-RUN mkdir /usr/src/huggingface_cached
+# Создание необходимых директорий
+RUN mkdir -p /usr/src/pisets /usr/src/huggingface_cached
 
-COPY ./server_ru.py /usr/src/pisets/server_ru.py
-COPY ./download_models.py /usr/src/pisets/download_models.py
-COPY ./requirements.txt /usr/src/pisets/requirements.txt
+# Копирование файлов
+COPY ./server_ru.py /usr/src/pisets/
+COPY ./download_models.py /usr/src/pisets/
+COPY ./requirements.txt /usr/src/pisets/
 COPY ./asr/ /usr/src/pisets/asr/
 COPY ./utils/ /usr/src/pisets/utils/
 COPY ./vad/ /usr/src/pisets/vad/
 COPY ./wav_io/ /usr/src/pisets/wav_io/
 
 WORKDIR /usr/src/pisets
 
-RUN python3 -m pip install --upgrade pip
-RUN python3 -m pip install -r requirements.txt
+# Установка зависимостей Python и очистка кэша pip
+RUN python3 -m pip install --upgrade pip && \
+    python3 -m pip install -r requirements.txt && \
+    python3 -m pip cache purge
 
-RUN export HF_HOME=/usr/src/huggingface_cached
-RUN export PYTORCH_CUDA_ALLOC_CONF=garbage_collection_threshold:0.6,max_split_size_mb:128
+# Установка переменных окружения
+ENV HF_HOME=/usr/src/huggingface_cached
+ENV PYTORCH_CUDA_ALLOC_CONF=garbage_collection_threshold:0.6,max_split_size_mb:128
+
+# Проверка работы библиотеки transformers
 RUN python -c "from transformers import pipeline; print(pipeline('sentiment-analysis', model='philschmid/tiny-bert-sst2-distilled')('we love you'))"
 
+# Запуск скрипта для загрузки моделей
 RUN python3 download_models.py ru
 
-ENTRYPOINT ["python3", "server_ru.py"]
+ENTRYPOINT ["python3", "server_ru.py"]
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,44 @@
+services:
+  model_service:
+    image: bond005/pisets:0.3
+    build:
+      context: .
+      dockerfile: Dockerfile
+    environment:
+      - HF_HOME=/usr/src/huggingface_cached
+      - PYTORCH_CUDA_ALLOC_CONF=garbage_collection_threshold:0.6,max_split_size_mb:128
+    volumes:
+      - ./data:/usr/src/huggingface_cached  # Пример монтирования тома для кэша Hugging Face
+    networks:
+      - internal_network  # Подключение к внутренней сети
+    healthcheck:
+      test: [ "CMD", "curl", "-f", "http://localhost:5000/ready" ]  # Проверка доступности эндпоинта /ready
+      interval: 30s  # Интервал между проверками
+      timeout: 10s   # Таймаут для каждой проверки
+      retries: 3     # Количество попыток перед тем, как контейнер будет считаться нездоровым
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1 # alternatively, use `count: all` for all GPUs
+              capabilities: [ gpu ]
+
+
+  web_client:
+    image: bond005/pisets:web_client-0.2
+    build:
+      context: .
+      dockerfile: web_client.Dockerfile
+    environment:
+      - PISETS_API_HOST=model_service
+      - PISETS_API_PORT=80
+    ports:
+      - "80:80"  # Порт, на котором будет доступен веб-клиент
+    networks:
+      - internal_network  # Подключение к внутренней сети
+    depends_on:
+      - model_service
+
+networks:
+  internal_network:  # Определение внутренней сети
diff --git a/server_ru.py b/server_ru.py
@@ -1,21 +1,20 @@
+import asyncio
 import logging
 import os
 import tempfile
+import uuid
 
-from flask import Flask, request, jsonify, send_file
 import numpy as np
-import uuid
-import asyncio
+from docx import Document
+from flask import Flask, request, jsonify, send_file
 
-from wav_io.wav_io import transform_to_wavpcm, load_sound
-from wav_io.wav_io import TARGET_SAMPLING_FREQUENCY
-from asr.asr import initialize_model_for_speech_recognition
 from asr.asr import initialize_model_for_speech_classification
+from asr.asr import initialize_model_for_speech_recognition
 from asr.asr import initialize_model_for_speech_segmentation
 from asr.asr import transcribe as transcribe_speech
 from utils.utils import time_to_str
-
-from docx import Document
+from wav_io.wav_io import TARGET_SAMPLING_FREQUENCY
+from wav_io.wav_io import transform_to_wavpcm, load_sound
 
 speech_to_srt_logger = logging.getLogger(__name__)
 logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
@@ -61,6 +60,11 @@ def ready():
     return 'OK'
 
 
+@app.route('/statuses', methods=['GET'])
+async def statuses():
+    return jsonify(task_status)
+
+
 @app.route('/transcribe', methods=['POST'])
 async def transcribe():
     task_id = str(uuid.uuid4())
@@ -74,9 +78,12 @@ async def transcribe():
     file = request.files['audio']
     if file.filename == '':
         speech_to_srt_logger.error('400: No audio file provided for upload')
-        task_status[task_id] = jsonify(
-            {"status": "Error", "status_code": 400, "message": "No audio file provided for upload"})
-        return task_status[task_id]
+        task_status[task_id] = {
+            "status": "Error",
+            "status_code": 400,
+            "message": "No audio file provided for upload"
+        }
+        return jsonify(task_status[task_id])
 
     point_pos = file.filename.rfind('.')
     if point_pos > 0:
@@ -85,9 +92,11 @@ async def transcribe():
         src_file_ext = ''
     if len(src_file_ext) == 0:
         speech_to_srt_logger.error('400: Unknown type of the file provided for upload')
-        task_status[task_id] = jsonify(
-            {"status": "Error", "status_code": 400, "message": "Unknown type of the file provided for upload"})
-        return task_status[task_id]
+        task_status[task_id] = {
+            "status": "Error",
+            "status_code": 400,
+            "message": "Unknown type of the file provided for upload"}
+        return jsonify(task_status[task_id])
     tmp_audio_name = ''
     tmp_wav_name = ''
     err_msg = ''
@@ -141,12 +150,12 @@ async def transcribe():
         speech_to_srt_logger.info(f'The sound "{file.filename}" is empty.')
     else:
         speech_to_srt_logger.error(task_id)
-        await asyncio.create_task(create_result_file(input_sound, segmenter, vad, asr, task_id))
+        await asyncio.create_task(create_result_file(input_sound, segmenter, vad, asr, task_id, filename=file.filename))
 
     return jsonify({'task_id': task_id})
 
 
-async def create_result_file(input_sound, segmenter, vad, asr, task_id):
+async def create_result_file(input_sound, segmenter, vad, asr, task_id, filename=None):
     texts_with_timestamps = transcribe_speech(input_sound, segmenter, vad, asr, MIN_FRAME_SIZE, MAX_FRAME_SIZE)
     output_filename = task_id + '.docx'
     doc = Document()
@@ -158,15 +167,20 @@ async def create_result_file(input_sound, segmenter, vad, asr, task_id):
     result_path = os.path.join(RESULTS_FOLDER, output_filename)
     doc.save(result_path)
 
-    task_status[task_id] = jsonify({'status': 'Ready', 'status_code': 200, 'result_path': result_path})
+    task_status[task_id] = {
+        'status': 'Ready',
+        'status_code': 200,
+        'result_path': result_path,
+        'source_filename': filename
+    }
 
 
 @app.route('/status/<task_id>', methods=['GET'])
 def get_status(task_id):
     status = task_status.get(task_id, None)
     if status is None:
         return jsonify({'error': 'Task not found'}), 404
-    return status
+    return jsonify(status)
 
 
 @app.route('/download_result/<task_id>', methods=['GET'])
@@ -179,4 +193,6 @@ def download_result(task_id):
 
 
 if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=8040)
+    host = os.getenv("PISETS_API_HOST", "0.0.0.0")
+    port = int(os.getenv("PISETS_API_PORT", "80"))
+    app.run(host=host, port=port)
diff --git a/web_client.Dockerfile b/web_client.Dockerfile
@@ -0,0 +1,8 @@
+FROM python:3.10-alpine
+
+RUN pip install --no-cache-dir gradio==5.9.1 requests==2.32.3 pandas==2.2.3
+
+COPY web_client.py /app/web_client.py
+
+EXPOSE 80
+CMD ["python3.10", "/app/web_client.py"]
diff --git a/web_client.py b/web_client.py
@@ -0,0 +1,113 @@
+import os
+import tempfile
+from pathlib import Path
+
+import gradio as gr
+import pandas as pd
+import requests
+
+API_URL = os.getenv("PISETS_API_URL", "http://localhost:8040")
+status_columns = ["ID задачи", "Имя файла", "Статус", "Скачать"]
+
+
+def upload_audio(audio_file_path):
+    if audio_file_path is None:
+        return "Please upload an audio file."
+    audio_file_path = Path(audio_file_path)
+
+    # Отправка аудиофайла на обработку
+    files = {'audio': open(audio_file_path, 'rb')}
+    response = requests.post(f"{API_URL}/transcribe", files=files)
+
+    if response.status_code == 200:
+        task_id = response.json()['task_id']
+        return task_id
+    else:
+        return f"Error: {response.text}"
+
+
+def get_all_statuses():
+    response = requests.get(f"{API_URL}/statuses")
+
+    if response.status_code == 200:
+        statuses = response.json()
+        # Создаем список словарей для DataFrame
+        tasks_list = []
+        for task_id, task_info in statuses.items():
+            if not isinstance(task_info, dict):
+                task_info = task_info.json()
+            # Извлекаем статус из ответа API
+            status = task_info.get('status', 'Unknown')
+            source_filename = task_info.get('source_filename', 'Unknown')
+
+            tasks_list.append({
+                status_columns[0]: task_id,
+                status_columns[1]: source_filename,
+                status_columns[2]: status,
+                status_columns[3]: "⬇️" if status == "Ready" else "-"
+            })
+
+        # Создаем DataFrame
+        df = pd.DataFrame(tasks_list)
+        return df
+    else:
+        return pd.DataFrame(columns=status_columns)
+
+
+def download_result(task_id, filename):
+    if not task_id:
+        return None
+
+    response = requests.get(f"{API_URL}/download_result/{task_id}")
+
+    if response.status_code == 200:
+        # Сохраняем результат во временный файл
+        directory = tempfile.mkdtemp(task_id)
+        output_path = Path(directory) / f"{filename}.docx"
+        with open(output_path, 'wb') as f:
+            f.write(response.content)
+        return_path = str(output_path.absolute().resolve())
+        return return_path
+    else:
+        return None
+
+
+def handle_download_click(evt: gr.SelectData, statuses_df):
+    # Получаем task_id из выбранной строки
+    task_id = statuses_df.iloc[evt.index[0]][status_columns[0]]
+    filename = statuses_df.iloc[evt.index[0]][status_columns[1]]
+    return download_result(task_id, filename)
+
+
+with gr.Blocks() as demo:
+    gr.Markdown("# Сервис автоматического распознавания речи")
+
+    with gr.Row():
+        with gr.Column():
+            audio_input = gr.Audio(type="filepath", label="Загрузка файла")
+            upload_button = gr.Button("Отправить на обработку")
+
+    with gr.Row():
+        with gr.Column():
+            status_table = gr.Dataframe(
+                headers=status_columns,
+                interactive=False,
+            )
+            download_output = gr.File(label="Скачанные результаты")
+
+    # События
+    upload_button.click(
+        fn=upload_audio,
+        inputs=audio_input,
+        outputs=None
+    )
+
+    status_table.select(
+        fn=handle_download_click,
+        inputs=status_table,
+        outputs=download_output
+    )
+
+    status_table.attach_load_event(get_all_statuses, 1, None, )
+
+demo.launch(server_name="0.0.0.0", server_port=80, ssl_verify=False, debug=False)