Merge pull request #1 from DliDAM/fastapi_kr

[Final Merge] Fastapi_kr branch to Main branch
DliDAM · Dec 3, 2024 · 2f09515 · 2f09515
2 parents 13731e3 + f42ea35
commit 2f09515
Show file tree

Hide file tree

Showing 690 changed files with 28,885 additions and 221 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -4,6 +4,6 @@ WORKDIR /code
 
 COPY ./requirements.txt /code/requirements.txt
 
-RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt -v
 
 CMD [ "uvicorn", "main:app","--proxy-headers", "--host", "0.0.0.0", "--port","80" ]
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -12,4 +12,3 @@ services:
       - .:/code
     environment:
       - ENVIRONMENT=production
-
diff --git a/m4a2wav.py b/m4a2wav.py
@@ -0,0 +1,27 @@
+import os
+from pydub import AudioSegment
+
+def convert_m4a_to_wav(input_path):
+    # 입력 경로의 모든 파일 검색
+    for root, dirs, files in os.walk(input_path):
+        for file in files:
+            # m4a 파일만 처리
+            if file.endswith('.m4a'):
+                # 파일의 전체 경로
+                m4a_path = os.path.join(root, file)
+                # wav 파일명 생성 (확장자만 변경)
+                wav_path = os.path.splitext(m4a_path)[0] + '.wav'
+
+                try:
+                    # m4a 파일 로드
+                    audio = AudioSegment.from_file(m4a_path, format='m4a')
+                    # wav 파일로 저장
+                    audio.export(wav_path, format='wav')
+                    print(f'변환 완료: {file}')
+                except Exception as e:
+                    print(f'변환 실패: {file} - {str(e)}')
+
+if __name__ == "__main__":
+    # 변환하고자 하는 폴더 경로 지정
+    input_folder = "/Users/bangbyeonghun/Documents/projects/grad/ai-server/tortoise/tortoise/voices/rose"
+    convert_m4a_to_wav(input_folder)
diff --git a/main.py b/main.py
@@ -1,11 +1,13 @@
 from typing import Union
-from fastapi import FastAPI
+from fastapi import FastAPI, UploadFile, File, Form
 from routers import websocket
 from fastapi.responses import FileResponse
+import os
+import shutil
 
 
 app = FastAPI()
-app.include_router(websocket.router)
+#app.include_router(websocket.router)
 path = './templates/index.html'
 audio_path = './templates/audio.html'
 audio_file_path = './templates/audio_file.html'
@@ -15,7 +17,18 @@
 async def get():
     return FileResponse(text_to_audio_path)
 
-@app.get("/items/{item_id}")
-def read_item(item_id : int, q:Union[str, None] = None):
-    return {"item_id" : item_id, "q" : q}
-
+@app.post("/audio")
+async def upload_audio(audio_file: UploadFile = File(...), user_id: str = Form(...)):
+    # 사용자 ID를 기반으로 저장 디렉토리 설정
+    save_directory = f"tortoise/tortoise/voices/{user_id}"
+
+    # 디렉토리가 없으면 생성
+    os.makedirs(save_directory, exist_ok=True)
+
+    # 파일 저장
+    file_path = os.path.join(save_directory, audio_file.filename)
+    with open(file_path, "wb") as buffer:
+        shutil.copyfileobj(audio_file.file, buffer)
+
+    return {"message": f"사용자 {user_id}의 오디오 파일이 성공적으로 업로드되었습니다.", "file_name": audio_file.filename, "user_id": user_id}
+
diff --git a/requirements2.txt b/requirements2.txt
@@ -0,0 +1,5 @@
+uvicorn
+fastapi
+python-multipart
+websockets
+spacy
diff --git a/routers/websocket.py b/routers/websocket.py
@@ -1,217 +1,73 @@
-from fastapi import APIRouter, WebSocket, WebSocketDisconnect
-import asyncio
-import os
-import uuid
-import json
-from queue import Queue
-from datetime import datetime, timedelta
-from time import sleep
-import threading
-from typing import List
-import logging
-import wave # <- 임시로 import하는거. 추후 삭제 필요.
-
-router = APIRouter()
-
-# Thread safe Queue for passing data from the threaded recording callback.
-text_queue = Queue()
-
-# Thread safe Queue for passing result from STT Thread
-result_queue = Queue()
-
-# EventObject for Stopping Thread
-stop_event = threading.Event()
-
-# ======================== new project 08/23 ======================== #
-# 로그 확인용 <- 추후 삭제
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("websocket")
-
-async def process_tts(text: str) -> bytes:
-    # TTS 처리 (여기서는 단순히 바이너리 데이터를 반환하는 예시)
-    # 실제로는 TTS 엔진을 사용하여 음성 데이터를 생성
-    await asyncio.sleep(1)  # TTS 처리 시간 시뮬레이션
-    response = b"dummy binary audio data"
-    logger.info(f"Sending message: {response}")
-    return b"dummy binary audio data"  # 추후 실제 음성 데이터로 대체
-
-@router.websocket("/ws/new")
-async def websocket_TTS(websocket: WebSocket):
-    await websocket.accept()
-    try:
-        # Worker 코루틴을 별도로 실행하여 Queue 처리
-        asyncio.create_task(worker(websocket))
-
-        while True:
-            data = await websocket.receive_text()
-            logger.info(f"Received message: {data}")
-            words = data.split()
-
-            chunk_size = 7 # 이 부분은 성능에 따라서 변경 가능
-            chunks = [words[i:i+chunk_size] for i in range(0,len(words), chunk_size)]
-            for chunk in chunks:
-                text_queue.put(chunk)
-
-    except WebSocketDisconnect:
-        print("WebSocket connection closed")
-
-# Queue를 처리하는 작업자 함수
-async def worker(websocket : WebSocket):
-    while True:
-        if not text_queue.empty():
-            text = text_queue.get()
-            audio_data = await process_tts(text)
-            logger.info("----input----")
-            await websocket.send_bytes(audio_data)
-            logger.info("----output-----")
-        await asyncio.sleep(0.1)  # Queue가 비어있는지 확인하는 주기
-
-# ================================================================================================ #
-def read_wav(file_path):
-    with wave.open(file_path, 'rb') as wav_file:
-        frames = wav_file.readframes(wav_file.getnframes())
-    return 
-
-async def textToSpeech(stop_event):
-    print("[TTSThread] TTS Thread Executed")
-
-    while not stop_event.is_set():
-        sleep(3)
-        try:
-            now = datetime.utcnow()
-            # pop text from the queue.
-            if not text_queue.empty():
-                audio_file_path = "./data/received_audio.wav"
-                processed_result = read_wav(audio_file_path)
-                if processed_result != None:
-                    result_queue.put(processed_result)
-
-        except Exception as e:
-            print(f"[TTSThread] Error processing text data: {e}")
-            break
-
-    print("[TTSThread] TTS Thread Destroyed")
-
-async def process_thread(websocket: WebSocket):
-    print("[ProcessTask] Process Task Initiated")
-    while not stop_event.is_set():
-        await asyncio.sleep(0.25)
-        try:
-            if not result_queue.empty():
-                audio_result = result_queue.get()
-                while chunk := audio_result.read(1024):
-                    await websocket.send_bytes(chunk)
-
-        except Exception as e:
-            print(f"[ProcessTask] Error : {e}")
-            break
+# from fastapi import APIRouter, WebSocket, WebSocketDisconnect
+# from whisper_live import WhisperModel
+# import asyncio
+# import tempfile
+
+# router = APIRouter()
+
+# # Initialize Whisper Model (Specify the desired model name)
+# model_name = "small"  # You can use "tiny", "small", "medium", or "large"
+# whisper_model = WhisperModel(model_name=model_name)
+
+# @router.websocket("/ws")
+# async def websocket_endpoint(websocket: WebSocket):
+#     print("Configuring BE Socket")
+#     await websocket.accept()
+#     print("BE Socket Accepted")
 
-    print("[ProcessTask] Process Task Destroyed")
-
-async def websocket_task(websocket: WebSocket):
-    # Load Model Here 
-    device = 'cuda'
-    print("[WebSocketTask]-[****] Model Loaded Successfully with", device)
-
-    # Accept WebSocket
-    #connection_uuid = await websocket.receive_bytes()
-    #print("[WebSocketTask] Connection [" + connection_uuid + "] Accepted")
-
-    # Execute TTS Thread until WebSocket Disconnected
-
-    ttsThread = threading.Thread(target=textToSpeech, args=(stop_event,))
-    ttsThread.start()
-
-    processTask = asyncio.create_task(process_thread(websocket))
-
-    # Receive Text
-    try:
-        while True:
-            text_data = await websocket.receive_text()
-            text_queue.put(text_data)
-
-            # Sleep for other async functions
-            await asyncio.sleep(0)
-
-    except Exception as e:
-        print(f"[WebSocketTask] WebSocket error: {e}")
-    finally:
-        stop_event.set()
-        ttsThread.join()
-        processTask.cancel()
-
-        # clear stop_event for next Socket Connection
-        stop_event.clear()
-
-        while not text_queue.empty():
-            text_queue.get()
-
-        while not result_queue.empty():
-            result_queue.get()
-
-        if websocket.client_state.name != "DISCONNECTED":
-            await websocket.close()
-
-        print("[WebSocketTask] Connection Closed")
-
-#======= WebSocket ========#
-@router.websocket("/tts/ws")
-async def tts_endpoint(websocket: WebSocket):
-    print("Configuring BE Socket")
-    await websocket.accept()
-    print("BE Socket Accepted")
-
-    await asyncio.create_task(websocket_task(websocket))
+#     while True:
+#         data = await websocket.receive_text()
+#         await websocket.send_text(f"Echo: {data}")
+
+# @router.websocket("/audio/ws")
+# async def websocket_audio_endpoint(websocket: WebSocket):
+#     await websocket.accept()
+#     print("WebSocket connection accepted.")
+
+#     try:
+#         # Prepare an async queue to hold incoming audio data
+#         audio_queue = asyncio.Queue()
+
+#         # Function to process audio data from the queue
+#         async def process_audio_data():
+#             # Use a temporary file to write the received audio
+#             with tempfile.NamedTemporaryFile(suffix=".wav") as audio_file:
+#                 while True:
+#                     # Retrieve audio data from the queue
+#                     data = await audio_queue.get()
+#                     if data is None:
+#                         break
+
+#                     # Write the data to the temporary file
+#                     audio_file.write(data)
+#                     audio_file.flush()
+
+#                     # Perform the transcription
+#                     transcription = whisper_model.transcribe(audio_file.name)
+
+#                     # Send the transcription back to the client
+#                     await websocket.send_text(transcription)
+#                     print(f"Transcribed text: {transcription}")
+
+#         # Start the audio processing task
+#         audio_task = asyncio.create_task(process_audio_data())
+
+#         while True:
+#             # Receive audio data as bytes
+#             data = await websocket.receive_bytes()
+
+#             # Add the received audio data to the queue for processing
+#             await audio_queue.put(data)
 
-@router.websocket("/ws")
-async def websocket_test_endpoint(websocket: WebSocket):
-    print("Configuring BE Socket")
-    await websocket.accept()
-    print("BE Socket Accepted")
-
-    while True:
-        data = await websocket.receive_text()
-        await websocket.send_text(f"Echo: {data}")
+#     except WebSocketDisconnect:
+#         print("WebSocket connection closed by client.")
 
-@router.websocket("/audio/ws")
-async def websocket_audio_test_endpoint(websocket: WebSocket):
-    await websocket.accept()
-    print("WebSocket connection accepted.")
-
-    try:
-        while True:
-            # 음성 데이터를 바이너리로 수신
-            data = await websocket.receive_bytes()
-            # 데이터 처리 예: 파일로 저장
-            with open("received_audio.wav", "wb") as f:
-                f.write(data)
-            # 클라이언트에 데이터 전송 (예: 수신 완료 알림)
-            await websocket.send_text("Audio received and saved.")
-    except Exception as e:
-        print("Error:", e)
-    finally:
-        print("WebSocket connection closed.")
+#     except Exception as e:
+#         print(f"Error: {e}")
 
-#======= WebSocket ========#
-@router.websocket("/tts")
-async def websocket_tts_test_endpoint(websocket: WebSocket):
-    print("Configuring BE Socket")
-    await websocket.accept()
-    print("BE Socket Accepted")
-    try:
-        while True:
-            data = await websocket.receive_text()
-            print(f"Received text data: {data}")
-            audio_file_path = "./data/received_audio.wav"
-            if os.path.exists(audio_file_path):
-                with open(audio_file_path, "rb") as audio_file:
-                    while chunk := audio_file.read(1024):
-                        await websocket.send_bytes(chunk)
-                    #processed_result = read_wav(audio_file_path)
-                    #await websocket.send_bytes(processed_result)
-            else:
-                    await websocket.send_text("Audio file not found.")
-    except Exception as e:
-        print("Exception as",e)
-    finally:
-        print("WebSocket connection closed.")
+#     finally:
+#         # Notify the audio processing task to exit
+#         await audio_queue.put(None)
+#         # Wait for the audio processing task to complete
+#         await audio_task
+#         print("WebSocket connection closed.")
Original file line number	Diff line number	Diff line change
Expand Up		@@ -12,4 +12,3 @@ services:
		- .:/code
		environment:
		- ENVIRONMENT=production