Skip to content

Commit 8ba4a84

Browse files
committed
支持edge_tts
1 parent d682f61 commit 8ba4a84

File tree

8 files changed

+183
-103
lines changed

8 files changed

+183
-103
lines changed

Dockerfile

+6-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
# FROM docker.rainbond.cc/506610466/cuda:12.2.0-runtime-ubuntu20.04-uv
2-
FROM 506610466/cuda:12.2.0-runtime-ubuntu20.04-uv
2+
# FROM 506610466/cuda:12.2.0-runtime-ubuntu20.04-uv
3+
# 从基础镜像开始构建,加快构建速度
4+
FROM 506610466/gpt_server:base
35
COPY ./ /gpt_server
46
WORKDIR /gpt_server
5-
6-
RUN uv venv --seed && uv sync && uv cache clean && \
7-
echo '[[ -f .venv/bin/activate ]] && source .venv/bin/activate' >> ~/.bashrc
7+
RUN uv sync && uv cache clean
8+
# RUN uv venv --seed && uv sync && uv cache clean && \
9+
# echo '[[ -f .venv/bin/activate ]] && source .venv/bin/activate' >> ~/.bashrc
810

911
CMD ["/bin/bash"]

Dockerfile.copy

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM docker.rainbond.cc/506610466/gpt_server:latest
1+
FROM hub.geekery.cn/506610466/gpt_server:latest
22

33
COPY ./ /gpt_server
44

gpt_server/openai_api_protocol/custom_api_protocol.py

+20
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,26 @@
1414
from pydantic import Field, BaseModel
1515

1616

17+
class SpeechRequest(BaseModel):
18+
model: str = Field(
19+
default="edge_tts", description="One of the available TTS models:"
20+
)
21+
input: str = Field(
22+
description="The text to generate audio for. The maximum length is 4096 characters."
23+
)
24+
voice: str = Field(
25+
default="zh-CN-YunxiNeural",
26+
description="The voice to use when generating the audio",
27+
)
28+
response_format: Optional[str] = Field(
29+
default="mp3", description="The format of the audio"
30+
)
31+
speed: Optional[float] = Field(
32+
default=1.0,
33+
description="The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default.",
34+
)
35+
36+
1737
class ModerationsRequest(BaseModel):
1838
input: Union[str, List[str]]
1939
model: str

gpt_server/serving/openai_api_server.py

+25-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from fastapi import Depends, HTTPException
2121
from fastapi.exceptions import RequestValidationError
2222
from fastapi.middleware.cors import CORSMiddleware
23-
from fastapi.responses import StreamingResponse, JSONResponse
23+
from fastapi.responses import StreamingResponse, JSONResponse, FileResponse
2424
from fastapi.security.http import HTTPAuthorizationCredentials, HTTPBearer
2525
import httpx
2626

@@ -699,7 +699,31 @@ async def generate_completion(payload: Dict[str, Any], worker_addr: str):
699699
CustomEmbeddingsRequest,
700700
RerankRequest,
701701
ModerationsRequest,
702+
SpeechRequest,
702703
)
704+
import edge_tts
705+
import uuid
706+
707+
OUTPUT_DIR = "./edge_tts_cache"
708+
709+
710+
@app.post("/v1/audio/speech", dependencies=[Depends(check_api_key)])
711+
async def speech(request: SpeechRequest):
712+
os.makedirs(OUTPUT_DIR, exist_ok=True) # 即使存在也不会报错
713+
list_voices = await edge_tts.list_voices()
714+
support_list_voices = [i["ShortName"] for i in list_voices]
715+
if request.voice not in support_list_voices:
716+
return JSONResponse(
717+
ErrorResponse(
718+
message=f"不支持voice:{request.voice}", code=ErrorCode.INVALID_MODEL
719+
).dict(),
720+
status_code=400,
721+
)
722+
filename = f"{uuid.uuid4()}.mp3"
723+
output_path = os.path.join(OUTPUT_DIR, filename)
724+
communicate = edge_tts.Communicate(text=request.input, voice=request.voice)
725+
await communicate.save(output_path)
726+
return FileResponse(output_path, media_type="audio/mpeg", filename=filename)
703727

704728

705729
@app.post("/v1/moderations", dependencies=[Depends(check_api_key)])

pyproject.toml

+5
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ dependencies = [
2525
"qwen_vl_utils",
2626
"evalscope[perf]==0.7.0",
2727
"modelscope==1.20.1",
28+
"edge-tts>=7.0.0",
2829
]
2930

3031
[tool.uv]
@@ -37,6 +38,10 @@ override-dependencies = [
3738

3839
]
3940

41+
[[tool.uv.index]]
42+
url = "https://pypi.tuna.tsinghua.edu.cn/simple"
43+
default = true
44+
4045
[project.scripts]
4146
gpt_server = "gpt_server.cli:main"
4247

requirements.txt

+11-3
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ aiohappyeyeballs==2.4.4
2121
aiohttp==3.11.11
2222
# via
2323
# datasets
24+
# edge-tts
2425
# evalscope
2526
# fschat
2627
# fsspec
@@ -75,6 +76,7 @@ cachetools==5.5.0
7576
# streamlit
7677
certifi==2024.12.14
7778
# via
79+
# edge-tts
7880
# httpcore
7981
# httpx
8082
# requests
@@ -84,7 +86,7 @@ cffi==1.17.1
8486
# soundfile
8587
charset-normalizer==3.4.0
8688
# via requests
87-
click==8.1.7
89+
click==8.1.8
8890
# via
8991
# nltk
9092
# ray
@@ -133,6 +135,8 @@ diskcache==5.6.3
133135
# outlines
134136
distro==1.9.0
135137
# via openai
138+
edge-tts==7.0.0
139+
# via gpt-server (pyproject.toml)
136140
editdistance==0.8.1
137141
# via evalscope
138142
einops==0.8.0
@@ -256,7 +260,7 @@ interegular==0.3.3
256260
# outlines-core
257261
jieba==0.42.1
258262
# via evalscope
259-
jinja2==3.1.4
263+
jinja2==3.1.5
260264
# via
261265
# altair
262266
# gradio
@@ -741,6 +745,8 @@ sortedcontainers==2.4.0
741745
# via modelscope
742746
soundfile==0.12.1
743747
# via infinity-emb
748+
srt==3.5.3
749+
# via edge-tts
744750
sse-starlette==2.1.3
745751
# via evalscope
746752
starlette==0.38.6
@@ -759,6 +765,7 @@ sympy==1.13.1
759765
# torch
760766
tabulate==0.9.0
761767
# via
768+
# edge-tts
762769
# evalscope
763770
# sacrebleu
764771
tenacity==9.0.0
@@ -865,6 +872,7 @@ typing-extensions==4.12.2
865872
# via
866873
# altair
867874
# anyio
875+
# edge-tts
868876
# fastapi
869877
# gradio
870878
# gradio-client
@@ -885,7 +893,7 @@ tzdata==2024.2
885893
# via pandas
886894
unicorn==2.1.1
887895
# via evalscope
888-
urllib3==2.2.3
896+
urllib3==2.3.0
889897
# via
890898
# modelscope
891899
# requests

tests/test_tts.py

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from pathlib import Path
2+
from openai import OpenAI
3+
4+
# 新版本 opnai
5+
client = OpenAI(api_key="EMPTY", base_url="http://localhost:8082/v1")
6+
speech_file_path = Path(__file__).parent / "speech.mp3"
7+
response = client.audio.speech.create(
8+
model="edge_tts",
9+
voice="zh-CN-YunxiNeural",
10+
input="你好啊,我是人工智能。",
11+
)
12+
response.write_to_file(speech_file_path)

0 commit comments

Comments
 (0)