Skip to content

Commit

Permalink
feat: add more pre-commit hooks
Browse files Browse the repository at this point in the history
  • Loading branch information
Fedir Zadniprovskyi committed May 26, 2024
1 parent aa5390b commit d0feed8
Show file tree
Hide file tree
Showing 10 changed files with 83 additions and 34 deletions.
56 changes: 38 additions & 18 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,41 @@ repos:
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files
# TODO: enable
# - repo: https://github.com/pre-commit/mirrors-mypy
# rev: v1.10.0
# hooks:
# - id: mypy
# args: [--strict]
# TODO: enable
# - repo: https://github.com/RobertCraigie/pyright-python
# rev: v1.1.363
# hooks:
# - id: pyright
# Disabled because it doesn't work on NixOS
# - repo: https://github.com/astral-sh/ruff-pre-commit
# rev: v0.4.4
# hooks:
# - id: ruff # linter
# args: [--fix]
# - id: ruff-format
- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.28.4
hooks:
- id: check-taskfile
- repo: https://github.com/rhysd/actionlint
rev: v1.7.0
hooks:
- id: actionlint
- repo: https://github.com/IamTheFij/docker-pre-commit
rev: v3.0.1
hooks:
- id: docker-compose-check
- repo: https://github.com/hadolint/hadolint
rev: v2.12.0
hooks:
- id: hadolint
- repo: https://github.com/shellcheck-py/shellcheck-py
rev: v0.10.0.1
hooks:
- id: shellcheck
# NOTE: not using https://github.com/RobertCraigie/pyright-python because it doesn't work with poetry virtual environments
# NOTE: not using github.com/astral-sh/ruff-pre-commit because it doesn't work on NixOS
- repo: local
hooks:
- id: pyright
name: pyright
entry: ./pre-commit-scripts/pyright.sh
language: script
pass_filenames: false
- id: ruff-lint
name: ruff-lint
entry: ./pre-commit-scripts/ruff-lint.sh
pass_filenames: false
language: script
- id: ruff-format
name: ruff-format
entry: ./pre-commit-scripts/ruff-format.sh
language: script
7 changes: 5 additions & 2 deletions Dockerfile.cpu
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
FROM ubuntu:22.04
# hadolint ignore=DL3008,DL4006
RUN apt-get update && \
apt-get install -y curl software-properties-common && \
apt-get install -y --no-install-recommends curl software-properties-common && \
add-apt-repository ppa:deadsnakes/ppa && \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get -y install python3.11 python3.11-distutils && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3.11 python3.11-distutils && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
RUN pip install --no-cache-dir poetry==1.8.2
WORKDIR /root/speaches
Expand Down
7 changes: 5 additions & 2 deletions Dockerfile.cuda
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04
# hadolint ignore=DL3008,DL4006
RUN apt-get update && \
apt-get install -y curl software-properties-common && \
apt-get install -y --no-install-recommends curl software-properties-common && \
add-apt-repository ppa:deadsnakes/ppa && \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get -y install python3.11 python3.11-distutils && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3.11 python3.11-distutils && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
RUN pip install --no-cache-dir poetry==1.8.2
WORKDIR /root/speaches
Expand Down
4 changes: 4 additions & 0 deletions pre-commit-scripts/pyright.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env bash
# shellcheck disable=SC1091
source "$(poetry env info --path)"/bin/activate
pyright
2 changes: 2 additions & 0 deletions pre-commit-scripts/ruff-format.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/usr/bin/env bash
ruff format
2 changes: 2 additions & 0 deletions pre-commit-scripts/ruff-lint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/usr/bin/env bash
ruff check --fix
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ youtube-dl = {git = "https://github.com/ytdl-org/youtube-dl.git"}
[tool.ruff]
target-version = "py311"

[tool.pyright]
# typeCheckingMode = "strict"
pythonVersion = "3.11"
pythonPlatform = "Linux"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
20 changes: 14 additions & 6 deletions speaches/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,15 @@
from io import BytesIO
from typing import Annotated, Literal, OrderedDict

from fastapi import (FastAPI, Form, Query, Response, UploadFile, WebSocket,
WebSocketDisconnect)
from fastapi import (
FastAPI,
Form,
Query,
Response,
UploadFile,
WebSocket,
WebSocketDisconnect,
)
from fastapi.responses import StreamingResponse
from fastapi.websockets import WebSocketState
from faster_whisper import WhisperModel
Expand All @@ -16,11 +23,12 @@
from speaches import utils
from speaches.asr import FasterWhisperASR
from speaches.audio import AudioStream, audio_samples_from_file
from speaches.config import (SAMPLES_PER_SECOND, Language, Model,
ResponseFormat, config)
from speaches.config import SAMPLES_PER_SECOND, Language, Model, ResponseFormat, config
from speaches.logger import logger
from speaches.server_models import (TranscriptionJsonResponse,
TranscriptionVerboseJsonResponse)
from speaches.server_models import (
TranscriptionJsonResponse,
TranscriptionVerboseJsonResponse,
)
from speaches.transcriber import audio_transcriber

models: OrderedDict[Model, WhisperModel] = OrderedDict()
Expand Down
2 changes: 1 addition & 1 deletion speaches/server_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def from_segment(
text=segment.text,
words=(
[WordObject.from_word(word) for word in segment.words]
if type(segment.words) == list
if isinstance(segment.words, list)
else []
),
segments=[SegmentObject.from_segment(segment)],
Expand Down
12 changes: 7 additions & 5 deletions tests/app_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from speaches.config import BYTES_PER_SECOND
from speaches.main import app
from speaches.server_models import TranscriptionVerboseResponse
from speaches.server_models import TranscriptionVerboseJsonResponse

SIMILARITY_THRESHOLD = 0.97
AUDIO_FILES_LIMIT = 5
Expand Down Expand Up @@ -54,13 +54,13 @@ def stream_audio_data(

def transcribe_audio_data(
client: TestClient, data: bytes
) -> TranscriptionVerboseResponse:
) -> TranscriptionVerboseJsonResponse:
response = client.post(
TRANSCRIBE_ENDPOINT,
files={"file": ("audio.raw", data, "audio/raw")},
)
data = json.loads(response.json()) # TODO: figure this out
return TranscriptionVerboseResponse(**data) # type: ignore
return TranscriptionVerboseJsonResponse(**data) # type: ignore


@pytest.mark.parametrize("file_path", file_paths)
Expand All @@ -70,14 +70,16 @@ def test_ws_audio_transcriptions(
with open(file_path, "rb") as file:
data = file.read()

streaming_transcription: TranscriptionVerboseResponse = None # type: ignore
streaming_transcription: TranscriptionVerboseJsonResponse = None # type: ignore
thread = threading.Thread(
target=stream_audio_data, args=(ws, data), kwargs={"speed": 4.0}
)
thread.start()
while True:
try:
streaming_transcription = TranscriptionVerboseResponse(**ws.receive_json())
streaming_transcription = TranscriptionVerboseJsonResponse(
**ws.receive_json()
)
except WebSocketDisconnect:
break
file_transcription = transcribe_audio_data(client, data)
Expand Down

0 comments on commit d0feed8

Please sign in to comment.