feat: add more pre-commit hooks

fedirz · May 26, 2024 · d0feed8 · d0feed8
1 parent aa5390b
commit d0feed8
Show file tree

Hide file tree

Showing 10 changed files with 83 additions and 34 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -8,21 +8,41 @@ repos:
       - id: end-of-file-fixer
       - id: check-yaml
       - id: check-added-large-files
-  # TODO: enable
-  # - repo: https://github.com/pre-commit/mirrors-mypy
-  #   rev: v1.10.0
-  #   hooks:
-  #     - id: mypy
-  #       args: [--strict]
-  # TODO: enable
-  # - repo: https://github.com/RobertCraigie/pyright-python
-  #   rev: v1.1.363
-  #   hooks:
-  #   - id: pyright
-  # Disabled because it doesn't work on NixOS
-  # - repo: https://github.com/astral-sh/ruff-pre-commit
-  #   rev: v0.4.4
-  #   hooks:
-  #     - id: ruff # linter
-  #       args: [--fix]
-  #     - id: ruff-format
+  - repo: https://github.com/python-jsonschema/check-jsonschema
+    rev: 0.28.4
+    hooks:
+      - id: check-taskfile
+  - repo: https://github.com/rhysd/actionlint
+    rev: v1.7.0
+    hooks:
+      - id: actionlint
+  - repo: https://github.com/IamTheFij/docker-pre-commit
+    rev: v3.0.1
+    hooks:
+      - id: docker-compose-check
+  - repo: https://github.com/hadolint/hadolint
+    rev: v2.12.0
+    hooks:
+      - id: hadolint
+  - repo: https://github.com/shellcheck-py/shellcheck-py
+    rev: v0.10.0.1
+    hooks:
+      - id: shellcheck
+  # NOTE: not using https://github.com/RobertCraigie/pyright-python because it doesn't work with poetry virtual environments
+  # NOTE: not using github.com/astral-sh/ruff-pre-commit because it doesn't work on NixOS
+  - repo: local
+    hooks:
+      - id: pyright
+        name: pyright
+        entry: ./pre-commit-scripts/pyright.sh
+        language: script
+        pass_filenames: false
+      - id: ruff-lint
+        name: ruff-lint
+        entry: ./pre-commit-scripts/ruff-lint.sh
+        pass_filenames: false
+        language: script
+      - id: ruff-format
+        name: ruff-format
+        entry: ./pre-commit-scripts/ruff-format.sh
+        language: script
diff --git a/Dockerfile.cpu b/Dockerfile.cpu
@@ -1,9 +1,12 @@
 FROM ubuntu:22.04
+# hadolint ignore=DL3008,DL4006
 RUN apt-get update && \
-    apt-get install -y curl software-properties-common && \
+    apt-get install -y --no-install-recommends curl software-properties-common && \
     add-apt-repository ppa:deadsnakes/ppa && \
     apt-get update && \
-    DEBIAN_FRONTEND=noninteractive apt-get -y install python3.11 python3.11-distutils && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3.11 python3.11-distutils && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/* && \
     curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
 RUN pip install --no-cache-dir poetry==1.8.2
 WORKDIR /root/speaches

diff --git a/Dockerfile.cuda b/Dockerfile.cuda
@@ -1,9 +1,12 @@
 FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04
+# hadolint ignore=DL3008,DL4006
 RUN apt-get update && \
-    apt-get install -y curl software-properties-common && \
+    apt-get install -y --no-install-recommends curl software-properties-common && \
     add-apt-repository ppa:deadsnakes/ppa && \
     apt-get update && \
-    DEBIAN_FRONTEND=noninteractive apt-get -y install python3.11 python3.11-distutils && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3.11 python3.11-distutils && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/* && \
     curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
 RUN pip install --no-cache-dir poetry==1.8.2
 WORKDIR /root/speaches

diff --git a/pre-commit-scripts/pyright.sh b/pre-commit-scripts/pyright.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+# shellcheck disable=SC1091
+source "$(poetry env info --path)"/bin/activate
+pyright
diff --git a/pre-commit-scripts/ruff-format.sh b/pre-commit-scripts/ruff-format.sh
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+ruff format
diff --git a/pre-commit-scripts/ruff-lint.sh b/pre-commit-scripts/ruff-lint.sh
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+ruff check --fix
diff --git a/pyproject.toml b/pyproject.toml
@@ -22,6 +22,11 @@ youtube-dl = {git = "https://github.com/ytdl-org/youtube-dl.git"}
 [tool.ruff]
 target-version = "py311"
 
+[tool.pyright]
+# typeCheckingMode = "strict"
+pythonVersion = "3.11"
+pythonPlatform = "Linux"
+
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
diff --git a/speaches/main.py b/speaches/main.py
@@ -6,8 +6,15 @@
 from io import BytesIO
 from typing import Annotated, Literal, OrderedDict
 
-from fastapi import (FastAPI, Form, Query, Response, UploadFile, WebSocket,
-                     WebSocketDisconnect)
+from fastapi import (
+    FastAPI,
+    Form,
+    Query,
+    Response,
+    UploadFile,
+    WebSocket,
+    WebSocketDisconnect,
+)
 from fastapi.responses import StreamingResponse
 from fastapi.websockets import WebSocketState
 from faster_whisper import WhisperModel
@@ -16,11 +23,12 @@
 from speaches import utils
 from speaches.asr import FasterWhisperASR
 from speaches.audio import AudioStream, audio_samples_from_file
-from speaches.config import (SAMPLES_PER_SECOND, Language, Model,
-                             ResponseFormat, config)
+from speaches.config import SAMPLES_PER_SECOND, Language, Model, ResponseFormat, config
 from speaches.logger import logger
-from speaches.server_models import (TranscriptionJsonResponse,
-                                    TranscriptionVerboseJsonResponse)
+from speaches.server_models import (
+    TranscriptionJsonResponse,
+    TranscriptionVerboseJsonResponse,
+)
 from speaches.transcriber import audio_transcriber
 
 models: OrderedDict[Model, WhisperModel] = OrderedDict()

diff --git a/speaches/server_models.py b/speaches/server_models.py
@@ -85,7 +85,7 @@ def from_segment(
             text=segment.text,
             words=(
                 [WordObject.from_word(word) for word in segment.words]
-                if type(segment.words) == list
+                if isinstance(segment.words, list)
                 else []
             ),
             segments=[SegmentObject.from_segment(segment)],

diff --git a/tests/app_test.py b/tests/app_test.py
@@ -12,7 +12,7 @@
 
 from speaches.config import BYTES_PER_SECOND
 from speaches.main import app
-from speaches.server_models import TranscriptionVerboseResponse
+from speaches.server_models import TranscriptionVerboseJsonResponse
 
 SIMILARITY_THRESHOLD = 0.97
 AUDIO_FILES_LIMIT = 5
@@ -54,13 +54,13 @@ def stream_audio_data(
 
 def transcribe_audio_data(
     client: TestClient, data: bytes
-) -> TranscriptionVerboseResponse:
+) -> TranscriptionVerboseJsonResponse:
     response = client.post(
         TRANSCRIBE_ENDPOINT,
         files={"file": ("audio.raw", data, "audio/raw")},
     )
     data = json.loads(response.json())  # TODO: figure this out
-    return TranscriptionVerboseResponse(**data)  # type: ignore
+    return TranscriptionVerboseJsonResponse(**data)  # type: ignore
 
 
 @pytest.mark.parametrize("file_path", file_paths)
@@ -70,14 +70,16 @@ def test_ws_audio_transcriptions(
     with open(file_path, "rb") as file:
         data = file.read()
 
-    streaming_transcription: TranscriptionVerboseResponse = None  # type: ignore
+    streaming_transcription: TranscriptionVerboseJsonResponse = None  # type: ignore
     thread = threading.Thread(
         target=stream_audio_data, args=(ws, data), kwargs={"speed": 4.0}
     )
     thread.start()
     while True:
         try:
-            streaming_transcription = TranscriptionVerboseResponse(**ws.receive_json())
+            streaming_transcription = TranscriptionVerboseJsonResponse(
+                **ws.receive_json()
+            )
         except WebSocketDisconnect:
             break
     file_transcription = transcribe_audio_data(client, data)