From 6d8852be2f30d84c26be5d6c4891b107d34a7f8d Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Mon, 18 Nov 2024 13:05:20 +0100 Subject: [PATCH 1/8] chore: do not emit project --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index af4469f..2fdd52a 100644 --- a/Makefile +++ b/Makefile @@ -151,7 +151,7 @@ prepare-release: update-changelog tests/requirements-testing.lock # we use lock extension so that dependabot doesn't pick up changes in this file tests/requirements-testing.lock: pyproject.toml - uv export --dev --format requirements-txt --no-hashes --output-file $@ + uv export --dev --format requirements-txt --no-hashes --no-emit-project --output-file $@ .PHONY: update-changelog update-changelog: CHANGELOG.md From 9ea9fdcf4ef72012f0c28ac762453870cbbf247b Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Wed, 20 Nov 2024 13:33:45 +0100 Subject: [PATCH 2/8] ci: only cache uv, not .venv --- .github/workflows/check.yml | 24 ------------------------ .github/workflows/test.yml | 16 ---------------- 2 files changed, 40 deletions(-) diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index c676a96..315a2c4 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -44,15 +44,7 @@ jobs: version: ${{ env.UV_VERSION }} enable-cache: true - - name: Load cached venv - id: cached-venv - uses: actions/cache@v4 - with: - path: .venv - key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/uv.lock') }} - - name: Install dependencies - if: steps.cached-venv.outputs.cache-hit != 'true' run: make install-dev - name: check formatting @@ -80,15 +72,7 @@ jobs: version: ${{ env.UV_VERSION }} enable-cache: true - - name: Load cached venv - id: cached-venv - uses: actions/cache@v4 - with: - path: .venv - key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/uv.lock') }} - - name: Install dependencies - if: steps.cached-venv.outputs.cache-hit != 'true' run: make install-dev - name: lint code @@ -116,15 +100,7 @@ jobs: version: ${{ env.UV_VERSION }} enable-cache: true - - name: Load cached venv - id: cached-venv - uses: actions/cache@v4 - with: - path: .venv - key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/uv.lock') }} - - name: Install dependencies - if: steps.cached-venv.outputs.cache-hit != 'true' run: make install-dev - name: type-check code diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 840760e..fafff67 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -59,15 +59,7 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Load cached venv - id: cached-venv - uses: actions/cache@v4 - with: - path: .venv - key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/uv.lock') }} - - name: Install dependencies - if: steps.cached-venv.outputs.cache-hit != 'true' run: uv sync --all-extras --dev - name: Run tests for coverage @@ -117,15 +109,7 @@ jobs: with: python-version: ${{ env.MINIMUM_PYTHON_VERSION }} - - name: Load cached venv - id: cached-venv - uses: actions/cache@v4 - with: - path: .venv - key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/uv.lock') }} - - name: Install dependencies - if: steps.cached-venv.outputs.cache-hit != 'true' run: uv sync --all-extras --dev #---------------------------------------------- # Run tests and upload coverage From 3a2fff0cccbbf82d504e1c0029a38994add5a12b Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Wed, 20 Nov 2024 13:36:17 +0100 Subject: [PATCH 3/8] ci: add --all-packages to uv sync calls --- .github/workflows/scheduled.yml | 4 ++-- .github/workflows/test.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index ab75168..d1555b4 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -48,7 +48,7 @@ jobs: run: uv lock - name: uv sync --dev - run: uv sync --dev + run: uv sync --dev --all-packages - name: make test run: make test @@ -85,7 +85,7 @@ jobs: - name: uv sync --dev --upgrade if: hashFiles('uv.lock') != '' - run: uv sync --dev --upgrade + run: uv sync --dev --upgrade --all-packages - name: make test if: hashFiles('uv.lock') != '' diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index fafff67..8f3ef26 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -160,7 +160,7 @@ jobs: python-version: ${{ env.MINIMUM_PYTHON_VERSION }} - name: install lowest direct dependencies - run: uv sync --resolution lowest-direct --all-extras --dev + run: uv sync --resolution lowest-direct --all-extras --all-packages --dev - name: make test run: make test From 272aaf5eeaef6c038200339a71b50889b6225919 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Thu, 21 Nov 2024 12:10:06 +0100 Subject: [PATCH 4/8] chore: add --all-packages to uv sync calls --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 2fdd52a..1147c92 100644 --- a/Makefile +++ b/Makefile @@ -83,11 +83,11 @@ dev: install-dev # setup development environment install-dev: - uv sync --dev + uv sync --all-packages --dev # setup production environment install: - uv sync --no-dev + uv sync --all-packages --no-dev lock: uv.lock From cf7f0e57dbc15be7bbef517233795711fbdd0693 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Thu, 21 Nov 2024 12:46:58 +0100 Subject: [PATCH 5/8] ci: bump codecov-action to v5 --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8f3ef26..f91f3ca 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -118,7 +118,7 @@ jobs: run: make doc-tests cov_report=xml - name: Upload coverage to Codecov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} # directory: ./coverage From 52dd8e0aaf6bc3a1dcdb1bbd81a497b85b2f1fa9 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Thu, 21 Nov 2024 13:52:14 +0100 Subject: [PATCH 6/8] feat: use gpu if available --- .../ocr_corrector.py | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/ocr-correction-viklofg-sweocr/src/sbx_ocr_correction_viklofg_sweocr/ocr_corrector.py b/ocr-correction-viklofg-sweocr/src/sbx_ocr_correction_viklofg_sweocr/ocr_corrector.py index f0957ab..1acc675 100644 --- a/ocr-correction-viklofg-sweocr/src/sbx_ocr_correction_viklofg_sweocr/ocr_corrector.py +++ b/ocr-correction-viklofg-sweocr/src/sbx_ocr_correction_viklofg_sweocr/ocr_corrector.py @@ -3,6 +3,7 @@ import re from typing import Any, Optional +import torch from parallel_corpus import graph from parallel_corpus.text_token import Token from sparv import api as sparv_api # type: ignore [import-untyped] @@ -37,13 +38,30 @@ def __init__(self, *, tokenizer: Any, model: Any) -> None: """Construct an OcrCorrector.""" self.tokenizer = tokenizer self.model = model - self.pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer) + if torch.cuda.is_available(): + logger.info("Using GPU (cuda)") + dtype = torch.float16 + else: + logger.warning("Using CPU, is cuda available?") + dtype = torch.float32 + device_map = "auto" if torch.cuda.is_available() and torch.cuda.device_count() > 1 else None + self.pipeline = pipeline( + "text2text-generation", model=model, tokenizer=tokenizer, device_map=device_map, torch_dtype=dtype + ) @classmethod def default(cls) -> "OcrCorrector": """Create a default OcrCorrector.""" + dtype = torch.float16 if torch.cuda.is_available() else torch.float32 tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME, revision=TOKENIZER_REVISION) - model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME, revision=MODEL_REVISION) + model = T5ForConditionalGeneration.from_pretrained( + MODEL_NAME, + revision=MODEL_REVISION, + torch_dtype=dtype, + device_map=("auto" if torch.cuda.is_available() and torch.cuda.device_count() > 1 else None), + ) + if torch.cuda.is_available() and torch.cuda.device_count() == 1: + model = model.cuda() # type: ignore return cls(model=model, tokenizer=tokenizer) def calculate_corrections(self, text: list[str]) -> list[tuple[tuple[int, int], Optional[str]]]: From ca45aa5ddea9b5ded40f31bab5283d55434210b6 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Thu, 21 Nov 2024 14:03:27 +0100 Subject: [PATCH 7/8] ci: use make install-dev where possible --- .github/workflows/check.yml | 1 - .github/workflows/scheduled.yml | 4 ++-- .github/workflows/test.yml | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 315a2c4..10e47ef 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -120,4 +120,3 @@ jobs: uses: re-actors/alls-green@release/v1 with: jobs: ${{ toJSON(needs) }} - allowed-failures: upload-coverage diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index d1555b4..0cce419 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -48,7 +48,7 @@ jobs: run: uv lock - name: uv sync --dev - run: uv sync --dev --all-packages + run: uv sync --dev --all-packages --all-extras - name: make test run: make test @@ -85,7 +85,7 @@ jobs: - name: uv sync --dev --upgrade if: hashFiles('uv.lock') != '' - run: uv sync --dev --upgrade --all-packages + run: uv sync --dev --upgrade --all-packages --all-extras - name: make test if: hashFiles('uv.lock') != '' diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f91f3ca..9791216 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -60,7 +60,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies - run: uv sync --all-extras --dev + run: make install-dev - name: Run tests for coverage run: make test-w-coverage cov_report=xml @@ -110,7 +110,7 @@ jobs: python-version: ${{ env.MINIMUM_PYTHON_VERSION }} - name: Install dependencies - run: uv sync --all-extras --dev + run: make install-dev #---------------------------------------------- # Run tests and upload coverage #---------------------------------------------- From cb24252c2fef03d7f2a26b96cd9ee52477ca4535 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Thu, 21 Nov 2024 14:34:56 +0100 Subject: [PATCH 8/8] ci: remove nightly job --- .github/workflows/scheduled.yml | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 0cce419..6a88bb0 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -21,38 +21,6 @@ env: UV_VERSION: "0.5.1" jobs: - # https://twitter.com/mycoliza/status/1571295690063753218 - nightly: - runs-on: ubuntu-latest - name: ubuntu / 3.14-dev - steps: - - uses: actions/checkout@v4 - with: - submodules: true - - - name: Set up uv - uses: astral-sh/setup-uv@v3 - with: - version: ${{ env.UV_VERSION }} - enable-cache: true - - - name: Install python - uses: actions/setup-python@v5 - with: - python-version: "3.14-dev" - - - run: python --version - - - name: uv lock - if: hashFiles('uv.lock') == '' - run: uv lock - - - name: uv sync --dev - run: uv sync --dev --all-packages --all-extras - - - name: make test - run: make test - # https://twitter.com/alcuadrado/status/1571291687837732873 update: # This action checks that updating the dependencies of this crate to the latest available that