From 1ee4821d952ed9e8b2c9f1d5e77e051711d2c157 Mon Sep 17 00:00:00 2001 From: Mart Ratas Date: Tue, 7 Jan 2025 14:59:50 +0000 Subject: [PATCH] CU-8693bc9kc: Add python 3.12 support (#511) * CU-8693bc9kc: Add python 3.12 support * CU-8693bc9kc: Amend dependencies so as to be compatible with python 3.12 * Bump default spacy model version (to 3.8) * CU-8693bc9kc: Fix some typing issues due to numpy2 * CU-8693bc9kc: Fix some typing issues due to numpy2 (try 2) * CU-8693bc9kc: Change spacy models to 3.7.2 * CU-8693bc9kc: Pin numpy to v1 * CU-8693bc9kc: Fix numpy requirement comment * CU-8693bc9kc: Fix usage of old/deprecated assert methods in tests * CU-8693bc9kc: Update some requirement comments --- .github/workflows/main.yml | 2 +- install_requires.txt | 8 ++++---- medcat/cdb.py | 8 +++++--- medcat/vocab.py | 5 +++-- requirements-dev.txt | 2 +- requirements.txt | 2 +- setup.py | 1 + tests/utils/test_memory_optimiser.py | 8 ++++---- 8 files changed, 20 insertions(+), 16 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1b7232bb6..b620a0aab 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ '3.9', '3.10', '3.11' ] + python-version: [ '3.9', '3.10', '3.11', '3.12' ] max-parallel: 4 steps: diff --git a/install_requires.txt b/install_requires.txt index 136728d89..623628804 100644 --- a/install_requires.txt +++ b/install_requires.txt @@ -1,11 +1,11 @@ -'numpy>=1.22.0,<1.26.0' # 1.22.0 is first to support python 3.11; post 1.26.0 there's issues with scipy +'numpy>=1.26.0,<2.0.0' # 1.26 is first to support 3.12; cannod support numpy2 due to spacy 'pandas>=1.4.2' # first to support 3.11 'gensim>=4.3.0,<5.0.0' # 5.3.0 is first to support 3.11; avoid major version bump -'spacy>=3.6.0,<3.8.0' # 3.8 only supports numpy2 which we can't use due to other dependencies -'scipy~=1.9.2' # 1.9.2 is first to support 3.11 +'spacy>=3.6.0,<4.0.0' # avoid major bump +'scipy>=1.9.2,<1.14.0' # 1.9.2 is first to support 3.11; 1.14.0 does not support 3.9 'transformers>=4.34.0,<5.0.0' # avoid major version bump 'accelerate>=0.23.0' # required by Trainer class in de-id -'torch>=1.13.0,<3.0.0' # 1.13 is first to support 3.11; 2.1.2 has been compatible, but avoid major 3.0.0 for now +'torch>=2.4.0,<3.0.0' # 2.4.0 is first to support 3.12; avoid major 3.0.0 for now 'tqdm>=4.27' 'scikit-learn>=1.1.3,<2.0.0' # 1.1.3 is first to supporrt 3.11; avoid major version bump 'dill>=0.3.6,<1.0.0' # stuff saved in 0.3.6/0.3.7 is not always compatible with 0.3.4/0.3.5; avoid major bump diff --git a/medcat/cdb.py b/medcat/cdb.py index 3961fc921..507e7d3b9 100644 --- a/medcat/cdb.py +++ b/medcat/cdb.py @@ -818,15 +818,17 @@ def most_similar(self, sim_data['sim_vectors_cuis'] = np.array(sim_vectors_cuis) # Select appropriate concepts - type_id_inds = np.arange(0, len(sim_data['sim_vectors_type_ids'])) + type_id_inds = np.arange(0, len(sim_data['sim_vectors_type_ids']), dtype=np.int32) if len(type_id_filter) > 0: - type_id_inds = np.array([], dtype=np.int32) + # NOTE: change in numpy 2 + type_id_inds = np.array([], dtype=np.int32) # type: ignore for type_id in type_id_filter: type_id_inds = np.union1d(np.array([ind for ind, type_ids in enumerate(sim_data['sim_vectors_type_ids']) if type_id in type_ids]), type_id_inds) cnt_inds = np.arange(0, len(sim_data['sim_vectors_counts'])) if min_cnt > 0: - cnt_inds = np.where(sim_data['sim_vectors_counts'] >= min_cnt)[0] + # NOTE: change in numpy 2 + cnt_inds = np.where(sim_data['sim_vectors_counts'] >= min_cnt)[0] # type: ignore # Intersect cnt and type_id inds = np.intersect1d(type_id_inds, cnt_inds) diff --git a/medcat/vocab.py b/medcat/vocab.py index 88350c945..b23b24190 100644 --- a/medcat/vocab.py +++ b/medcat/vocab.py @@ -1,6 +1,6 @@ import numpy as np import pickle -from typing import Optional, List, Dict +from typing import Optional, List, Dict, cast import logging @@ -216,7 +216,8 @@ def get_negative_samples(self, n: int = 6, ignore_punct_and_num: bool = False) - if len(self.cum_probs) == 0: self.make_unigram_table() random_vals = np.random.rand(n) - inds = np.searchsorted(self.cum_probs, random_vals).tolist() + # NOTE: there's a change in numpy + inds = cast(List[int], np.searchsorted(self.cum_probs, random_vals).tolist()) if ignore_punct_and_num: # Do not return anything that does not have letters in it diff --git a/requirements-dev.txt b/requirements-dev.txt index 6b954afc9..fe487b560 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,5 @@ . -https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.6.0/en_core_web_md-3.6.0-py3-none-any.whl +https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1-py3-none-any.whl flake8~=7.0.0 darglint~=1.8.1 mypy>=1.7.0,<1.12.0 diff --git a/requirements.txt b/requirements.txt index 45842566e..1e2b5efc0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ . -https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.6.0/en_core_web_md-3.6.0-py3-none-any.whl +https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.2/en_core_web_md-3.7.2-py3-none-any.whl diff --git a/setup.py b/setup.py index 08440b9ec..f46ce23ea 100644 --- a/setup.py +++ b/setup.py @@ -36,6 +36,7 @@ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ], diff --git a/tests/utils/test_memory_optimiser.py b/tests/utils/test_memory_optimiser.py index 5f59f5274..472029d45 100644 --- a/tests/utils/test_memory_optimiser.py +++ b/tests/utils/test_memory_optimiser.py @@ -254,8 +254,8 @@ def test_optimisation_round_trip_cuis(self): with self.subTest(f'{name}'): self.assertIsInstance(before, dict) self.assertIsInstance(after, dict) - self.assertEquals(len(before), len(after)) - self.assertEquals(before, after) + self.assertEqual(len(before), len(after)) + self.assertEqual(before, after) def test_optimisation_round_trip_snames(self): snames_before = self.cdb.snames @@ -264,8 +264,8 @@ def test_optimisation_round_trip_snames(self): snames_after = self.cdb.snames self.assertIsInstance(snames_before, set) self.assertIsInstance(snames_after, set) - self.assertEquals(len(snames_before), len(snames_after)) - self.assertEquals(snames_before, snames_after) + self.assertEqual(len(snames_before), len(snames_after)) + self.assertEqual(snames_before, snames_after) def test_optimisation_round_trip_dirty(self): memory_optimiser.perform_optimisation(self.cdb)