Skip to content

Commit

Permalink
CU-8693bc9kc: Add python 3.12 support (#511)
Browse files Browse the repository at this point in the history
* CU-8693bc9kc: Add python 3.12 support

* CU-8693bc9kc: Amend dependencies so as to be compatible with python 3.12

* Bump default spacy model version (to 3.8)

* CU-8693bc9kc: Fix some typing issues due to numpy2

* CU-8693bc9kc: Fix some typing issues due to numpy2 (try 2)

* CU-8693bc9kc: Change spacy models to 3.7.2

* CU-8693bc9kc: Pin numpy to v1

* CU-8693bc9kc: Fix numpy requirement comment

* CU-8693bc9kc: Fix usage of old/deprecated assert methods in tests

* CU-8693bc9kc: Update some requirement comments
  • Loading branch information
mart-r authored Jan 7, 2025
1 parent 00c0dd0 commit 1ee4821
Show file tree
Hide file tree
Showing 8 changed files with 20 additions and 16 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ '3.9', '3.10', '3.11' ]
python-version: [ '3.9', '3.10', '3.11', '3.12' ]
max-parallel: 4

steps:
Expand Down
8 changes: 4 additions & 4 deletions install_requires.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
'numpy>=1.22.0,<1.26.0' # 1.22.0 is first to support python 3.11; post 1.26.0 there's issues with scipy
'numpy>=1.26.0,<2.0.0' # 1.26 is first to support 3.12; cannod support numpy2 due to spacy
'pandas>=1.4.2' # first to support 3.11
'gensim>=4.3.0,<5.0.0' # 5.3.0 is first to support 3.11; avoid major version bump
'spacy>=3.6.0,<3.8.0' # 3.8 only supports numpy2 which we can't use due to other dependencies
'scipy~=1.9.2' # 1.9.2 is first to support 3.11
'spacy>=3.6.0,<4.0.0' # avoid major bump
'scipy>=1.9.2,<1.14.0' # 1.9.2 is first to support 3.11; 1.14.0 does not support 3.9
'transformers>=4.34.0,<5.0.0' # avoid major version bump
'accelerate>=0.23.0' # required by Trainer class in de-id
'torch>=1.13.0,<3.0.0' # 1.13 is first to support 3.11; 2.1.2 has been compatible, but avoid major 3.0.0 for now
'torch>=2.4.0,<3.0.0' # 2.4.0 is first to support 3.12; avoid major 3.0.0 for now
'tqdm>=4.27'
'scikit-learn>=1.1.3,<2.0.0' # 1.1.3 is first to supporrt 3.11; avoid major version bump
'dill>=0.3.6,<1.0.0' # stuff saved in 0.3.6/0.3.7 is not always compatible with 0.3.4/0.3.5; avoid major bump
Expand Down
8 changes: 5 additions & 3 deletions medcat/cdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -818,15 +818,17 @@ def most_similar(self,
sim_data['sim_vectors_cuis'] = np.array(sim_vectors_cuis)

# Select appropriate concepts
type_id_inds = np.arange(0, len(sim_data['sim_vectors_type_ids']))
type_id_inds = np.arange(0, len(sim_data['sim_vectors_type_ids']), dtype=np.int32)
if len(type_id_filter) > 0:
type_id_inds = np.array([], dtype=np.int32)
# NOTE: change in numpy 2
type_id_inds = np.array([], dtype=np.int32) # type: ignore
for type_id in type_id_filter:
type_id_inds = np.union1d(np.array([ind for ind, type_ids in enumerate(sim_data['sim_vectors_type_ids']) if type_id in type_ids]),
type_id_inds)
cnt_inds = np.arange(0, len(sim_data['sim_vectors_counts']))
if min_cnt > 0:
cnt_inds = np.where(sim_data['sim_vectors_counts'] >= min_cnt)[0]
# NOTE: change in numpy 2
cnt_inds = np.where(sim_data['sim_vectors_counts'] >= min_cnt)[0] # type: ignore
# Intersect cnt and type_id
inds = np.intersect1d(type_id_inds, cnt_inds)

Expand Down
5 changes: 3 additions & 2 deletions medcat/vocab.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np
import pickle
from typing import Optional, List, Dict
from typing import Optional, List, Dict, cast
import logging


Expand Down Expand Up @@ -216,7 +216,8 @@ def get_negative_samples(self, n: int = 6, ignore_punct_and_num: bool = False) -
if len(self.cum_probs) == 0:
self.make_unigram_table()
random_vals = np.random.rand(n)
inds = np.searchsorted(self.cum_probs, random_vals).tolist()
# NOTE: there's a change in numpy
inds = cast(List[int], np.searchsorted(self.cum_probs, random_vals).tolist())

if ignore_punct_and_num:
# Do not return anything that does not have letters in it
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
.
https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.6.0/en_core_web_md-3.6.0-py3-none-any.whl
https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1-py3-none-any.whl
flake8~=7.0.0
darglint~=1.8.1
mypy>=1.7.0,<1.12.0
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
.
https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.6.0/en_core_web_md-3.6.0-py3-none-any.whl
https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.2/en_core_web_md-3.7.2-py3-none-any.whl
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
Expand Down
8 changes: 4 additions & 4 deletions tests/utils/test_memory_optimiser.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,8 @@ def test_optimisation_round_trip_cuis(self):
with self.subTest(f'{name}'):
self.assertIsInstance(before, dict)
self.assertIsInstance(after, dict)
self.assertEquals(len(before), len(after))
self.assertEquals(before, after)
self.assertEqual(len(before), len(after))
self.assertEqual(before, after)

def test_optimisation_round_trip_snames(self):
snames_before = self.cdb.snames
Expand All @@ -264,8 +264,8 @@ def test_optimisation_round_trip_snames(self):
snames_after = self.cdb.snames
self.assertIsInstance(snames_before, set)
self.assertIsInstance(snames_after, set)
self.assertEquals(len(snames_before), len(snames_after))
self.assertEquals(snames_before, snames_after)
self.assertEqual(len(snames_before), len(snames_after))
self.assertEqual(snames_before, snames_after)

def test_optimisation_round_trip_dirty(self):
memory_optimiser.perform_optimisation(self.cdb)
Expand Down

0 comments on commit 1ee4821

Please sign in to comment.