diff --git a/docs/releases.md b/docs/releases.md index af878f1..22660fe 100644 --- a/docs/releases.md +++ b/docs/releases.md @@ -1,4 +1,4 @@ -v0.3.1 +v0.3.2 - Fix exploding memory usage when using `top_n` v0.3.0 diff --git a/polyfuzz/__init__.py b/polyfuzz/__init__.py index 2d8dd79..884f6b1 100644 --- a/polyfuzz/__init__.py +++ b/polyfuzz/__init__.py @@ -1,2 +1,2 @@ from .polyfuzz import PolyFuzz -__version__ = "0.3.1" +__version__ = "0.3.2" diff --git a/polyfuzz/models/_utils.py b/polyfuzz/models/_utils.py index de901a0..99f7d85 100644 --- a/polyfuzz/models/_utils.py +++ b/polyfuzz/models/_utils.py @@ -86,7 +86,7 @@ def cosine_similarity(from_vector: np.ndarray, similarity_matrix = similarity_matrix.tocsr() indices = _top_n_idx_sparse(similarity_matrix, top_n) - similarities = _top_n_similarities_sparse(similarity_matrix, top_n, indices) + similarities = _top_n_similarities_sparse(similarity_matrix, indices) indices = np.array(np.nan_to_num(np.array(indices, dtype=np.float), nan=0), dtype=np.int) # Faster than knn and slower than sparse but uses more memory @@ -132,11 +132,11 @@ def _top_n_idx_sparse(matrix, n): return np.array(top_n_idx) -def _top_n_similarities_sparse(matrix, n, indices): +def _top_n_similarities_sparse(matrix, indices): """ Return similarity scores of top n values in each row of a sparse matrix """ similarity_scores = [] for row, values in enumerate(indices): - scores = [round(matrix[row, value], n) if value is not None else 0 for value in values] + scores = [round(matrix[row, value], 3) if value is not None else 0 for value in values] similarity_scores.append(scores) similarity_scores = np.array(similarity_scores).T return similarity_scores diff --git a/setup.py b/setup.py index a06ab0d..54b23c8 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,7 @@ setup( name="polyfuzz", packages=find_packages(exclude=["notebooks", "docs"]), - version="0.3.1", + version="0.3.2", author="Maarten Grootendorst", author_email="maartengrootendorst@gmail.com", description="PolyFuzz performs fuzzy string matching, grouping, and evaluation.",