From 176fd203ada1d5ce017922459153207f419567c9 Mon Sep 17 00:00:00 2001 From: PhilipMay Date: Thu, 21 Dec 2023 19:02:46 +0100 Subject: [PATCH 01/12] Update installation instructions for module dependencies --- mltb2/data.py | 5 +++-- mltb2/fasttext.py | 6 ++++-- mltb2/files.py | 5 +++-- mltb2/md.py | 5 +++-- mltb2/openai.py | 5 +++-- mltb2/optuna.py | 6 ++++-- mltb2/plot.py | 6 ++++-- mltb2/somajo.py | 6 ++++-- mltb2/somajo_transformers.py | 6 ++++-- mltb2/transformers.py | 6 ++++-- 10 files changed, 36 insertions(+), 20 deletions(-) diff --git a/mltb2/data.py b/mltb2/data.py index de7a277..0be112e 100644 --- a/mltb2/data.py +++ b/mltb2/data.py @@ -6,8 +6,9 @@ """Data loading module. -Use pip to install the necessary dependencies for this module: -``pip install mltb2[data]`` +Hint: + Use pip to install the necessary dependencies for this module: + ``pip install mltb2[data]`` """ import os diff --git a/mltb2/fasttext.py b/mltb2/fasttext.py index 851ad8b..5fc3453 100644 --- a/mltb2/fasttext.py +++ b/mltb2/fasttext.py @@ -5,8 +5,10 @@ """fastText specific module. This module is based on `fastText `_. -Use pip to install the necessary dependencies for this module: -``pip install mltb2[fasttext]`` + +Hint: + Use pip to install the necessary dependencies for this module: + ``pip install mltb2[fasttext]`` """ import os diff --git a/mltb2/files.py b/mltb2/files.py index 468fbac..2d9be8d 100644 --- a/mltb2/files.py +++ b/mltb2/files.py @@ -4,8 +4,9 @@ """File utils module. -Use pip to install the necessary dependencies for this module: -``pip install mltb2[files]`` +Hint: + Use pip to install the necessary dependencies for this module: + ``pip install mltb2[files]`` """ diff --git a/mltb2/md.py b/mltb2/md.py index 5d21e5b..62f2c6d 100644 --- a/mltb2/md.py +++ b/mltb2/md.py @@ -5,8 +5,9 @@ """Markdown specific module. -Use pip to install the necessary dependencies for this module: -``pip install mltb2[md]`` +Hint: + Use pip to install the necessary dependencies for this module: + ``pip install mltb2[md]`` """ import re diff --git a/mltb2/openai.py b/mltb2/openai.py index bdc3eb3..749f332 100644 --- a/mltb2/openai.py +++ b/mltb2/openai.py @@ -4,8 +4,9 @@ """OpenAI specific module. -Use pip to install the necessary dependencies for this module: -``pip install mltb2[openai]`` +Hint: + Use pip to install the necessary dependencies for this module: + ``pip install mltb2[openai]`` """ diff --git a/mltb2/optuna.py b/mltb2/optuna.py index 75689c3..125d7f8 100644 --- a/mltb2/optuna.py +++ b/mltb2/optuna.py @@ -5,8 +5,10 @@ """Optuna specific module. This module is based on `Optuna `_. -Use pip to install the necessary dependencies for this module: -``pip install mltb2[optuna]`` + +Hint: + Use pip to install the necessary dependencies for this module: + ``pip install mltb2[optuna]`` """ diff --git a/mltb2/plot.py b/mltb2/plot.py index 875ace8..2ef3d09 100644 --- a/mltb2/plot.py +++ b/mltb2/plot.py @@ -5,8 +5,10 @@ """Plot tools module. This module is based on `Matplotlib `_. -Use pip to install the necessary dependencies for this module: -``pip install mltb2[plot]`` + +Hint: + Use pip to install the necessary dependencies for this module: + ``pip install mltb2[plot]`` """ from typing import Optional diff --git a/mltb2/somajo.py b/mltb2/somajo.py index 5b97c3a..7143af2 100644 --- a/mltb2/somajo.py +++ b/mltb2/somajo.py @@ -5,8 +5,10 @@ """SoMaJo specific module. This module is based on `SoMaJo `_. -Use pip to install the necessary dependencies for this module: -``pip install mltb2[somajo]`` + +Hint: + Use pip to install the necessary dependencies for this module: + ``pip install mltb2[somajo]`` """ diff --git a/mltb2/somajo_transformers.py b/mltb2/somajo_transformers.py index 18b76ef..9b8b78d 100644 --- a/mltb2/somajo_transformers.py +++ b/mltb2/somajo_transformers.py @@ -8,8 +8,10 @@ This module is based on `Hugging Face Transformers `_ and `SoMaJo `_. -Use pip to install the necessary dependencies for this module: -``pip install mltb2[somajo_transformers]`` + +Hint: + Use pip to install the necessary dependencies for this module: + ``pip install mltb2[somajo_transformers]`` """ diff --git a/mltb2/transformers.py b/mltb2/transformers.py index fcea39f..356a3e1 100644 --- a/mltb2/transformers.py +++ b/mltb2/transformers.py @@ -6,8 +6,10 @@ This module is based on `Hugging Face Transformers `_. -Use pip to install the necessary dependencies for this module: -``pip install mltb2[transformers]`` + +Hint: + Use pip to install the necessary dependencies for this module: + ``pip install mltb2[transformers]`` """ import os From 67cf90180dc2856cc3fb18d3f27cbf2e7904d974 Mon Sep 17 00:00:00 2001 From: PhilipMay Date: Thu, 21 Dec 2023 19:24:02 +0100 Subject: [PATCH 02/12] Add documentation links for platformdirs package and update data loading module --- docs/source/conf.py | 1 + mltb2/data.py | 12 +++++++++++- mltb2/files.py | 8 ++++++-- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 85e4d41..1404a2a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -73,6 +73,7 @@ "scipy": ("https://docs.scipy.org/doc/scipy/reference/", None), "numpy": ("https://numpy.org/doc/stable/", None), "git": ("https://gitpython.readthedocs.io/en/stable/", None), + "platformdirs": ("https://platformdirs.readthedocs.io/en/latest/", None), } # html_logo = "imgs/xxx.png" diff --git a/mltb2/data.py b/mltb2/data.py index 0be112e..dbe3eab 100644 --- a/mltb2/data.py +++ b/mltb2/data.py @@ -4,7 +4,17 @@ # This software is distributed under the terms of the MIT license # which is available at https://opensource.org/licenses/MIT -"""Data loading module. +"""This module offers tools for loading data. + +The following tabular data sets are supported: + +- colon: ``_ +- prostate: ``_ +- leukemia_big: ``_ + +After loading the data from the internet it is parsed, converted and +cached in the mltb2 data directory. +This data directory is determined by :func:`mltb2.files.get_and_create_mltb2_data_dir`. Hint: Use pip to install the necessary dependencies for this module: diff --git a/mltb2/files.py b/mltb2/files.py index 2d9be8d..ba65cca 100644 --- a/mltb2/files.py +++ b/mltb2/files.py @@ -19,11 +19,15 @@ def get_and_create_mltb2_data_dir(mltb2_base_data_dir: Optional[str] = None) -> str: - """Return and create mltb data dir. + """Return and create a data dir for mltb2. + + The exact directory is given by the ``mltb2_base_data_dir`` as the base folder + and then the folder ``mltb2`` is appended. Args: mltb2_base_data_dir: The base data directory. If ``None`` the default - user data directory is used. + user data directory is used. The default user data directory is + determined by :func:`platformdirs.user_data_dir`. Returns: The directory path. From a64b91fa306ac6a43d95ecda69ddbc0cbd41b8bc Mon Sep 17 00:00:00 2001 From: PhilipMay Date: Thu, 21 Dec 2023 20:28:49 +0100 Subject: [PATCH 03/12] Add utility functions module doc --- mltb2/files.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mltb2/files.py b/mltb2/files.py index ba65cca..1eb0e57 100644 --- a/mltb2/files.py +++ b/mltb2/files.py @@ -4,6 +4,8 @@ """File utils module. +This module provides utility functions for other modules. + Hint: Use pip to install the necessary dependencies for this module: ``pip install mltb2[files]`` From 860f39e21ecfe50b23eb8c369dabd6ab75728f26 Mon Sep 17 00:00:00 2001 From: PhilipMay Date: Thu, 21 Dec 2023 20:31:52 +0100 Subject: [PATCH 04/12] Update supported tabular data sets doc in data.py --- mltb2/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mltb2/data.py b/mltb2/data.py index dbe3eab..3613d38 100644 --- a/mltb2/data.py +++ b/mltb2/data.py @@ -6,7 +6,7 @@ """This module offers tools for loading data. -The following tabular data sets are supported: +The following tabular data sets from the biological and medical domain are supported: - colon: ``_ - prostate: ``_ From 488ab2aa9990ec491450002f1827cd2fd4f67430 Mon Sep 17 00:00:00 2001 From: PhilipMay Date: Thu, 21 Dec 2023 20:34:16 +0100 Subject: [PATCH 05/12] Refactor fastText module documentation --- mltb2/fasttext.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mltb2/fasttext.py b/mltb2/fasttext.py index 5fc3453..8d2dda7 100644 --- a/mltb2/fasttext.py +++ b/mltb2/fasttext.py @@ -2,9 +2,7 @@ # This software is distributed under the terms of the MIT license # which is available at https://opensource.org/licenses/MIT -"""fastText specific module. - -This module is based on `fastText `_. +"""This module offers tools for `fastText `_. Hint: Use pip to install the necessary dependencies for this module: From e8f6709ba1a26e444f07f520b5041b5b9cecc030 Mon Sep 17 00:00:00 2001 From: PhilipMay Date: Thu, 21 Dec 2023 20:36:51 +0100 Subject: [PATCH 06/12] Refactor Optuna module documentation --- mltb2/optuna.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mltb2/optuna.py b/mltb2/optuna.py index 125d7f8..ec8e830 100644 --- a/mltb2/optuna.py +++ b/mltb2/optuna.py @@ -2,9 +2,7 @@ # This software is distributed under the terms of the MIT license # which is available at https://opensource.org/licenses/MIT -"""Optuna specific module. - -This module is based on `Optuna `_. +"""This module offers tools for `Optuna `_. Hint: Use pip to install the necessary dependencies for this module: From 3a136d69837ef2aea40a8868bbb9901b78e50973 Mon Sep 17 00:00:00 2001 From: PhilipMay Date: Thu, 21 Dec 2023 20:46:22 +0100 Subject: [PATCH 07/12] Update documentation links and add hints in plot.py --- docs/source/conf.py | 3 ++- mltb2/plot.py | 12 ++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 1404a2a..7e00184 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -70,10 +70,11 @@ # "transformers": ("https://huggingface.co/transformers/", None), "optuna": ("https://optuna.readthedocs.io/en/stable/", None), "mlflow": ("https://www.mlflow.org/docs/latest/", None), - "scipy": ("https://docs.scipy.org/doc/scipy/reference/", None), + "scipy": ("https://docs.scipy.org/doc/scipy/", None), "numpy": ("https://numpy.org/doc/stable/", None), "git": ("https://gitpython.readthedocs.io/en/stable/", None), "platformdirs": ("https://platformdirs.readthedocs.io/en/latest/", None), + #"matplotlib": ("https://matplotlib.org/stable/", None), } # html_logo = "imgs/xxx.png" diff --git a/mltb2/plot.py b/mltb2/plot.py index 2ef3d09..db7cf46 100644 --- a/mltb2/plot.py +++ b/mltb2/plot.py @@ -33,7 +33,9 @@ def twin_axes_timeseries_plot( """Create twin axes timeseries plot. Plots two different timeseries curves in one diagram but two different y-axes. - This function does not call `matplotlib.pyplot.plot()`. + + Hint: + This function does not use `matplotlib.pyplot.plot`. Args: values_1: (``array_like``) Values for the first timeseries curve. @@ -90,7 +92,8 @@ def boxplot( ): """Prints one or more boxplots in a single diagram. - This function does not call `matplotlib.pyplot.plot()`. + Hint: + This function does not use `matplotlib.pyplot.plot`. Args: values: Values for the boxplot(s). @@ -133,7 +136,8 @@ def boxplot_dict( ): """Create boxplot form dictionary. - This function does not call `matplotlib.pyplot.plot()`. + Hint: + This function does not use `matplotlib.pyplot.plot`. Args: values_dict: Dictionary with values for the boxplot(s). @@ -154,7 +158,7 @@ def boxplot_dict( def save_last_figure(filename): - """Saves the last plot. + """Saves the last plot made by Matplotlib. For jupyter notebooks this has to be called in the same cell that created the plot. """ From b56e679d9575016054fc6d83704ef733968f7a3c Mon Sep 17 00:00:00 2001 From: PhilipMay Date: Thu, 21 Dec 2023 20:51:34 +0100 Subject: [PATCH 08/12] Update module description in somajo_transformers.py --- mltb2/somajo_transformers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mltb2/somajo_transformers.py b/mltb2/somajo_transformers.py index 9b8b78d..82a4b3c 100644 --- a/mltb2/somajo_transformers.py +++ b/mltb2/somajo_transformers.py @@ -3,7 +3,7 @@ # This software is distributed under the terms of the MIT license # which is available at https://opensource.org/licenses/MIT -"""Hugging Face Transformers and SoMaJo specific module. +"""This module offers Hugging Face Transformers and SoMaJo specific tools. This module is based on `Hugging Face Transformers `_ and From 47f630fcf789fb836ac9917a399d988d52abacf3 Mon Sep 17 00:00:00 2001 From: PhilipMay Date: Thu, 21 Dec 2023 20:51:38 +0100 Subject: [PATCH 09/12] Update SoMaJo module description --- mltb2/somajo.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mltb2/somajo.py b/mltb2/somajo.py index 7143af2..29556fb 100644 --- a/mltb2/somajo.py +++ b/mltb2/somajo.py @@ -2,9 +2,7 @@ # This software is distributed under the terms of the MIT license # which is available at https://opensource.org/licenses/MIT -"""SoMaJo specific module. - -This module is based on `SoMaJo `_. +"""This module offers `SoMaJo `_ specific tools. Hint: Use pip to install the necessary dependencies for this module: From b793692e6cce65ba4154eb3e9e9977ab1260bc6f Mon Sep 17 00:00:00 2001 From: PhilipMay Date: Thu, 21 Dec 2023 20:51:57 +0100 Subject: [PATCH 10/12] Refactor text module doc --- mltb2/text.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mltb2/text.py b/mltb2/text.py index 2138347..d721e87 100644 --- a/mltb2/text.py +++ b/mltb2/text.py @@ -2,7 +2,14 @@ # This software is distributed under the terms of the MIT license # which is available at https://opensource.org/licenses/MIT -"""Text specific module.""" +"""This module offers text specific tools. + +It offers the following functionality: + +- detect or clean invisible characters +- detect or replace special whitespaces +- remove duplicate whitespaces +""" import re from typing import Dict, Final, Pattern, Tuple From 3ae38cf43c31b17b99341e545c48b44e4bb1d72c Mon Sep 17 00:00:00 2001 From: PhilipMay Date: Thu, 21 Dec 2023 20:53:40 +0100 Subject: [PATCH 11/12] Refactor Hugging Face Transformers module documentation --- mltb2/transformers.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mltb2/transformers.py b/mltb2/transformers.py index 356a3e1..882e37f 100644 --- a/mltb2/transformers.py +++ b/mltb2/transformers.py @@ -2,10 +2,7 @@ # This software is distributed under the terms of the MIT license # which is available at https://opensource.org/licenses/MIT -"""Hugging Face Transformers specific module. - -This module is based on -`Hugging Face Transformers `_. +"""This module offers `Hugging Face Transformers `_ specific tools. Hint: Use pip to install the necessary dependencies for this module: From 03ad606e2bb0ddca5f58ac869125c8d3994b36ed Mon Sep 17 00:00:00 2001 From: PhilipMay Date: Thu, 21 Dec 2023 21:04:46 +0100 Subject: [PATCH 12/12] fix linting --- Makefile | 15 +++++++-------- docs/source/conf.py | 9 ++++++--- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index 95d5ca1..b764be7 100644 --- a/Makefile +++ b/Makefile @@ -1,21 +1,20 @@ src := mltb2 -test-src := tests -other-src := setup.py docs +other-src := tests docs check: - poetry run black $(src) $(test-src) --check --diff - poetry run mypy --install-types --non-interactive $(src) $(test-src) - poetry run ruff $(src) $(test-src) + poetry run black $(src) $(other-src) --check --diff + poetry run mypy --install-types --non-interactive $(src) $(other-src) + poetry run ruff $(src) $(other-src) poetry run mdformat --check --number . poetry run make -C docs clean doctest format: - poetry run black $(src) $(test-src) - poetry run ruff $(src) $(test-src) --fix + poetry run black $(src) $(other-src) + poetry run ruff $(src) $(other-src) --fix poetry run mdformat --number . test: - poetry run pytest $(test-src) + poetry run pytest $(other-src) sphinx: poetry run make -C docs clean html diff --git a/docs/source/conf.py b/docs/source/conf.py index 7e00184..35a7530 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,3 +1,7 @@ +# Copyright (c) 2023 Philip May +# This software is distributed under the terms of the MIT license +# which is available at https://opensource.org/licenses/MIT + """Configuration for the Sphinx documentation builder.""" # Configuration file for the Sphinx documentation builder. @@ -17,11 +21,10 @@ # sys.path.insert(0, os.path.abspath('.')) from typing import List - # -- Project information ----------------------------------------------------- project = "MLTB2" -copyright = "2023, Philip May, Deutsche Telekom AG" +copyright = "2023, Philip May, Deutsche Telekom AG" # noqa: A001 author = "Philip May" @@ -74,7 +77,7 @@ "numpy": ("https://numpy.org/doc/stable/", None), "git": ("https://gitpython.readthedocs.io/en/stable/", None), "platformdirs": ("https://platformdirs.readthedocs.io/en/latest/", None), - #"matplotlib": ("https://matplotlib.org/stable/", None), + # "matplotlib": ("https://matplotlib.org/stable/", None), } # html_logo = "imgs/xxx.png"