diff --git a/Makefile b/Makefile index 95d5ca1..b764be7 100644 --- a/Makefile +++ b/Makefile @@ -1,21 +1,20 @@ src := mltb2 -test-src := tests -other-src := setup.py docs +other-src := tests docs check: - poetry run black $(src) $(test-src) --check --diff - poetry run mypy --install-types --non-interactive $(src) $(test-src) - poetry run ruff $(src) $(test-src) + poetry run black $(src) $(other-src) --check --diff + poetry run mypy --install-types --non-interactive $(src) $(other-src) + poetry run ruff $(src) $(other-src) poetry run mdformat --check --number . poetry run make -C docs clean doctest format: - poetry run black $(src) $(test-src) - poetry run ruff $(src) $(test-src) --fix + poetry run black $(src) $(other-src) + poetry run ruff $(src) $(other-src) --fix poetry run mdformat --number . test: - poetry run pytest $(test-src) + poetry run pytest $(other-src) sphinx: poetry run make -C docs clean html diff --git a/docs/source/conf.py b/docs/source/conf.py index 85e4d41..35a7530 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,3 +1,7 @@ +# Copyright (c) 2023 Philip May +# This software is distributed under the terms of the MIT license +# which is available at https://opensource.org/licenses/MIT + """Configuration for the Sphinx documentation builder.""" # Configuration file for the Sphinx documentation builder. @@ -17,11 +21,10 @@ # sys.path.insert(0, os.path.abspath('.')) from typing import List - # -- Project information ----------------------------------------------------- project = "MLTB2" -copyright = "2023, Philip May, Deutsche Telekom AG" +copyright = "2023, Philip May, Deutsche Telekom AG" # noqa: A001 author = "Philip May" @@ -70,9 +73,11 @@ # "transformers": ("https://huggingface.co/transformers/", None), "optuna": ("https://optuna.readthedocs.io/en/stable/", None), "mlflow": ("https://www.mlflow.org/docs/latest/", None), - "scipy": ("https://docs.scipy.org/doc/scipy/reference/", None), + "scipy": ("https://docs.scipy.org/doc/scipy/", None), "numpy": ("https://numpy.org/doc/stable/", None), "git": ("https://gitpython.readthedocs.io/en/stable/", None), + "platformdirs": ("https://platformdirs.readthedocs.io/en/latest/", None), + # "matplotlib": ("https://matplotlib.org/stable/", None), } # html_logo = "imgs/xxx.png" diff --git a/mltb2/data.py b/mltb2/data.py index de7a277..3613d38 100644 --- a/mltb2/data.py +++ b/mltb2/data.py @@ -4,10 +4,21 @@ # This software is distributed under the terms of the MIT license # which is available at https://opensource.org/licenses/MIT -"""Data loading module. +"""This module offers tools for loading data. -Use pip to install the necessary dependencies for this module: -``pip install mltb2[data]`` +The following tabular data sets from the biological and medical domain are supported: + +- colon: ``_ +- prostate: ``_ +- leukemia_big: ``_ + +After loading the data from the internet it is parsed, converted and +cached in the mltb2 data directory. +This data directory is determined by :func:`mltb2.files.get_and_create_mltb2_data_dir`. + +Hint: + Use pip to install the necessary dependencies for this module: + ``pip install mltb2[data]`` """ import os diff --git a/mltb2/fasttext.py b/mltb2/fasttext.py index 851ad8b..8d2dda7 100644 --- a/mltb2/fasttext.py +++ b/mltb2/fasttext.py @@ -2,11 +2,11 @@ # This software is distributed under the terms of the MIT license # which is available at https://opensource.org/licenses/MIT -"""fastText specific module. +"""This module offers tools for `fastText `_. -This module is based on `fastText `_. -Use pip to install the necessary dependencies for this module: -``pip install mltb2[fasttext]`` +Hint: + Use pip to install the necessary dependencies for this module: + ``pip install mltb2[fasttext]`` """ import os diff --git a/mltb2/files.py b/mltb2/files.py index 468fbac..1eb0e57 100644 --- a/mltb2/files.py +++ b/mltb2/files.py @@ -4,8 +4,11 @@ """File utils module. -Use pip to install the necessary dependencies for this module: -``pip install mltb2[files]`` +This module provides utility functions for other modules. + +Hint: + Use pip to install the necessary dependencies for this module: + ``pip install mltb2[files]`` """ @@ -18,11 +21,15 @@ def get_and_create_mltb2_data_dir(mltb2_base_data_dir: Optional[str] = None) -> str: - """Return and create mltb data dir. + """Return and create a data dir for mltb2. + + The exact directory is given by the ``mltb2_base_data_dir`` as the base folder + and then the folder ``mltb2`` is appended. Args: mltb2_base_data_dir: The base data directory. If ``None`` the default - user data directory is used. + user data directory is used. The default user data directory is + determined by :func:`platformdirs.user_data_dir`. Returns: The directory path. diff --git a/mltb2/md.py b/mltb2/md.py index 5d21e5b..62f2c6d 100644 --- a/mltb2/md.py +++ b/mltb2/md.py @@ -5,8 +5,9 @@ """Markdown specific module. -Use pip to install the necessary dependencies for this module: -``pip install mltb2[md]`` +Hint: + Use pip to install the necessary dependencies for this module: + ``pip install mltb2[md]`` """ import re diff --git a/mltb2/openai.py b/mltb2/openai.py index bdc3eb3..749f332 100644 --- a/mltb2/openai.py +++ b/mltb2/openai.py @@ -4,8 +4,9 @@ """OpenAI specific module. -Use pip to install the necessary dependencies for this module: -``pip install mltb2[openai]`` +Hint: + Use pip to install the necessary dependencies for this module: + ``pip install mltb2[openai]`` """ diff --git a/mltb2/optuna.py b/mltb2/optuna.py index 75689c3..ec8e830 100644 --- a/mltb2/optuna.py +++ b/mltb2/optuna.py @@ -2,11 +2,11 @@ # This software is distributed under the terms of the MIT license # which is available at https://opensource.org/licenses/MIT -"""Optuna specific module. +"""This module offers tools for `Optuna `_. -This module is based on `Optuna `_. -Use pip to install the necessary dependencies for this module: -``pip install mltb2[optuna]`` +Hint: + Use pip to install the necessary dependencies for this module: + ``pip install mltb2[optuna]`` """ diff --git a/mltb2/plot.py b/mltb2/plot.py index 875ace8..db7cf46 100644 --- a/mltb2/plot.py +++ b/mltb2/plot.py @@ -5,8 +5,10 @@ """Plot tools module. This module is based on `Matplotlib `_. -Use pip to install the necessary dependencies for this module: -``pip install mltb2[plot]`` + +Hint: + Use pip to install the necessary dependencies for this module: + ``pip install mltb2[plot]`` """ from typing import Optional @@ -31,7 +33,9 @@ def twin_axes_timeseries_plot( """Create twin axes timeseries plot. Plots two different timeseries curves in one diagram but two different y-axes. - This function does not call `matplotlib.pyplot.plot()`. + + Hint: + This function does not use `matplotlib.pyplot.plot`. Args: values_1: (``array_like``) Values for the first timeseries curve. @@ -88,7 +92,8 @@ def boxplot( ): """Prints one or more boxplots in a single diagram. - This function does not call `matplotlib.pyplot.plot()`. + Hint: + This function does not use `matplotlib.pyplot.plot`. Args: values: Values for the boxplot(s). @@ -131,7 +136,8 @@ def boxplot_dict( ): """Create boxplot form dictionary. - This function does not call `matplotlib.pyplot.plot()`. + Hint: + This function does not use `matplotlib.pyplot.plot`. Args: values_dict: Dictionary with values for the boxplot(s). @@ -152,7 +158,7 @@ def boxplot_dict( def save_last_figure(filename): - """Saves the last plot. + """Saves the last plot made by Matplotlib. For jupyter notebooks this has to be called in the same cell that created the plot. """ diff --git a/mltb2/somajo.py b/mltb2/somajo.py index 5b97c3a..29556fb 100644 --- a/mltb2/somajo.py +++ b/mltb2/somajo.py @@ -2,11 +2,11 @@ # This software is distributed under the terms of the MIT license # which is available at https://opensource.org/licenses/MIT -"""SoMaJo specific module. +"""This module offers `SoMaJo `_ specific tools. -This module is based on `SoMaJo `_. -Use pip to install the necessary dependencies for this module: -``pip install mltb2[somajo]`` +Hint: + Use pip to install the necessary dependencies for this module: + ``pip install mltb2[somajo]`` """ diff --git a/mltb2/somajo_transformers.py b/mltb2/somajo_transformers.py index 18b76ef..82a4b3c 100644 --- a/mltb2/somajo_transformers.py +++ b/mltb2/somajo_transformers.py @@ -3,13 +3,15 @@ # This software is distributed under the terms of the MIT license # which is available at https://opensource.org/licenses/MIT -"""Hugging Face Transformers and SoMaJo specific module. +"""This module offers Hugging Face Transformers and SoMaJo specific tools. This module is based on `Hugging Face Transformers `_ and `SoMaJo `_. -Use pip to install the necessary dependencies for this module: -``pip install mltb2[somajo_transformers]`` + +Hint: + Use pip to install the necessary dependencies for this module: + ``pip install mltb2[somajo_transformers]`` """ diff --git a/mltb2/text.py b/mltb2/text.py index 2138347..d721e87 100644 --- a/mltb2/text.py +++ b/mltb2/text.py @@ -2,7 +2,14 @@ # This software is distributed under the terms of the MIT license # which is available at https://opensource.org/licenses/MIT -"""Text specific module.""" +"""This module offers text specific tools. + +It offers the following functionality: + +- detect or clean invisible characters +- detect or replace special whitespaces +- remove duplicate whitespaces +""" import re from typing import Dict, Final, Pattern, Tuple diff --git a/mltb2/transformers.py b/mltb2/transformers.py index fcea39f..882e37f 100644 --- a/mltb2/transformers.py +++ b/mltb2/transformers.py @@ -2,12 +2,11 @@ # This software is distributed under the terms of the MIT license # which is available at https://opensource.org/licenses/MIT -"""Hugging Face Transformers specific module. +"""This module offers `Hugging Face Transformers `_ specific tools. -This module is based on -`Hugging Face Transformers `_. -Use pip to install the necessary dependencies for this module: -``pip install mltb2[transformers]`` +Hint: + Use pip to install the necessary dependencies for this module: + ``pip install mltb2[transformers]`` """ import os