diff --git a/INSTALL.md b/INSTALL.md new file mode 100644 index 0000000..e355e12 --- /dev/null +++ b/INSTALL.md @@ -0,0 +1,78 @@ +# Prerequisites + +The following software is currently required if you build from scratch: + +* git +* CMake >= 3.11 +* A reasonably new C++ compiler +* Python 3 +* OpenMP for shared memory parallelization (optional) + +# Building + +This sequence of commands lets you start from scratch: + +```bash +git clone https://gitlab.dune-project.org/dominic/covid-tda.git +cd covid-tda +mkdir build +cd build +cmake -DCMAKE_BUILD_TYPE=Release .. +make distance +make test +``` + +This should (successfully) build the executable `distance` in the `build` subdirectory, +and run the tests. +If `cmake` picks up the wrong compiler, it can be explicitly enforced by adding +`-DCMAKE_CXX_COMPILER=` to the cmake call (in that case it is best +to remove the build directory and start from scratch). + +To enable OpenMP multi-threading, add `-DHAMMING_WITH_OPENMP=ON` to the cmake call. + +# Running + +The tool can be run from the command line: + +``` +./distance +``` + +Here, `` must point to a fasta dataset, e.g. by putting `../data/example.fasta` +after putting a data file in the `data` directory. `n` is the maximum number of gene +sequences that the tool should read. This can be a smaller number than the number of +gene sequences in the dataset. + +The output is currently written to a file `distances.csv`. The output is a full +matrix, not only the triangular part of the symmetric matrix. + +If running in parallel with OpenMP, you can control the number of threads available +by changing the environment variable `OMP_NUM_THREADS`: + +``` +OMP_NUM_THREADS=8 ./distance +``` + +# Building the Python interface + +This sequence of command should build the Python interface: + +``` +git clone --recursive https://gitlab.dune-project.org/dominic/covid-tda.git +cd covid-tda + +mkdir build +cd build +cmake -DCMAKE_BUILD_TYPE=Release .. +make hammingdist +``` + +If the wrong version of Python is found, it can be set by adding `-DPYTHON_EXECUTABLE=\path\to\python` +to the cmake command. + +# Deploying the Python interface + +The Python packages are currently built using Github Actions with the project +`ciwheelbuild`. A wheel build and deploy can be triggered by creating a release +in Github. Of course, Github Actions should be enabled and the PyPI API access +token needs to be stored a secret on the project. diff --git a/README.md b/README.md index 714da3f..d65fded 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,20 @@ A small C++ tool to calculate pairwise distances between gene sequences given in fasta format. [![DOI](https://zenodo.org/badge/308676358.svg)](https://zenodo.org/badge/latestdoi/308676358) +[![pypi releases](https://img.shields.io/pypi/v/hammingdist.svg)](https://pypi.org/project/hammingdist) +[![python versions](https://img.shields.io/pypi/pyversions/hammingdist)](https://pypi.org/project/hammingdist) # Python interface To use the Python interface, you should install it from PyPI: -``` +```bash python -m pip install hammingdist ``` Then, you can e.g. use it in the following way from Python: -``` +```python import hammingdist # To see the different optional arguments available: @@ -47,89 +49,7 @@ data.dump_sequence_indices("indices.txt") # Finally, we can pass the data as a list of strings in Python: data = hammingdist.from_stringlist(["ACGTACGT", "ACGTAGGT", "ATTTACGT"]) - -# When in doubt, the internal data structures of the DataSet object can be inspected: -print(data._distances) ``` The Python package is currently built without OpenMP support, but this can -be changed upon request. - -# Prerequisites - -The following software is currently required if you build from scratch: - -* git -* CMake >= 3.11 -* A reasonably new C++ compiler -* Python 3 -* OpenMP for shared memory parallelization (optional) - -# Building - -This sequence of commands lets you start from scratch: - -``` -git clone https://gitlab.dune-project.org/dominic/covid-tda.git -cd covid-tda -mkdir build -cd build -cmake -DCMAKE_BUILD_TYPE=Release .. -make distance -make test -``` - -This should (successfully) build the executable `distance` in the `build` subdirectory, -and run the tests. -If `cmake` picks up the wrong compiler, it can be explicitly enforced by adding -`-DCMAKE_CXX_COMPILER=` to the cmake call (in that case it is best -to remove the build directory and start from scratch). - -To enable OpenMP multi-threading, add `-DHAMMING_WITH_OPENMP=ON` to the cmake call. - -# Running - -The tool can be run from the command line: - -``` -./distance -``` - -Here, `` must point to a fasta dataset, e.g. by putting `../data/example.fasta` -after putting a data file in the `data` directory. `n` is the maximum number of gene -sequences that the tool should read. This can be a smaller number than the number of -gene sequences in the dataset. - -The output is currently written to a file `distances.csv`. The output is a full -matrix, not only the triangular part of the symmetric matrix. - -If running in parallel with OpenMP, you can control the number of threads available -by changing the environment variable `OMP_NUM_THREADS`: - -``` -OMP_NUM_THREADS=8 ./distance -``` - -# Building the Python interface - -This sequence of command should build the Python interface: - -``` -git clone --recursive https://gitlab.dune-project.org/dominic/covid-tda.git -cd covid-tda - -mkdir build -cd build -cmake -DCMAKE_BUILD_TYPE=Release .. -make hammingdist -``` - -If the wrong version of Python is found, it can be set by adding `-DPYTHON_EXECUTABLE=\path\to\python` -to the cmake command. - -# Deploying the Python interface - -The Python packages are currently built using Github Actions with the project -`ciwheelbuild`. A wheel build and deploy can be triggered by creating a release -in Github. Of course, Github Actions should be enabled and the PyPI API access -token needs to be stored a secret on the project. +be changed upon request. \ No newline at end of file diff --git a/setup.py b/setup.py index c0b7968..1f38495 100644 --- a/setup.py +++ b/setup.py @@ -71,20 +71,34 @@ def build_extension(self, ext): subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env) subprocess.check_call(['cmake', '--build', '.', '--target', 'hammingdist'] + build_args, cwd=self.build_temp) +from os import path +with open(path.join(path.abspath(path.dirname(__file__)), "README.md")) as f: + long_description = f.read() setup( name='hammingdist', - version='0.10.0', + version='0.11.0', author='Dominic Kempf, Liam Keegan', author_email='ssc@iwr.uni-heidelberg.de', description='A fast tool to calculate Hamming distances', - long_description='', + url="https://github.com/ssciwr/hammingdist", + long_description=long_description, + long_description_content_type="text/markdown", ext_modules=[CMakeExtension('hammingdist')], cmdclass=dict(build_ext=CMakeBuild), zip_safe=False, classifiers=[ - "Programming Language :: Python :: 3", + "Operating System :: MacOS :: MacOS X", + "Operating System :: Microsoft :: Windows", + "Operating System :: POSIX :: Linux", + "Programming Language :: C++", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", ], )