From 325b495a5ddcd27317da03d1fcc4578c57e2b102 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Thu, 7 Mar 2024 11:55:07 +0100 Subject: [PATCH] Add API documentation This PR added the documentation for APIs. To see the doc website locally, run `mkdocs serve` in root folder of this repo. The online website will be setup when this PR is approved. --- docs/api/antismash.md | 1 + docs/api/arranger.md | 2 ++ docs/api/bigscape.md | 2 ++ docs/api/genomics.md | 1 + docs/api/genomics_abc.md | 1 + docs/api/genomics_utils.md | 1 + docs/api/gnps.md | 13 +++++++++++ docs/api/loader.md | 2 ++ docs/api/metabolomics.md | 1 + docs/api/metabolomics_abc.md | 1 + docs/api/metabolomics_utils.md | 1 + docs/api/mibig.md | 1 + docs/api/nplinker.md | 1 + docs/api/schema.md | 1 + docs/api/scoring.md | 7 ++++++ docs/api/strain.md | 1 + docs/api/strain_utils.md | 1 + docs/api/utils.md | 3 +++ mkdocs.yml | 23 ++++++++++++++++++- src/nplinker/genomics/utils.py | 6 ++--- .../metabolomics/gnps/gnps_extractor.py | 2 ++ .../gnps/gnps_file_mapping_loader.py | 9 +++++++- src/nplinker/metabolomics/gnps/gnps_format.py | 1 + .../metabolomics/gnps/gnps_spectrum_loader.py | 1 + src/nplinker/strain/utils.py | 21 ++++++++++------- 25 files changed, 91 insertions(+), 13 deletions(-) create mode 100644 docs/api/antismash.md create mode 100644 docs/api/arranger.md create mode 100644 docs/api/bigscape.md create mode 100644 docs/api/genomics.md create mode 100644 docs/api/genomics_abc.md create mode 100644 docs/api/genomics_utils.md create mode 100644 docs/api/gnps.md create mode 100644 docs/api/loader.md create mode 100644 docs/api/metabolomics.md create mode 100644 docs/api/metabolomics_abc.md create mode 100644 docs/api/metabolomics_utils.md create mode 100644 docs/api/mibig.md create mode 100644 docs/api/nplinker.md create mode 100644 docs/api/schema.md create mode 100644 docs/api/scoring.md create mode 100644 docs/api/strain.md create mode 100644 docs/api/strain_utils.md create mode 100644 docs/api/utils.md diff --git a/docs/api/antismash.md b/docs/api/antismash.md new file mode 100644 index 00000000..a0868d12 --- /dev/null +++ b/docs/api/antismash.md @@ -0,0 +1 @@ +::: nplinker.genomics.antismash diff --git a/docs/api/arranger.md b/docs/api/arranger.md new file mode 100644 index 00000000..0b687411 --- /dev/null +++ b/docs/api/arranger.md @@ -0,0 +1,2 @@ + +::: nplinker.arranger diff --git a/docs/api/bigscape.md b/docs/api/bigscape.md new file mode 100644 index 00000000..04689ad8 --- /dev/null +++ b/docs/api/bigscape.md @@ -0,0 +1,2 @@ +::: nplinker.genomics.bigscape +::: nplinker.genomics.bigscape.run_bigscape diff --git a/docs/api/genomics.md b/docs/api/genomics.md new file mode 100644 index 00000000..d19e4367 --- /dev/null +++ b/docs/api/genomics.md @@ -0,0 +1 @@ +::: nplinker.genomics diff --git a/docs/api/genomics_abc.md b/docs/api/genomics_abc.md new file mode 100644 index 00000000..dc037f9e --- /dev/null +++ b/docs/api/genomics_abc.md @@ -0,0 +1 @@ +::: nplinker.genomics.abc diff --git a/docs/api/genomics_utils.md b/docs/api/genomics_utils.md new file mode 100644 index 00000000..d064562a --- /dev/null +++ b/docs/api/genomics_utils.md @@ -0,0 +1 @@ +::: nplinker.genomics.utils diff --git a/docs/api/gnps.md b/docs/api/gnps.md new file mode 100644 index 00000000..f8e4e401 --- /dev/null +++ b/docs/api/gnps.md @@ -0,0 +1,13 @@ +::: nplinker.metabolomics.gnps + options: + members: + - GNPSFormat + - GNPSDownloader + - GNPSExtractor + - GNPSSpectrumLoader + - GNPSMolecularFamilyLoader + - GNPSAnnotationLoader + - GNPSFileMappingLoader + - gnps_format_from_archive + - gnps_format_from_file_mapping + - gnps_format_from_task_id diff --git a/docs/api/loader.md b/docs/api/loader.md new file mode 100644 index 00000000..fb09c827 --- /dev/null +++ b/docs/api/loader.md @@ -0,0 +1,2 @@ + +::: nplinker.loader diff --git a/docs/api/metabolomics.md b/docs/api/metabolomics.md new file mode 100644 index 00000000..19614803 --- /dev/null +++ b/docs/api/metabolomics.md @@ -0,0 +1 @@ +::: nplinker.metabolomics diff --git a/docs/api/metabolomics_abc.md b/docs/api/metabolomics_abc.md new file mode 100644 index 00000000..e0bcfa38 --- /dev/null +++ b/docs/api/metabolomics_abc.md @@ -0,0 +1 @@ +::: nplinker.metabolomics.abc diff --git a/docs/api/metabolomics_utils.md b/docs/api/metabolomics_utils.md new file mode 100644 index 00000000..4d5420a4 --- /dev/null +++ b/docs/api/metabolomics_utils.md @@ -0,0 +1 @@ +::: nplinker.metabolomics.utils diff --git a/docs/api/mibig.md b/docs/api/mibig.md new file mode 100644 index 00000000..7c548faf --- /dev/null +++ b/docs/api/mibig.md @@ -0,0 +1 @@ +::: nplinker.genomics.mibig diff --git a/docs/api/nplinker.md b/docs/api/nplinker.md new file mode 100644 index 00000000..1030f850 --- /dev/null +++ b/docs/api/nplinker.md @@ -0,0 +1 @@ +::: nplinker.nplinker diff --git a/docs/api/schema.md b/docs/api/schema.md new file mode 100644 index 00000000..e2862ef2 --- /dev/null +++ b/docs/api/schema.md @@ -0,0 +1 @@ +::: nplinker.schemas diff --git a/docs/api/scoring.md b/docs/api/scoring.md new file mode 100644 index 00000000..efbe30c9 --- /dev/null +++ b/docs/api/scoring.md @@ -0,0 +1,7 @@ +::: nplinker.scoring + options: + members: + - ScoringMethod + - MetcalfScoring + - LinkCollection + - ObjectLink diff --git a/docs/api/strain.md b/docs/api/strain.md new file mode 100644 index 00000000..34f15148 --- /dev/null +++ b/docs/api/strain.md @@ -0,0 +1 @@ +::: nplinker.strain diff --git a/docs/api/strain_utils.md b/docs/api/strain_utils.md new file mode 100644 index 00000000..ce5db2c1 --- /dev/null +++ b/docs/api/strain_utils.md @@ -0,0 +1 @@ +::: nplinker.strain.utils diff --git a/docs/api/utils.md b/docs/api/utils.md new file mode 100644 index 00000000..cf69ea60 --- /dev/null +++ b/docs/api/utils.md @@ -0,0 +1,3 @@ +::: nplinker.utils + options: + members_order: alphabetical diff --git a/mkdocs.yml b/mkdocs.yml index 65785de4..65a4e059 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -56,7 +56,28 @@ extra_javascript: nav: - Get Started: - Welcome to NPLinker: index.md -- API: +- API Documentation: + - NPLinker: api/nplinker.md + - Dataset Arranger: api/arranger.md + - Dataset Loader: api/loader.md + - Genomics Data: + - Data Models: api/genomics.md + - Base Classes: api/genomics_abc.md + - MiBIG: api/mibig.md + - AntiSMASH: api/antismash.md + - BigScape: api/bigscape.md + - Utilities: api/genomics_utils.md + - Metabolomics Data: + - Data Models: api/metabolomics.md + - Base Classes: api/metabolomics_abc.md + - GNPS: api/gnps.md + - Utilities: api/metabolomics_utils.md + - Strain Data: + - Data Models: api/strain.md + - Utilities: api/strain_utils.md + - Scoring: api/scoring.md + - Schemas: api/schema.md + - General Utilities: api/utils.md markdown_extensions: - tables diff --git a/src/nplinker/genomics/utils.py b/src/nplinker/genomics/utils.py index 93cf1efa..a91ee4dd 100644 --- a/src/nplinker/genomics/utils.py +++ b/src/nplinker/genomics/utils.py @@ -290,11 +290,11 @@ def get_mappings_strain_id_bgc_id( Key is strain id and value is a set of BGC ids. See Also: - `extract_mappings_strain_id_original_genome_id`: Extract mappings + - `extract_mappings_strain_id_original_genome_id`: Extract mappings "strain_id <-> original_genome_id". - `extract_mappings_original_genome_id_resolved_genome_id`: Extract mappings + - `extract_mappings_original_genome_id_resolved_genome_id`: Extract mappings "original_genome_id <-> resolved_genome_id". - `extract_mappings_resolved_genome_id_bgc_id`: Extract mappings + - `extract_mappings_resolved_genome_id_bgc_id`: Extract mappings "resolved_genome_id <-> bgc_id". """ mappings_dict = {} diff --git a/src/nplinker/metabolomics/gnps/gnps_extractor.py b/src/nplinker/metabolomics/gnps/gnps_extractor.py index 08220b55..4a7dd28e 100644 --- a/src/nplinker/metabolomics/gnps/gnps_extractor.py +++ b/src/nplinker/metabolomics/gnps/gnps_extractor.py @@ -12,6 +12,7 @@ def __init__(self, file: str | PathLike, extract_dir: str | PathLike): """Class to extract files from a GNPS molecular networking archive(.zip). Four files are extracted and renamed to the following names: + - file_mappings(.tsv/.csv) - spectra.mgf - molecular_families.tsv @@ -19,6 +20,7 @@ def __init__(self, file: str | PathLike, extract_dir: str | PathLike): The files to be extracted are selected based on the GNPS workflow type, as desribed below (in the order of the files above): + 1. METABOLOMICS-SNETS - clusterinfosummarygroup_attributes_withIDs_withcomponentID/*.tsv - METABOLOMICS-SNETS*.mgf diff --git a/src/nplinker/metabolomics/gnps/gnps_file_mapping_loader.py b/src/nplinker/metabolomics/gnps/gnps_file_mapping_loader.py index 58d5c392..48d68e7c 100644 --- a/src/nplinker/metabolomics/gnps/gnps_file_mapping_loader.py +++ b/src/nplinker/metabolomics/gnps/gnps_file_mapping_loader.py @@ -16,6 +16,7 @@ def __init__(self, file: str | PathLike): The file mappings file is from GNPS output archive, as described below for each GNPS workflow type: + 1. METABOLOMICS-SNETS - clusterinfosummarygroup_attributes_withIDs_withcomponentID/*.tsv 2. METABOLOMICS-SNETS-V2 @@ -29,7 +30,7 @@ def __init__(self, file: str | PathLike): Raises: ValueError: Raises ValueError if the file is not valid. - Example: + Examples: >>> loader = GNPSFileMappingLoader("gnps_file_mappings.tsv") >>> print(loader.mappings["1"]) ['26c.mzXML'] @@ -137,6 +138,7 @@ def _load_snets(self) -> None: """Load file mapping from output of GNPS SNETS workflow. The following columns are loaded: + - "cluster index": loaded as spectrum id - "AllFiles": a list of files in which the spectrum occurs, separated by '###'. @@ -157,6 +159,7 @@ def _load_snetsv2(self) -> None: """Load file mapping from output of GNPS SNETS-V2 workflow. The following columns are loaded: + - "cluster index": loaded as spectrum id - "UniqueFileSources": a list of files in which the spectrum occurs, separated by '|'. @@ -174,13 +177,17 @@ def _load_fbmn(self): """Load file mapping from output of GNPS FBMN workflow. The column "row ID" is loaded as spectrum id. + The column names containing " Peak area" are used to extract the file names, and the values of these columns are used to determine whether the spectrum occurs in the file. The file name is taken only if the value is greater than 0. + An example data of the file is as follows: + ``` row ID,5434_5433_mod.mzXML Peak area,5425_5426_mod.mzXML Peak area 1,1764067.8434999974,0.0 + ``` """ pattern = " Peak area" with open(self._file, mode="rt", encoding="utf-8") as f: diff --git a/src/nplinker/metabolomics/gnps/gnps_format.py b/src/nplinker/metabolomics/gnps/gnps_format.py index 61f40619..8df05450 100644 --- a/src/nplinker/metabolomics/gnps/gnps_format.py +++ b/src/nplinker/metabolomics/gnps/gnps_format.py @@ -105,6 +105,7 @@ def gnps_format_from_file_mapping(file: str | PathLike) -> GNPSFormat: The GNSP file mapping file is located in different folders depending on the GNPS workflow. Here are the locations in corresponding GNPS zip archives: + - METABOLOMICS-SNETS workflow: the .tsv file under folder "clusterinfosummarygroup_attributes_withIDs_withcomponentID" - METABOLOMICS-SNETS-V2 workflow: the .clustersummary file (tsv) under folder "clusterinfosummarygroup_attributes_withIDs_withcomponentID" - FEATURE-BASED-MOLECULAR-NETWORKING workflow: the .csv file under folder "quantification_table" diff --git a/src/nplinker/metabolomics/gnps/gnps_spectrum_loader.py b/src/nplinker/metabolomics/gnps/gnps_spectrum_loader.py index 09c37bcb..83e96011 100644 --- a/src/nplinker/metabolomics/gnps/gnps_spectrum_loader.py +++ b/src/nplinker/metabolomics/gnps/gnps_spectrum_loader.py @@ -14,6 +14,7 @@ def __init__(self, file: str | PathLike): The file mappings file is from GNPS output archive, as described below for each GNPS workflow type: + 1. METABOLOMICS-SNETS - METABOLOMICS-SNETS*.mgf 2. METABOLOMICS-SNETS-V2 diff --git a/src/nplinker/strain/utils.py b/src/nplinker/strain/utils.py index 282441e7..c424d6f2 100644 --- a/src/nplinker/strain/utils.py +++ b/src/nplinker/strain/utils.py @@ -20,9 +20,12 @@ def load_user_strains(json_file: str | PathLike) -> set[Strain]: """Load user specified strains from a JSON file. - The JSON file must follow the schema defined in "nplinker/schemas/user_strains.json". + The JSON file must follow the schema defined in `schemas/user_strains.json`. + An example content of the JSON file: + ``` {"strain_ids": ["strain1", "strain2"]} + ``` Args: json_file: Path to the JSON file containing user specified strains. @@ -53,10 +56,12 @@ def podp_generate_strain_mappings( """Generate strain mappings JSON file for PODP pipeline. To get the strain mappings, we need to combine the following mappings: + - strain_id <-> original_genome_id <-> resolved_genome_id <-> bgc_id - strain_id <-> MS_filename <-> spectrum_id These mappings are extracted from the following files: + - "strain_id <-> original_genome_id" is extracted from `podp_project_json_file`. - "original_genome_id <-> resolved_genome_id" is extracted from `genome_status_json_file`. - "resolved_genome_id <-> bgc_id" is extracted from `genome_bgc_mappings_file`. @@ -78,18 +83,18 @@ def podp_generate_strain_mappings( The strain mappings stored in a StrainCollection object. See Also: - `extract_mappings_strain_id_original_genome_id`: Extract mappings + - `extract_mappings_strain_id_original_genome_id`: Extract mappings "strain_id <-> original_genome_id". - `extract_mappings_original_genome_id_resolved_genome_id`: Extract mappings + - `extract_mappings_original_genome_id_resolved_genome_id`: Extract mappings "original_genome_id <-> resolved_genome_id". - `extract_mappings_resolved_genome_id_bgc_id`: Extract mappings + - `extract_mappings_resolved_genome_id_bgc_id`: Extract mappings "resolved_genome_id <-> bgc_id". - `get_mappings_strain_id_bgc_id`: Get mappings "strain_id <-> bgc_id". - `extract_mappings_strain_id_ms_filename`: Extract mappings + - `get_mappings_strain_id_bgc_id`: Get mappings "strain_id <-> bgc_id". + - `extract_mappings_strain_id_ms_filename`: Extract mappings "strain_id <-> MS_filename". - `extract_mappings_ms_filename_spectrum_id`: Extract mappings + - `extract_mappings_ms_filename_spectrum_id`: Extract mappings "MS_filename <-> spectrum_id". - `get_mappings_strain_id_spectrum_id`: Get mappings "strain_id <-> spectrum_id". + - `get_mappings_strain_id_spectrum_id`: Get mappings "strain_id <-> spectrum_id". """ # Get mappings strain_id <-> original_geonme_id <-> resolved_genome_id <-> bgc_id mappings_strain_id_bgc_id = get_mappings_strain_id_bgc_id(