update docs, fix formatting

uio-bmi · Nov 1, 2021 · 5de9c51 · 5de9c51
1 parent fba4769
commit 5de9c51
Show file tree

Hide file tree

Showing 9 changed files with 80 additions and 24 deletions.
diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html
@@ -0,0 +1,32 @@
+{% extends "!layout.html" %}
+
+{%- block extrahead %}
+{{ super() }}
+
+<script async defer data-domain="docs.immuneml.uio.no" src="https://plausible.io/js/plausible.js"></script>
+
+<link rel="apple-touch-icon" sizes="180x180" href="../_static/meta/apple-touch-icon.png">
+<link rel="icon" type="image/png" sizes="32x32" href="../_static/meta/favicon-32x32.png">
+<link rel="icon" type="image/png" sizes="16x16" href="../_static/meta/favicon-16x16.png">
+<link rel="manifest" href="../_static/meta/site.webmanifest">
+<link rel="mask-icon" href="../_static/meta/safari-pinned-tab.svg" color="#5bbad5">
+<meta name="msapplication-TileColor" content="#da532c">
+<meta name="theme-color" content="#ffffff">
+
+<meta name="description" content="immuneML is an open-source software platform for machine learning analysis of adaptive immune receptor repertoires,
+available as a Python library, through Galaxy and as a Docker image. On this website, you can browse the platform's documentation and tutorials.">
+
+<!--<meta name="twitter:card" content="summary" />-->
+<!--<meta name="twitter:site" content="@immuneml" />-->
+<!--<meta name="twitter:title" content="immuneML documentations and tutorials" />-->
+<!--<meta name="twitter:description" content="immuneML is an open-source software platform for machine learning analysis of adaptive immune receptor repertoires,-->
+<!--available as a Python library, through Galaxy and as a Docker image. On this website, you can browse the platform's documentation and tutorials." />-->
+<!--<meta name="twitter:image" content="https://docs.immuneml.uio.no/_images/receptor_classification_overview.png" />-->
+
+<!--<meta name="og:image" content="https://docs.immuneml.uio.no/_images/receptor_classification_overview.png">-->
+<!--<meta name="og:type" content="website">-->
+<!--<meta name="og:title" content="immuneML documentation and tutorials">-->
+<!--<meta name="og:description" content="immuneML is an open-source software platform for machine learning analysis of adaptive immune receptor repertoires,-->
+<!--available as a Python library, through Galaxy and as a Docker image. On this website, you can browse the platform's documentation and tutorials.">-->
+
+{% endblock %}
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -60,7 +60,7 @@
 html_baseurl = 'https://docs.immuneml.uio.no'
 
 # mathjax configuration
-mathjax_config = {
+mathjax3_config = {
     'extensions': ['tex2jax.js'],
     'jax': ['input/TeX', 'output/HTML-CSS'],
 }

diff --git a/docs/source/developer_docs/how_to_add_new_encoding.rst b/docs/source/developer_docs/how_to_add_new_encoding.rst
@@ -24,7 +24,7 @@ To add a new encoder:
 4. Implement the abstract methods :code:`encode()` and :code:`build_object()`.
 5. Implement methods to import and export an encoder: :code:`get_additional_files()`, :code:`export_encoder()` and :code:`load_encoder()`, mostly relying on functionality already available in :py:obj:`~immuneML.encodings.DatasetEncoder.DatasetEncoder`.
 6. Add class documentation including: what the encoder does, what the arguments are and an example on how to use it from YAML specification.
-7. Add the new encoder class to the list of compatible encoders returned by the :code:`get_compatible_encoders()` method of the :ref:`MLMethod` of interest.
+7. Add the new encoder class to the list of compatible encoders returned by the :code:`get_compatible_encoders()` method of the :py:obj:`~immuneML.ml_methods.MLMethod.MLMethod` of interest.
 
 An example of the implementation of :code:`NewKmerFrequencyEncoder` for the :py:obj:`~immuneML.data_model.dataset.RepertoireDataset.RepertoireDataset` is shown.
 

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -58,6 +58,14 @@ documentation under :ref:`YAML specification`.
 
 Our open-source code can be found on `GitHub <https://github.com/uio-bmi/ImmuneML>`_ :)
 
+Previous versions
+-------------------
+
+Documentation for previous immuneML versions can be found here:
+
+- `v2.0.4 <https://docs.immuneml.uio.no/v2.0.4/>`_
+- `v1.2.5 <https://docs.immuneml.uio.no/v1.2.5/>`_
+
 
 .. toctree::
    :maxdepth: 1

diff --git a/docs/source/installation/install_with_package_manager.rst b/docs/source/installation/install_with_package_manager.rst
@@ -61,7 +61,7 @@ To install the DeepRC dependencies, run:
 
   pip install -r requirements_DeepRC.txt --no-dependencies
 
-6. Optionally, if you want to use the :ref:`CompAIRRDistanceEncoder` or :ref:`CompAIRRSequenceAbundanceEncoder`, you have to install the C++ tool `CompAIRR <https://github.com/uio-bmi/compairr>`_.
+6. Optionally, if you want to use the :ref:`CompAIRRDistance` or :ref:`CompAIRRSequenceAbundance` encoder, you have to install the C++ tool `CompAIRR <https://github.com/uio-bmi/compairr>`_.
 The easiest way to do this is by cloning CompAIRR from GitHub and installing it using :code:`make` in the main folder:
 
 .. code-block:: console
@@ -72,7 +72,7 @@ The easiest way to do this is by cloning CompAIRR from GitHub and installing it
 
 If such installation is unsuccessful (for example if you do not have the rights to install CompAIRR via make),
 it is also possible to directly provide the path to a CompAIRR executable as a parameter
-to :ref:`CompAIRRDistanceEncoder` or :ref:`CompAIRRSequenceAbundanceEncoder`.
+to :ref:`CompAIRRDistance` or :ref:`CompAIRRSequenceAbundance` encoder.
 
 
 

diff --git a/immuneML/dsl/symbol_table/SymbolTable.py b/immuneML/dsl/symbol_table/SymbolTable.py
@@ -10,11 +10,24 @@ class SymbolTable:
     """
     Symbol table contains all objects parsed from the specification in the following format:
 
-    --------------------------------------------------------------------------------------------------
-    symbol | symbol_type   | item                                                      | config      |
-    --------------------------------------------------------------------------------------------------
-    e1     | encoding      | EncodingObject                                            | {...}       | -> SymbolTableEntry object
-    seq1   | preprocessing | [ClonesPerRepertoireFilter(), MetadataRepertoireFilter()] | {...}       | -> SymbolTableEntry object
+    .. list-table::
+        :header-rows: 1
+
+        * - symbol
+          - symbol_type
+          - item
+          - config
+          -
+        * - e1
+          - encoding
+          - EncodingObject
+          - {..}
+          - -> SymbolTableEntry object
+        * - seq1
+          - preprocessing
+          - [ClonesPerRepertoireFilter(), MetadataRepertoireFilter()]
+          - {..}
+          - -> SymbolTableEntry object
 
     """
 

diff --git a/immuneML/reports/ml_reports/DeepRCMotifDiscovery.py b/immuneML/reports/ml_reports/DeepRCMotifDiscovery.py
@@ -19,8 +19,9 @@ class DeepRCMotifDiscovery(MLReport):
     This report plots the contributions of (i) input sequences and (ii) kernels to trained :ref:`DeepRC` model with respect to
     the test dataset. Contributions are computed using integrated gradients (IG).
     This report produces two figures:
-        - inputs_integrated_gradients: Shows the contributions of the characters within the input sequences (test dataset) that was most important for immune status prediction of the repertoire. IG is only applied to sequences of positive class repertoires.
-        - kernel_integrated_gradients: Shows the 1D CNN kernels with the highest contribution over all positions and amino acids.
+
+    - inputs_integrated_gradients: Shows the contributions of the characters within the input sequences (test dataset) that was most important for immune status prediction of the repertoire. IG is only applied to sequences of positive class repertoires.
+    - kernel_integrated_gradients: Shows the 1D CNN kernels with the highest contribution over all positions and amino acids.
 
     For both inputs and kernels: Larger characters in the extracted motifs indicate higher contribution, with blue
     indicating positive contribution and red indicating negative contribution towards the prediction of the immune status.

diff --git a/immuneML/reports/ml_reports/MotifSeedRecovery.py b/immuneML/reports/ml_reports/MotifSeedRecovery.py
@@ -27,6 +27,7 @@ class MotifSeedRecovery(MLReport):
     size is expected to be largest for the kmer features with high overlap to the motif seeds.
 
     Note that to use this report, the following criteria must be met:
+
     - KmerFrequencyEncoder must be used.
     - One of the following classifiers must be used: RandomForestClassifier, LogisticRegression, SVM
     - For each label, the implanted motif seeds relevant to that label must be specified
@@ -67,14 +68,14 @@ class MotifSeedRecovery(MLReport):
 
     Arguments:
 
-        implanted_motifs_per_label (dict): a nested dictionary that specifies the motif seeds that were implanted in
-            the given dataset. The first level of keys in this dictionary represents the different labels. In the
-            inner dictionary there should be two keys: "seeds" and "hamming_distance"
-                seeds: a list of motif seeds. The seeds may contain gaps, specified by a '/' symbol.
-                hamming_distance: A boolean value that specifies whether hamming distance was allowed when implanting the
-                    motif seeds for a given label. Note that this applies to all seeds for this label.
-                gap_sizes: a list of all the possible gap sizes that were used when implanting a gapped motif seed.
-                    When no gapped seeds are used, this value has no effect.
+        implanted_motifs_per_label (dict): a nested dictionary that specifies the motif seeds that were implanted in the given dataset. The first
+        level of keys in this dictionary represents the different labels. In the inner dictionary there should be two keys: "seeds" and
+        "hamming_distance":
+                - seeds: a list of motif seeds. The seeds may contain gaps, specified by a '/' symbol.
+                - hamming_distance: A boolean value that specifies whether hamming distance was allowed when implanting the motif seeds for a given
+                label. Note that this applies to all seeds for this label.
+                - gap_sizes: a list of all the possible gap sizes that were used when implanting a gapped motif seed.
+                When no gapped seeds are used, this value has no effect.
 
 
     YAML specification:
@@ -94,7 +95,7 @@ class MotifSeedRecovery(MLReport):
                         - 0
                         - 1
                         - 2
-                    T1D
+                    T1D:
                         seeds:
                         - CC/C
                         - CCC

diff --git a/immuneML/util/ImportHelper.py b/immuneML/util/ImportHelper.py
@@ -108,12 +108,13 @@ def import_repertoire_dataset(import_class, params: DatasetImportParams, dataset
     def update_gene_info(df: pd.DataFrame):
         """
         Updates gene info in 2 steps:
+
         - First, columns are added if they were not present. This is done by going from the highest level of information (alleles)
-          towards the lowest level of information (subgroups) by stripping away suffixes. If gene and subgroup columns were already
-          present, suffixes are still stripped away just in case.
+        towards the lowest level of information (subgroups) by stripping away suffixes. If gene and subgroup columns were already
+        present, suffixes are still stripped away just in case.
         - Next, if there are None values present, the highest possible level of information is copied in from the lower level information fields.
-          This is done by moving from subgroups towards alleles. So if for one particular receptor only the subgroup was present, the subgroup
-          will be copied into the genes and alleles column.
+        This is done by moving from subgroups towards alleles. So if for one particular receptor only the subgroup was present, the subgroup
+        will be copied into the genes and alleles column.
         """
         for gene in ['v', 'j']:
             # step 1: create all columns