ME-ICA · handwerkerd · Sep 23, 2024 · Jul 31, 2023 · Dec 5, 2023 · Nov 1, 2023
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -13,7 +13,7 @@ jobs:
     steps:
       - checkout
       - restore_cache:
-          key: conda-py38-v2-{{ checksum "pyproject.toml" }}
+          key: conda-py38-v3-{{ checksum "pyproject.toml" }}
       - run:
           name: Generate environment
           command: |
@@ -23,7 +23,7 @@ jobs:
               pip install -e .[tests]
             fi
       - save_cache:
-          key: conda-py38-v2-{{ checksum "pyproject.toml" }}
+          key: conda-py38-v3-{{ checksum "pyproject.toml" }}
           paths:
             - /opt/conda/envs/tedana_py38
 
@@ -34,7 +34,7 @@ jobs:
     steps:
       - checkout
       - restore_cache:
-          key: conda-py38-v2-{{ checksum "pyproject.toml" }}
+          key: conda-py38-v3-{{ checksum "pyproject.toml" }}
       - run:
           name: Running unit tests
           command: |
@@ -56,7 +56,7 @@ jobs:
     steps:
       - checkout
       - restore_cache:
-          key: conda-py39-v2-{{ checksum "pyproject.toml" }}
+          key: conda-py39-v3-{{ checksum "pyproject.toml" }}
       - run:
           name: Generate environment
           command: |
@@ -75,7 +75,7 @@ jobs:
             mkdir /tmp/src/coverage
             mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.py39
       - save_cache:
-          key: conda-py39-v2-{{ checksum "pyproject.toml" }}
+          key: conda-py39-v3-{{ checksum "pyproject.toml" }}
           paths:
             - /opt/conda/envs/tedana_py39
       - persist_to_workspace:
@@ -90,7 +90,7 @@ jobs:
     steps:
       - checkout
       - restore_cache:
-          key: conda-py310-v1-{{ checksum "pyproject.toml" }}
+          key: conda-py310-v3-{{ checksum "pyproject.toml" }}
       - run:
           name: Generate environment
           command: |
@@ -109,7 +109,7 @@ jobs:
             mkdir /tmp/src/coverage
             mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.py310
       - save_cache:
-          key: conda-py310-v1-{{ checksum "pyproject.toml" }}
+          key: conda-py310-v3-{{ checksum "pyproject.toml" }}
           paths:
             - /opt/conda/envs/tedana_py310
       - persist_to_workspace:
@@ -124,7 +124,7 @@ jobs:
     steps:
       - checkout
       - restore_cache:
-          key: conda-py311-v1-{{ checksum "pyproject.toml" }}
+          key: conda-py311-v3-{{ checksum "pyproject.toml" }}
       - run:
           name: Generate environment
           command: |
@@ -143,7 +143,7 @@ jobs:
             mkdir /tmp/src/coverage
             mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.py311
       - save_cache:
-          key: conda-py311-v1-{{ checksum "pyproject.toml" }}
+          key: conda-py311-v3-{{ checksum "pyproject.toml" }}
           paths:
             - /opt/conda/envs/tedana_py311
       - persist_to_workspace:
@@ -158,7 +158,7 @@ jobs:
     steps:
       - checkout
       - restore_cache:
-          key: conda-py312-v1-{{ checksum "pyproject.toml" }}
+          key: conda-py312-v3-{{ checksum "pyproject.toml" }}
       - run:
           name: Generate environment
           command: |
@@ -177,7 +177,7 @@ jobs:
             mkdir /tmp/src/coverage
             mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.py312
       - save_cache:
-          key: conda-py312-v1-{{ checksum "pyproject.toml" }}
+          key: conda-py312-v3-{{ checksum "pyproject.toml" }}
           paths:
             - /opt/conda/envs/tedana_py312
       - persist_to_workspace:
@@ -192,7 +192,7 @@ jobs:
     steps:
       - checkout
       - restore_cache:
-          key: conda-py38-v2-{{ checksum "pyproject.toml" }}
+          key: conda-py38-v3-{{ checksum "pyproject.toml" }}
       - run:
           name: Style check
           command: |
@@ -208,7 +208,7 @@ jobs:
     steps:
       - checkout
       - restore_cache:
-          key: conda-py38-v2-{{ checksum "pyproject.toml" }}
+          key: conda-py38-v3-{{ checksum "pyproject.toml" }}
       - run:
           name: Run integration tests
           no_output_timeout: 40m
@@ -233,7 +233,7 @@ jobs:
     steps:
       - checkout
       - restore_cache:
-          key: conda-py38-v2-{{ checksum "pyproject.toml" }}
+          key: conda-py38-v3-{{ checksum "pyproject.toml" }}
       - run:
           name: Run integration tests
           no_output_timeout: 40m
@@ -258,7 +258,7 @@ jobs:
     steps:
       - checkout
       - restore_cache:
-          key: conda-py38-v2-{{ checksum "pyproject.toml" }}
+          key: conda-py38-v3-{{ checksum "pyproject.toml" }}
       - run:
           name: Run integration tests
           no_output_timeout: 40m
@@ -283,7 +283,7 @@ jobs:
     steps:
       - checkout
       - restore_cache:
-          key: conda-py38-v2-{{ checksum "pyproject.toml" }}
+          key: conda-py38-v3-{{ checksum "pyproject.toml" }}
       - run:
           name: Run integration tests
           no_output_timeout: 40m
@@ -308,7 +308,7 @@ jobs:
     steps:
       - checkout
       - restore_cache:
-          key: conda-py38-v2-{{ checksum "pyproject.toml" }}
+          key: conda-py38-v3-{{ checksum "pyproject.toml" }}
       - run:
           name: Run integration tests
           no_output_timeout: 40m
@@ -335,7 +335,7 @@ jobs:
           at: /tmp
       - checkout
       - restore_cache:
-          key: conda-py38-v2-{{ checksum "pyproject.toml" }}
+          key: conda-py38-v3-{{ checksum "pyproject.toml" }}
       - run:
           name: Merge coverage files
           command: |

diff --git a/docs/approach.rst b/docs/approach.rst
@@ -334,6 +334,8 @@ Next, ``tedana`` applies TE-dependent independent component analysis (ICA) in
 order to identify and remove TE-independent (i.e., non-BOLD noise) components.
 The dimensionally reduced optimally combined data are first subjected to ICA in
 order to fit a mixing matrix to the whitened data.
+``tedana`` can use a single interation of FastICA or multiple interations of robustICA,
+with an explanation of those approaches `in our FAQ`_.
 This generates a number of independent timeseries (saved as **desc-ICA_mixing.tsv**),
 as well as parameter estimate maps which show the spatial loading of these components on the
 brain (**desc-ICA_components.nii.gz**).
@@ -380,6 +382,8 @@ yielding a denoised timeseries, which is saved as **desc-denoised_bold.nii.gz**.
 
 .. image:: /_static/a15_denoised_data_timeseries.png
 
+.. _in our FAQ: faq.html#tedana-what-is-the-right-number-of-ica-components-what-options-let-me-get-it
+.. _These decision trees are detailed here: included_decision_trees.html
 
 *******************************
 Manual classification with RICA

diff --git a/docs/faq.rst b/docs/faq.rst
@@ -93,11 +93,78 @@ The TEDICA step may fail to converge if TEDPCA is either too strict
 With updates to the ``tedana`` code, this issue is now rare, but it may happen
 when preprocessing has not been applied to the data, or when improper steps have
 been applied to the data (e.g. rescaling, nuisance regression).
+It can also still happen when everything is seemingly correct
+(see the answer to the next question).
 If you are confident that your data have been preprocessed correctly prior to
 applying tedana, and you encounter this problem, please submit a question to `NeuroStars`_.
 
 .. _NeuroStars: https://neurostars.org
 
+*********************************************************************************
+[tedana] What is the right number of ICA components & what options let me get it?
+*********************************************************************************
+
+Part of the PCA step in ``tedana`` processing involves identifying the number of
+components that contain meaningful signal.
+The PCA components are then used to calculate the same number of ICA components.
+The ``--tedpca`` option includes several options to identify the "correct" number
+of PCA components.
+``kundu`` and ``kundu-stabilize`` use several echo-based criteria to exclude PCA
+components that are unlikely to contain T2* or S0 signal.
+``mdl`` (conservative & fewest components), ``kic``,
+& ``aic`` (liberal & more components) use `MAPCA`_.
+Within the same general method, each uses a cost function to find a minimum
+where more components no longer model meaningful variance.
+For some datasets we see all methods fail and result in too few or too many components.
+There is no consistent number of components or % variance explained to define the correct number.
+The correct number of components will depend on the noise levels of the data.
+For example, smaller voxels will results in more thermal noise and less total variance explained.
+A dataset with more head motion artifacts will have more variance explained,
+since more structured signal is within the head motion artifacts.
+The clear failure cases are extreme. That is getting less than 1/5 the number of components
+compared to time points or having nearly as many components as time points.
+We are working on identifying why this happens and adding better solutions.
+Our current guess is that most of the above methods assume data are
+independant and identically distributed (IID),
+and signal leakage from in-slice and multi-slice accelleration may violate this assumption.
+
+We have one option that is generally useful and is also a partial solution.
+``--ica_method robustica`` will run `robustica`_.
+This is a method that, for a given number of PCA components,
+will repeatedly run ICA and identify components that are stable across iterations.
+While running ICA multiple times will slow processing, as a general benefit,
+this means that the ICA results are less sensitive to the initialization parameters,
+computer hardware, and software versions.
+This will result in better stability and replicability of ICA results.
+Additionally, `robustica`_ almost always results in fewer components than initially prescripted,
+since there are fewer stable components across interations than the total number of components.
+This means, even if the initial PCA component estimate is a bit off,
+the number of resulting robust ICA components will represent stable information in the data.
+For a dataset where the PCA comoponent estimation methods are failing,
+one could use ``--tedpca`` with a fixed integer for a constant number of components,
+that is on the high end of the typical number of components for a study,
+and then `robustica`_ will reduce the number of components to only find stable information.
+That said, if the fixed PCA component number is too high,
+then the method will have too many unstable components,
+and if the fixed PCA component number is too low, then there will be even fewer ICA components.
+With this approach, the number of ICA components is more consistent,
+but is still sensitive to the intial number of PCA components.
+For example, for a single dataset 60 PCA components might result in 46 stable ICA components,
+while 55 PCA components might results in 43 stable ICA components.
+We are still testing how these interact to give better recommendations for even more stable results.
+While the TEDANA developers expect that ``--ica_method robustica`` may become
+the default configuration in future TEDANA versions,
+it is first being released to the public as a non-default option
+in hope of gaining insight into its behaviour
+across a broader range of multi-echo fMRI data.
+If users are having trouble with PCA component estimation failing on a dataset,
+we recommend using RobustICA;
+and we invite users to send us feedback on its behavior and efficacy.
+
+
+.. _MAPCA: https://github.com/ME-ICA/mapca
+.. _robustica: https://github.com/CRG-CNAG/robustica
+
 .. _manual classification:
 
 ********************************************************************************

diff --git a/pyproject.toml b/pyproject.toml
@@ -30,6 +30,7 @@ dependencies = [
     "pandas>=2.0,<=2.2.2",
     "pybtex",
     "pybtex-apa-style",
+    "robustica>=0.1.4,<=0.1.4",
     "scikit-learn>=0.21, <=1.5.2",
     "scipy>=1.2.0, <=1.14.1",
     "threadpoolctl",

diff --git a/tedana/config.py b/tedana/config.py
@@ -0,0 +1,19 @@
+"""Setting default values for ICA decomposition."""
+
+DEFAULT_ICA_METHOD = "fastica"
+DEFAULT_N_MAX_ITER = 500
+DEFAULT_N_MAX_RESTART = 10
+DEFAULT_SEED = 42
+
+
+"""Setting values for number of robust runs."""
+
+DEFAULT_N_ROBUST_RUNS = 30
+MIN_N_ROBUST_RUNS = 5
+MAX_N_ROBUST_RUNS = 500
+WARN_N_ROBUST_RUNS = 200
+
+
+"""Setting the warning threshold for the index quality."""
+
+WARN_IQ = 0.6