Merge pull request #14 from OnnoKampman/refactor

Further updates
OnnoKampman · May 25, 2024 · bde162b · bde162b
2 parents a0711d0 + c2ed305
commit bde162b
Show file tree

Hide file tree

Showing 2,282 changed files with 20,357 additions and 9,748 deletions.
diff --git a/.github/workflows/pylint.yml → .github/workflows/ruff_linting.yml b/.github/workflows/pylint.yml → .github/workflows/ruff_linting.yml
@@ -1,4 +1,4 @@
-name: Pylint
+name: Ruff linting
 
 on: [push]
 
@@ -19,13 +19,13 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install git+https://github.com/OnnoKampman/[email protected]
-          pip install matplotlib nilearn pandas==1.5.3 ptitprince pylint seaborn
-      - name: Analyze code with pylint
+          pip install matplotlib nilearn pandas==1.5.3 ptitprince ruff seaborn
+      - name: Analyze code with Ruff
         run: |
           echo $PYTHONPATH
           pwd
           ls -al
           export PYTHONPATH=$PWD
           echo $PYTHONPATH
-          pylint $(git ls-files '*.py') --disable=line-too-long,missing-class-docstring,missing-function-docstring,missing-module-docstring --fail-under=7
+          ruff check . --exit-zero
         continue-on-error: true
diff --git a/.gitignore b/.gitignore
@@ -6,6 +6,13 @@ __pycache__
 # Data.
 opk20_hivemind_paper_1/
 
+**.tar.gz
+
+datasets/fmri/rs/HCP/hcp-openaccess/
+# HCP_PTN1200_recon2/
+
+# Results.
+
 results/fmri/rs/HCP/HCP_PTN1200_recon2/3T_HCP1200_MSMAll_d15_ts2/brain_states/k01/
 results/fmri/rs/HCP/HCP_PTN1200_recon2/3T_HCP1200_MSMAll_d15_ts2/brain_states/k02/
 results/fmri/rs/HCP/HCP_PTN1200_recon2/3T_HCP1200_MSMAll_d15_ts2/brain_states/k04/
@@ -32,22 +39,3 @@ results/fmri/rs/HCP/HCP_PTN1200_recon2/3T_HCP1200_MSMAll_d50_ts2/brain_states/k1
 results/fmri/rs/HCP/HCP_PTN1200_recon2/3T_HCP1200_MSMAll_d50_ts2/brain_states/k15/
 results/fmri/rs/HCP/HCP_PTN1200_recon2/3T_HCP1200_MSMAll_d50_ts2/brain_states/k18/
 results/fmri/rs/HCP/HCP_PTN1200_recon2/3T_HCP1200_MSMAll_d50_ts2/brain_states/k21/
-
-# Ignore simulation benchmarks where N is not 400.
-results/fmri/sim/d2/N0120_T0200/
-results/fmri/sim/d2/N0200_T0200/
-results/fmri/sim/d2/N1200_T0200/
-
-results/fmri/sim/d3d/N0120_T0200/
-results/fmri/sim/d3d/N0200_T0200/
-
-results/fmri/sim/d3s/N0120_T0200/
-results/fmri/sim/d3s/N0200_T0200/
-
-results/fmri/sim/d4s/N0200_T0010/
-
-results/fmri/sim/d6s/N0200_T0010/
-
-results/fmri/sim/d9s/N0200_T0010/
-
-results/fmri/sim/d15s/N0200_T0010/
diff --git a/benchmarks/fmri/rs/ABCD/README.md b/benchmarks/fmri/rs/ABCD/README.md
@@ -0,0 +1,3 @@
+# ABCD dataset
+
+https://nda.nih.gov/abcd
diff --git a/benchmarks/fmri/rs/HCP/TVFC_summary_measures/save_TVFC_estimates_summary_measures.py b/benchmarks/fmri/rs/HCP/TVFC_summary_measures/save_TVFC_estimates_summary_measures.py
@@ -98,7 +98,7 @@
                 reconstruct_symmetric_summary_measure_matrix_from_tril(
                     mean_over_subjects_edgewise_summarized_tvfc_df.values,
                     tvfc_summary_measure=tvfc_summary_measure,
-                    n_time_series=n_time_series
+                    num_time_series=n_time_series
                 )
             )  # (D, D)
 

diff --git a/benchmarks/fmri/rs/HCP/brain_states/compute_brain_state_switch_counts_ICC.py b/benchmarks/fmri/rs/HCP/brain_states/compute_brain_state_switch_counts_ICC.py
@@ -11,7 +11,7 @@
 
 def _compute_brain_state_switch_count_icc(
     config_dict: dict,
-    n_basis_states: int,
+    num_basis_states: int,
     connectivity_metric: str = 'correlation',
 ) -> None:
     """
@@ -20,22 +20,22 @@ def _compute_brain_state_switch_count_icc(
     Parameters
     ----------
     :param config_dict:
-    :param n_basis_states:
+    :param num_basis_states:
     :param connectivity_metric:
         'correlation', 'covariance'
     :return:
     """
     all_brain_state_switch_counts_iccs_df = pd.DataFrame()
     for model_name in config_dict['models-brain-state-analysis']:
         # Load number of switches in brain state file.
-        n_brain_state_switches_savedir = os.path.join(
-            config_dict['git-results-basedir'], 'brain_states', f'k{n_basis_states:02d}'
+        num_brain_state_switches_savedir = os.path.join(
+            config_dict['git-results-basedir'], 'brain_states', f'k{num_basis_states:02d}'
         )
-        n_brain_state_switches_filename = f'number_of_brain_state_switches_{model_name:s}.csv'
+        num_brain_state_switches_filename = f'number_of_brain_state_switches_{model_name:s}.csv'
         brain_state_switch_counts_df = pd.read_csv(
-            os.path.join(n_brain_state_switches_savedir, n_brain_state_switches_filename),
+            os.path.join(num_brain_state_switches_savedir, num_brain_state_switches_filename),
             index_col=0
-        )  # (n_subjects, n_scans)
+        )  # (num_subjects, num_scans)
 
         brain_state_switch_counts_icc = compute_icc_scores_pingouin(
             brain_state_switch_counts_df.values,
@@ -51,10 +51,10 @@ def _compute_brain_state_switch_count_icc(
         all_brain_state_switch_counts_iccs_df.loc[model_name, 'ICC'] = brain_state_switch_counts_icc
 
     all_brain_state_switch_counts_iccs_df.to_csv(
-        os.path.join(n_brain_state_switches_savedir, 'number_of_brain_state_switches_ICC.csv'),
+        os.path.join(num_brain_state_switches_savedir, 'number_of_brain_state_switches_ICC.csv'),
         float_format="%.2f"
     )
-    logging.info(f"Saved ICC scores in '{n_brain_state_switches_savedir:s}'.")
+    logging.info(f"Saved ICC scores in '{num_brain_state_switches_savedir:s}'.")
 
 
 if __name__ == "__main__":
@@ -66,10 +66,10 @@ def _compute_brain_state_switch_count_icc(
         subset_dimensionality=data_dimensionality,
         hostname=socket.gethostname()
     )
-    n_brain_states_list = cfg['n-brain-states-list']
+    num_brain_states_list = cfg['n-brain-states-list']
 
-    for n_brain_states in n_brain_states_list:
+    for n_brain_states in num_brain_states_list:
         _compute_brain_state_switch_count_icc(
             config_dict=cfg,
-            n_basis_states=n_brain_states
+            num_basis_states=n_brain_states
         )
diff --git a/benchmarks/fmri/rs/HCP/brain_states/extract_brain_states.py b/benchmarks/fmri/rs/HCP/brain_states/extract_brain_states.py
@@ -59,25 +59,25 @@
             )  # (N, D*(D-1)/2)
             all_subjects_tril_tvfc_per_time_step.append(subject_tril_tvfc_per_time_step)
 
-        n_time_steps = single_subject_tvfc_estimates.shape[0]
-        n_time_series = single_subject_tvfc_estimates.shape[1]
+        num_time_steps = single_subject_tvfc_estimates.shape[0]
+        num_time_series = single_subject_tvfc_estimates.shape[1]
 
         # Aggregates all observed 'states' over time and over subjects.
         all_subjects_tril_tvfc_per_time_step = np.array(all_subjects_tril_tvfc_per_time_step)  # (n_subjects, N, D*(D-1)/2)
         all_subjects_tril_tvfc_per_time_step = all_subjects_tril_tvfc_per_time_step.reshape(-1, all_subjects_tril_tvfc_per_time_step.shape[-1])  # (n_subjects*N, D*(D-1)/2)
-        assert all_subjects_tril_tvfc_per_time_step.shape == (num_subjects * n_time_steps, int(n_time_series * (n_time_series-1) / 2))
+        assert all_subjects_tril_tvfc_per_time_step.shape == (num_subjects * num_time_steps, int(num_time_series * (num_time_series-1) / 2))
 
-        for n_brain_states in num_brain_states_list:
-            n_brain_states_inertia, _, _ = compute_basis_state(
+        for num_brain_states in num_brain_states_list:
+            num_brain_states_inertia, _, _ = compute_basis_state(
                 config_dict=cfg,
                 all_subjects_tril_tvfc=all_subjects_tril_tvfc_per_time_step,
                 scan_session_id=scan_id,
                 model_name=model_name,
-                n_basis_states=n_brain_states,
-                n_time_series=n_time_series,
-                n_time_steps=n_time_steps,
+                n_basis_states=num_brain_states,
+                n_time_series=num_time_series,
+                n_time_steps=num_time_steps,
             )
-            distortions_df.loc[n_brain_states, scan_id] = n_brain_states_inertia / num_subjects
+            distortions_df.loc[num_brain_states, scan_id] = num_brain_states_inertia / num_subjects
 
     distortions_df.astype(float).round(2).to_csv(
         os.path.join(

diff --git a/benchmarks/fmri/rs/HCP/brain_states/plot_brain_state_switch_counts.py b/benchmarks/fmri/rs/HCP/brain_states/plot_brain_state_switch_counts.py
@@ -14,7 +14,7 @@
 
 def plot_brain_state_switch_count(
     config_dict: dict,
-    n_basis_states: int,
+    num_basis_states: int,
     connectivity_metric: str = 'correlation',
     figure_savedir: str = None,
 ) -> None:
@@ -24,25 +24,25 @@ def plot_brain_state_switch_count(
     Parameters
     ----------
     :param config_dict:
-    :param n_basis_states:
+    :param num_basis_states:
     :param connectivity_metric:
         'correlation', 'covariance'
     :param figure_savedir:
     """
     sns.set(style="whitegrid")
     plt.style.use(os.path.join(config_dict['git-basedir'], 'configs', 'fig.mplstyle'))
 
-    n_brain_state_switches_savedir = os.path.join(
-        config_dict['git-results-basedir'], 'brain_states', f'k{n_basis_states:02d}'
+    num_brain_state_switches_savedir = os.path.join(
+        config_dict['git-results-basedir'], 'brain_states', f'k{num_basis_states:02d}'
     )
 
     all_brain_state_switch_counts_df = pd.DataFrame()
     for model_name in config_dict['models-brain-state-analysis']:
-        n_brain_state_switches_filename = f'number_of_brain_state_switches_{model_name:s}.csv'
+        num_brain_state_switches_filename = f'number_of_brain_state_switches_{model_name:s}.csv'
         brain_state_switch_counts_df = pd.read_csv(
-            os.path.join(n_brain_state_switches_savedir, n_brain_state_switches_filename),
+            os.path.join(num_brain_state_switches_savedir, num_brain_state_switches_filename),
             index_col=0
-        )  # (n_subjects, n_scans)
+        )  # (num_subjects, num_scans)
 
         # Shorten model names for plot.
         model_name = model_name.replace('SVWP_joint', 'WP')
@@ -101,12 +101,12 @@ def plot_brain_state_switch_count(
         subset_dimensionality=data_dimensionality,
         hostname=socket.gethostname()
     )
-    n_brain_states_list = cfg['n-brain-states-list']
+    num_brain_states_list = cfg['n-brain-states-list']
 
-    for n_brain_states in n_brain_states_list:
+    for n_brain_states in num_brain_states_list:
         plot_brain_state_switch_count(
             config_dict=cfg,
-            n_basis_states=n_brain_states,
+            num_basis_states=n_brain_states,
             figure_savedir=os.path.join(
                 cfg['figures-basedir'], 'brain_states', f'k{n_brain_states:02d}'
             )

diff --git a/benchmarks/fmri/rs/HCP/imputation_benchmark/compute_LEOO_likelihoods.py b/benchmarks/fmri/rs/HCP/imputation_benchmark/compute_LEOO_likelihoods.py
@@ -48,14 +48,7 @@
                 scan_id=scan_id,
                 verbose=False,
             )
-            n_time_series = y.shape[1]
-            if experiment_dimensionality == 'bivariate':
-                chosen_indices = [0, 1]
-                # chosen_indices_df = cfg['chosen-indices']
-                # chosen_indices = chosen_indices_df.loc[subject, scan_id]
-                y = y[:, chosen_indices]
-                n_time_series = y.shape[1]
-                print('y', y.shape)
+            num_time_series = y.shape[1]
 
             x_train, x_test = leave_every_other_out_split(x)  # (N/2, 1), (N/2, 1)
             y_train, y_test = leave_every_other_out_split(y)  # (N/2, D), (N/2, D)
@@ -64,7 +57,7 @@
             test_locations_predicted_covariance_structure = get_test_location_estimated_covariance_structure(
                 config_dict=cfg,
                 model_name=model_name,
-                n_time_series=n_time_series,
+                num_time_series=num_time_series,
                 x_train_locations=x_train,
                 x_test_locations=x_test,
                 scan_id=scan_id,

diff --git a/benchmarks/fmri/rs/HCP/imputation_benchmark/compute_edgewise_LEOO_likelihoods.py b/benchmarks/fmri/rs/HCP/imputation_benchmark/compute_edgewise_LEOO_likelihoods.py
@@ -58,7 +58,7 @@
             test_locations_predicted_covariance_structure = get_test_location_estimated_covariance_structure(
                 config_dict=cfg,
                 model_name=model_name,
-                n_time_series=num_time_series,
+                num_time_series=num_time_series,
                 x_train_locations=x_train,
                 x_test_locations=x_test,
                 scan_id=scan_id,

diff --git a/benchmarks/fmri/rs/HCP/imputation_benchmark/plot_edgewise_LEOO_likelihoods.py b/benchmarks/fmri/rs/HCP/imputation_benchmark/plot_edgewise_LEOO_likelihoods.py
@@ -54,18 +54,18 @@ def plot_edgewise_imputation_benchmark_scores_joint(
             index_col=0
         )  # (D, D)
 
-        n_time_series = edgewise_likelihoods.shape[0]
+        num_time_series = edgewise_likelihoods.shape[0]
         if data_dimensionality == 'd15':
             edgewise_likelihoods, new_rsn_names = reorder_ica_components(
                 config_dict=config_dict,
                 original_matrix=edgewise_likelihoods.values,
-                n_time_series=n_time_series,
+                n_time_series=num_time_series,
                 # lower_triangular=True
             )
         else:
             # TODO: add RSN names map for d50
             edgewise_likelihoods = edgewise_likelihoods.values
-            new_rsn_names = np.arange(n_time_series)
+            new_rsn_names = np.arange(num_time_series)
 
         # Define mask for upper triangular values.
         mask = np.zeros_like(edgewise_likelihoods)
@@ -126,18 +126,18 @@ def plot_edgewise_imputation_benchmark_scores(
 
     vmin, vmax = -4.0, -2.0
 
-    n_time_series = edgewise_likelihoods.shape[0]
+    num_time_series = edgewise_likelihoods.shape[0]
     if data_dimensionality == 'd15':
         edgewise_likelihoods, new_rsn_names = reorder_ica_components(
             config_dict=config_dict,
             original_matrix=edgewise_likelihoods.values,
-            n_time_series=n_time_series,
+            n_time_series=num_time_series,
             # lower_triangular=True
         )
     else:
         # TODO: add RSN names map for d50
         edgewise_likelihoods = edgewise_likelihoods.values
-        new_rsn_names = np.arange(n_time_series)
+        new_rsn_names = np.arange(num_time_series)
 
     # Define mask for upper triangular values.
     mask = np.zeros_like(edgewise_likelihoods)

diff --git a/benchmarks/fmri/rs/HCP/kernel_analysis/compute_kernel_params_ICC.py b/benchmarks/fmri/rs/HCP/kernel_analysis/compute_kernel_params_ICC.py
@@ -18,7 +18,6 @@
         data_set_name='HCP_PTN1200_recon2',
         subset_dimensionality=data_dimensionality
     )
-    n_time_series = int(data_dimensionality[1:])
 
     # The kernel parameters were saved with the experiments in the git repo.
     kernel_params_savedir = os.path.join(cfg['git-results-basedir'], 'kernel_analysis')