diff --git a/README.md b/README.md
index 3a460e2b..50c16e34 100644
--- a/README.md
+++ b/README.md
@@ -91,8 +91,7 @@ flowchart TB
 
 A subset of the common dataset.
 
-Example file:
-`resources_test/common/cxg_mouse_pancreas_atlas/dataset.h5ad`
+Example file: `resources_test/common/cxg_immune_cell_atlas/dataset.h5ad`
 
 Format:
 
@@ -158,7 +157,7 @@ Arguments:
 Unintegrated AnnData HDF5 file.
 
 Example file:
-`resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad`
+`resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad`
 
 Format:
 
@@ -202,7 +201,7 @@ Data structure:
 Uncensored dataset containing the true labels.
 
 Example file:
-`resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/solution.h5ad`
+`resources_test/task_batch_integration/cxg_immune_cell_atlas/solution.h5ad`
 
 Format:
 
@@ -317,7 +316,7 @@ Arguments:
 An integrated AnnData dataset.
 
 Example file:
-`resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/integrated.h5ad`
+`resources_test/task_batch_integration/cxg_immune_cell_atlas/integrated.h5ad`
 
 Description:
 
@@ -362,7 +361,7 @@ Data structure:
 An integrated AnnData dataset with additional outputs.
 
 Example file:
-`resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/integrated_full.h5ad`
+`resources_test/task_batch_integration/cxg_immune_cell_atlas/integrated_full.h5ad`
 
 Description:
 
diff --git a/_viash.yaml b/_viash.yaml
index 8a0d18ea..1598a220 100644
--- a/_viash.yaml
+++ b/_viash.yaml
@@ -31,21 +31,21 @@ description: |
 
 references:
   doi:
-    # Luecken, M.D., Büttner, M., Chaichoompu, K. et al. 
-    # Benchmarking atlas-level data integration in single-cell genomics. Nat Methods 19, 41–50 (2022). 
+    # Luecken, M.D., Büttner, M., Chaichoompu, K. et al.
+    # Benchmarking atlas-level data integration in single-cell genomics. Nat Methods 19, 41–50 (2022).
     - 10.1038/s41592-021-01336-8
-  
+
 info:
   image: thumbnail.svg
   test_resources:
     - type: s3
-      path: s3://openproblems-data/resources_test/common/cxg_mouse_pancreas_atlas/
-      dest: resources_test/common/cxg_mouse_pancreas_atlas
+      path: s3://openproblems-data/resources_test/common/cxg_immune_cell_atlas/
+      dest: resources_test/common/cxg_immune_cell_atlas
     - type: s3
       path: s3://openproblems-data/resources_test/task_batch_integration/
       dest: resources_test/task_batch_integration
 
-authors: 
+authors:
   - name: Michaela Mueller
     roles: [ maintainer, author ]
     info:
diff --git a/scripts/create_resources/resources.sh b/scripts/create_resources/resources.sh
index 58ac28a1..66b4eefb 100755
--- a/scripts/create_resources/resources.sh
+++ b/scripts/create_resources/resources.sh
@@ -19,7 +19,7 @@ tw launch https://github.com/openproblems-bio/task_batch_integration.git \
   --pull-latest \
   --main-script target/nextflow/workflows/process_datasets/main.nf \
   --workspace 53907369739130 \
-  --compute-env 6TeIFgV5OY4pJCk8I0bfOh \
+  --compute-env 6UWsS5iw7TI37saKo2wcMi \
   --params-file /tmp/params.yaml \
   --entry-name auto \
   --config common/nextflow_helpers/labels_tw.config \
diff --git a/scripts/create_resources/test_resources.sh b/scripts/create_resources/test_resources.sh
index 92694692..49d2fd93 100755
--- a/scripts/create_resources/test_resources.sh
+++ b/scripts/create_resources/test_resources.sh
@@ -15,36 +15,36 @@ mkdir -p $DATASET_DIR
 
 # process dataset
 viash run src/data_processors/process_dataset/config.vsh.yaml -- \
-  --input "$RAW_DATA/cxg_mouse_pancreas_atlas/dataset.h5ad" \
-  --output_dataset "$DATASET_DIR/cxg_mouse_pancreas_atlas/dataset.h5ad" \
-  --output_solution "$DATASET_DIR/cxg_mouse_pancreas_atlas/solution.h5ad"
+  --input "$RAW_DATA/cxg_immune_cell_atlas/dataset.h5ad" \
+  --output_dataset "$DATASET_DIR/cxg_immune_cell_atlas/dataset.h5ad" \
+  --output_solution "$DATASET_DIR/cxg_immune_cell_atlas/solution.h5ad"
 
 # run one method
 viash run src/methods/combat/config.vsh.yaml -- \
-  --input $DATASET_DIR/cxg_mouse_pancreas_atlas/dataset.h5ad \
-  --output $DATASET_DIR/cxg_mouse_pancreas_atlas/integrated.h5ad
+  --input $DATASET_DIR/cxg_immune_cell_atlas/dataset.h5ad \
+  --output $DATASET_DIR/cxg_immune_cell_atlas/integrated.h5ad
 
 # run transformer
 viash run src/data_processors/transform/config.vsh.yaml -- \
-    --input_integrated $DATASET_DIR/cxg_mouse_pancreas_atlas/integrated.h5ad \
-    --input_dataset $DATASET_DIR/cxg_mouse_pancreas_atlas/dataset.h5ad \
+    --input_integrated $DATASET_DIR/cxg_immune_cell_atlas/integrated.h5ad \
+    --input_dataset $DATASET_DIR/cxg_immune_cell_atlas/dataset.h5ad \
     --expected_method_types feature \
-    --output $DATASET_DIR/cxg_mouse_pancreas_atlas/integrated_full.h5ad
+    --output $DATASET_DIR/cxg_immune_cell_atlas/integrated_full.h5ad
 
 # run one metric
 viash run src/metrics/graph_connectivity/config.vsh.yaml -- \
-    --input_integrated $DATASET_DIR/cxg_mouse_pancreas_atlas/integrated_full.h5ad \
-    --input_solution $DATASET_DIR/cxg_mouse_pancreas_atlas/solution.h5ad \
-    --output $DATASET_DIR/cxg_mouse_pancreas_atlas/score.h5ad
+    --input_integrated $DATASET_DIR/cxg_immune_cell_atlas/integrated_full.h5ad \
+    --input_solution $DATASET_DIR/cxg_immune_cell_atlas/solution.h5ad \
+    --output $DATASET_DIR/cxg_immune_cell_atlas/score.h5ad
 
 # write the state file
-cat > $DATASET_DIR/state.yaml << HERE
-id: cxg_mouse_pancreas_atlas
+cat > $DATASET_DIR/cxg_immune_cell_atlas/state.yaml << HERE
+id: cxg_immune_cell_atlas
 output_dataset: !file dataset.h5ad
 output_solution: !file solution.h5ad
 output_integrated: !file integrated.h5ad
 output_integrated_full: !file integrated_full.h5ad
-output_score: !file score.h5ad
+output_score: !file score_mod1.h5ad
 HERE
 
 # only run this if you have access to the openproblems-data bucket
diff --git a/scripts/run_benchmark/run_full_seqeracloud.sh b/scripts/run_benchmark/run_full_seqeracloud.sh
index 8f4bc92a..1e980239 100755
--- a/scripts/run_benchmark/run_full_seqeracloud.sh
+++ b/scripts/run_benchmark/run_full_seqeracloud.sh
@@ -25,7 +25,7 @@ tw launch https://github.com/openproblems-bio/task_batch_integration.git \
   --pull-latest \
   --main-script target/nextflow/workflows/run_benchmark/main.nf \
   --workspace 53907369739130 \
-  --compute-env 6TeIFgV5OY4pJCk8I0bfOh \
+  --compute-env 6UWsS5iw7TI37saKo2wcMi \
   --params-file /tmp/params.yaml \
   --entry-name auto \
   --config common/nextflow_helpers/labels_tw.config \
diff --git a/scripts/run_benchmark/run_test_seqeracloud.sh b/scripts/run_benchmark/run_test_seqeracloud.sh
index 64056313..3645ad0f 100755
--- a/scripts/run_benchmark/run_test_seqeracloud.sh
+++ b/scripts/run_benchmark/run_test_seqeracloud.sh
@@ -21,7 +21,7 @@ tw launch https://github.com/openproblems-bio/task_batch_integration.git \
   --pull-latest \
   --main-script target/nextflow/workflows/run_benchmark/main.nf \
   --workspace 53907369739130 \
-  --compute-env 6TeIFgV5OY4pJCk8I0bfOh \
+  --compute-env 6UWsS5iw7TI37saKo2wcMi \
   --params-file /tmp/params.yaml \
   --entry-name auto \
   --config common/nextflow_helpers/labels_tw.config \
diff --git a/src/api/base_method.yaml b/src/api/base_method.yaml
new file mode 100644
index 00000000..ed3d5938
--- /dev/null
+++ b/src/api/base_method.yaml
@@ -0,0 +1,20 @@
+namespace: methods
+info:
+  type: method
+  type_info:
+    label: Method
+    summary: A method for the batch integration task.
+    description: |
+      A batch integration method which integrates multiple datasets.
+arguments:
+  - name: --input
+    __merge__: file_dataset.yaml
+    direction: input
+    required: true
+  - name: --output
+    __merge__: file_integrated.yaml
+    direction: output
+    required: true
+test_resources:
+  - type: python_script
+    path: /common/component_tests/check_config.py
diff --git a/src/api/comp_control_method.yaml b/src/api/comp_control_method.yaml
index 0ca176f6..b8e1ebd3 100644
--- a/src/api/comp_control_method.yaml
+++ b/src/api/comp_control_method.yaml
@@ -24,5 +24,5 @@ test_resources:
     path: /common/component_tests/check_config.py
   - type: python_script
     path: /common/component_tests/run_and_check_output.py
-  - path: /resources_test/task_batch_integration/cxg_mouse_pancreas_atlas
-    dest: resources_test/task_batch_integration/cxg_mouse_pancreas_atlas
+  - path: /resources_test/task_batch_integration/cxg_immune_cell_atlas
+    dest: resources_test/task_batch_integration/cxg_immune_cell_atlas
diff --git a/src/api/comp_method.yaml b/src/api/comp_method.yaml
index dda52ce0..571c9565 100644
--- a/src/api/comp_method.yaml
+++ b/src/api/comp_method.yaml
@@ -1,24 +1,6 @@
-namespace: methods
-info:
-  type: method
-  type_info:
-    label: Method
-    summary: A method for the batch integration task.
-    description: |
-      A batch integration method which integrates multiple datasets.
-arguments:
-  - name: --input
-    __merge__: file_dataset.yaml
-    direction: input
-    required: true
-  - name: --output
-    __merge__: file_integrated.yaml
-    direction: output
-    required: true
+__merge__: base_method.yaml
 test_resources:
-  - type: python_script
-    path: /common/component_tests/check_config.py
   - type: python_script
     path: /common/component_tests/run_and_check_output.py
-  - path: /resources_test/task_batch_integration/cxg_mouse_pancreas_atlas
-    dest: resources_test/task_batch_integration/cxg_mouse_pancreas_atlas
+  - path: /resources_test/task_batch_integration/cxg_immune_cell_atlas
+    dest: resources_test/task_batch_integration/cxg_immune_cell_atlas
diff --git a/src/api/comp_metric.yaml b/src/api/comp_metric.yaml
index 73eee377..bc57056a 100644
--- a/src/api/comp_metric.yaml
+++ b/src/api/comp_metric.yaml
@@ -24,5 +24,5 @@ test_resources:
     path: /common/component_tests/check_config.py
   - type: python_script
     path: /common/component_tests/run_and_check_output.py
-  - path: /resources_test/task_batch_integration/cxg_mouse_pancreas_atlas
-    dest: resources_test/task_batch_integration/cxg_mouse_pancreas_atlas
+  - path: /resources_test/task_batch_integration/cxg_immune_cell_atlas
+    dest: resources_test/task_batch_integration/cxg_immune_cell_atlas
diff --git a/src/api/comp_process_dataset.yaml b/src/api/comp_process_dataset.yaml
index b2b449aa..067a5c3d 100644
--- a/src/api/comp_process_dataset.yaml
+++ b/src/api/comp_process_dataset.yaml
@@ -25,7 +25,7 @@ arguments:
     default: 2000
     required: false
 test_resources:
-  - path: /resources_test/common/cxg_mouse_pancreas_atlas/
-    dest: resources_test/common/cxg_mouse_pancreas_atlas/
+  - path: /resources_test/common/cxg_immune_cell_atlas/
+    dest: resources_test/common/cxg_immune_cell_atlas/
   - type: python_script
-    path: /common/component_tests/run_and_check_output.py
\ No newline at end of file
+    path: /common/component_tests/run_and_check_output.py
diff --git a/src/api/comp_transformer.yaml b/src/api/comp_transformer.yaml
index eb347298..b68a9c37 100644
--- a/src/api/comp_transformer.yaml
+++ b/src/api/comp_transformer.yaml
@@ -6,7 +6,7 @@ info:
     summary: Check the output and transform to create additional output types
     description: |
       This component will:
-      
+
         - Assert whether the input dataset and integrated dataset have the same shape.
         - Reorder the integrated dataset to match the input dataset if needed.
         - Transform the corrected feature output to an embedding.
@@ -26,7 +26,7 @@ arguments:
     required: true
     multiple: true
     description: |
-      The expected output types of the batch integration method. 
+      The expected output types of the batch integration method.
     choices: [ feature, embedding, graph ]
   - name: --output
     __merge__: file_integrated_full.yaml
@@ -35,5 +35,5 @@ arguments:
 test_resources:
   - type: python_script
     path: /common/component_tests/run_and_check_output.py
-  - path: /resources_test/task_batch_integration/cxg_mouse_pancreas_atlas
-    dest: resources_test/task_batch_integration/cxg_mouse_pancreas_atlas
+  - path: /resources_test/task_batch_integration/cxg_immune_cell_atlas
+    dest: resources_test/task_batch_integration/cxg_immune_cell_atlas
diff --git a/src/api/file_common_dataset.yaml b/src/api/file_common_dataset.yaml
index 1399f0b2..171fdeb6 100644
--- a/src/api/file_common_dataset.yaml
+++ b/src/api/file_common_dataset.yaml
@@ -2,7 +2,7 @@
 # `src/datasets/api/file_common_dataset.yaml`. However, some fields
 # such as obs.cell_type and obs.batch are now required
 type: file
-example: "resources_test/common/cxg_mouse_pancreas_atlas/dataset.h5ad"
+example: "resources_test/common/cxg_immune_cell_atlas/dataset.h5ad"
 label: "Common Dataset"
 summary: A subset of the common dataset.
 info:
diff --git a/src/api/file_dataset.yaml b/src/api/file_dataset.yaml
index 8f60192b..a76ae203 100644
--- a/src/api/file_dataset.yaml
+++ b/src/api/file_dataset.yaml
@@ -1,5 +1,5 @@
 type: file
-example: "resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad"
+example: "resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad"
 label: "Dataset"
 summary: Unintegrated AnnData HDF5 file.
 info:
diff --git a/src/api/file_integrated.yaml b/src/api/file_integrated.yaml
index abd6df29..7920fcd0 100644
--- a/src/api/file_integrated.yaml
+++ b/src/api/file_integrated.yaml
@@ -1,5 +1,5 @@
 type: file
-example: "resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/integrated.h5ad"
+example: "resources_test/task_batch_integration/cxg_immune_cell_atlas/integrated.h5ad"
 label: Integration
 summary: An integrated AnnData dataset.
 description: |
diff --git a/src/api/file_integrated_full.yaml b/src/api/file_integrated_full.yaml
index 4d02f596..cdedb854 100644
--- a/src/api/file_integrated_full.yaml
+++ b/src/api/file_integrated_full.yaml
@@ -1,5 +1,5 @@
 type: file
-example: "resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/integrated_full.h5ad"
+example: "resources_test/task_batch_integration/cxg_immune_cell_atlas/integrated_full.h5ad"
 label: Transformed integration
 summary: An integrated AnnData dataset with additional outputs.
 description: |
@@ -8,7 +8,7 @@ description: |
     - Feature: the corrected_counts layer
     - Embedding: the X_emb obsm
     - Graph: the connectivities and distances obsp
-  
+
   The Graph should always be present, but the Feature and Embedding are optional.
 info:
   format:
diff --git a/src/api/file_solution.yaml b/src/api/file_solution.yaml
index 35e0c7ea..562bfa22 100644
--- a/src/api/file_solution.yaml
+++ b/src/api/file_solution.yaml
@@ -1,5 +1,5 @@
 type: file
-example: "resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/solution.h5ad"
+example: "resources_test/task_batch_integration/cxg_immune_cell_atlas/solution.h5ad"
 label: "Solution"
 summary: Uncensored dataset containing the true labels.
 info:
diff --git a/src/control_methods/embed_cell_types/script.py b/src/control_methods/embed_cell_types/script.py
index 5482d301..f6f1961b 100644
--- a/src/control_methods/embed_cell_types/script.py
+++ b/src/control_methods/embed_cell_types/script.py
@@ -2,11 +2,11 @@
 
 ## VIASH START
 par = {
-    'input_dataset': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad',
-    'input_solution': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/solution.h5ad',
+    'input_dataset': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
+    'input_solution': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/solution.h5ad',
     'output': 'output.h5ad',
 }
-meta = { 
+meta = {
     'functionality': 'foo',
     'config': 'bar'
 }
diff --git a/src/control_methods/embed_cell_types_jittered/script.py b/src/control_methods/embed_cell_types_jittered/script.py
index 9ad3e743..06180464 100644
--- a/src/control_methods/embed_cell_types_jittered/script.py
+++ b/src/control_methods/embed_cell_types_jittered/script.py
@@ -4,13 +4,13 @@
 ## VIASH START
 
 par = {
-    'input_dataset': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad',
-    'input_solution': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/solution.h5ad',
+    'input_dataset': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
+    'input_solution': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/solution.h5ad',
     'output': 'output.h5ad',
     'jitter': 0.01,
 }
 
-meta = { 
+meta = {
     'functionality': 'foo',
     'config': 'bar'
 }
diff --git a/src/control_methods/no_integration/script.py b/src/control_methods/no_integration/script.py
index 0c1581be..df7b280d 100644
--- a/src/control_methods/no_integration/script.py
+++ b/src/control_methods/no_integration/script.py
@@ -2,7 +2,7 @@
 
 ## VIASH START
 par = {
-    'input_dataset': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad',
+    'input_dataset': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
     'output': 'output.h5ad',
 }
 ## VIASH END
diff --git a/src/control_methods/no_integration_batch/script.py b/src/control_methods/no_integration_batch/script.py
index 8324acf9..1f62763c 100644
--- a/src/control_methods/no_integration_batch/script.py
+++ b/src/control_methods/no_integration_batch/script.py
@@ -5,11 +5,11 @@
 ## VIASH START
 
 par = {
-    'input_dataset': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad',
+    'input_dataset': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
     'output': 'output.h5ad',
 }
 
-meta = { 
+meta = {
     'functionality': 'foo',
     'config': 'bar'
 }
@@ -46,4 +46,4 @@
 
 print("Store outputs", flush=True)
 adata.uns['method_id'] = meta['name']
-adata.write_h5ad(par['output'], compression='gzip')
\ No newline at end of file
+adata.write_h5ad(par['output'], compression='gzip')
diff --git a/src/control_methods/shuffle_integration/script.py b/src/control_methods/shuffle_integration/script.py
index 91a542af..e1f29318 100644
--- a/src/control_methods/shuffle_integration/script.py
+++ b/src/control_methods/shuffle_integration/script.py
@@ -3,10 +3,10 @@
 
 ## VIASH START
 par = {
-    'input_dataset': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad',
+    'input_dataset': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
     'output': 'output.h5ad',
 }
-meta = { 
+meta = {
     "resources_dir": "src/tasks/batch_integration/control_methods/"
 }
 ## VIASH END
diff --git a/src/control_methods/shuffle_integration_by_batch/script.py b/src/control_methods/shuffle_integration_by_batch/script.py
index c7d35171..a9b63edc 100644
--- a/src/control_methods/shuffle_integration_by_batch/script.py
+++ b/src/control_methods/shuffle_integration_by_batch/script.py
@@ -3,10 +3,10 @@
 
 ## VIASH START
 par = {
-    'input_dataset': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad',
+    'input_dataset': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
     'output': 'output.h5ad',
 }
-meta = { 
+meta = {
     "resources_dir": "src/tasks/batch_integration/control_methods/"
 }
 ## VIASH END
diff --git a/src/control_methods/shuffle_integration_by_cell_type/script.py b/src/control_methods/shuffle_integration_by_cell_type/script.py
index 762bd07b..0df2ba46 100644
--- a/src/control_methods/shuffle_integration_by_cell_type/script.py
+++ b/src/control_methods/shuffle_integration_by_cell_type/script.py
@@ -3,10 +3,10 @@
 
 ## VIASH START
 par = {
-    'input_dataset': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad',
+    'input_dataset': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
     'output': 'output.h5ad',
 }
-meta = { 
+meta = {
     "resources_dir": "src/tasks/batch_integration/control_methods/"
 }
 ## VIASH END
diff --git a/src/data_processors/transform/script.py b/src/data_processors/transform/script.py
index dc01584a..226edca8 100644
--- a/src/data_processors/transform/script.py
+++ b/src/data_processors/transform/script.py
@@ -3,8 +3,8 @@
 
 ## VIASH START
 par = {
-    "input_integrated": "resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/integrated.h5ad",
-    "input_dataset": "resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad",
+    "input_integrated": "resources_test/task_batch_integration/cxg_immune_cell_atlas/integrated.h5ad",
+    "input_dataset": "resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad",
     "expected_method_types": ["feature"],
     "ouput": "output.h5ad"
 }
@@ -28,7 +28,7 @@
 
 if "corrected_counts" in integrated.layers.keys():
     assert integrated.shape[1] == dataset.shape[1], "Number of genes do not match"
-    
+
     if not integrated.var.index.equals(dataset.var.index):
         assert integrated.var.index.sort_values().equals(dataset.var.index.sort_values()), "Gene names do not match"
         print("Reordering genes", flush=True)
diff --git a/src/methods/batchelor_fastmnn/script.R b/src/methods/batchelor_fastmnn/script.R
index 76791bea..879aad68 100644
--- a/src/methods/batchelor_fastmnn/script.R
+++ b/src/methods/batchelor_fastmnn/script.R
@@ -8,7 +8,7 @@ suppressPackageStartupMessages({
 
 ## VIASH START
 par <- list(
-  input = 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad',
+  input = 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
   output = 'output.h5ad'
 )
 meta <- list(
diff --git a/src/methods/batchelor_mnn_correct/script.R b/src/methods/batchelor_mnn_correct/script.R
index cadbcc82..4a8802af 100644
--- a/src/methods/batchelor_mnn_correct/script.R
+++ b/src/methods/batchelor_mnn_correct/script.R
@@ -7,7 +7,7 @@ suppressPackageStartupMessages({
 })
 ## VIASH START
 par <- list(
-  input = 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad',
+  input = 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
   output = 'output.h5ad'
 )
 meta <- list(
diff --git a/src/methods/bbknn/script.py b/src/methods/bbknn/script.py
index 86c807ed..9c121ccb 100644
--- a/src/methods/bbknn/script.py
+++ b/src/methods/bbknn/script.py
@@ -5,7 +5,7 @@
 
 ## VIASH START
 par = {
-    'input': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad',
+    'input': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
     'output': 'output.h5ad',
     'annoy_n_trees': 10,
     'neighbors_within_batch': 3,
diff --git a/src/methods/combat/script.py b/src/methods/combat/script.py
index 155c1621..ab251363 100644
--- a/src/methods/combat/script.py
+++ b/src/methods/combat/script.py
@@ -4,7 +4,7 @@
 
 ## VIASH START
 par = {
-    'input': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad',
+    'input': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
     'output': 'output.h5ad',
 }
 meta = {
diff --git a/src/methods/harmony/script.R b/src/methods/harmony/script.R
index e5cb2c5b..595e3f19 100644
--- a/src/methods/harmony/script.R
+++ b/src/methods/harmony/script.R
@@ -5,7 +5,7 @@ requireNamespace("harmony", quietly = TRUE)
 
 ## VIASH START
 par <- list(
-  input = 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad',
+  input = 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
   output = 'output.h5ad'
 )
 meta <- list(
diff --git a/src/methods/harmonypy/script.py b/src/methods/harmonypy/script.py
index 79b32537..ec851953 100644
--- a/src/methods/harmonypy/script.py
+++ b/src/methods/harmonypy/script.py
@@ -5,7 +5,7 @@
 
 ## VIASH START
 par = {
-    "input": "resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad",
+    "input": "resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad",
     "output": "output.h5ad"
 }
 meta = {
diff --git a/src/methods/liger/script.R b/src/methods/liger/script.R
index 62dec598..e5b7e451 100644
--- a/src/methods/liger/script.R
+++ b/src/methods/liger/script.R
@@ -4,7 +4,7 @@ requireNamespace("rliger", quietly = TRUE)
 
 ## VIASH START
 par <- list(
-  input = "resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad",
+  input = "resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad",
   output = "output.h5ad"
 )
 meta <- list(
diff --git a/src/methods/mnnpy/script.py b/src/methods/mnnpy/script.py
index a9dfd8a8..7100da10 100644
--- a/src/methods/mnnpy/script.py
+++ b/src/methods/mnnpy/script.py
@@ -3,7 +3,7 @@
 
 ## VIASH START
 par = {
-    'input': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad',
+    'input': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
     'output': 'output.h5ad',
 }
 meta = {
diff --git a/src/methods/pyliger/script.py b/src/methods/pyliger/script.py
index 603b6d04..c6bd5f0e 100644
--- a/src/methods/pyliger/script.py
+++ b/src/methods/pyliger/script.py
@@ -5,7 +5,7 @@
 
 ## VIASH START
 par = {
-    'input': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad',
+    'input': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
     'output': 'output.h5ad'
 }
 meta = {
@@ -31,7 +31,7 @@
 adata_per_batch = []
 for batch in adata.obs['batch'].unique():
   adb = adata[adata.obs['batch'] == batch].copy()
-  
+
   # save row sum and sum of squares for further use
   norm_sum = np.ravel(np.sum(adb.layers["norm_data"], axis=0))
   norm_sum_sq = np.ravel(np.sum(adb.layers["norm_data"].power(2), axis=0))
diff --git a/src/methods/scalex/script.py b/src/methods/scalex/script.py
index 887a989d..7d09f02f 100644
--- a/src/methods/scalex/script.py
+++ b/src/methods/scalex/script.py
@@ -4,7 +4,7 @@
 
 ## VIASH START
 par = {
-    'input': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad',
+    'input': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
     'output': 'output.h5ad',
 }
 meta = {
diff --git a/src/methods/scanorama/script.py b/src/methods/scanorama/script.py
index 8f99418c..2ddb91df 100644
--- a/src/methods/scanorama/script.py
+++ b/src/methods/scanorama/script.py
@@ -4,7 +4,7 @@
 
 ## VIASH START
 par = {
-    'input': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad',
+    'input': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
     'output': 'output.h5ad',
 }
 meta = {
diff --git a/src/methods/scanvi/script.py b/src/methods/scanvi/script.py
index 882d7ff6..5a17d2e9 100644
--- a/src/methods/scanvi/script.py
+++ b/src/methods/scanvi/script.py
@@ -4,7 +4,7 @@
 
 ## VIASH START
 par = {
-    'input': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad',
+    'input': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
     'output': 'output.h5ad',
     'n_hvg': 2000,
     'n_latent': 30,
diff --git a/src/methods/scimilarity/config.vsh.yaml b/src/methods/scimilarity/config.vsh.yaml
new file mode 100644
index 00000000..02b6527c
--- /dev/null
+++ b/src/methods/scimilarity/config.vsh.yaml
@@ -0,0 +1,34 @@
+__merge__: /src/api/base_method.yaml
+name: scimilarity
+label: SCimilarity
+summary: SCimilarity provides unifying representation of single cell expression profiles
+description: |
+  SCimilarity is a unifying representation of single cell expression profiles that quantifies similarity between expression states and generalizes to represent new studies without additional training
+references:
+  doi: 10.1101/2023.07.18.549537
+links:
+  repository: https://github.com/Genentech/scimilarity
+  documentation: https://genentech.github.io/scimilarity/index.html
+info:
+  method_types: [embedding]
+  preferred_normalization: counts
+arguments:
+  - name: --model
+    type: file
+    description: Path to the directory containing SCimilarity models or a .zip/.tar.gz archive
+    required: true
+resources:
+  - type: python_script
+    path: script.py
+  - path: /src/utils/read_anndata_partial.py
+engines:
+  - type: docker
+    image: openproblems/base_pytorch_nvidia:1.0.0
+    setup:
+      - type: python
+        github: Genentech/scimilarity
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [midtime, midmem, lowcpu]
diff --git a/src/methods/scimilarity/script.py b/src/methods/scimilarity/script.py
new file mode 100644
index 00000000..2da1790e
--- /dev/null
+++ b/src/methods/scimilarity/script.py
@@ -0,0 +1,112 @@
+import os
+import sys
+import tempfile
+import zipfile
+import tarfile
+
+import anndata as ad
+import scimilarity
+
+## VIASH START
+par = {
+    "input": "resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad",
+    "output": "output.h5ad",
+    "model": "model_v1.1",
+}
+meta = {
+    "name": "scvi",
+}
+## VIASH END
+
+sys.path.append(meta["resources_dir"])
+from read_anndata_partial import read_anndata
+
+print("Read input", flush=True)
+adata = read_anndata(par["input"], X="layers/counts", obs="obs", var="var", uns="uns")
+
+if adata.uns["dataset_organism"] != "homo_sapiens":
+    raise ValueError(
+        f"SCimilarity can only be used with human data "
+        f"(dataset_organism == \"{adata.uns['dataset_organism']}\")"
+    )
+
+if os.path.isdir(par["model"]):
+    model_temp = None
+    model_dir = par["model"]
+else:
+    model_temp = tempfile.TemporaryDirectory()
+    model_dir = model_temp.name
+
+    if zipfile.is_zipfile(par["model"]):
+        print("Extract SCimilarity model from .zip", flush=True)
+        with zipfile.ZipFile(par["model"], "r") as zip_file:
+            zip_file.extractall(model_dir)
+    elif tarfile.is_tarfile(par["model"]) and par["model"].endswith(".tar.gz"):
+        print("Extract SCimilarity model from .tar.gz", flush=True)
+        with tarfile.open(par["model"], "r:gz") as tar_file:
+            tar_file.extractall(model_dir)
+            model_dir = os.path.join(model_dir, os.listdir(model_dir)[0])
+    else:
+        raise ValueError(
+            f"The 'model' argument should be a directory a .zip file or a .tar.gz file"
+        )
+
+print("Load SCimilarity model", flush=True)
+scimilarity_embedding = scimilarity.cell_embedding.CellEmbedding(model_path=model_dir)
+print("SCimilarity version:", scimilarity.__version__)
+
+print("Create input data", flush=True)
+# Some of the functions modify the adata so make sure we have a copy
+input = ad.AnnData(X=adata.X.copy(), layers={"counts": adata.X.copy()})
+# Set input.var_names to gene symbols
+input.var_names = adata.var["feature_name"]
+
+print("Align datasets", flush=True)
+
+# Check the number of genes in the dataset and reduce the overlap threshold if
+# necessary (mostly for subsampled test datasets)
+gene_overlap_threshold = 5000
+if 0.8 * input.n_vars < gene_overlap_threshold:
+    from warnings import warn
+
+    warn(
+        f"The number of genes in the dataset ({input.n_vars}) "
+        f"is less than or close to {gene_overlap_threshold}. "
+        f"Setting gene_overlap_threshold to 0.8 * n_var ({int(0.8 * input.n_vars)})."
+    )
+    gene_overlap_threshold = int(0.8 * input.n_vars)
+
+input = scimilarity.utils.align_dataset(
+    input,
+    scimilarity_embedding.gene_order,
+    gene_overlap_threshold=gene_overlap_threshold,
+)
+input = scimilarity.utils.consolidate_duplicate_symbols(input)
+
+print("Normalizing dataset", flush=True)
+input = scimilarity.utils.lognorm_counts(input)
+
+print("Get cell embeddings", flush=True)
+cell_embeddings = scimilarity_embedding.get_embeddings(input.X)
+
+print("Store outputs", flush=True)
+output = ad.AnnData(
+    obs=adata.obs[[]],
+    var=adata.var[[]],
+    obsm={
+        "X_emb": cell_embeddings,
+    },
+    uns={
+        "dataset_id": adata.uns["dataset_id"],
+        "normalization_id": adata.uns["normalization_id"],
+        "method_id": meta["name"],
+    },
+)
+print(output)
+
+print("Write output to file", flush=True)
+output.write_h5ad(par["output"], compression="gzip")
+
+if model_temp is not None:
+    print("Cleanup model directory", flush=True)
+    model_temp.cleanup()
diff --git a/src/methods/scvi/script.py b/src/methods/scvi/script.py
index b6836b49..20f1cf32 100644
--- a/src/methods/scvi/script.py
+++ b/src/methods/scvi/script.py
@@ -4,7 +4,7 @@
 
 ## VIASH START
 par = {
-    'input': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/dataset.h5ad',
+    'input': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
     'output': 'output.h5ad',
     'n_hvg': 2000,
     'n_latent': 30,
diff --git a/src/metrics/asw_batch/script.py b/src/metrics/asw_batch/script.py
index d6dafcfe..4a7269da 100644
--- a/src/metrics/asw_batch/script.py
+++ b/src/metrics/asw_batch/script.py
@@ -4,7 +4,7 @@
 
 ## VIASH START
 par = {
-    'input_integrated': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/integrated_full.h5ad',
+    'input_integrated': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/integrated_full.h5ad',
     'output': 'output.h5ad',
 }
 meta = {
diff --git a/src/metrics/asw_label/script.py b/src/metrics/asw_label/script.py
index 499a06f9..e307aaac 100644
--- a/src/metrics/asw_label/script.py
+++ b/src/metrics/asw_label/script.py
@@ -4,7 +4,7 @@
 
 ## VIASH START
 par = {
-    'input_integrated': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/integrated_full.h5ad',
+    'input_integrated': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/integrated_full.h5ad',
     'output': 'output.h5ad',
 }
 
diff --git a/src/metrics/cell_cycle_conservation/script.py b/src/metrics/cell_cycle_conservation/script.py
index 9ad38422..b254f4f8 100644
--- a/src/metrics/cell_cycle_conservation/script.py
+++ b/src/metrics/cell_cycle_conservation/script.py
@@ -5,7 +5,7 @@
 
 ## VIASH START
 par = {
-    'input_integrated': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/integrated_full.h5ad',
+    'input_integrated': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/integrated_full.h5ad',
     'output': 'output.h5ad'
 }
 
diff --git a/src/metrics/clustering_overlap/script.py b/src/metrics/clustering_overlap/script.py
index 30fe1704..2254acb0 100644
--- a/src/metrics/clustering_overlap/script.py
+++ b/src/metrics/clustering_overlap/script.py
@@ -6,7 +6,7 @@
 
 ## VIASH START
 par = {
-    'adata_integrated': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/integrated_full.h5ad',
+    'adata_integrated': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/integrated_full.h5ad',
     'output': 'output.h5ad',
 }
 
@@ -50,4 +50,4 @@
 )
 
 print("Write data to file", flush=True)
-output.write_h5ad(par["output"], compression="gzip")
\ No newline at end of file
+output.write_h5ad(par["output"], compression="gzip")
diff --git a/src/metrics/graph_connectivity/script.py b/src/metrics/graph_connectivity/script.py
index 0c92a35a..6148884e 100644
--- a/src/metrics/graph_connectivity/script.py
+++ b/src/metrics/graph_connectivity/script.py
@@ -4,7 +4,7 @@
 
 ## VIASH START
 par = {
-    'input_integrated': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/integrated_full.h5ad',
+    'input_integrated': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/integrated_full.h5ad',
     'output': 'output.h5ad',
 }
 meta = {
diff --git a/src/metrics/hvg_overlap/script.py b/src/metrics/hvg_overlap/script.py
index 8ecda9bc..b902fe08 100644
--- a/src/metrics/hvg_overlap/script.py
+++ b/src/metrics/hvg_overlap/script.py
@@ -4,8 +4,8 @@
 
 ## VIASH START
 par = {
-    'input_integrated': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/integrated_full.h5ad',
-    'input_solution': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/solution.h5ad',
+    'input_integrated': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/integrated_full.h5ad',
+    'input_solution': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/solution.h5ad',
     'output': 'output.h5ad',
 }
 meta = {
diff --git a/src/metrics/isolated_label_asw/script.py b/src/metrics/isolated_label_asw/script.py
index 39d23568..602e8d16 100644
--- a/src/metrics/isolated_label_asw/script.py
+++ b/src/metrics/isolated_label_asw/script.py
@@ -4,7 +4,7 @@
 
 ## VIASH START
 par = {
-    'input_integrated': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/integrated_full.h5ad',
+    'input_integrated': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/integrated_full.h5ad',
     'output': 'output.h5ad',
 }
 
@@ -46,4 +46,4 @@
 )
 
 print('Write data to file', flush=True)
-output.write_h5ad(par['output'], compression='gzip')
\ No newline at end of file
+output.write_h5ad(par['output'], compression='gzip')
diff --git a/src/metrics/isolated_label_f1/script.py b/src/metrics/isolated_label_f1/script.py
index a6529adb..2737f244 100644
--- a/src/metrics/isolated_label_f1/script.py
+++ b/src/metrics/isolated_label_f1/script.py
@@ -4,7 +4,7 @@
 
 ## VIASH START
 par = {
-    'input_integrated': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/integrated_full.h5ad',
+    'input_integrated': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/integrated_full.h5ad',
     'output': 'output.h5ad',
 }
 
@@ -45,4 +45,4 @@
 )
 
 print('Write data to file', flush=True)
-output.write_h5ad(par['output'], compression='gzip')
\ No newline at end of file
+output.write_h5ad(par['output'], compression='gzip')
diff --git a/src/metrics/kbet/script.py b/src/metrics/kbet/script.py
index 6c74c261..89bd799e 100644
--- a/src/metrics/kbet/script.py
+++ b/src/metrics/kbet/script.py
@@ -4,7 +4,7 @@
 
 ## VIASH START
 par = {
-    'input_integrated': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/integrated_full.h5ad',
+    'input_integrated': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/integrated_full.h5ad',
     'output': 'output.h5ad',
 }
 
@@ -46,4 +46,4 @@
 )
 
 print('Write data to file', flush=True)
-output.write_h5ad(par['output'], compression='gzip')
\ No newline at end of file
+output.write_h5ad(par['output'], compression='gzip')
diff --git a/src/metrics/lisi/script.py b/src/metrics/lisi/script.py
index b50f6e62..c0c564cd 100644
--- a/src/metrics/lisi/script.py
+++ b/src/metrics/lisi/script.py
@@ -5,7 +5,7 @@
 
 ## VIASH START
 par = {
-    'input_integrated': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/integrated_full.h5ad',
+    'input_integrated': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/integrated_full.h5ad',
     'output': 'output.h5ad',
 }
 meta = {
diff --git a/src/metrics/pcr/script.py b/src/metrics/pcr/script.py
index 265ad430..0ae18ddb 100644
--- a/src/metrics/pcr/script.py
+++ b/src/metrics/pcr/script.py
@@ -4,7 +4,7 @@
 
 ## VIASH START
 par = {
-    'input_integrated': 'resources_test/task_batch_integration/cxg_mouse_pancreas_atlas/integrated_full.h5ad',
+    'input_integrated': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/integrated_full.h5ad',
     'output': 'output.h5ad',
 }
 
@@ -59,4 +59,4 @@
 
 
 print('Write data to file', flush=True)
-output.write_h5ad(par['output'], compression='gzip')
\ No newline at end of file
+output.write_h5ad(par['output'], compression='gzip')
diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml
index f70f9b43..3ed43a1e 100644
--- a/src/workflows/run_benchmark/config.vsh.yaml
+++ b/src/workflows/run_benchmark/config.vsh.yaml
@@ -80,6 +80,7 @@ dependencies:
   - name: methods/scalex
   - name: methods/scanorama
   - name: methods/scanvi
+  - name: methods/scimilarity
   - name: methods/scvi
   # metrics
   - name: metrics/asw_batch
diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf
index ff77ad8d..2eff6d8d 100644
--- a/src/workflows/run_benchmark/main.nf
+++ b/src/workflows/run_benchmark/main.nf
@@ -26,6 +26,9 @@ methods = [
   scalex,
   scanorama,
   scanvi,
+  scimilarity.run(
+    args: [model: file("s3://openproblems-work/cache/scimilarity-model_v1.1.tar.gz")]
+  ),
   scvi
 ]
 
@@ -55,7 +58,7 @@ workflow run_wf {
    ****************************/
   dataset_ch = input_ch
     // store join id
-    | map{ id, state -> 
+    | map{ id, state ->
       [id, state + ["_meta": [join_id: id]]]
     }
 
@@ -153,7 +156,7 @@ workflow run_wf {
       },
       // use 'fromState' to fetch the arguments the component requires from the overall state
       fromState: [
-        input_solution: "input_solution", 
+        input_solution: "input_solution",
         input_integrated: "method_output_cleaned"
       ],
       // use 'toState' to publish that component's outputs to the overall state