MHubAI · LennyN95 · Mar 15, 2024 · Feb 16, 2024 · Mar 6, 2024 · Mar 6, 2024
diff --git a/models/fmcib_radiomics/config/default.yml b/models/fmcib_radiomics/config/default.yml
@@ -0,0 +1,22 @@
+general:
+  data_base_dir: /app/data
+  version: 1.0
+  description: FMCIB pipeline starting from DICOM files and centroids in json files or slicer exports named by their SeriesInstanceUID
+
+execute:
+- DicomImporter
+- FileImporter
+- NiftiConverter
+- FMCIBRunner
+- DataOrganizer
+
+modules:
+
+  FileImporter:
+    instance_id: sid
+    meta: type=fmcibcoordinates
+    type: json
+
+  DataOrganizer:
+    targets:
+      - json:type=fmcibfeatures-->[i:sid]/features.json
diff --git a/models/fmcib_radiomics/config/from_centroids.yml b/models/fmcib_radiomics/config/from_centroids.yml
@@ -0,0 +1,20 @@
+general:
+  data_base_dir: /app/data
+  version: 1.0
+  description: "FMCIB pipeline starting from a coordinate json file"
+
+execute:
+- FileStructureImporter
+- FMCIBRunner
+- DataOrganizer
+
+modules:
+  FileStructureImporter:
+    structures:
+      - $patientID/CT.nrrd@instance@nrrd:mod=ct
+      - $patientID/centroids.json@json:type=fmcibcoordinates
+    import_id: patientID
+
+  DataOrganizer:
+    targets:
+      - json:type=fmcibfeatures-->[i:patientID]/features.json
diff --git a/models/fmcib_radiomics/config/from_nrrd_mask.yml b/models/fmcib_radiomics/config/from_nrrd_mask.yml
@@ -0,0 +1,21 @@
+general:
+  data_base_dir: /app/data
+  version: 1.0
+  description: "FMCIB pipeline starting from a nrrd file image and a nnrd binary mask of the GTV."
+
+execute:
+- FileStructureImporter
+- CentroidExtractor
+- FMCIBRunner
+- DataOrganizer
+
+modules:
+  FileStructureImporter:
+    structures:
+      - $patientID/CT.nrrd@instance@nrrd:mod=ct
+      - $patientID/masks/GTV-1.nrrd@nrrd:mod=seg
+    import_id: patientID
+
+  DataOrganizer:
+    targets:
+      - json:type=fmcibfeatures-->[i:patientID]/features.json
diff --git a/models/fmcib_radiomics/config/from_slicer.yml b/models/fmcib_radiomics/config/from_slicer.yml
@@ -0,0 +1,20 @@
+general:
+  data_base_dir: /app/data
+  version: 1.0
+  description: "FMCIB pipeline"
+
+execute:
+- FileStructureImporter
+- FMCIBRunner
+- DataOrganizer
+
+modules:
+  FileStructureImporter:
+    structures:
+      - $patientID@instance/re:^.*\.nrrd$::@nrrd:mod=ct
+      - $patientID/re:^.*\.json$::@json:type=fmcibcoordinates
+    import_id: patientID
+
+  DataOrganizer:
+    targets:
+      - json:type=fmcibfeatures-->[i:patientID]/features.json
diff --git a/models/fmcib_radiomics/dockerfiles/Dockerfile b/models/fmcib_radiomics/dockerfiles/Dockerfile
@@ -0,0 +1,21 @@
+FROM mhubai/base:latest
+
+LABEL authors="[email protected],[email protected]"
+
+# download model weights
+RUN wget https://zenodo.org/records/10528450/files/model_weights.torch?download=1 -O /app/model_weights.torch
+
+# clone mhub implementation
+ARG MHUB_MODELS_REPO
+RUN buildutils/import_mhub_model.sh fmcib_radiomics ${MHUB_MODELS_REPO}
+
+
+# Install additional pip packages
+RUN pip3 install --upgrade pip && pip3 install --no-cache-dir \
+  jsonschema==4.21.1
+
+# Install FMCIB package, should install everything else ...
+RUN pip3 install foundation-cancer-image-biomarker --pre
+
+ENTRYPOINT ["mhub.run"]
+CMD ["--workflow", "default"]
diff --git a/models/fmcib_radiomics/meta.json b/models/fmcib_radiomics/meta.json
@@ -0,0 +1,137 @@
+{
+  "id": "26e98e14-b605-4007-bd8b-79d517c935b5",
+  "name": "fmcib_radiomics",
+  "title": "Foundation Model for Cancer Imaging Biomarkers",
+  "summary": {
+    "description": "A foundation model for cancer imaging biomarker discovery trained through self-supervised learning using a dataset of 11,467 radiographic lesions. The model features can be used as a data-driven substitute for classical radiomic features",
+    "inputs": [
+      {
+        "label": "Input CT Image",
+        "description": "CT imaging data containing lesions of interest, such as nodules or tumors",
+        "format": "DICOM",
+        "modality": "CT",
+        "slicethickness": "5mm",
+        "bodypartexamined": "Whole",
+        "non-contrast": true,
+        "contrast": true
+      },
+      {
+        "label": "Center of mass",        
+        "description": "Center of mass of the lesion in the CT image",
+        "format": "JSON",
+        "modality": "JSON",
+        "slicethickness": "5mm",
+        "bodypartexamined": "Whole",
+        "non-contrast": true,
+        "contrast": true
+      }
+    ],
+    "outputs": [
+      {
+        "type": "Prediction",
+        "valueType": "Feature vector",
+        "description": "A set of features extracted from the input CT image",
+        "label": "Features"
+      }
+    ],
+    "model": {
+      "architecture": "3D ResNet50",
+      "training": "other",
+      "cmpapproach": "3D"
+    },
+    "data": {
+      "training": {
+        "vol_samples": 11467
+      },
+      "evaluation": {
+        "vol_samples": 1944
+      },
+      "public": true,
+      "external": true
+    }
+  },
+  "details": {
+    "name": "Foundation Model for Cancer Imaging Biomarkers",
+    "version": "0.0.1",
+    "type": "Feature extractor",
+    "devteam": "Researchers from the Artificial Intelligence in Medicine (AIM) Program, Mass General Brigham, Harvard Medical School and other institutions",
+    "date": {
+      "pub": "2023 (preprint)",
+      "code": "n/a",
+      "weights": "18.01.2024"
+    },
+    "cite": "Pai, S., Bontempi, D., Hadzic, I., Prudente, V., et al. Foundation Model for Cancer Imaging Biomarkers. 2023.",
+    "license": {
+      "code": "MIT",
+      "weights": "CC BY-NC 4.0"
+    },
+    "publications": [
+      {
+        "title": "Foundation Model for Cancer Imaging Biomarkers",
+        "uri": "https://www.medrxiv.org/content/10.1101/2023.09.04.23294952v1"
+      }
+    ],
+    "github": "https://github.com/AIM-Harvard/foundation-cancer-image-biomarker",
+    "zenodo": "https://zenodo.org/records/10528450",
+    "colab": "https://colab.research.google.com/drive/1JMtj_4W0uNPzrVnM9EpN1_xpaB-5KC1H?usp=sharing",
+    "slicer": false    
+  },
+  "info": {
+    "use": {
+      "title": "Intended Use",
+      "text": "The foundation model is intended to extract features from several different types of lesions (lung, liver, kidney, mediastinal, abdominal, pelvic, bone and soft tissue). These features can be used for a variety of predictive and clustering tasks as a data-driven substitute for classical radiomic features."
+    },
+    "analyses": {
+      "title": "Quantitative Analyses",
+      "text": "The model's performance was assessed using three different downstream tasks, including malignancy prediction and lung cancer risk prediction. Refer to the publication for more details [1].",
+      "references": [
+          {
+              "label": "Foundation model for cancer image biomarkers",
+              "uri": "https://www.medrxiv.org/content/10.1101/2023.09.04.23294952v1"
+          }
+        ]
+    },
+    "evaluation": {
+        "title": "Evaluation Data",
+        "text": "The evaluation dataset consists of 1,944 lesions, including 1,221 lesions for anatomical site classification, 170 nodules for malignancy prediction, and 553 tumors (420 LUNG1 + 133 RADIO) for prognostication. The dataset was held out from the training data and gathered from several different sources [1, 2, 3, 4].",
+        "tables": [
+            {
+                "label": "Evaluation Tasks & Datasets",
+                "entries": {
+                    "Lesion Anatomical Site Prediction": "DeepLesion (n=1221)",
+                    "Nodule Malignancy Prediction": "LUNA16 (n=170)",
+                    "Tumor Prognostication": "NSCLC-Radiomics (n=420) + NSCLC-Radiogenomics (n=133)"
+                }
+            }
+        ],
+        "references": [
+                {
+                "label": "DeepLesion: automated mining of large-scale lesion annotations and universal lesion detection with deep learning.",
+                "uri": "https://pubmed.ncbi.nlm.nih.gov/30035154/"
+            },
+            {
+                "label": "LUNA16",
+                "uri": "https://www.cancerimagingarchive.net/collection/lidc-idri/"
+            },
+            {
+              "label": "NSCLC-Radiomics",
+              "uri": "https://www.cancerimagingarchive.net/collection/nsclc-radiomics/"
+          },
+          {
+            "label": "NSCLC-Radiogenomics",
+            "uri": "https://www.cancerimagingarchive.net/analysis-result/nsclc-radiogenomics-stanford/"
+          }
+        ]
+    },
+    "training": {
+    "title": "Training Data",
+    "text": "The training dataset consists of 11467 lesions sourced from 5,513 unique CT scans across 2,312 different patients. This was curated from the DeepLesion dataset [1] following two steps - 1) Lesions that did not contain anatomical labels were selected, 2) Scans with spacing 5mm or more were removed.",
+    "references": [
+      {
+        "label": "DeepLesion: automated mining of large-scale lesion annotations and universal lesion detection with deep learning.",
+        "uri": "https://pubmed.ncbi.nlm.nih.gov/30035154/"
+    }
+    ]
+    }
+  }
+}
diff --git a/models/fmcib_radiomics/utils/CentroidExtractor.py b/models/fmcib_radiomics/utils/CentroidExtractor.py
@@ -0,0 +1,43 @@
+"""
+---------------------------------------------------------
+Author: Leonard Nürnberg
+Email:  [email protected]
+Date:   06.03.2024
+---------------------------------------------------------
+"""
+
+import json, jsonschema
+from mhubio.core import Instance, InstanceData, IO, Module
+import SimpleITK as sitk
+
+class CentroidExtractor(Module):
+
+    @IO.Instance()
+    @IO.Input('in_mask', 'nrrd:mod=seg', the='Tumor segmentation mask for the input NRRD file.')
+    @IO.Output('centroids_json', 'centroids.json', "json:type=fmcibcoordinates", the='JSON file containing 3D coordinates of the centroid of the input mask.')
+    def task(self, instance: Instance, in_mask: InstanceData, centroids_json: InstanceData) -> None:
+
+        # read the input mask 
+        mask = sitk.ReadImage(in_mask.abspath)
+
+        # get the center of massk from the mask via ITK
+        label_shape_filter = sitk.LabelShapeStatisticsImageFilter()
+        label_shape_filter.Execute(mask)
+        try:
+            centroid = label_shape_filter.GetCentroid(255)
+        except:
+            centroid = label_shape_filter.GetCentroid(1)
+
+        # extract x, y, and z coordinates from the centroid
+        x, y, z = centroid
+
+        # set up the coordinate dictionary
+        coordinate_dict = {
+            "coordX": x,
+            "coordY": y,
+            "coordZ": z,
+        }
+
+        # write the coordinate dictionary to a json file
+        with open(centroids_json.abspath, "w") as f:
+            json.dump(coordinate_dict, f)