Skip to content

Commit

Permalink
LOT use case (#145)
Browse files Browse the repository at this point in the history
* ReadMe file for creating dataset from MIMICIII, and training LOS>3 classifier

* adding links to readMe file

* fixing the LR model

* Mimic_Hnadler, data_processeing, model and the main file

* all attacks are running

* add relevant files from MIMIC_Extract and mimic_code repos

* fix some bugs regarding nivduration

* revert conflicting tabular_mia files

* adding subset to the mimicDataset

* fixing loading the database and indices

* fixing physionet username, breaking the makefile in two commands.

* fixing data and output folder in gitingnore

* adding gitkeep

* adding gitkeep

* adding gitignore for data

* adding gitkeep to output

* adding gitkeep to output

* adding gitignore for output

* removing extra readme file

* update the readme file

* fixing copying output to the correct directory

* Add .gitignore to ignore contents of the data folder

* removing redundant files, fixind double sigmoid bug

* adding comments in the readmefile
  • Loading branch information
fazelehh authored Nov 22, 2024
1 parent 3cd278c commit 9f4d953
Show file tree
Hide file tree
Showing 133 changed files with 49,935 additions and 1 deletion.
5 changes: 5 additions & 0 deletions examples/mia/LOS/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Ignore everything inside the data folder
data/*

# But do not ignore the .gitkeep file
!data/.gitkeep
43 changes: 43 additions & 0 deletions examples/mia/LOS/audit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
audit: # Configurations for auditing
random_seed: 1234 # Integer specifying the random seed
attack_list:
# rmia:
# training_data_fraction: 0.5 # Fraction of the auxilary dataset to use for this attack (in each shadow model training)
# attack_data_fraction: 0.5 # Fraction of auxiliary dataset to sample from during attack
# num_shadow_models: 8 # Number of shadow models to train
# online: True # perform online or offline attack
# temperature: 2
# gamma: 1.0
# offline_a: 0.33 # parameter from which we compute p(x) from p_OUT(x) such that p_IN(x) = a p_OUT(x) + b.
# offline_b: 0.66
# qmia:
# training_data_fraction: 1.0 # Fraction of the auxilary dataset (data without train and test indices) to use for training the quantile regressor
# epochs: 5 # Number of training epochs for quantile regression
# population:
# attack_data_fraction: 1.0 # Fraction of the auxilary dataset to use for this attack
lira:
training_data_fraction: 0.5 # Fraction of the auxilary dataset to use for this attack (in each shadow model training)
num_shadow_models: 8 # Number of shadow models to train
online: True # perform online or offline attack
fixed_variance: True # Use a fixed variance for the whole audit
boosting: True
# loss_traj:
# training_distill_data_fraction : 0.7 # Fraction of the auxilary dataset to use for training the distillation models D_s = (1-D_KD)/2
# number_of_traj: 10 # Number of epochs (number of points in the loss trajectory)
# label_only: False # True or False
# mia_classifier_epochs: 100

output_dir: "./leakpro_output"
attack_type: "mia" #mia, gia

target:
# Target model path
module_path: "utils/model.py"
model_class: "MimicLR"
# Data paths
target_folder: "./target"
data_path: "./data/dataset.pkl"

shadow_model:

distillation_model:
Empty file added examples/mia/LOS/data/.gitkeep
Empty file.
65 changes: 65 additions & 0 deletions examples/mia/LOS/mimic_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@

import torch
from torch import cuda, device, optim, sigmoid
from torch.nn import BCELoss
from torch.utils.data import DataLoader
from tqdm import tqdm

from leakpro import AbstractInputHandler

class MimicInputHandler(AbstractInputHandler):
"""Class to handle the user input for the CIFAR10 dataset."""

def __init__(self, configs: dict) -> None:
super().__init__(configs = configs)


def get_criterion(self)->None:
"""Set the CrossEntropyLoss for the model."""
return BCELoss()

def get_optimizer(self, model:torch.nn.Module) -> None:
"""Set the optimizer for the model."""
learning_rate = 0.1
momentum = 0.8
return optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

def train(
self,
dataloader: DataLoader,
model: torch.nn.Module = None,
criterion: torch.nn.Module = None,
optimizer: optim.Optimizer = None,
epochs: int = None,
) -> dict:
"""Model training procedure."""

compute_device = device("cuda" if cuda.is_available() else "cpu")
model.to(compute_device)
model.train()

criterion = self.get_criterion()
optimizer = self.get_optimizer(model)

for e in tqdm(range(epochs), desc="Training Progress"):
model.train()
train_acc, train_loss = 0.0, 0.0

for data, target in dataloader:
target = target.float().unsqueeze(1)
data, target = data.to(compute_device, non_blocking=True), target.to(compute_device, non_blocking=True)
optimizer.zero_grad()
output = model(data)

loss = criterion(output, target)
pred = sigmoid(output) >= 0.5
train_acc += pred.eq(target).sum().item()

loss.backward()
optimizer.step()
train_loss += loss.item()

train_acc = train_acc/len(dataloader.dataset)
train_loss = train_loss/len(dataloader)

return {"model": model, "metrics": {"accuracy": train_acc, "loss": train_loss}}
278 changes: 278 additions & 0 deletions examples/mia/LOS/mimic_main.ipynb

Large diffs are not rendered by default.

118 changes: 118 additions & 0 deletions examples/mia/LOS/mimiciii_prepration/MIMIC_Extract/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# Ignore all contents of output
/output/*

# Exclude the folder itself so it stay in the repository
!/output/.gitkeep


# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/


*.ipynb_checkpoints*
makejob

# tags
tags
21 changes: 21 additions & 0 deletions examples/mia/LOS/mimiciii_prepration/MIMIC_Extract/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2019 MLforHealth

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
SET SEARCH_PATH TO public,mimiciii;
SELECT
i.icustay_id, d.subject_id, d.hadm_id,
array_agg(d.icd9_code ORDER BY seq_num ASC) AS icd9_codes
FROM diagnoses_icd d
LEFT OUTER JOIN (SELECT ccs_matched_id, icd9_code from ccs_dx) c
ON c.icd9_code = d.icd9_code
INNER JOIN icustays i
ON i.hadm_id = d.hadm_id AND i.subject_id = d.subject_id
WHERE d.hadm_id IN ('{hadm_id}') AND seq_num IS NOT NULL
GROUP BY i.icustay_id, d.subject_id, d.hadm_id
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
\echo "DEBUG ONLY"
SET search_path TO mimiciii;
SELECT
i.icustay_id, d.subject_id, d.hadm_id,
array_agg(d.icd9_code ORDER BY seq_num ASC) AS icd9_codes,
array_agg(c.ccs_matched_id ORDER BY seq_num ASC) AS ccs_codes
FROM mimiciii.diagnoses_icd d
LEFT OUTER JOIN (SELECT ccs_matched_id, icd9_code from mimiciii.ccs_dx) c
ON c.icd9_code = d.icd9_code
INNER JOIN icustays i
ON i.hadm_id = d.hadm_id AND i.subject_id = d.subject_id

WHERE seq_num IS NOT NULL
GROUP BY i.icustay_id, d.subject_id, d.hadm_id

Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
\echo "This file is just for debugging"
SET search_path TO public,mimiciii;
select distinct
i.subject_id,
i.hadm_id,
i.icustay_id,
i.gender,
i.age as age,
i.ethnicity,
i.admission_type,
i.hospital_expire_flag,
i.hospstay_seq,
i.los_icu,
i.admittime,
i.dischtime,
i.intime,
i.outtime,
a.diagnosis AS diagnosis_at_admission,
a.insurance,
a.deathtime,
a.discharge_location,
CASE when a.deathtime between i.intime and i.outtime THEN 1 ELSE 0 END AS mort_icu,
CASE when a.deathtime between i.admittime and i.dischtime THEN 1 ELSE 0 END AS mort_hosp,
s.first_careunit,
c.fullcode_first,
c.dnr_first,
c.fullcode,
c.dnr,
-- c.timednr_chart,
c.dnr_first_charttime,
c.cmo_first,
c.cmo_last,
c.cmo,
c.cmo_ds,
-- c.timecmo_chart,
c.cmo_first_charttime,
-- c.timecmo_nursingnote,
c.cmo_nursingnote_charttime,
sofa.sofa,
sofa.respiration as sofa_,
sofa.coagulation as sofa_,
sofa.liver as sofa_,
sofa.cardiovascular as sofa_,
sofa.cns as sofa_,
sofa.renal as sofa_,
sapsii.sapsii,
sapsii.sapsii_prob,
oasis.oasis,
oasis.oasis_prob,
COALESCE(f.readmission_30, 0) AS readmission_30
FROM icustay_detail i
INNER JOIN admissions a ON i.hadm_id = a.hadm_id
INNER JOIN icustays s ON i.icustay_id = s.icustay_id
INNER JOIN code_status c ON i.icustay_id = c.icustay_id
LEFT OUTER JOIN (SELECT d.icustay_id, 1 as readmission_30
FROM icustays c, icustays d
WHERE c.subject_id=d.subject_id
AND c.icustay_id > d.icustay_id
AND c.intime - d.outtime <= interval '30 days'
AND c.outtime = (SELECT MIN(e.outtime) from icustays e
WHERE e.subject_id=c.subject_id
AND e.intime>d.outtime)) f
ON i.icustay_id=f.icustay_id
LEFT OUTER JOIN (SELECT icustay_id, sofa, respiration, coagulation, liver, cardiovascular, cns, renal
FROM sofa) sofa
ON i.icustay_id=sofa.icustay_id
LEFT OUTER JOIN (SELECT icustay_id, sapsii, sapsii_prob
FROM sapsii) sapsii
ON sapsii.icustay_id=i.icustay_id
LEFT OUTER JOIN (SELECT icustay_id, oasis, oasis_prob
FROM oasis) oasis
ON oasis.icustay_id=i.icustay_id
WHERE s.first_careunit NOT like 'NICU'
and i.hadm_id is not null and i.icustay_id is not null
and i.hospstay_seq = 1
and i.icustay_seq = 1
and i.age >= 16
and i.los_icu >= 1
and (i.outtime >= (i.intime + interval '12 hours'))
and (i.outtime <= (i.intime + interval '250 hours'))
ORDER BY subject_id
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
SELECT n.subject_id, n.hadm_id, i.icustay_id, n.chartdate, n.charttime, n.category, n.description, n.text
FROM noteevents n INNER JOIN icustays i on i.hadm_id = n.hadm_id
WHERE
iserror IS NULL
AND (n.chartdate <= i.outtime OR n.charttime <= i.outtime)
AND n.hadm_id IN ('{hadm_id}')
AND n.subject_id IN ('{subject_id}')
Loading

0 comments on commit 9f4d953

Please sign in to comment.