-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* ReadMe file for creating dataset from MIMICIII, and training LOS>3 classifier * adding links to readMe file * fixing the LR model * Mimic_Hnadler, data_processeing, model and the main file * all attacks are running * add relevant files from MIMIC_Extract and mimic_code repos * fix some bugs regarding nivduration * revert conflicting tabular_mia files * adding subset to the mimicDataset * fixing loading the database and indices * fixing physionet username, breaking the makefile in two commands. * fixing data and output folder in gitingnore * adding gitkeep * adding gitkeep * adding gitignore for data * adding gitkeep to output * adding gitkeep to output * adding gitignore for output * removing extra readme file * update the readme file * fixing copying output to the correct directory * Add .gitignore to ignore contents of the data folder * removing redundant files, fixind double sigmoid bug * adding comments in the readmefile
- Loading branch information
Showing
133 changed files
with
49,935 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Ignore everything inside the data folder | ||
data/* | ||
|
||
# But do not ignore the .gitkeep file | ||
!data/.gitkeep |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
audit: # Configurations for auditing | ||
random_seed: 1234 # Integer specifying the random seed | ||
attack_list: | ||
# rmia: | ||
# training_data_fraction: 0.5 # Fraction of the auxilary dataset to use for this attack (in each shadow model training) | ||
# attack_data_fraction: 0.5 # Fraction of auxiliary dataset to sample from during attack | ||
# num_shadow_models: 8 # Number of shadow models to train | ||
# online: True # perform online or offline attack | ||
# temperature: 2 | ||
# gamma: 1.0 | ||
# offline_a: 0.33 # parameter from which we compute p(x) from p_OUT(x) such that p_IN(x) = a p_OUT(x) + b. | ||
# offline_b: 0.66 | ||
# qmia: | ||
# training_data_fraction: 1.0 # Fraction of the auxilary dataset (data without train and test indices) to use for training the quantile regressor | ||
# epochs: 5 # Number of training epochs for quantile regression | ||
# population: | ||
# attack_data_fraction: 1.0 # Fraction of the auxilary dataset to use for this attack | ||
lira: | ||
training_data_fraction: 0.5 # Fraction of the auxilary dataset to use for this attack (in each shadow model training) | ||
num_shadow_models: 8 # Number of shadow models to train | ||
online: True # perform online or offline attack | ||
fixed_variance: True # Use a fixed variance for the whole audit | ||
boosting: True | ||
# loss_traj: | ||
# training_distill_data_fraction : 0.7 # Fraction of the auxilary dataset to use for training the distillation models D_s = (1-D_KD)/2 | ||
# number_of_traj: 10 # Number of epochs (number of points in the loss trajectory) | ||
# label_only: False # True or False | ||
# mia_classifier_epochs: 100 | ||
|
||
output_dir: "./leakpro_output" | ||
attack_type: "mia" #mia, gia | ||
|
||
target: | ||
# Target model path | ||
module_path: "utils/model.py" | ||
model_class: "MimicLR" | ||
# Data paths | ||
target_folder: "./target" | ||
data_path: "./data/dataset.pkl" | ||
|
||
shadow_model: | ||
|
||
distillation_model: |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
|
||
import torch | ||
from torch import cuda, device, optim, sigmoid | ||
from torch.nn import BCELoss | ||
from torch.utils.data import DataLoader | ||
from tqdm import tqdm | ||
|
||
from leakpro import AbstractInputHandler | ||
|
||
class MimicInputHandler(AbstractInputHandler): | ||
"""Class to handle the user input for the CIFAR10 dataset.""" | ||
|
||
def __init__(self, configs: dict) -> None: | ||
super().__init__(configs = configs) | ||
|
||
|
||
def get_criterion(self)->None: | ||
"""Set the CrossEntropyLoss for the model.""" | ||
return BCELoss() | ||
|
||
def get_optimizer(self, model:torch.nn.Module) -> None: | ||
"""Set the optimizer for the model.""" | ||
learning_rate = 0.1 | ||
momentum = 0.8 | ||
return optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum) | ||
|
||
def train( | ||
self, | ||
dataloader: DataLoader, | ||
model: torch.nn.Module = None, | ||
criterion: torch.nn.Module = None, | ||
optimizer: optim.Optimizer = None, | ||
epochs: int = None, | ||
) -> dict: | ||
"""Model training procedure.""" | ||
|
||
compute_device = device("cuda" if cuda.is_available() else "cpu") | ||
model.to(compute_device) | ||
model.train() | ||
|
||
criterion = self.get_criterion() | ||
optimizer = self.get_optimizer(model) | ||
|
||
for e in tqdm(range(epochs), desc="Training Progress"): | ||
model.train() | ||
train_acc, train_loss = 0.0, 0.0 | ||
|
||
for data, target in dataloader: | ||
target = target.float().unsqueeze(1) | ||
data, target = data.to(compute_device, non_blocking=True), target.to(compute_device, non_blocking=True) | ||
optimizer.zero_grad() | ||
output = model(data) | ||
|
||
loss = criterion(output, target) | ||
pred = sigmoid(output) >= 0.5 | ||
train_acc += pred.eq(target).sum().item() | ||
|
||
loss.backward() | ||
optimizer.step() | ||
train_loss += loss.item() | ||
|
||
train_acc = train_acc/len(dataloader.dataset) | ||
train_loss = train_loss/len(dataloader) | ||
|
||
return {"model": model, "metrics": {"accuracy": train_acc, "loss": train_loss}} |
Large diffs are not rendered by default.
Oops, something went wrong.
118 changes: 118 additions & 0 deletions
118
examples/mia/LOS/mimiciii_prepration/MIMIC_Extract/.gitignore
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
# Ignore all contents of output | ||
/output/* | ||
|
||
# Exclude the folder itself so it stay in the repository | ||
!/output/.gitkeep | ||
|
||
|
||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# pyenv | ||
.python-version | ||
|
||
# celery beat schedule file | ||
celerybeat-schedule | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
|
||
|
||
*.ipynb_checkpoints* | ||
makejob | ||
|
||
# tags | ||
tags |
21 changes: 21 additions & 0 deletions
21
examples/mia/LOS/mimiciii_prepration/MIMIC_Extract/LICENSE
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2019 MLforHealth | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
11 changes: 11 additions & 0 deletions
11
examples/mia/LOS/mimiciii_prepration/MIMIC_Extract/SQL_Queries/codes.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
SET SEARCH_PATH TO public,mimiciii; | ||
SELECT | ||
i.icustay_id, d.subject_id, d.hadm_id, | ||
array_agg(d.icd9_code ORDER BY seq_num ASC) AS icd9_codes | ||
FROM diagnoses_icd d | ||
LEFT OUTER JOIN (SELECT ccs_matched_id, icd9_code from ccs_dx) c | ||
ON c.icd9_code = d.icd9_code | ||
INNER JOIN icustays i | ||
ON i.hadm_id = d.hadm_id AND i.subject_id = d.subject_id | ||
WHERE d.hadm_id IN ('{hadm_id}') AND seq_num IS NOT NULL | ||
GROUP BY i.icustay_id, d.subject_id, d.hadm_id |
15 changes: 15 additions & 0 deletions
15
examples/mia/LOS/mimiciii_prepration/MIMIC_Extract/SQL_Queries/debug_codes.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
\echo "DEBUG ONLY" | ||
SET search_path TO mimiciii; | ||
SELECT | ||
i.icustay_id, d.subject_id, d.hadm_id, | ||
array_agg(d.icd9_code ORDER BY seq_num ASC) AS icd9_codes, | ||
array_agg(c.ccs_matched_id ORDER BY seq_num ASC) AS ccs_codes | ||
FROM mimiciii.diagnoses_icd d | ||
LEFT OUTER JOIN (SELECT ccs_matched_id, icd9_code from mimiciii.ccs_dx) c | ||
ON c.icd9_code = d.icd9_code | ||
INNER JOIN icustays i | ||
ON i.hadm_id = d.hadm_id AND i.subject_id = d.subject_id | ||
|
||
WHERE seq_num IS NOT NULL | ||
GROUP BY i.icustay_id, d.subject_id, d.hadm_id | ||
|
81 changes: 81 additions & 0 deletions
81
examples/mia/LOS/mimiciii_prepration/MIMIC_Extract/SQL_Queries/debug_statics.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
\echo "This file is just for debugging" | ||
SET search_path TO public,mimiciii; | ||
select distinct | ||
i.subject_id, | ||
i.hadm_id, | ||
i.icustay_id, | ||
i.gender, | ||
i.age as age, | ||
i.ethnicity, | ||
i.admission_type, | ||
i.hospital_expire_flag, | ||
i.hospstay_seq, | ||
i.los_icu, | ||
i.admittime, | ||
i.dischtime, | ||
i.intime, | ||
i.outtime, | ||
a.diagnosis AS diagnosis_at_admission, | ||
a.insurance, | ||
a.deathtime, | ||
a.discharge_location, | ||
CASE when a.deathtime between i.intime and i.outtime THEN 1 ELSE 0 END AS mort_icu, | ||
CASE when a.deathtime between i.admittime and i.dischtime THEN 1 ELSE 0 END AS mort_hosp, | ||
s.first_careunit, | ||
c.fullcode_first, | ||
c.dnr_first, | ||
c.fullcode, | ||
c.dnr, | ||
-- c.timednr_chart, | ||
c.dnr_first_charttime, | ||
c.cmo_first, | ||
c.cmo_last, | ||
c.cmo, | ||
c.cmo_ds, | ||
-- c.timecmo_chart, | ||
c.cmo_first_charttime, | ||
-- c.timecmo_nursingnote, | ||
c.cmo_nursingnote_charttime, | ||
sofa.sofa, | ||
sofa.respiration as sofa_, | ||
sofa.coagulation as sofa_, | ||
sofa.liver as sofa_, | ||
sofa.cardiovascular as sofa_, | ||
sofa.cns as sofa_, | ||
sofa.renal as sofa_, | ||
sapsii.sapsii, | ||
sapsii.sapsii_prob, | ||
oasis.oasis, | ||
oasis.oasis_prob, | ||
COALESCE(f.readmission_30, 0) AS readmission_30 | ||
FROM icustay_detail i | ||
INNER JOIN admissions a ON i.hadm_id = a.hadm_id | ||
INNER JOIN icustays s ON i.icustay_id = s.icustay_id | ||
INNER JOIN code_status c ON i.icustay_id = c.icustay_id | ||
LEFT OUTER JOIN (SELECT d.icustay_id, 1 as readmission_30 | ||
FROM icustays c, icustays d | ||
WHERE c.subject_id=d.subject_id | ||
AND c.icustay_id > d.icustay_id | ||
AND c.intime - d.outtime <= interval '30 days' | ||
AND c.outtime = (SELECT MIN(e.outtime) from icustays e | ||
WHERE e.subject_id=c.subject_id | ||
AND e.intime>d.outtime)) f | ||
ON i.icustay_id=f.icustay_id | ||
LEFT OUTER JOIN (SELECT icustay_id, sofa, respiration, coagulation, liver, cardiovascular, cns, renal | ||
FROM sofa) sofa | ||
ON i.icustay_id=sofa.icustay_id | ||
LEFT OUTER JOIN (SELECT icustay_id, sapsii, sapsii_prob | ||
FROM sapsii) sapsii | ||
ON sapsii.icustay_id=i.icustay_id | ||
LEFT OUTER JOIN (SELECT icustay_id, oasis, oasis_prob | ||
FROM oasis) oasis | ||
ON oasis.icustay_id=i.icustay_id | ||
WHERE s.first_careunit NOT like 'NICU' | ||
and i.hadm_id is not null and i.icustay_id is not null | ||
and i.hospstay_seq = 1 | ||
and i.icustay_seq = 1 | ||
and i.age >= 16 | ||
and i.los_icu >= 1 | ||
and (i.outtime >= (i.intime + interval '12 hours')) | ||
and (i.outtime <= (i.intime + interval '250 hours')) | ||
ORDER BY subject_id |
7 changes: 7 additions & 0 deletions
7
examples/mia/LOS/mimiciii_prepration/MIMIC_Extract/SQL_Queries/notes.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
SELECT n.subject_id, n.hadm_id, i.icustay_id, n.chartdate, n.charttime, n.category, n.description, n.text | ||
FROM noteevents n INNER JOIN icustays i on i.hadm_id = n.hadm_id | ||
WHERE | ||
iserror IS NULL | ||
AND (n.chartdate <= i.outtime OR n.charttime <= i.outtime) | ||
AND n.hadm_id IN ('{hadm_id}') | ||
AND n.subject_id IN ('{subject_id}') |
Oops, something went wrong.