Skip to content

Commit

Permalink
addresses the issues in #251
Browse files Browse the repository at this point in the history
mPnst and mpnstpdx code now build.
  • Loading branch information
sgosline committed Nov 27, 2024
1 parent fa1b225 commit d0dc5ed
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 21 deletions.
2 changes: 1 addition & 1 deletion build/docker/Dockerfile.mpnst
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM r-base:4.3.2
FROM r-base:4.3.3

# Set environment to noninteractive
ENV DEBIAN_FRONTEND=noninteractive
Expand Down
40 changes: 27 additions & 13 deletions build/mpnst/00_sample_gen.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,25 @@ library(dplyr)

##adding a command line argument
args = commandArgs(trailingOnly=TRUE)
if(length(args)!=2){
stop("Need a sample file and synapse token as argument. Rscript 00_sample_gen.R [samplefile] [synapse token]")
if(length(args) > 1 ){
stop("Up to one argument is allowed. This is the filepath to the previously run samples file.")
}


if (length(args) == 0 || is.na(args[1]) || args[1] == "" || !file.exists(args[1])) {
orig_samples <- ""
} else {
orig_samples <- fread(args[1])
}

orig_samples<-fread(args[1])

synapser::synLogin(authToken=args[2])
# Check if Synapse token is available from the environment
synapse_token <- Sys.getenv("SYNAPSE_AUTH_TOKEN")
if (synapse_token == "") {
stop("Error: SYNAPSE_AUTH_TOKEN environment variable is not set.")
}

synapser::synLogin(authToken=synapse_token)
manifest<-synapser::synTableQuery("select * from syn53503360")$asDataFrame()|>
as.data.frame()

Expand All @@ -32,23 +43,18 @@ manifest<-synapser::synTableQuery("select * from syn53503360")$asDataFrame()|>
##first create samples for the original tumors
tumorTable<-manifest|>
dplyr::select(common_name='Sample')|>
dplyr::mutate(other_id_source='NF Data Portal',other_names='',cancer_type="Malignant peripheral nerve sheath tumor",species='Human',model_type='tumor')|>
dplyr::mutate(other_id_source='NF Data Portal',other_names='',cancer_type="Malignant peripheral nerve sheath tumor",species='Homo sapiens (Human)',model_type='tumor')|>
tidyr::unite(col='other_id',c('common_name','model_type'),sep=' ',remove=FALSE)

##then create samples for the PDX
sampTable<-manifest|>
dplyr::select(common_name='Sample',MicroTissueDrugFolder)|>
dplyr::mutate(other_id_source='NF Data Portal',other_names='',cancer_type="Malignant peripheral nerve sheath tumor",species='Human',model_type='patient derived xenograft')|>
dplyr::mutate(other_id_source='NF Data Portal',other_names='',cancer_type="Malignant peripheral nerve sheath tumor",species='Homo sapiens (Human)',model_type='patient derived xenograft')|>
tidyr::unite(col='other_id',c('common_name','model_type'),sep=' ',remove=FALSE)


pdxmt<-manifest|>
dplyr::select(common_name='Sample',MicroTissueDrugFolder)|>
dplyr::mutate(other_id_source='NF Data Portal',other_names='',cancer_type="Malignant peripheral nerve sheath tumor",species='Human',model_type='organoid')|>
tidyr::unite(col='other_id',c('common_name','model_type'),sep=' ',remove=FALSE)

##third, generate a sample for the MTs if they were generated
#pdxmt<-subset(sampTable,!is.na(MicroTissueDrugFolder))
pdxmt<-subset(sampTable,!is.na(MicroTissueDrugFolder))
pdxmt$model_type=rep('organoid',nrow(pdxmt))
print(pdxmt)

Expand All @@ -58,7 +64,15 @@ main<-rbind(sampTable,pdxmt)|>

#main <- fread("mpnst/NF_MPNST_samples.csv")
#previous_aml <- fread(args[1])#"beatAML/beataml_samples.csv")
max_id <- max(orig_samples$improve_sample_id)

# If there is no previous samples file - start at 1, else, continue where the previous one left off.
if (identical(orig_samples, "")) {
max_id <- 1
} else {
max_id <- max(orig_samples$improve_sample_id, na.rm = TRUE)
}


main$improve_sample_id <- seq(from = max_id + 1, length.out = nrow(main))

#synapse_main <- fread("mpnst/synapse_NF-MPNST_samples.csv")
Expand Down
2 changes: 1 addition & 1 deletion build/mpnst/03_get_drug_response_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ org_samps<-subset(samples_df,model_type=='organoid')

##now get the manifest from synapse
manifest<-synapser::synTableQuery("select * from syn53503360")$asDataFrame()|>
as.data.frame()|>
as.data.table()|>
dplyr::rename(common_name='Sample')


Expand Down
2 changes: 1 addition & 1 deletion build/mpnst/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ directory. Currently using the test files as input.
`mpnst_samples.csv` file. This pulls from the latest synapse
project metadata table.
```
docker run -v $PWD:/tmp -e SYNAPSE_AUTH_TOKEN=$SYNAPSE_AUTH_TOKEN mpnst sh build_samples.sh /tmp/build/build_test/test_samples.csv
docker run -v $PWD:/tmp -e -e SYNAPSE_AUTH_TOKEN=$SYNAPSE_AUTH_TOKEN mpnst sh build_samples.sh /tmp/build/build_test/test_samples.csv
```

3. Pull the data and map it to the samples. This uses the metadata
Expand Down
4 changes: 2 additions & 2 deletions build/mpnst/build_drugs.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash
set -euo pipefail
#set -euo pipefail

trap 'echo "Error on or near line $LINENO while executing: $BASH_COMMAND"; exit 1' ERR
#trap 'echo "Error on or near line $LINENO while executing: $BASH_COMMAND"; exit 1' ERR

echo "Running 02_get_drug_data.R with /tmp/mpnst_drugs.tsv and $1."
Rscript 02_get_drug_data.R /tmp/mpnst_drugs.tsv $1
Expand Down
4 changes: 2 additions & 2 deletions build/mpnst/requirements.r
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ install.packages('remotes')
remotes::install_version('rjson', version = '0.2.21', repos = 'https://cloud.r-project.org')
install.packages('synapser', repos = c('http://ran.synapse.org', 'https://cloud.r-project.org'))
install.packages("dplyr")
install.packages("data.table")
install.packages("synapser", repos = c("http://ran.synapse.org", "https://cloud.r-project.org"))
install.packages("data.table")
install.packages("R.utils")
install.packages("stringr")
install.packages("tidyr")
install.packages("tidyr")
3 changes: 2 additions & 1 deletion build/utils/build_drug_desc.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,9 @@ def main():
cansmiles = [a for a in set(tab.canSMILES) if str(a)!='nan']
# isosmiles = list(set(tab.isoSMILES))
morgs = smiles_to_fingerprint(cansmiles)

# print(morgs)
ids = pd.DataFrame(tab[['improve_drug_id','canSMILES']]).drop_duplicates()
# print(ids)
id_morg = ids.rename({"canSMILES":'smile'},axis=1).merge(morgs)[['improve_drug_id','structural_descriptor','descriptor_value']]

mords = smiles_to_mordred(cansmiles,nproc=ncors)
Expand Down

0 comments on commit d0dc5ed

Please sign in to comment.