diff --git a/.gitignore b/.gitignore index abe11b1..fb27253 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ data/ - +executions/ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/compute1/launch_notebook.sh b/compute1/launch_notebook.sh new file mode 100644 index 0000000..d9db527 --- /dev/null +++ b/compute1/launch_notebook.sh @@ -0,0 +1,4 @@ +export LSF_DOCKER_VOLUMES="/storage1/fs1/dinglab:/storage1/fs1/dinglab /scratch1/fs1/dinglab:/scratch1/fs1/dinglab /home/estorrs:/home/estorrs" +export PATH="/miniconda/envs/ancestry/bin:$PATH" + +LSF_DOCKER_PORTS='8282:8888' bsub -R 'select[mem>10GB,port8282=1] rusage[mem=10GB] span[hosts=1]' -M 11GB -q general-interactive -G compute-dinglab -Is -a 'docker(estorrs/ancestry-pipeline:0.0.1)' 'jupyter notebook --port 8888 --no-browser --ip=0.0.0.0' diff --git a/notebooks/make_test_data.ipynb b/notebooks/make_test_data.ipynb new file mode 100644 index 0000000..1078006 --- /dev/null +++ b/notebooks/make_test_data.ipynb @@ -0,0 +1,596 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "a63150d1", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import re\n", + "\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "accd9693", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
# sample_namecasediseaseexperimental_strategysample_typedata_pathfilesizedata_formatreferenceUUIDsystem
29C1230738.WXS.T.ADNA_eb44394c.hg38C1230738GBMWXStumor/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...34561898412BAMhg38f6c72dde-4426-4a98-9716-e4490b425df3storage1
31C1230738.WXS.T.hg38C1230738GBMWXStumor/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...34971128893BAMhg38b06c50fe-383f-4cce-a9b0-9f70f118d5e0storage1
51C1245129.WXS.T.ADNA_f4f0a623.hg38C1245129GBMWXStumor/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...37402084587BAMhg389e05a653-da61-49f4-b2a7-b58f4781e760storage1
53C1245129.WXS.T.hg38C1245129GBMWXStumor/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...39273198570BAMhg38923cea70-4973-4a54-ad99-151028fe7669storage1
81C204057.WXS.T.ADNA_fb79d37d.hg38C204057GBMWXStumor/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...36138400689BAMhg386a93dee0-802a-47cc-90a5-37f686a9aebfstorage1
....................................
7190C761370.WXS.T.ADNA_260f1df4.hg38C761370GBMWXStumor/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...44821987147BAMhg38fd66e1f9-ce3b-4a76-9e48-23bf78568f41storage1
7192C761370.WXS.T.hg38C761370GBMWXStumor/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...34824648092BAMhg388541768d-a439-4abf-987e-d08039a4ec1dstorage1
7214C827913.WXS.T.hg38C827913GBMWXStumor/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...31571237792BAMhg38295d757c-a37e-49f5-9d26-65dff2dbbf13storage1
7238C846363.WXS.T.ADNA_51dcc4a3.hg38C846363GBMWXStumor/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...47361689745BAMhg3883a989e8-836e-42b2-a240-f87f0858d189storage1
7240C846363.WXS.T.hg38C846363GBMWXStumor/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...22124861490BAMhg38e3e1eeb8-e749-4520-83e2-3ae1091a7207storage1
\n", + "

137 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " # sample_name case disease \\\n", + "29 C1230738.WXS.T.ADNA_eb44394c.hg38 C1230738 GBM \n", + "31 C1230738.WXS.T.hg38 C1230738 GBM \n", + "51 C1245129.WXS.T.ADNA_f4f0a623.hg38 C1245129 GBM \n", + "53 C1245129.WXS.T.hg38 C1245129 GBM \n", + "81 C204057.WXS.T.ADNA_fb79d37d.hg38 C204057 GBM \n", + "... ... ... ... \n", + "7190 C761370.WXS.T.ADNA_260f1df4.hg38 C761370 GBM \n", + "7192 C761370.WXS.T.hg38 C761370 GBM \n", + "7214 C827913.WXS.T.hg38 C827913 GBM \n", + "7238 C846363.WXS.T.ADNA_51dcc4a3.hg38 C846363 GBM \n", + "7240 C846363.WXS.T.hg38 C846363 GBM \n", + "\n", + " experimental_strategy sample_type \\\n", + "29 WXS tumor \n", + "31 WXS tumor \n", + "51 WXS tumor \n", + "53 WXS tumor \n", + "81 WXS tumor \n", + "... ... ... \n", + "7190 WXS tumor \n", + "7192 WXS tumor \n", + "7214 WXS tumor \n", + "7238 WXS tumor \n", + "7240 WXS tumor \n", + "\n", + " data_path filesize \\\n", + "29 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... 34561898412 \n", + "31 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... 34971128893 \n", + "51 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... 37402084587 \n", + "53 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... 39273198570 \n", + "81 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... 36138400689 \n", + "... ... ... \n", + "7190 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... 44821987147 \n", + "7192 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... 34824648092 \n", + "7214 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... 31571237792 \n", + "7238 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... 47361689745 \n", + "7240 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... 22124861490 \n", + "\n", + " data_format reference UUID system \n", + "29 BAM hg38 f6c72dde-4426-4a98-9716-e4490b425df3 storage1 \n", + "31 BAM hg38 b06c50fe-383f-4cce-a9b0-9f70f118d5e0 storage1 \n", + "51 BAM hg38 9e05a653-da61-49f4-b2a7-b58f4781e760 storage1 \n", + "53 BAM hg38 923cea70-4973-4a54-ad99-151028fe7669 storage1 \n", + "81 BAM hg38 6a93dee0-802a-47cc-90a5-37f686a9aebf storage1 \n", + "... ... ... ... ... \n", + "7190 BAM hg38 fd66e1f9-ce3b-4a76-9e48-23bf78568f41 storage1 \n", + "7192 BAM hg38 8541768d-a439-4abf-987e-d08039a4ec1d storage1 \n", + "7214 BAM hg38 295d757c-a37e-49f5-9d26-65dff2dbbf13 storage1 \n", + "7238 BAM hg38 83a989e8-836e-42b2-a240-f87f0858d189 storage1 \n", + "7240 BAM hg38 e3e1eeb8-e749-4520-83e2-3ae1091a7207 storage1 \n", + "\n", + "[137 rows x 11 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('/storage1/fs1/dinglab/Active/Projects/estorrs/CPTAC3.catalog/BamMap/storage1.BamMap.dat',\n", + " sep='\\t')\n", + "df = df[df['data_format']=='BAM']\n", + "df = df[df['reference']=='hg38']\n", + "df = df[df['experimental_strategy']=='WXS']\n", + "df = df[df['disease']=='GBM']\n", + "df = df[df['sample_type']=='tumor']\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c4da7ec6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sample_idfilepath
29C1230738.WXS.T.ADNA_eb44394c.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
31C1230738.WXS.T.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
51C1245129.WXS.T.ADNA_f4f0a623.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
53C1245129.WXS.T.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
81C204057.WXS.T.ADNA_fb79d37d.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
.........
7190C761370.WXS.T.ADNA_260f1df4.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
7192C761370.WXS.T.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
7214C827913.WXS.T.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
7238C846363.WXS.T.ADNA_51dcc4a3.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
7240C846363.WXS.T.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
\n", + "

137 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " sample_id \\\n", + "29 C1230738.WXS.T.ADNA_eb44394c.hg38 \n", + "31 C1230738.WXS.T.hg38 \n", + "51 C1245129.WXS.T.ADNA_f4f0a623.hg38 \n", + "53 C1245129.WXS.T.hg38 \n", + "81 C204057.WXS.T.ADNA_fb79d37d.hg38 \n", + "... ... \n", + "7190 C761370.WXS.T.ADNA_260f1df4.hg38 \n", + "7192 C761370.WXS.T.hg38 \n", + "7214 C827913.WXS.T.hg38 \n", + "7238 C846363.WXS.T.ADNA_51dcc4a3.hg38 \n", + "7240 C846363.WXS.T.hg38 \n", + "\n", + " filepath \n", + "29 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "31 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "51 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "53 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "81 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "... ... \n", + "7190 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "7192 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "7214 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "7238 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "7240 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "\n", + "[137 rows x 2 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = df[['# sample_name', 'data_path']]\n", + "df.columns = ['sample_id', 'filepath']\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a9e97e74", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sample_idfilepath
29C1230738.WXS.T.ADNA_eb44394c.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
31C1230738.WXS.T.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
51C1245129.WXS.T.ADNA_f4f0a623.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
53C1245129.WXS.T.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
81C204057.WXS.T.ADNA_fb79d37d.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
.........
7190C761370.WXS.T.ADNA_260f1df4.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
7192C761370.WXS.T.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
7214C827913.WXS.T.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
7238C846363.WXS.T.ADNA_51dcc4a3.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
7240C846363.WXS.T.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
\n", + "

69 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " sample_id \\\n", + "29 C1230738.WXS.T.ADNA_eb44394c.hg38 \n", + "31 C1230738.WXS.T.hg38 \n", + "51 C1245129.WXS.T.ADNA_f4f0a623.hg38 \n", + "53 C1245129.WXS.T.hg38 \n", + "81 C204057.WXS.T.ADNA_fb79d37d.hg38 \n", + "... ... \n", + "7190 C761370.WXS.T.ADNA_260f1df4.hg38 \n", + "7192 C761370.WXS.T.hg38 \n", + "7214 C827913.WXS.T.hg38 \n", + "7238 C846363.WXS.T.ADNA_51dcc4a3.hg38 \n", + "7240 C846363.WXS.T.hg38 \n", + "\n", + " filepath \n", + "29 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "31 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "51 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "53 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "81 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "... ... \n", + "7190 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "7192 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "7214 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "7238 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "7240 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "\n", + "[69 rows x 2 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mask = [True if os.path.exists(fp) else False for fp in df['filepath']]\n", + "df = df[mask]\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "55bd3c7c", + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv('../executions/cptac_test/gbm_sample_to_bam.txt', sep='\\t', index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2639b817", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/run_ancestry.ipynb b/notebooks/run_ancestry.ipynb new file mode 100644 index 0000000..bac7bec --- /dev/null +++ b/notebooks/run_ancestry.ipynb @@ -0,0 +1,903 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 58, + "id": "a971c34e", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import re\n", + "import subprocess\n", + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "\n", + "from wombat.bsub import batch_bsub_commands, write_command_file, DEFAULT_ARGS\n", + "from wombat.utils import listfiles" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "3860ecd6", + "metadata": {}, + "outputs": [], + "source": [ + "run_dir = '/scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test'\n", + "Path(run_dir).mkdir(parents=True, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "id": "8d3fdc26", + "metadata": {}, + "source": [ + "## define inputs" + ] + }, + { + "cell_type": "markdown", + "id": "db1ae8ea", + "metadata": {}, + "source": [ + "thousand genomes sample panel used for training" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "80e694dd", + "metadata": {}, + "outputs": [], + "source": [ + "thousand_genomes_panel = '/storage1/fs1/dinglab/Active/Projects/Ancestry_Data/integrated_call_samples_v3.20130502.ALL.panel'" + ] + }, + { + "cell_type": "markdown", + "id": "b78b039d", + "metadata": {}, + "source": [ + "reference genome (preferably the same reference used to align the .bams that are having ancestry called on them)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "fa48babe", + "metadata": {}, + "outputs": [], + "source": [ + "reference_fasta = '/storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa'" + ] + }, + { + "cell_type": "markdown", + "id": "593b4c00", + "metadata": {}, + "source": [ + "vcf and bed file for thousand genomes project samples.\n", + "\n", + "make sure to use the correct vcf/bed file depending on your reference (hg19 vs hg38) and whether the genome coordinates start with \"chr\" or not\n", + "\n", + "you can find different versions of these files here\n", + "\n", + "hg38 and genome coordinates start with chr\n", + "+ thousand_genomes_vcf = '/storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.vcf'\n", + "+ thousand_genomes_bed = '/storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed'\n", + "\n", + "hg38 and genome coordinates do not start with chr\n", + "+ thousand_genomes_vcf = '/storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.vcf'\n", + "+ thousand_genomes_bed = '/storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.bed'\n", + "\n", + "hg19 and genome coordinates start with chr\n", + "+ thousand_genomes_vcf = '/storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg19/all.coding.sorted.02maf.snps.chr.vcf'\n", + "+ thousand_genomes_bed = '/storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg19/all.coding.sorted.02maf.snps.chr.bed'\n", + "\n", + "hg19 and genome coordinates do not start with chr\n", + "+ thousand_genomes_vcf = '/storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg19/all.coding.sorted.02maf.snps.vcf'\n", + "+ thousand_genomes_bed = '/storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg19/all.coding.sorted.02maf.snps.bed'" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "898bc4bd", + "metadata": {}, + "outputs": [], + "source": [ + "thousand_genomes_vcf = '/storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.vcf'\n", + "thousand_genomes_bed = '/storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed'" + ] + }, + { + "cell_type": "markdown", + "id": "fe406ed2", + "metadata": {}, + "source": [ + "sample bam locations" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "a11b763d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sample_idfilepath
0C1230738.WXS.T.ADNA_eb44394c.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
1C1230738.WXS.T.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
2C1245129.WXS.T.ADNA_f4f0a623.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
3C1245129.WXS.T.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
4C204057.WXS.T.ADNA_fb79d37d.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
.........
64C761370.WXS.T.ADNA_260f1df4.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
65C761370.WXS.T.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
66C827913.WXS.T.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
67C846363.WXS.T.ADNA_51dcc4a3.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
68C846363.WXS.T.hg38/storage1/fs1/dinglab/Active/Primary/CPTAC3.sh...
\n", + "

69 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " sample_id \\\n", + "0 C1230738.WXS.T.ADNA_eb44394c.hg38 \n", + "1 C1230738.WXS.T.hg38 \n", + "2 C1245129.WXS.T.ADNA_f4f0a623.hg38 \n", + "3 C1245129.WXS.T.hg38 \n", + "4 C204057.WXS.T.ADNA_fb79d37d.hg38 \n", + ".. ... \n", + "64 C761370.WXS.T.ADNA_260f1df4.hg38 \n", + "65 C761370.WXS.T.hg38 \n", + "66 C827913.WXS.T.hg38 \n", + "67 C846363.WXS.T.ADNA_51dcc4a3.hg38 \n", + "68 C846363.WXS.T.hg38 \n", + "\n", + " filepath \n", + "0 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "1 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "2 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "3 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "4 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + ".. ... \n", + "64 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "65 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "66 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "67 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "68 /storage1/fs1/dinglab/Active/Primary/CPTAC3.sh... \n", + "\n", + "[69 rows x 2 columns]" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('/storage1/fs1/dinglab/Active/Projects/estorrs/ancestry-pipeline/executions/cptac_test/gbm_sample_to_bam.txt',\n", + " sep='\\t')\n", + "df" + ] + }, + { + "cell_type": "markdown", + "id": "08cf2f19", + "metadata": {}, + "source": [ + "sanity check to make sure genome coordinates match, in this test case we expect \"chr\" to prepend all genome coordinates" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "c3d88215", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
012
0chr11327313273
\n", + "
" + ], + "text/plain": [ + " 0 1 2\n", + "0 chr1 13273 13273" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.read_csv(thousand_genomes_bed, sep='\\t', header=None, nrows=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "ac29eb22", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'chr1'" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fp = df['filepath'][0]\n", + "cmd = f'samtools view {fp} | head -n 1'\n", + "out = subprocess.check_output(cmd, shell=True) # chromosome coordinate is in third column\n", + "out.decode().split('\\t')[2]" + ] + }, + { + "cell_type": "markdown", + "id": "6dff02a7", + "metadata": {}, + "source": [ + "## setting bsub related parameters" + ] + }, + { + "cell_type": "markdown", + "id": "8efa5eb7", + "metadata": {}, + "source": [ + "## step 1: extract readcounts with bam-readcount" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "b65deb39", + "metadata": {}, + "outputs": [], + "source": [ + "execution_dir = os.path.join(run_dir, '1.slate')\n", + "log_dir = os.path.join(execution_dir, 'logs')\n", + "result_dir = os.path.join(execution_dir, 'results')\n", + "Path(log_dir).mkdir(parents=True, exist_ok=True)\n", + "Path(result_dir).mkdir(parents=True, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "247b5e2a", + "metadata": {}, + "outputs": [], + "source": [ + "def get_slate_command(input_bam_fp, readcount_fp, trimmed_bam_fp):\n", + " return f'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 \\\n", + "--fasta {reference_fasta} --positions {thousand_genomes_bed} \\\n", + "--readcount-output {readcount_fp} --filtered-bam-output {trimmed_bam_fp} {input_bam_fp}'" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "ef02a722", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'mem': 10,\n", + " 'n_processes': 1,\n", + " 'max_mem': None,\n", + " 'docker': 'python:3.8',\n", + " 'queue': 'dinglab',\n", + " 'gpu_model': 'TeslaV100_SXM2_32GB',\n", + " 'gpu_mem': '30',\n", + " 'gpu_num': 1,\n", + " 'use_gpu': False,\n", + " 'group': 'compute-dinglab',\n", + " 'group_name': None,\n", + " 'n_concurrent': 10,\n", + " 'interactive': False,\n", + " 'username': 'estorrs'}" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "volumes = ['/storage1/fs1/dinglab', '/scratch1/fs1/dinglab', '/home/estorrs'] # compute1 needs to know which directories to map, replace /home/ if you are not me\n", + "exports = ['/miniconda/bin'] # need to override compute1 PATH defaults\n", + "args = DEFAULT_ARGS.copy()\n", + "args" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "926f080b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['mkdir -p /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs',\n", + " 'export LSF_DOCKER_VOLUMES=\"/storage1/fs1/dinglab:/storage1/fs1/dinglab /scratch1/fs1/dinglab:/scratch1/fs1/dinglab /home/estorrs:/home/estorrs /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs:/scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs\"',\n", + " 'bgadd -L 100 /estorrs/ancestry_slate',\n", + " 'export PATH=\"/opt/java/openjdk/bin:/miniconda/bin:$PATH\"',\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C1230738.WXS.T.ADNA_eb44394c.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C1230738.WXS.T.ADNA_eb44394c.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C1230738.WXS.T.ADNA_eb44394c.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C1230738.WXS.T.ADNA_eb44394c.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/f6c72dde-4426-4a98-9716-e4490b425df3/062a4cb7-1a73-4382-9f16-fdd891218a97_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C1230738.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C1230738.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C1230738.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C1230738.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/b06c50fe-383f-4cce-a9b0-9f70f118d5e0/aee72b24-81c3-47b5-856d-35ff22208f84_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C1245129.WXS.T.ADNA_f4f0a623.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C1245129.WXS.T.ADNA_f4f0a623.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C1245129.WXS.T.ADNA_f4f0a623.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C1245129.WXS.T.ADNA_f4f0a623.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/9e05a653-da61-49f4-b2a7-b58f4781e760/5b739159-0dff-476e-932e-8198cb3c76e4_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C1245129.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C1245129.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C1245129.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C1245129.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/923cea70-4973-4a54-ad99-151028fe7669/fb858b07-76dc-48b4-9096-adb0aed38c31_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C204057.WXS.T.ADNA_fb79d37d.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C204057.WXS.T.ADNA_fb79d37d.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C204057.WXS.T.ADNA_fb79d37d.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C204057.WXS.T.ADNA_fb79d37d.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/6a93dee0-802a-47cc-90a5-37f686a9aebf/b1b62bc9-91c7-416b-92c8-35f6e197980a_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C204057.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C204057.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C204057.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C204057.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/a3e622c4-b84b-41a3-810a-97bf521ba9a9/73fbc689-0c62-4ccb-8aaf-6d3697fc06e1_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C207624.WXS.T.ADNA_8ab52100.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C207624.WXS.T.ADNA_8ab52100.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C207624.WXS.T.ADNA_8ab52100.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C207624.WXS.T.ADNA_8ab52100.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/370da836-9a04-4ad5-9b4f-11270f994e3c/fd0af1cc-2bd5-4433-b288-a8f95f0f4eb9_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C207624.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C207624.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C207624.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C207624.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/1252d351-fbb5-4716-b862-028f33399922/0af3abb3-d73f-47f4-9f8a-9ac41d230bca_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C208854.WXS.T.ADNA_15b44502.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C208854.WXS.T.ADNA_15b44502.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C208854.WXS.T.ADNA_15b44502.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C208854.WXS.T.ADNA_15b44502.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/18f09f19-5cc3-413d-968f-a574a8a6084d/eb327532-25bd-4c10-a414-185474bdb7ef_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C208854.WXS.T.ADNA_93ae1041.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C208854.WXS.T.ADNA_93ae1041.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C208854.WXS.T.ADNA_93ae1041.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C208854.WXS.T.ADNA_93ae1041.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/af9ba009-d30c-4629-98a5-80c96e605e94/9c504d59-e2a4-492c-ae3a-993e3f802cef_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C208854.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C208854.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C208854.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C208854.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/20773aad-0e5f-4b6d-8542-6981c9bf31a9/39d0b9dc-ead3-409f-87fc-484fee59f592_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C217710.WXS.T.ADNA_e91fb26d.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C217710.WXS.T.ADNA_e91fb26d.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C217710.WXS.T.ADNA_e91fb26d.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C217710.WXS.T.ADNA_e91fb26d.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/8bbd9c83-206a-4547-93fb-1dbc329c4128/cdfb6ee2-5d43-478c-8f60-80c3c5ef589e_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C217710.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C217710.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C217710.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C217710.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/1d5ad431-811e-436c-8f11-dc1badb61be6/49aea928-c7da-432e-aa39-ec92de5d9f00_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C219555.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C219555.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C219555.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C219555.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/1d6f28a8-c2fe-4259-b1ed-b59d4b65d934/b8cc75d6-b8c2-4e04-8808-9c99797b2115_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C224106.WXS.T.ADNA_4208c7cd.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C224106.WXS.T.ADNA_4208c7cd.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C224106.WXS.T.ADNA_4208c7cd.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C224106.WXS.T.ADNA_4208c7cd.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/68b960fe-389d-45f1-a5b1-4500a8670c48/03758b31-f5e1-4bea-8001-a5147945dafa_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C224106.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C224106.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C224106.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C224106.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/3345bb07-d0be-4306-be6e-ea632f8c5fc8/a5fb9d82-568c-4f92-9d20-ff80d6bce284_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C224967.WXS.T.ADNA_ef1b8e33.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C224967.WXS.T.ADNA_ef1b8e33.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C224967.WXS.T.ADNA_ef1b8e33.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C224967.WXS.T.ADNA_ef1b8e33.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/406c1599-e9aa-41f3-95d8-3126dece3c82/75069e02-ca1b-4e5d-bd9a-7e0996430dec_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C224967.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C224967.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C224967.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C224967.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/63f77087-cd4f-4b87-a777-57a55abe245f/aac08359-e9fe-4cf7-b5e1-12cc6dfa6cf4_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C229764.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C229764.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C229764.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C229764.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/0858e1ec-6d27-4654-9a9d-ac8d08173e1a/ac8d4e2a-cbf6-4679-b022-ab0518e1ae21_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C231855.WXS.T.ADNA_40a9c1b9.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C231855.WXS.T.ADNA_40a9c1b9.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C231855.WXS.T.ADNA_40a9c1b9.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C231855.WXS.T.ADNA_40a9c1b9.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/aea7e706-e796-4b1f-bb99-40a333b400e9/8b5d164f-881e-476b-ad10-84e8b23a9601_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C231855.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C231855.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C231855.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C231855.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/f12a807d-d1e9-4c32-a4ac-798ff0359c82/ca322924-b07a-4fbb-8634-78e26e197671_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C234192.WXS.T.ADNA_a92ddca1.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C234192.WXS.T.ADNA_a92ddca1.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C234192.WXS.T.ADNA_a92ddca1.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C234192.WXS.T.ADNA_a92ddca1.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/391bf1c9-43e5-4e52-b602-a8014c059dbc/4eae5599-7c20-444f-8253-fc4a0195d96f_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C234192.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C234192.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C234192.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C234192.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/00d428d0-e8fc-4662-a0fa-cabc09e83c20/d774c07e-df68-4037-983e-1b21d267226b_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C2415597.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C2415597.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C2415597.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C2415597.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/d7016be0-b5fd-4b35-bfcb-b9f68116a669/bc166e0c-63e2-409c-a09f-7100fc4d0399_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C243048.WXS.T.ADNA_94a7b89d.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C243048.WXS.T.ADNA_94a7b89d.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C243048.WXS.T.ADNA_94a7b89d.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C243048.WXS.T.ADNA_94a7b89d.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/ba5c13e9-a560-4436-90f2-011d77ee7531/e4e6f65e-c590-48d7-8fba-1bfc0f10d029_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C243048.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C243048.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C243048.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C243048.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/8bdfe531-ae92-49f4-87c0-02ee2ad09779/6c5fd8de-99ba-4d63-8dd6-7587d7e72478_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C243909.WXS.T.ADNA_74725183.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C243909.WXS.T.ADNA_74725183.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C243909.WXS.T.ADNA_74725183.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C243909.WXS.T.ADNA_74725183.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/fff94d78-afc1-4c90-9e46-7d90e2f05b5f/d013c55d-d3bd-488f-95cd-6c6e9228d817_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C243909.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C243909.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C243909.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C243909.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/c7bd5850-d12c-44e5-ab30-20b39a264488/87b93eca-6649-48fe-8311-56853e65a862_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C247230.WXS.T.ADNA_0aa190bd.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C247230.WXS.T.ADNA_0aa190bd.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C247230.WXS.T.ADNA_0aa190bd.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C247230.WXS.T.ADNA_0aa190bd.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/f79420a5-f8b3-4853-85e4-8103a56fc3f9/44d33820-4b25-4059-b10b-6f7b9038ed39_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C247230.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C247230.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C247230.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C247230.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/94b2e77b-dacb-475b-bbef-febab3c064c2/eafce93e-0fd9-44bb-942d-7820b6f68744_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C259161.WXS.T.ADNA_5c7e3295.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C259161.WXS.T.ADNA_5c7e3295.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C259161.WXS.T.ADNA_5c7e3295.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C259161.WXS.T.ADNA_5c7e3295.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/942805cd-2142-4c93-b3d2-a8980b14e829/71aff02e-d6dd-4292-8627-d33c7d95fea5_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C259161.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C259161.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C259161.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C259161.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/2fe9a9b8-b5c4-4e72-8fb7-71cac06dd773/c72c1846-16af-4899-a429-9f5b7c954a58_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C266664.WXS.T.ADNA_5dbc58a2.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C266664.WXS.T.ADNA_5dbc58a2.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C266664.WXS.T.ADNA_5dbc58a2.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C266664.WXS.T.ADNA_5dbc58a2.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/ad1b5b7b-5c70-44e1-aa5c-f1e397fbb55a/670c02b3-383c-4ca4-9a0d-2fc479296d0c_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C266664.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C266664.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C266664.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C266664.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/7fa81b59-b344-463c-91a7-3bf91763635f/3af09701-86aa-4741-aa59-9aed8b02f2bf_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C283515.WXS.T.ADNA_16970afa.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C283515.WXS.T.ADNA_16970afa.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C283515.WXS.T.ADNA_16970afa.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C283515.WXS.T.ADNA_16970afa.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/fb59bee7-446d-4669-bd78-6234196d0153/2a1d8b76-daf0-4068-8ad2-61c163e545a3_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C283515.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C283515.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C283515.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C283515.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/5aa549bf-6059-4691-9a1a-dc2e334019b0/e251ee1d-1c35-4852-89fe-50fc2a4ac948_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C287328.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C287328.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C287328.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C287328.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/bc0a38db-f504-49c1-b50f-25b1cc7af943/198d468f-807f-4fbc-80c3-db5d8d0b859f_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C289419.WXS.T.ADNA_ebf29e44.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C289419.WXS.T.ADNA_ebf29e44.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C289419.WXS.T.ADNA_ebf29e44.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C289419.WXS.T.ADNA_ebf29e44.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/56d4c848-6cf4-4228-ad97-d3ab5056812d/4d704111-c691-4670-b26a-c087157c5729_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C289419.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C289419.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C289419.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C289419.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/7ca5f822-55be-4e81-81e4-5542cf2b6fe1/1db3a7f3-a22c-4898-82e3-8957c1238080_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C298152.WXS.T.ADNA_cb6d6363.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C298152.WXS.T.ADNA_cb6d6363.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C298152.WXS.T.ADNA_cb6d6363.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C298152.WXS.T.ADNA_cb6d6363.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/96617688-1ee2-4e4a-b128-6e170cee9966/2359e710-f546-4a7d-9b12-c9c3028c8b92_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C298152.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C298152.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C298152.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C298152.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/83633780-d4aa-48d6-bf32-90efafb8eb91/45164d3a-0a7f-498f-a025-eae34981591f_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C317832.WXS.T.ADNA_4b7f0398.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C317832.WXS.T.ADNA_4b7f0398.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C317832.WXS.T.ADNA_4b7f0398.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C317832.WXS.T.ADNA_4b7f0398.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/5b5d151b-3c7f-423a-9684-a301d78e9be5/a8de32a7-4eaf-498c-8b3f-17a2c36aba44_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C317832.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C317832.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C317832.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C317832.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/a3eb79f6-7744-45d2-b66f-8ab2c0a4a670/88fc297c-04d4-4bbe-885c-794e65593177_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C330255.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C330255.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C330255.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C330255.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/1342f51b-95be-4e96-9d04-5ce16089b486/cad8ffba-1e4f-4f8f-9378-0bd507f2444a_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C383391.WXS.T.ADNA_4db2e37f.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C383391.WXS.T.ADNA_4db2e37f.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C383391.WXS.T.ADNA_4db2e37f.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C383391.WXS.T.ADNA_4db2e37f.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/7341902c-8501-419d-a4a6-d88859d9a8d8/0d5862af-2f59-42b9-b6d8-5c1d84d662e9_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C383391.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C383391.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C383391.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C383391.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/cff641f8-776b-4b10-8983-71008ed827f4/f7d2db47-e553-4569-b883-51d139f98ef5_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C3L-04081.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C3L-04081.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C3L-04081.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C3L-04081.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/4f239bb8-5070-4cf0-b14c-599bbb7bfd94/fef12b13-bc4b-49fe-8975-9e7b25380154_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C3L-05481.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C3L-05481.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C3L-05481.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C3L-05481.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/e6b33d67-a8b0-42ce-8994-6e3e5e782da4/a8b62182-6e82-446f-bd1f-e829cf7979c7_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C3L-06625.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C3L-06625.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C3L-06625.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C3L-06625.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/926f6d61-0f53-4923-96cf-73626b0e0fe7/41fbd2d6-fccb-42f0-ae30-f26011c2e96e_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C3L-07202.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C3L-07202.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C3L-07202.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C3L-07202.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/5c4c8930-d564-4e1a-97bd-5971d536af18/ce1dba67-d732-46e3-9e72-096f6f0b75b9_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C3L-07213.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C3L-07213.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C3L-07213.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C3L-07213.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/57461fc1-881f-4b14-9c8c-6c795a225e1f/63cdc123-6efd-4d15-a5cb-4c8205b9eefe_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C3L-07611.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C3L-07611.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C3L-07611.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C3L-07611.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/66ea66b9-4620-4c09-a59d-f9b1c0bcb09f/f135e074-b46a-473b-8015-439c78e8f6fd_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C3N-01197.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C3N-01197.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C3N-01197.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C3N-01197.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/813f42ab-91e9-4dbc-8d09-bf15289946f8/8a4a56fd-dd41-4555-b2ac-8f80c06a75e4_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C3N-04113.WXS.T.RDNA_o25vvXX.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C3N-04113.WXS.T.RDNA_o25vvXX.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C3N-04113.WXS.T.RDNA_o25vvXX.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C3N-04113.WXS.T.RDNA_o25vvXX.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/e4493502-3425-4529-96af-414aae684e99/1ac39c4d-4742-4eea-ac1a-074503b7b5a4_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C3N-05991.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C3N-05991.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C3N-05991.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C3N-05991.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/7c48aae6-c19b-46e4-a8aa-205117088f07/0ca72fda-2cdf-415a-b5a4-53470bf3af60_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C464694.WXS.T.ADNA_7319b796.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C464694.WXS.T.ADNA_7319b796.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C464694.WXS.T.ADNA_7319b796.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C464694.WXS.T.ADNA_7319b796.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/00eb9000-e8b4-4b57-8f08-45b92aaba916/737a37b7-31a5-4187-a4fa-5e7171bb6569_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C464694.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C464694.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C464694.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C464694.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/44dd95c6-0058-43cc-8f40-781d00ea6f46/31b51dd4-c4fe-4d97-ae09-698217519667_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C524103.WXS.T.ADNA_aee3de0f.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C524103.WXS.T.ADNA_aee3de0f.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C524103.WXS.T.ADNA_aee3de0f.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C524103.WXS.T.ADNA_aee3de0f.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/35d02b21-b274-4b7c-ac81-9f4a5dd1176d/4c95a675-d17a-40a7-b77f-a2389895f7fe_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C524103.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C524103.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C524103.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C524103.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/2b0b81ce-7556-4be8-bf5f-6f359239cc39/ffd6163a-085f-4213-89e4-5bbc68bca404_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C621273.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C621273.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C621273.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C621273.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/4daf5e7f-6a2a-4c49-a2cb-abb712bacfae/3f83ce79-0cd9-4e7e-9352-726042aff7c8_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C685110.WXS.T.ADNA_224ab8cc.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C685110.WXS.T.ADNA_224ab8cc.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C685110.WXS.T.ADNA_224ab8cc.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C685110.WXS.T.ADNA_224ab8cc.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/0da7ee04-c471-488a-900b-44c538d9b274/9cd89b80-7f2d-4caf-9599-4457c80de4cd_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C685110.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C685110.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C685110.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C685110.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/533ec6cf-ac77-4d87-ad54-c8f34025c447/1e2e1134-2810-4845-91cb-feed8c0cb7af_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C760878.WXS.T.ADNA_b1ddc47c.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C760878.WXS.T.ADNA_b1ddc47c.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C760878.WXS.T.ADNA_b1ddc47c.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C760878.WXS.T.ADNA_b1ddc47c.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/dae070e2-2c6a-49ba-ae92-6f2319325e2a/9efda7a7-d54a-492b-9158-88f2644e909f_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C760878.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C760878.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C760878.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C760878.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/5495a6ee-951b-4613-b9d3-3344fe76a916/ad3ce926-8ffc-46c4-87c9-64fba596ac12_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C761370.WXS.T.ADNA_260f1df4.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C761370.WXS.T.ADNA_260f1df4.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C761370.WXS.T.ADNA_260f1df4.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C761370.WXS.T.ADNA_260f1df4.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/fd66e1f9-ce3b-4a76-9e48-23bf78568f41/59ba09c2-abf4-46dd-b566-c703d8da80e5_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C761370.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C761370.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C761370.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C761370.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/8541768d-a439-4abf-987e-d08039a4ec1d/94dc4f39-5f41-450e-bc88-b5efb574ab4b_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C827913.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C827913.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C827913.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C827913.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/295d757c-a37e-49f5-9d26-65dff2dbbf13/04ca4248-176f-4eda-89af-30230af7817d_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C846363.WXS.T.ADNA_51dcc4a3.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C846363.WXS.T.ADNA_51dcc4a3.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C846363.WXS.T.ADNA_51dcc4a3.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C846363.WXS.T.ADNA_51dcc4a3.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/83a989e8-836e-42b2-a240-f87f0858d189/9dd34d93-e75b-4bc5-b49a-b095ba8daf3e_wxs_gdc_realn.bam'\",\n", + " \"bsub -R 'select[mem>5GB] rusage[mem=5GB] span[hosts=1]' -M 6GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/slate:0.0.2)' -g /estorrs/ancestry_slate -J C846363.WXS.T.hg38 -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/logs/C846363.WXS.T.hg38.txt 'python /slate/slate/slate.py --min-base-quality 20 --min-mapping-quality 20 --fasta /storage1/fs1/dinglab/Active/Projects/PECGS/ref_genome/GRCh38.d1.vd1.fa --positions /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.bed --readcount-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C846363.WXS.T.hg38.readcount --filtered-bam-output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results/C846363.WXS.T.hg38.filtered.bam /storage1/fs1/dinglab/Active/Primary/CPTAC3.share/CPTAC3-GDC/GDC_import/data/e3e1eeb8-e749-4520-83e2-3ae1091a7207/d895aa40-8999-4f0b-be15-694f1bdb380c_wxs_gdc_realn.bam'\"]" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "args['username'] = 'estorrs' # change username if you arent me :) \n", + "args['docker'] = 'estorrs/slate:0.0.2'\n", + "args['queue'] = 'general'\n", + "args['n_concurrent'] = 100\n", + "args['mem'] = 5\n", + "args['group_name'] = 'ancestry_slate' # job group name\n", + "\n", + "commands = []\n", + "for s_id, fp in zip(df['sample_id'], df['filepath']):\n", + "\n", + " command = get_slate_command(fp,\n", + " os.path.join(result_dir, f'{s_id}.readcount'),\n", + " os.path.join(result_dir, f'{s_id}.filtered.bam'))\n", + "\n", + " commands.append(command)\n", + " \n", + "bsub_commands = batch_bsub_commands(commands, df['sample_id'].to_list(), log_dir, args,\n", + " volumes=volumes, exports=exports)\n", + "bsub_commands" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "289b3d04", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/run_slate.sh'" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fp = os.path.join(execution_dir, 'run_slate.sh')\n", + "write_command_file(bsub_commands, fp)\n", + "fp" + ] + }, + { + "cell_type": "markdown", + "id": "ddbb0f74", + "metadata": {}, + "source": [ + "sanity check to make sure all runs completed successfully" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "5e551369", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(69, 69)" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fps = sorted(listfiles(log_dir, regex=r'.txt$'))\n", + "count = 0\n", + "for fp in fps:\n", + " f = open(fp)\n", + " if 'Successfully completed.' in f.read():\n", + " count += 1\n", + " f.close()\n", + "len(fps), count" + ] + }, + { + "cell_type": "markdown", + "id": "d2b5b012", + "metadata": {}, + "source": [ + "After running, make sure to clean up slate directory, it produces trim bammed as an output and they take up a lot of space. run the below command to delete the unecessary fiels" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "e032d66d", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "69" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fps = sorted(listfiles(result_dir, regex=r'.filtered.bam$'))\n", + "len(fps)" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "0120524a", + "metadata": {}, + "outputs": [], + "source": [ + "for fp in fps:\n", + " os.remove(fp)" + ] + }, + { + "cell_type": "markdown", + "id": "2a539285", + "metadata": {}, + "source": [ + "## step 2: run genotype caller" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "ddd42f42", + "metadata": {}, + "outputs": [], + "source": [ + "execution_dir = os.path.join(run_dir, '2.genotype_caller')\n", + "log_dir = os.path.join(execution_dir, 'logs')\n", + "result_dir = os.path.join(execution_dir, 'results')\n", + "Path(log_dir).mkdir(parents=True, exist_ok=True)\n", + "Path(result_dir).mkdir(parents=True, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "f5dc8982", + "metadata": {}, + "outputs": [], + "source": [ + "def get_genotype_calling_command(script, readcount_dir, output_fp):\n", + " return f'python {script} \\\n", + "--readcount-dir {readcount_dir} --genomes-vcf {thousand_genomes_vcf} \\\n", + "--output {output_fp}'" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "82996a39", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'mem': 10,\n", + " 'n_processes': 1,\n", + " 'max_mem': None,\n", + " 'docker': 'python:3.8',\n", + " 'queue': 'dinglab',\n", + " 'gpu_model': 'TeslaV100_SXM2_32GB',\n", + " 'gpu_mem': '30',\n", + " 'gpu_num': 1,\n", + " 'use_gpu': False,\n", + " 'group': 'compute-dinglab',\n", + " 'group_name': None,\n", + " 'n_concurrent': 10,\n", + " 'interactive': False,\n", + " 'username': 'estorrs'}" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "volumes = ['/storage1/fs1/dinglab', '/scratch1/fs1/dinglab', '/home/estorrs'] # compute1 needs to know which directories to map, replace /home/ if you are not me\n", + "exports = ['/miniconda/envs/ancestry/bin'] # need to override compute1 PATH defaults\n", + "args = DEFAULT_ARGS.copy()\n", + "args" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "3819a903", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['mkdir -p /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/2.genotype_caller/logs',\n", + " 'export LSF_DOCKER_VOLUMES=\"/storage1/fs1/dinglab:/storage1/fs1/dinglab /scratch1/fs1/dinglab:/scratch1/fs1/dinglab /home/estorrs:/home/estorrs /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/2.genotype_caller/logs:/scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/2.genotype_caller/logs\"',\n", + " 'export PATH=\"/opt/java/openjdk/bin:/miniconda/envs/ancestry/bin:$PATH\"',\n", + " \"bsub -R 'select[mem>100GB] rusage[mem=100GB] span[hosts=1]' -M 101GB -n 1 -q general -G compute-dinglab -a 'docker(estorrs/ancestry-pipeline:0.0.1)' -oo /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/2.genotype_caller/logs/c.txt 'python /ancestry-pipeline/ancestry/readcount_caller.py --readcount-dir /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/1.slate/results --genomes-vcf /storage1/fs1/dinglab/Active/Projects/Ancestry_Data/hg38/all.coding.sorted.02maf.snps.chr.vcf --output /scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/2.genotype_caller/results/output.vcf'\"]" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "args['username'] = 'estorrs' # change username if you arent me :) \n", + "args['docker'] = 'estorrs/ancestry-pipeline:0.0.1'\n", + "args['queue'] = 'general'\n", + "args['mem'] = 100\n", + "\n", + "command = get_genotype_calling_command(\n", + " '/ancestry-pipeline/ancestry/readcount_caller.py',\n", + " os.path.join(run_dir, '1.slate', 'results'),\n", + " os.path.join(result_dir, 'output.vcf'))\n", + "\n", + " \n", + "bsub_commands = batch_bsub_commands([command], 'call readcounts', log_dir, args, volumes=volumes, exports=exports)\n", + "bsub_commands" + ] + }, + { + "cell_type": "markdown", + "id": "444bfb55", + "metadata": {}, + "source": [ + "run the below script" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "b8cd5ca8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/scratch1/fs1/dinglab/estorrs/ancestry/executions/gbm_test/2.genotype_caller/run_readcount_caller.sh'" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fp = os.path.join(execution_dir, 'run_readcount_caller.sh')\n", + "write_command_file(bsub_commands, fp)\n", + "fp" + ] + }, + { + "cell_type": "markdown", + "id": "38fee9f5", + "metadata": {}, + "source": [ + "## step 3: predict ancestry" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0b591bf", + "metadata": {}, + "outputs": [], + "source": [ + "execution_dir = os.path.join(run_dir, '3.predict_ancestry')\n", + "log_dir = os.path.join(execution_dir, 'logs')\n", + "result_dir = os.path.join(execution_dir, 'results')\n", + "Path(log_dir).mkdir(parents=True, exist_ok=True)\n", + "Path(result_dir).mkdir(parents=True, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "472e79e0", + "metadata": {}, + "outputs": [], + "source": [ + "def get_ancestry_prediction_command(script, output_dir, sample_vcf_fp):\n", + " return f'python {script} \\\n", + "--output-dir {output_dir} {thousand_genomes_vcf} {thousand_genomes_panel} {sample_vcf_fp}'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df4357c6", + "metadata": {}, + "outputs": [], + "source": [ + "volumes = ['/storage1/fs1/dinglab', '/scratch1/fs1/dinglab', '/home/estorrs'] # compute1 needs to know which directories to map, replace /home/ if you are not me\n", + "exports = ['/miniconda/envs/ancestry/bin'] # need to override compute1 PATH defaults\n", + "args = DEFAULT_ARGS.copy()\n", + "args" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "211f3336", + "metadata": {}, + "outputs": [], + "source": [ + "args['username'] = 'estorrs' # change username if you arent me :) \n", + "args['docker'] = 'estorrs/ancestry-pipeline:0.0.1'\n", + "args['queue'] = 'general'\n", + "args['mem'] = 100\n", + "\n", + "command = get_ancestry_prediction_command(\n", + " '/ancestry-pipeline/ancestry/ancestry_cli.py',\n", + " result_dir,\n", + " os.path.join(run_dir, '2.genotype_caller', 'results', 'output.vcf'))\n", + " \n", + "bsub_commands = batch_bsub_commands([command], 'predict ancestry', log_dir, args, volumes=volumes)\n", + "bsub_commands" + ] + }, + { + "cell_type": "markdown", + "id": "5ee88ca2", + "metadata": {}, + "source": [ + "run the below script" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0a179f13", + "metadata": {}, + "outputs": [], + "source": [ + "fp = os.path.join(execution_dir, 'predict_ancestry.sh')\n", + "write_command_file(bsub_commands, fp)\n", + "fp" + ] + }, + { + "cell_type": "markdown", + "id": "ba4be768", + "metadata": {}, + "source": [ + "## inspect results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba3671af", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}