diff --git a/Figure3/0_Load_All_Sample/script/load_samples.r b/Figure3/0_Load_All_Sample/script/load_samples.r index 2cfdd70..e0f5247 100644 --- a/Figure3/0_Load_All_Sample/script/load_samples.r +++ b/Figure3/0_Load_All_Sample/script/load_samples.r @@ -11,7 +11,7 @@ genetic_clone_sheet = read_tsv(str_glue('{ANALYSIS_FOLDER}/script/OCT_genetic_cl # Load sample sheet sample_sheet_url = "" -sample_sheet = read_sheet(, sheet = 'Sample_level') +sample_sheet = read_sheet(sample_sheet_url, sheet = 'Sample_level') sheet_use = sample_sheet %>% filter(LibraryName %in% genetic_clone_sheet$sample_id) # Load all data diff --git a/Figure6/0_data/table/OCT_genetic_clones_2023-09-07.tsv b/Figure6/0_data/table/OCT_genetic_clones_2023-09-07.tsv new file mode 100644 index 0000000..e9e9de6 --- /dev/null +++ b/Figure6/0_data/table/OCT_genetic_clones_2023-09-07.tsv @@ -0,0 +1,428 @@ +sample_id Filtered_tumor_regions genetic_clone +HT308B1-S1H1Fc2U1Z1Bs1 1 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 10 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 11 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 12 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 14 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 15 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 16 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 17 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 19 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 20 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 21 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 22 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 24 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 25 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 26 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 27 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 28 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 29 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 3 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 30 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 31 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 32 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 33 clone_2 +HT308B1-S1H1Fc2U1Z1Bs1 5 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 1 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 10 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 11 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 12 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 13 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 14 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 15 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 16 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 17 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 2 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 20 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 21 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 22 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 23 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 27 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 29 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 3 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 30 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 31 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 32 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 33 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 34 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 35 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 4 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 5 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 6 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 7 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 8 clone_2 +HT308B1-S1H4Fc2U1Z1Bs1 9 clone_2 +HT308B1-S1H5Fc2U1Z1Bs1 1 clone_3 +HT308B1-S1H5Fc2U1Z1Bs1 10 clone_3 +HT308B1-S1H5Fc2U1Z1Bs1 11 clone_3 +HT308B1-S1H5Fc2U1Z1Bs1 12 clone_2 +HT308B1-S1H5Fc2U1Z1Bs1 14 clone_3 +HT308B1-S1H5Fc2U1Z1Bs1 16 clone_3 +HT308B1-S1H5Fc2U1Z1Bs1 17 clone_3 +HT308B1-S1H5Fc2U1Z1Bs1 18 clone_2 +HT308B1-S1H5Fc2U1Z1Bs1 19 clone_3 +HT308B1-S1H5Fc2U1Z1Bs1 2 clone_3 +HT308B1-S1H5Fc2U1Z1Bs1 20 clone_3 +HT308B1-S1H5Fc2U1Z1Bs1 21 clone_3 +HT308B1-S1H5Fc2U1Z1Bs1 25 clone_3 +HT308B1-S1H5Fc2U1Z1Bs1 29 clone_3 +HT308B1-S1H5Fc2U1Z1Bs1 3 clone_3 +HT308B1-S1H5Fc2U1Z1Bs1 30 clone_2 +HT308B1-S1H5Fc2U1Z1Bs1 32 clone_3 +HT308B1-S1H5Fc2U1Z1Bs1 35 clone_2 +HT308B1-S1H5Fc2U1Z1Bs1 36 clone_3 +HT308B1-S1H5Fc2U1Z1Bs1 37 clone_2 +HT308B1-S1H5Fc2U1Z1Bs1 39 clone_3 +HT308B1-S1H5Fc2U1Z1Bs1 4 clone_3 +HT308B1-S1H5Fc2U1Z1Bs1 5 clone_3 +HT308B1-S1H5Fc2U1Z1Bs1 6 clone_3 +HT308B1-S1H5Fc2U1Z1Bs1 7 clone_3 +HT308B1-S1H5Fc2U1Z1Bs1 8 clone_3 +HT308B1-S1H5Fc2U1Z1Bs1 9 clone_3 +HT308B1-S2H5Fc2U1Z1Bs1 1 clone_1 +HT308B1-S2H5Fc2U1Z1Bs1 2 clone_1 +HT308B1-S2H5Fc2U1Z1Bs1 3 clone_1 +HT308B1-S2H5Fc2U1Z1Bs1 4 clone_1 +HT308B1-S2H5Fc2U1Z1Bs1 5 clone_1 +HT206B1-S1Fc1U2Z1B1 1 clone_1 +HT206B1-S1Fc1U2Z1B1 11 clone_1 +HT206B1-S1Fc1U2Z1B1 12 clone_1 +HT206B1-S1Fc1U2Z1B1 13 clone_1 +HT206B1-S1Fc1U2Z1B1 14 clone_1 +HT206B1-S1Fc1U2Z1B1 15 clone_1 +HT206B1-S1Fc1U2Z1B1 16 clone_1 +HT206B1-S1Fc1U2Z1B1 17 clone_1 +HT206B1-S1Fc1U2Z1B1 18 clone_2 +HT206B1-S1Fc1U2Z1B1 19 clone_1 +HT206B1-S1Fc1U2Z1B1 20 clone_1 +HT206B1-S1Fc1U2Z1B1 22 clone_1 +HT206B1-S1Fc1U2Z1B1 23 clone_1 +HT206B1-S1Fc1U2Z1B1 24 clone_2 +HT206B1-S1Fc1U2Z1B1 25 clone_1 +HT206B1-S1Fc1U2Z1B1 26 clone_2 +HT206B1-S1Fc1U2Z1B1 27 clone_1 +HT206B1-S1Fc1U2Z1B1 28 clone_1 +HT206B1-S1Fc1U2Z1B1 29 clone_2 +HT206B1-S1Fc1U2Z1B1 3 clone_2 +HT206B1-S1Fc1U2Z1B1 31 clone_1 +HT206B1-S1Fc1U2Z1B1 34 clone_1 +HT206B1-S1Fc1U2Z1B1 35 clone_1 +HT206B1-S1Fc1U2Z1B1 37 clone_1 +HT206B1-S1Fc1U2Z1B1 38 clone_1 +HT206B1-S1Fc1U2Z1B1 4 clone_1 +HT206B1-S1Fc1U2Z1B1 41 clone_1 +HT206B1-S1Fc1U2Z1B1 42 clone_1 +HT206B1-S1Fc1U2Z1B1 6 clone_2 +HT206B1-S1Fc1U2Z1B1 7 clone_1 +HT206B1-S1Fc1U2Z1B1 8 clone_1 +HT206B1-S1Fc1U3Z1B1 11 clone_1 +HT206B1-S1Fc1U3Z1B1 12 clone_1 +HT206B1-S1Fc1U3Z1B1 13 clone_1 +HT206B1-S1Fc1U3Z1B1 14 clone_1 +HT206B1-S1Fc1U3Z1B1 15 clone_1 +HT206B1-S1Fc1U3Z1B1 18 clone_1 +HT206B1-S1Fc1U3Z1B1 19 clone_1 +HT206B1-S1Fc1U3Z1B1 20 clone_1 +HT206B1-S1Fc1U3Z1B1 21 clone_1 +HT206B1-S1Fc1U3Z1B1 22 clone_1 +HT206B1-S1Fc1U3Z1B1 23 clone_1 +HT206B1-S1Fc1U3Z1B1 24 clone_2 +HT206B1-S1Fc1U3Z1B1 25 clone_1 +HT206B1-S1Fc1U3Z1B1 27 clone_1 +HT206B1-S1Fc1U3Z1B1 30 clone_1 +HT206B1-S1Fc1U3Z1B1 31 clone_1 +HT206B1-S1Fc1U3Z1B1 32 clone_1 +HT206B1-S1Fc1U3Z1B1 35 clone_1 +HT206B1-S1Fc1U3Z1B1 36 clone_2 +HT206B1-S1Fc1U3Z1B1 37 clone_2 +HT206B1-S1Fc1U3Z1B1 39 clone_1 +HT206B1-S1Fc1U3Z1B1 4 clone_2 +HT206B1-S1Fc1U3Z1B1 40 clone_1 +HT206B1-S1Fc1U3Z1B1 42 clone_1 +HT206B1-S1Fc1U3Z1B1 44 clone_1 +HT206B1-S1Fc1U3Z1B1 46 clone_1 +HT206B1-S1Fc1U3Z1B1 47 clone_1 +HT206B1-S1Fc1U3Z1B1 50 clone_1 +HT206B1-S1Fc1U3Z1B1 52 clone_2 +HT206B1-S1Fc1U3Z1B1 53 clone_1 +HT206B1-S1Fc1U3Z1B1 57 clone_1 +HT206B1-S1Fc1U3Z1B1 58 clone_2 +HT206B1-S1Fc1U3Z1B1 59 clone_2 +HT206B1-S1Fc1U3Z1B1 60 clone_1 +HT206B1-S1Fc1U3Z1B1 7 clone_1 +HT206B1-S1Fc1U4Z1B1 11 clone_1 +HT206B1-S1Fc1U4Z1B1 12 clone_1 +HT206B1-S1Fc1U4Z1B1 13 clone_1 +HT206B1-S1Fc1U4Z1B1 14 clone_1 +HT206B1-S1Fc1U4Z1B1 17 clone_1 +HT206B1-S1Fc1U4Z1B1 18 clone_1 +HT206B1-S1Fc1U4Z1B1 19 clone_1 +HT206B1-S1Fc1U4Z1B1 20 clone_1 +HT206B1-S1Fc1U4Z1B1 21 clone_1 +HT206B1-S1Fc1U4Z1B1 22 clone_1 +HT206B1-S1Fc1U4Z1B1 23 clone_1 +HT206B1-S1Fc1U4Z1B1 24 clone_2 +HT206B1-S1Fc1U4Z1B1 29 clone_2 +HT206B1-S1Fc1U4Z1B1 31 clone_1 +HT206B1-S1Fc1U4Z1B1 32 clone_1 +HT206B1-S1Fc1U4Z1B1 34 clone_1 +HT206B1-S1Fc1U4Z1B1 37 clone_1 +HT206B1-S1Fc1U4Z1B1 38 clone_1 +HT206B1-S1Fc1U4Z1B1 39 clone_2 +HT206B1-S1Fc1U4Z1B1 4 clone_2 +HT206B1-S1Fc1U4Z1B1 40 clone_1 +HT206B1-S1Fc1U4Z1B1 41 clone_1 +HT206B1-S1Fc1U4Z1B1 46 clone_2 +HT206B1-S1Fc1U4Z1B1 53 clone_1 +HT206B1-S1Fc1U4Z1B1 55 clone_1 +HT206B1-S1Fc1U4Z1B1 56 clone_1 +HT206B1-S1Fc1U4Z1B1 57 clone_1 +HT206B1-S1Fc1U4Z1B1 58 clone_1 +HT206B1-S1Fc1U4Z1B1 59 clone_1 +HT206B1-S1Fc1U4Z1B1 60 clone_1 +HT206B1-S1Fc1U4Z1B1 7 clone_1 +HT206B1-S1Fc1U4Z1B1 8 clone_1 +HT206B1-S1Fc1U5Z1B1 11 clone_1 +HT206B1-S1Fc1U5Z1B1 12 clone_1 +HT206B1-S1Fc1U5Z1B1 13 clone_1 +HT206B1-S1Fc1U5Z1B1 14 clone_1 +HT206B1-S1Fc1U5Z1B1 15 clone_1 +HT206B1-S1Fc1U5Z1B1 17 clone_1 +HT206B1-S1Fc1U5Z1B1 18 clone_1 +HT206B1-S1Fc1U5Z1B1 19 clone_1 +HT206B1-S1Fc1U5Z1B1 2 clone_2 +HT206B1-S1Fc1U5Z1B1 21 clone_1 +HT206B1-S1Fc1U5Z1B1 22 clone_1 +HT206B1-S1Fc1U5Z1B1 23 clone_1 +HT206B1-S1Fc1U5Z1B1 24 clone_1 +HT206B1-S1Fc1U5Z1B1 25 clone_1 +HT206B1-S1Fc1U5Z1B1 26 clone_2 +HT206B1-S1Fc1U5Z1B1 29 clone_2 +HT206B1-S1Fc1U5Z1B1 33 clone_1 +HT206B1-S1Fc1U5Z1B1 34 clone_1 +HT206B1-S1Fc1U5Z1B1 38 clone_1 +HT206B1-S1Fc1U5Z1B1 39 clone_1 +HT206B1-S1Fc1U5Z1B1 4 clone_1 +HT206B1-S1Fc1U5Z1B1 40 clone_1 +HT206B1-S1Fc1U5Z1B1 41 clone_2 +HT206B1-S1Fc1U5Z1B1 48 clone_1 +HT206B1-S1Fc1U5Z1B1 54 clone_1 +HT206B1-S1Fc1U5Z1B1 55 clone_2 +HT206B1-S1Fc1U5Z1B1 56 clone_1 +HT206B1-S1Fc1U5Z1B1 58 clone_1 +HT206B1-S1Fc1U5Z1B1 7 clone_1 +HT206B1-S1Fc1U5Z1B1 8 clone_1 +HT206B1-U1_ST_Bn1 1 clone_1 +HT206B1-U1_ST_Bn1 10 clone_1 +HT206B1-U1_ST_Bn1 13 clone_1 +HT206B1-U1_ST_Bn1 14 clone_1 +HT206B1-U1_ST_Bn1 16 clone_1 +HT206B1-U1_ST_Bn1 18 clone_1 +HT206B1-U1_ST_Bn1 2 clone_1 +HT206B1-U1_ST_Bn1 21 clone_2 +HT206B1-U1_ST_Bn1 22 clone_2 +HT206B1-U1_ST_Bn1 23 clone_2 +HT206B1-U1_ST_Bn1 3 clone_1 +HT206B1-U1_ST_Bn1 4 clone_1 +HT206B1-U1_ST_Bn1 5 clone_1 +HT206B1-U1_ST_Bn1 7 clone_2 +HT206B1-U1_ST_Bn1 9 clone_1 +HT265B1-S1H1Fc2U1Z1Bs1 1 clone_1 +HT265B1-S1H1Fc2U1Z1Bs1 10 clone_1 +HT265B1-S1H1Fc2U1Z1Bs1 12 clone_1 +HT265B1-S1H1Fc2U1Z1Bs1 13 clone_1 +HT265B1-S1H1Fc2U1Z1Bs1 14 clone_1 +HT265B1-S1H1Fc2U1Z1Bs1 15 clone_1 +HT265B1-S1H1Fc2U1Z1Bs1 2 clone_1 +HT265B1-S1H1Fc2U1Z1Bs1 3 clone_2 +HT265B1-S1H1Fc2U1Z1Bs1 4 clone_2 +HT265B1-S1H1Fc2U1Z1Bs1 5 clone_2 +HT265B1-S1H1Fc2U1Z1Bs1 6 clone_1 +HT265B1-S1H1Fc2U1Z1Bs1 7 clone_1 +HT265B1-S1H1Fc2U1Z1Bs1 8 clone_1 +HT265B1-S1H1Fc2U1Z1Bs1 9 clone_1 +HT339B1-S1H3Fc2U1Z1Bs1 1 clone_1 +HT339B1-S1H3Fc2U1Z1Bs1 11 clone_1 +HT339B1-S1H3Fc2U1Z1Bs1 12 clone_1 +HT339B1-S1H3Fc2U1Z1Bs1 14 clone_1 +HT339B1-S1H3Fc2U1Z1Bs1 16 clone_1 +HT339B1-S1H3Fc2U1Z1Bs1 17 clone_1 +HT339B1-S1H3Fc2U1Z1Bs1 18 clone_1 +HT339B1-S1H3Fc2U1Z1Bs1 21 clone_1 +HT339B1-S1H3Fc2U1Z1Bs1 22 clone_1 +HT339B1-S1H3Fc2U1Z1Bs1 25 clone_1 +HT339B1-S1H3Fc2U1Z1Bs1 27 clone_1 +HT339B1-S1H3Fc2U1Z1Bs1 28 clone_1 +HT339B1-S1H3Fc2U1Z1Bs1 29 clone_1 +HT339B1-S1H3Fc2U1Z1Bs1 3 clone_1 +HT339B1-S1H3Fc2U1Z1Bs1 31 clone_1 +HT339B1-S1H3Fc2U1Z1Bs1 4 clone_1 +HT339B1-S1H3Fc2U1Z1Bs1 5 clone_1 +HT339B1-S1H3Fc2U1Z1Bs1 6 clone_1 +HT339B1-S1H3Fc2U1Z1Bs1 7 clone_1 +HT339B1-S1H3Fc2U1Z1Bs1 9 clone_1 +HT339B1-S1H3Fc2U2Bs2 1 clone_1 +HT339B1-S1H3Fc2U2Bs2 11 clone_1 +HT339B1-S1H3Fc2U2Bs2 13 clone_1 +HT339B1-S1H3Fc2U2Bs2 14 clone_1 +HT339B1-S1H3Fc2U2Bs2 17 clone_1 +HT339B1-S1H3Fc2U2Bs2 18 clone_1 +HT339B1-S1H3Fc2U2Bs2 19 clone_1 +HT339B1-S1H3Fc2U2Bs2 2 clone_1 +HT339B1-S1H3Fc2U2Bs2 21 clone_1 +HT339B1-S1H3Fc2U2Bs2 22 clone_1 +HT339B1-S1H3Fc2U2Bs2 24 clone_1 +HT339B1-S1H3Fc2U2Bs2 25 clone_1 +HT339B1-S1H3Fc2U2Bs2 26 clone_1 +HT339B1-S1H3Fc2U2Bs2 28 clone_1 +HT339B1-S1H3Fc2U2Bs2 3 clone_1 +HT339B1-S1H3Fc2U2Bs2 34 clone_1 +HT339B1-S1H3Fc2U2Bs2 35 clone_1 +HT339B1-S1H3Fc2U2Bs2 36 clone_1 +HT339B1-S1H3Fc2U2Bs2 37 clone_1 +HT339B1-S1H3Fc2U2Bs2 6 clone_1 +HT339B1-S1H3Fc2U2Bs2 7 clone_1 +HT339B1-S1H3Fc2U2Bs2 8 clone_1 +HT339B1-S1H3Fc2U2Bs2 9 clone_1 +HT268B1-Th1H3Fc2U12Z1Bs1 1 clone_2 +HT268B1-Th1H3Fc2U12Z1Bs1 10 clone_1 +HT268B1-Th1H3Fc2U12Z1Bs1 2 clone_2 +HT268B1-Th1H3Fc2U12Z1Bs1 3 clone_1 +HT268B1-Th1H3Fc2U12Z1Bs1 4 clone_1 +HT268B1-Th1H3Fc2U12Z1Bs1 5 clone_1 +HT268B1-Th1H3Fc2U12Z1Bs1 6 clone_2 +HT268B1-Th1H3Fc2U12Z1Bs1 7 clone_2 +HT268B1-Th1H3Fc2U12Z1Bs1 9 clone_1 +HT268B1-Th1H3Fc2U22Z1Bs1 1 clone_2 +HT268B1-Th1H3Fc2U22Z1Bs1 10 clone_1 +HT268B1-Th1H3Fc2U22Z1Bs1 2 clone_2 +HT268B1-Th1H3Fc2U22Z1Bs1 4 clone_2 +HT268B1-Th1H3Fc2U22Z1Bs1 5 clone_1 +HT268B1-Th1H3Fc2U22Z1Bs1 6 clone_1 +HT268B1-Th1H3Fc2U22Z1Bs1 7 clone_1 +HT268B1-Th1H3Fc2U22Z1Bs1 8 clone_1 +HT268B1-Th1H3Fc2U22Z1Bs1 9 clone_1 +HT268B1-Th1H3Fc2U2Z1Bs1 1 clone_2 +HT268B1-Th1H3Fc2U2Z1Bs1 2 clone_2 +HT268B1-Th1H3Fc2U2Z1Bs1 3 clone_1 +HT268B1-Th1H3Fc2U2Z1Bs1 4 clone_1 +HT268B1-Th1H3Fc2U2Z1Bs1 5 clone_2 +HT268B1-Th1H3Fc2U2Z1Bs1 6 clone_1 +HT268B1-Th1H3Fc2U2Z1Bs1 8 clone_1 +HT268B1-Th1H3Fc2U32Z1Bs1 1 clone_2 +HT268B1-Th1H3Fc2U32Z1Bs1 10 clone_1 +HT268B1-Th1H3Fc2U32Z1Bs1 2 clone_2 +HT268B1-Th1H3Fc2U32Z1Bs1 3 clone_1 +HT268B1-Th1H3Fc2U32Z1Bs1 4 clone_1 +HT268B1-Th1H3Fc2U32Z1Bs1 5 clone_1 +HT268B1-Th1H3Fc2U32Z1Bs1 6 clone_2 +HT268B1-Th1H3Fc2U32Z1Bs1 7 clone_1 +HT268B1-Th1H3Fc2U32Z1Bs1 8 clone_1 +HT268B1-Th1K3Fc2U1Z1Bs1 1 clone_1 +HT268B1-Th1K3Fc2U1Z1Bs1 10 clone_1 +HT268B1-Th1K3Fc2U1Z1Bs1 2 clone_1 +HT268B1-Th1K3Fc2U1Z1Bs1 3 clone_2 +HT268B1-Th1K3Fc2U1Z1Bs1 4 clone_2 +HT268B1-Th1K3Fc2U1Z1Bs1 5 clone_2 +HT268B1-Th1K3Fc2U1Z1Bs1 6 clone_2 +HT268B1-Th1K3Fc2U1Z1Bs1 7 clone_2 +HT268B1-Th1K3Fc2U1Z1Bs1 8 clone_2 +HT268B1-Th1K3Fc2U1Z1Bs1 9 clone_1 +HT112C1-U1_ST_Bn1 1 clone_2 +HT112C1-U1_ST_Bn1 2 clone_2 +HT112C1-U1_ST_Bn1 3 clone_3 +HT112C1-U1_ST_Bn1 4 clone_1 +HT112C1-U1_ST_Bn1 5 clone_1 +HT112C1-U1_ST_Bn1 6 clone_3 +HT112C1-U2_ST_Bn1 1 clone_2 +HT112C1-U2_ST_Bn1 2 clone_3 +HT112C1-U2_ST_Bn1 3 clone_3 +HT112C1-U2_ST_Bn1 4 clone_1 +HT112C1-U2_ST_Bn1 5 clone_1 +HT112C1-U2_ST_Bn1 6 clone_1 +HT112C1-U2_ST_Bn1 7 clone_2 +HT253C1-Th1K1Fc2U1Z1Bs1 1 clone_1 +HT253C1-Th1K1Fc2U1Z1Bs1 10 clone_1 +HT253C1-Th1K1Fc2U1Z1Bs1 11 clone_1 +HT253C1-Th1K1Fc2U1Z1Bs1 12 clone_1 +HT253C1-Th1K1Fc2U1Z1Bs1 13 clone_1 +HT253C1-Th1K1Fc2U1Z1Bs1 14 clone_1 +HT253C1-Th1K1Fc2U1Z1Bs1 15 clone_1 +HT253C1-Th1K1Fc2U1Z1Bs1 17 clone_2 +HT253C1-Th1K1Fc2U1Z1Bs1 18 clone_2 +HT253C1-Th1K1Fc2U1Z1Bs1 2 clone_1 +HT253C1-Th1K1Fc2U1Z1Bs1 3 clone_1 +HT253C1-Th1K1Fc2U1Z1Bs1 4 clone_1 +HT253C1-Th1K1Fc2U1Z1Bs1 5 clone_1 +HT253C1-Th1K1Fc2U1Z1Bs1 6 clone_1 +HT253C1-Th1K1Fc2U1Z1Bs1 9 clone_1 +HT260C1-Th1K1Fc2U1Z1Bs1 1 clone_2 +HT260C1-Th1K1Fc2U1Z1Bs1 10 clone_1 +HT260C1-Th1K1Fc2U1Z1Bs1 11 clone_2 +HT260C1-Th1K1Fc2U1Z1Bs1 12 clone_1 +HT260C1-Th1K1Fc2U1Z1Bs1 2 clone_2 +HT260C1-Th1K1Fc2U1Z1Bs1 3 clone_2 +HT260C1-Th1K1Fc2U1Z1Bs1 4 clone_1 +HT260C1-Th1K1Fc2U1Z1Bs1 5 clone_1 +HT260C1-Th1K1Fc2U1Z1Bs1 6 clone_1 +HT260C1-Th1K1Fc2U1Z1Bs1 7 clone_1 +HT260C1-Th1K1Fc2U1Z1Bs1 8 clone_1 +HT260C1-Th1K1Fc2U1Z1Bs1 9 clone_1 +HT270P1-H2Fc2U1Z1Bs1 1 clone_2 +HT270P1-H2Fc2U1Z1Bs1 10 clone_1 +HT270P1-H2Fc2U1Z1Bs1 11 clone_1 +HT270P1-H2Fc2U1Z1Bs1 12 clone_1 +HT270P1-H2Fc2U1Z1Bs1 13 clone_1 +HT270P1-H2Fc2U1Z1Bs1 14 clone_1 +HT270P1-H2Fc2U1Z1Bs1 15 clone_1 +HT270P1-H2Fc2U1Z1Bs1 16 clone_1 +HT270P1-H2Fc2U1Z1Bs1 17 clone_1 +HT270P1-H2Fc2U1Z1Bs1 18 clone_1 +HT270P1-H2Fc2U1Z1Bs1 19 clone_1 +HT270P1-H2Fc2U1Z1Bs1 2 clone_2 +HT270P1-H2Fc2U1Z1Bs1 20 clone_1 +HT270P1-H2Fc2U1Z1Bs1 21 clone_1 +HT270P1-H2Fc2U1Z1Bs1 22 clone_1 +HT270P1-H2Fc2U1Z1Bs1 23 clone_2 +HT270P1-H2Fc2U1Z1Bs1 3 clone_2 +HT270P1-H2Fc2U1Z1Bs1 4 clone_1 +HT270P1-H2Fc2U1Z1Bs1 5 clone_2 +HT270P1-H2Fc2U1Z1Bs1 6 clone_1 +HT270P1-H2Fc2U1Z1Bs1 7 clone_1 +HT270P1-H2Fc2U1Z1Bs1 8 clone_1 +HT270P1-H2Fc2U1Z1Bs1 9 clone_1 +HT288P1-S1H4Fc2U1Z1Bs1 1 clone_2 +HT288P1-S1H4Fc2U1Z1Bs1 10 clone_2 +HT288P1-S1H4Fc2U1Z1Bs1 11 clone_2 +HT288P1-S1H4Fc2U1Z1Bs1 12 clone_2 +HT288P1-S1H4Fc2U1Z1Bs1 13 clone_2 +HT288P1-S1H4Fc2U1Z1Bs1 14 clone_2 +HT288P1-S1H4Fc2U1Z1Bs1 15 clone_2 +HT288P1-S1H4Fc2U1Z1Bs1 16 clone_2 +HT288P1-S1H4Fc2U1Z1Bs1 17 clone_1 +HT288P1-S1H4Fc2U1Z1Bs1 18 clone_2 +HT288P1-S1H4Fc2U1Z1Bs1 2 clone_2 +HT288P1-S1H4Fc2U1Z1Bs1 20 clone_2 +HT288P1-S1H4Fc2U1Z1Bs1 21 clone_2 +HT288P1-S1H4Fc2U1Z1Bs1 22 clone_2 +HT288P1-S1H4Fc2U1Z1Bs1 23 clone_2 +HT288P1-S1H4Fc2U1Z1Bs1 24 clone_2 +HT288P1-S1H4Fc2U1Z1Bs1 3 clone_3 +HT288P1-S1H4Fc2U1Z1Bs1 4 clone_3 +HT288P1-S1H4Fc2U1Z1Bs1 5 clone_2 +HT288P1-S1H4Fc2U1Z1Bs1 6 clone_2 +HT288P1-S1H4Fc2U1Z1Bs1 7 clone_2 +HT288P1-S1H4Fc2U1Z1Bs1 8 clone_2 +HT288P1-S1H4Fc2U1Z1Bs1 9 clone_2 +HT306P1-S1H1Fc2U1Z1Bs1 1 clone_1 +HT306P1-S1H1Fc2U1Z1Bs1 10 clone_1 +HT306P1-S1H1Fc2U1Z1Bs1 11 clone_3 +HT306P1-S1H1Fc2U1Z1Bs1 12 clone_3 +HT306P1-S1H1Fc2U1Z1Bs1 2 clone_2 +HT306P1-S1H1Fc2U1Z1Bs1 3 clone_1 +HT306P1-S1H1Fc2U1Z1Bs1 4 clone_3 +HT306P1-S1H1Fc2U1Z1Bs1 5 clone_1 +HT306P1-S1H1Fc2U1Z1Bs1 6 clone_1 +HT306P1-S1H1Fc2U1Z1Bs1 7 clone_1 +HT306P1-S1H1Fc2U1Z1Bs1 8 clone_1 +HT306P1-S1H1Fc2U1Z1Bs1 9 clone_1 diff --git a/Figure6/0_palette/script/color_STsubclone.r b/Figure6/0_palette/script/color_STsubclone.r new file mode 100644 index 0000000..e4292bf --- /dev/null +++ b/Figure6/0_palette/script/color_STsubclone.r @@ -0,0 +1,74 @@ +# Cancer color from Clara: +source('/diskmnt/Projects/Users/cliu/pancan_ST/color.R') + +color_cancer_discover_full = c(Breast = "#d386b7", Colorectal = "#b75420", Pancreas = "#5d9db9" +) +color_cancer_discover = c(BRCA = "#d386b7", CRC = "#b75420", PDAC = "#5d9db9") +color_cancer_all = c(color_cancer_discover, + ccRCC = "#BB9946", + RCC = "#BB9946", # "#22bc6c", + CHOL = "#bfa8db", #Cholangiocarcinoma = "#bfa8db", + UCEC = "#f39c12" +) + +color_n_section = c('5' = '#3862a5','4'='#588fcc','3'='#85b1dd','2'='#abcfee','1'='#dfe9f1') + +color_organ = c( + "Pancreas" = "#8EA0C7", + "Kidney" = "#E9BE55", + "Breast" = "#d386b7", + "Liver" = "#55ae90", + "Lung" = "#88bbee", + "Colon" = "#E9916E", + "Bile Duct" = "#b15e22", + "Uretus" = "#f39c12", + "Cholangio" = "#bfa8db", + "Prostate" = "#2980b9" +) + +color_tumor_type = c( + Metastasis = "#C54F69", + Primary = "#55749D" +) + +color_assay = c('FFPE' = '#865C8F', 'OCT' = '#99AE62') + +color_cohort = c('Validation' = "#0072B2", 'Discovery' = '#D55E00') + +# A distinct color reference set +# by GPT and manual selection +color_distinct = c( +"#e74c3c", #(bright red) +"#c0392b", #(deep red) +"#b15e22", #(burnt orange) +"#b9471f", #(rusty orange) +"#e7b3a3", #(pale peach) +"#e89c84", #(coral) +"#f39c12", #(orange) +"#f1c40f", #(bright yellow) +"#e5b15f", #(mustard yellow) +"#e4bf80", #(pale gold) +"#d3b98d", #(pale yellow-brown) +"#d9c9aa", #(pale beige) +"#27ae60", #(deep green) +"#1abc9c", +"#a8bfb1", #(gray-green) +"#8299a2", #(slate blue) +"#c0d2c5", #(pale sage green) +"#5fa4ad", #(seafoam green) +"#a5b1c2", #(pale blue-gray) +"#70b6c1", #(pale teal) +"#2980b9", #(bright blue) +"#bfa8db", #(pale lavender) +"#b894d0", #(pale lilac) +"#d8a8d9", #(pale pink) +"#d1a6bc", #(pale mauve) +"#c88fbf", #(mauve-pink) +"#d68ab5" #(soft pink) +) + +color_size_group = c('small' = '#4E6F99', 'mid' = '#65D196', 'large' = '#E7E431') +color_primary_met = c('Primary' = '#7B9AD0', 'Metastasis' = '#EE9933') + + +# c('Brain' = '#0072B2', 'Kidney' = '#D55E00', 'Liver' = '#CC79A7', 'Lung' = "#CCff12",'Lymph Node' = '#11ffbb',"Muscle" = "#22FF13",'Skin' = '#5572B2', 'Spleen' = '#D55E33', 'Stomach' = '#CC7912'), \ No newline at end of file diff --git a/Figure6/1_run_PASTE2/runPASTE.sh b/Figure6/1_run_PASTE2/runPASTE.sh new file mode 100644 index 0000000..1119031 --- /dev/null +++ b/Figure6/1_run_PASTE2/runPASTE.sh @@ -0,0 +1,66 @@ +#! /bin/bash +# Activate conda envrionment in bash script +eval "$(conda shell.bash hook)" +conda activate PASTE + +# -------------------------------------------------- +# Run variables. Change these +SAMPLE_NAME="HT206B1" +# Sample list +# Sample Paths +SLIDE_NAMES=( +"HT206B1-S1H1U2" +"HT206B1-S1H1U3" +"HT206B1-S1H1U4" +"HT206B1-S1H1U5" +) +SLIDE_PATHS=( +"PATH/TO/HT206B1/H1/HT206B1-S1Fc1U2Z1B1/outs" +"PATH/TO/HT206B1/H1/HT206B1-S1Fc1U3Z1B1/outs" +"PATH/TO/HT206B1/H1/HT206B1-S1Fc1U4Z1B1/outs" +"PATH/TO/HT206B1/H1/HT206B1-S1Fc1U5Z1B1/outs" +) + +# -------------------------------------------------- +# Project root. Change if needed +PROJECT_ROOT="ANALYISIS_PATH/1_run_paste2/out/" + +# Script constants. No need to change +RUN_PASTE_SCRIPT="ANALYISIS_PATH/script/src/1_runPASTE.py" + +# Get PASTE2 path +# This can be obtained by clone the PASTE2 github repo +# git clone git@github.com:raphael-group/paste2.git +PASTE2_PATH="ANALYISIS_PATH/0_paste2_github/src/" + +# -------------------------------------------------- +# Options for the 1_runPASTE.py script +# -h, --help show this help message and exit +# -1 SLIDE1_PATH, --slide1_path=SLIDE1_PATH +# slide1 path +# -2 SLIDE2_PATH, --slide2_path=SLIDE2_PATH +# slide2 path +# -a SLIDE1_NAME, --slide1_name=SLIDE1_NAME +# slide1 name +# -b SLIDE2_NAME, --slide2_name=SLIDE2_NAME +# slide2 name +# -o OUT_ROOT, --out_root=OUT_ROOT +# output path +# -p PASTE2_PATH, --paste2_path=PASTE2_PATH +# paste2 path +# -r RUN_PART, --run_part=RUN_PART +# run part. 1=PASTE1, 2=PASTE2, 3=PASTE2-histology. 12=PASTE1+PASTE2. 13=PASTE1+PASTE2-histology. 23=PASTE2+PASTE2-histology. 123=PASTE1+PASTE2+PASTE2-histology") +# -------------------------------------------------- +# run script +# Loop through elements in array and run script +for (( i=0; i<${#SLIDE_NAMES[@]}-1; i++ )) +do + python3 $RUN_PASTE_SCRIPT \ + --paste2_path $PASTE2_PATH \ + --slide1_path ${SLIDE_PATHS[$i]} \ + --slide2_path ${SLIDE_PATHS[$i+1]} \ + --slide1_name ${SLIDE_NAMES[$i]} \ + --slide2_name ${SLIDE_NAMES[$i+1]} \ + --out_root "$PROJECT_ROOT/$SAMPLE_NAME" \ + --run_part 123 +done diff --git a/Figure6/1_run_PASTE2/src/1_runPASTE.py b/Figure6/1_run_PASTE2/src/1_runPASTE.py new file mode 100644 index 0000000..e17fc9f --- /dev/null +++ b/Figure6/1_run_PASTE2/src/1_runPASTE.py @@ -0,0 +1,187 @@ +import matplotlib.pyplot as plt +import matplotlib.patches as mpatches +import numpy as np +import scanpy as sc +import paste as pst +import pandas as pd +import squidpy as sq + + +# use optparse to parse arguments +import optparse +parser = optparse.OptionParser() +# options for slide1 and slide2 paths +parser.add_option('-1', '--slide1_path', action="store", dest="slide1_path", help="slide1 path") +parser.add_option('-2', '--slide2_path', action="store", dest="slide2_path", help="slide2 path") +# options for slide1 and slide2 names +parser.add_option('-a', '--slide1_name', action="store", dest="slide1_name", help="slide1 name") +parser.add_option('-b', '--slide2_name', action="store", dest="slide2_name", help="slide2 name") +# options for opt path +parser.add_option('-o', '--out_root', action="store", dest="out_root", help="output path") +# option for paste2 path, default to "/diskmnt/Datasets/Spatial_Transcriptomics/Analysis/ST_subclone/30-PASTE2/3_PASTE2_runs/0_paste2_github/src/" +parser.add_option('-p', '--paste2_path', action="store", + default="/diskmnt/Datasets/Spatial_Transcriptomics/Analysis/ST_subclone/30-PASTE2/3_PASTE2_runs/0_paste2_github/src/", + dest="paste2_path", help="paste2 path") +# Option to select which part to run. Default only PASTE1. +parser.add_option('-r', '--run_part', action="store", default="123", dest="run_part", help="run part. 1=PASTE1, 2=PASTE2, 3=PASTE2-histology. 12=PASTE1+PASTE2. 13=PASTE1+PASTE2-histology. 23=PASTE2+PASTE2-histology. 123=PASTE1+PASTE2+PASTE2-histology") +# Set maxinum number of iterations for PASTE1 +parser.add_option('-m', '--max_iter', action="store", default=1000, dest="max_iter", help="max number of iterations for PASTE1") + +options, args = parser.parse_args() +slide1_path = options.slide1_path +slide2_path = options.slide2_path +slide1_name = options.slide1_name +slide2_name = options.slide2_name +out_root = options.out_root +paste2_path = options.paste2_path +run_part = options.run_part +max_iter = int(options.max_iter) + +# create sample name +sample_name_list = [slide1_name, slide2_name] +sample_name_list.sort() +sample_run_name = f"{sample_name_list[0]}_{sample_name_list[1]}" +print(f"Running PASTE for {sample_run_name}") + +# create out_path +out_path = f"{out_root}/{sample_run_name}" +import os +if not os.path.exists(out_path): + os.makedirs(out_path) + +# Load and Preprocess slices +def load_and_preprocess(sample_path: str): + visium_sample = sq.read.visium(sample_path) + sc.pp.filter_genes(visium_sample, min_counts = 15) + sc.pp.filter_cells(visium_sample, min_counts = 100) + # make variable name unique + visium_sample.var_names_make_unique() + return visium_sample + + +# Load visium +file_list = [ + slide1_path, + slide2_path +] + +slices = [load_and_preprocess(file_path) for file_path in file_list] +slice1 = slices[0] +slice2 = slices[1] + +# Arguments from command line PASTE: https://github.com/raphael-group/paste/blob/main/paste-cmd-line.py +args_paste={ + 'alpha' :0.1, + 'cost' :"kl", +} + +# Plot parameters +spot_size = 70 + +############################################## +# Use PASTE2 +############################################## +# preprocess +# first add rgb to slices +slice1.obsm['rgb'] = slice1.obsm['spatial'] +slice2.obsm['rgb'] = slice2.obsm['spatial'] + +# import paste2 +# add paste2 to the path +import sys +sys.path.append(paste2_path) +from paste2.PASTE2 import partial_pairwise_align, partial_pairwise_align_histology + +""" + Optimal partial alignment of two slices using both gene expression and histological image information. + + sliceA, sliceB must be AnnData objects that contain .obsm['rgb'], which stores the RGB value of each spot in the histology image. +""" +if(run_part.find("2") != -1): + # Check if output file exists already + output_part2_path = f"{out_path}/2_PASTE2_partial_pairwise_numpy.npy" + if os.path.exists(output_part2_path): + print("PASTE2 already run, loading results") + slice12_al = np.load(output_part2_path) + else: + # Run PASTE2 alignment + print("Running pairwise alignment from PASTE2") + slice12_al = partial_pairwise_align(slice1, slice2, s= args_paste['alpha'], dissimilarity=args_paste['cost']) # this will take a while + # save both results as npy + np.save(output_part2_path, slice12_al) + + # PLOT ------------------------------------------------ + # To visualize the alignment you can stack the slices + # according to the alignment pi + slices, pis = [slices[0], slices[1]], [slice12_al] + new_slices = pst.stack_slices_pairwise(slices, pis) + + slice_colors = ['#e41a1c','#377eb8'] + plt.figure(figsize=(7,7)) + for i in range(len(new_slices)): + pst.plot_slice(new_slices[i],slice_colors[i],s=spot_size) + + plt.legend(handles=[mpatches.Patch(color=slice_colors[0], label=sample_name_list[0]), + mpatches.Patch(color=slice_colors[1], label=sample_name_list[1])]) + plt.gca().invert_yaxis() + plt.axis('off') + #plt.show() + plt.savefig(f"{out_path}/2_PASTE2_partial_pairwise.pdf", format="pdf", bbox_inches="tight") + + # Generate new coordinates + # From the command line script: https://github.com/raphael-group/paste/blob/main/paste-cmd-line.py + n_slices = 2 + for i in range(n_slices): + # Format to match original tissue_positions.csv + # barcode,in_tissue,array_row,array_col,pxl_row_in_fullres,pxl_col_in_fullres,pxl_row_in_fullres_new,pxl_col_in_fullres_new + df_tissue = slices[i].obs[['in_tissue','array_row','array_col']] #.reset_index().rename(columns = {'index':'barcode'}) + df_coord_orig = pd.DataFrame(slices[i].obsm['spatial'], index = df_tissue.index, columns = ['pxl_row_in_fullres','pxl_col_in_fullres']) + df_coord_new = pd.DataFrame(new_slices[i].obsm['spatial'], index = df_tissue.index, columns = ['pxl_row_in_fullres_new','pxl_col_in_fullres_new']) + df_coord = pd.concat([df_tissue,df_coord_orig, df_coord_new], axis = 1).reset_index().rename(columns = {'index':'barcode'}) + df_coord.to_csv(f"{out_path}/2_PASTE2_partial_pairwise_" + sample_name_list[i] + "_new_tissue_positions.csv", index = False) + + + +############################################## +# PASTE2 algnment using histology +############################################## +if(run_part.find("3") != -1): + # Check if output file exists already + npy_out_part3_path=f"{out_path}/3_PASTE2_partial_pairwise_histology_numpy.npy" + if(os.path.isfile(npy_out_part3_path)): + print("PASTE2 histology already run, loading results") + # Load file directly# Load PASTE2 histology alignment + slice12_al_hist = np.load(npy_out_part3_path, allow_pickle=True) + else: + # Run PASTE2 histology alignment + print("Running pairwise alignment from PASTE2 using histology") + slice12_al_hist = partial_pairwise_align_histology(slice1, slice2, s= args_paste['alpha'], dissimilarity=args_paste['cost']) # this will take a while + np.save(npy_out_part3_path, slice12_al_hist) + + + # Plot ------------------------------------------------ + # To visualize the alignment you can stack the slices + # according to the alignment pi + slices, pis = [slices[0], slices[1]], [slice12_al_hist] + new_slices = pst.stack_slices_pairwise(slices, pis) + + slice_colors = ['#e41a1c','#377eb8'] + plt.figure(figsize=(7,7)) + for i in range(len(new_slices)): + pst.plot_slice(new_slices[i],slice_colors[i],s=spot_size) + + plt.legend(handles=[mpatches.Patch(color=slice_colors[0], label=sample_name_list[0]), + mpatches.Patch(color=slice_colors[1], label=sample_name_list[1])]) + plt.gca().invert_yaxis() + plt.axis('off') + plt.savefig(f"{out_path}/3_PASTE2_partial_pairwise_histology.pdf", format="pdf", bbox_inches="tight") + + n_slices = 2 + for i in range(n_slices): + # Format to match original tissue_positions.csv + # barcode,in_tissue,array_row,array_col,pxl_row_in_fullres,pxl_col_in_fullres,pxl_row_in_fullres_new,pxl_col_in_fullres_new + df_tissue = slices[i].obs[['in_tissue','array_row','array_col']] #.reset_index().rename(columns = {'index':'barcode'}) + df_coord_orig = pd.DataFrame(slices[i].obsm['spatial'], index = df_tissue.index, columns = ['pxl_row_in_fullres','pxl_col_in_fullres']) + df_coord_new = pd.DataFrame(new_slices[i].obsm['spatial'], index = df_tissue.index, columns = ['pxl_row_in_fullres_new','pxl_col_in_fullres_new']) + df_coord = pd.concat([df_tissue,df_coord_orig, df_coord_new], axis = 1).reset_index().rename(columns = {'index':'barcode'}) + df_coord.to_csv(f"{out_path}/3_PASTE2_partial_pairwise_histology_" + sample_name_list[i] + "_new_tissue_positions.csv", index = False) diff --git a/Figure6/2_PASTE2_analysis/1_plot_paste_result/script/check_alignment_batch2.r b/Figure6/2_PASTE2_analysis/1_plot_paste_result/script/check_alignment_batch2.r new file mode 100644 index 0000000..d38eae6 --- /dev/null +++ b/Figure6/2_PASTE2_analysis/1_plot_paste_result/script/check_alignment_batch2.r @@ -0,0 +1,142 @@ +library(tidyverse) +library(patchwork) + +# Out +project_path = "" +out_path =str_glue("{project_path}/2_PASTE2_anaysis/1_plot_paste_result/out/") + +dir.create(out_path, recursive = T, showWarnings = F) + +# List all case/piece to plot +paste_result_path = str_glue("{project_path}/1_run_PASTE2/out") +all_piece_names = paste_result_path %>% {intersect(list.files(.), list.dirs(., full.names= F))} + +for(piece_use in all_piece_names){ + # Coord path + coord_path = str_glue("{paste_result_path}/{piece_use}") + + sample_names = list.files(coord_path, pattern = 'csv') %>% str_remove('.csv') + + all_coord = map(sample_names, function(sample_nm){ + read_csv(str_glue("{coord_path}/{sample_nm}.csv")) %>% + mutate(sample_name = sample_nm) + }) %>% bind_rows() + + + # Generate pariwise plot + piece_name = str_remove(sample_names, '[0-9]$') %>% unique + number_range = str_remove(sample_names, piece_name) %>% as.numeric %>% sort + n_slice = length(sample_names) + plot_group = data.frame( + section1 = str_c(piece_name, seq(min(number_range), max(number_range-1))), + section2 = str_c(piece_name, seq(min(number_range) + 1, max(number_range))) + ) + + + p_pariwise = pmap(plot_group, function(section1, section2){ + all_coord %>% filter(sample_name %in% c(section1, section2)) %>% + ggplot(aes(x = x ,y =y, color = sample_name)) + + geom_point(size = 0.7) + + labs(title = str_glue("{section1} vs {section2}")) + + theme_bw() + theme(aspect.ratio = 1) + }) + + p_all = ggplot(all_coord, aes(x = x ,y =y, color = sample_name)) + + geom_point(size = 0.7) + + theme_bw() + theme(aspect.ratio = 1) + + labs(title = "All sections") + + # put in seperate folder + outpath_piece = str_glue("{out_path}/{piece_use}/") + dir.create(outpath_piece, recursive = T, showWarnings = F) + pdf(str_glue("{outpath_piece}/{piece_name}_all_and_pairwise.pdf"), width = 10, height = 10) + wrap_plots(c(list(p_all), p_pariwise), ncol = 2) %>% print() + dev.off() + +} + + +### ------------------ 2. Check alignment result ------------------ ### +library(googlesheets4) +# Pareters +piece_name +# Part 2 uses seurat clustesr to check alignment result +gs4_deauth() +googlt_sheet_url="" +sheet_use = read_sheet(googlt_sheet_url, sheet = 'Sample_multisections') %>% + filter(PASTEBatch == 'Batch2') + + # Load seurat objects +obj_list = pmap(sheet_use, function(SeuratObject,...){ + readRDS(SeuratObject) + }) %>% setNames(sheet_use$PASTEName) + +# Test loading sample +for(piece_use in all_piece_names){ + message(piece_use) + # Load object + sheet_piece_use = sheet_use %>% filter(PASTEpieceName == piece_use) + obj_list_use = pmap(sheet_piece_use, function(SeuratObject,...){ + readRDS(SeuratObject) + }) %>% setNames(sheet_piece_use$PASTEFileName) + + # Coord path + coord_path = str_glue("{paste_result_path}/{piece_use}") + + sample_names = list.files(coord_path, pattern = 'csv') %>% str_remove('.csv') + + all_coord = map(sample_names, function(sample_nm){ + read_csv(str_glue("{coord_path}/{sample_nm}.csv")) %>% + mutate(sample_name = sample_nm) + }) %>% bind_rows() + + # Add seurat clusters to table + all_clusters = obj_list_use %>% imap(function(obj, sample_nm){ + obj@meta.data %>% rownames_to_column('barcode') %>% + select(barcode, seurat_clusters) %>% + mutate(sample_name = sample_nm) + }) %>% bind_rows() + + all_coord_clusters = left_join(all_coord, all_clusters, by = c('sample_name', 'barcode')) + + # PLOT + # Generate pariwise plot + piece_name = sheet_piece_use$PASTEpieceName %>% unique + # number_range = str_remove(sample_names, piece_name) %>% as.numeric %>% sort + n_slice = length(sample_names) + plot_group = data.frame( + section1 = sample_names[1:(n_slice-1)], + section2 = sample_names[2:n_slice] + ) + + p_pariwise = pmap(plot_group, function(section1, section2){ + all_coord_clusters %>% + filter(sample_name %in% c(section1, section2)) %>% + ggplot(aes(x = x ,y =y, color = seurat_clusters)) + + geom_point(size = 0.7, alpha = 0.4) + + labs(title = str_glue("{section1} vs {section2}")) + + #scale_color_manual(values = c('black', 'red')) + + theme_bw() + theme(aspect.ratio = 1) + }) + p_individuals = map(sample_names, function(slice_use){ + all_coord_clusters %>% filter(sample_name %in% c(slice_use)) %>% + ggplot(aes(x = x ,y =y, color = seurat_clusters)) + + geom_point(size = 0.7) + + labs(title = str_glue("{slice_use}")) + + theme_bw() + theme(aspect.ratio = 1) + }) + + p_all = ggplot(all_coord_clusters, aes(x = x ,y =y, color = seurat_clusters)) + + geom_point(size = 0.7, alpha = 0.4) + + theme_bw() + theme(aspect.ratio = 1) + + labs(title = "All sections") + + # put in seperate folder + outpath_piece = str_glue("{out_path}/{piece_use}/") + dir.create(outpath_piece, recursive = T, showWarnings = F) + pdf(str_glue("{outpath_piece}/{piece_name}_all_and_pairwise_seurat_clusters.pdf"), width = 10, height = 10) + wrap_plots(c(list(p_all), p_pariwise), ncol = 2) %>% print() + wrap_plots(p_individuals, ncol = 2) %>% print() + dev.off() + +} diff --git a/Figure6/2_PASTE2_analysis/2_get_matching_spot/script/1_get_matching_spot.workflow_batch2.sh b/Figure6/2_PASTE2_analysis/2_get_matching_spot/script/1_get_matching_spot.workflow_batch2.sh new file mode 100644 index 0000000..883a936 --- /dev/null +++ b/Figure6/2_PASTE2_analysis/2_get_matching_spot/script/1_get_matching_spot.workflow_batch2.sh @@ -0,0 +1,8 @@ +eval "(conda shell.bash hook)" # activate conda environment +conda activate seurat4.3 + +PROJECT_PATH="" +analysis_path="$PROJECT_PATH/2_get_matching_spot/script" + +mkdir -p $analysis_path/log +Rscript --no-save --no-restore --verbose $analysis_path/1_get_matching_spot_batch2.r > $analysis_path/log/run.log 2> $analysis_path/log/run.err \ No newline at end of file diff --git a/Figure6/2_PASTE2_analysis/2_get_matching_spot/script/1_get_matching_spot_batch2.r b/Figure6/2_PASTE2_analysis/2_get_matching_spot/script/1_get_matching_spot_batch2.r new file mode 100644 index 0000000..4aff00e --- /dev/null +++ b/Figure6/2_PASTE2_analysis/2_get_matching_spot/script/1_get_matching_spot_batch2.r @@ -0,0 +1,124 @@ +# This script find spot from adjacent slice that is closest to the spot in the current slice +library(tidyverse) +library(patchwork) +library(googlesheets4) +library(tictoc) +library(future) +library(furrr) + +plan(multisession, workers = 10) + +# Out +project_path = "" +out_path =str_glue("{project_path}/2_PASTE2_anaysis/2_get_matching_spot/out/") +dir.create(out_path, recursive = T, showWarnings = F) + +# Load tracking sheet +gs4_deauth() +google_sheet_url = "" +sheet_multi = read_sheet(google_sheet_url, sheet = 'Sample_multisections') + +# Load subclone +subclone_df = read_tsv(str_glue('{project_path}/0_data/table/OCT_genetic_clones_2023-09-18.tsv')) + +# List all case/piece to plot +paste2_result_root = str_glue('{project_path}/1_run_PASTE2/out') +all_piece_names = paste2_result_root %>% {intersect(list.files(.), list.dirs(., full.names= F))} + +for(piece_use in all_piece_names){ + ## ------------------ 1. Combine all alignment result and meta.data ------------------ ### + # 1. Load PASTE2 coordinates + message(piece_use) + coord_path = str_glue("{paste2_result_root}/{piece_use}") + + sample_names = list.files(coord_path, pattern = 'csv') %>% str_remove('.csv') %>% gtools::mixedsort(.) + + all_coord = map(sample_names, function(sample_nm){ + read_csv(str_glue("{coord_path}/{sample_nm}.csv")) %>% + mutate( + PASTEpieceName = piece_use, + PASTEFileName = sample_nm + ) + }) %>% bind_rows() + + # 2. Load morph regions + sheet_piece_use = sheet_multi %>% filter(PASTEpieceName== piece_use) + morph_df = pmap(sheet_piece_use, ~with(list(...), { + read_tsv(MorphOutput) %>% + dplyr::rename(barcode = '...1') %>% + # Clean columns + select(barcode, `Filtered tumor regions`) %>% + dplyr::rename(Filtered_tumor_regions= `Filtered tumor regions`) %>% + mutate( + PASTEFileName = all_of(PASTEFileName), + PASTEpieceName = all_of(PASTEpieceName), + LibraryName = all_of(LibraryName) + ) + })) %>% + bind_rows() + # Add morph regions to coord + all_coord = left_join(all_coord, morph_df, by = c('PASTEpieceName', 'PASTEFileName', 'barcode')) + + # 3. Add genetic subclone and libraryName + sheet_multi_use = sheet_multi %>% filter(PASTEpieceName == piece_use) %>% select(PASTEpieceName, PASTEFileName, LibraryName) + all_coord_subclone = left_join(all_coord, subclone_df, c('LibraryName' = 'sample_id','Filtered_tumor_regions')) + + # ------------------------------------------ ### + # Make connection between each spot to its adjacent slice + # 4. Add + # Generate pariwise plot + sheet_piece_use = sheet_multi %>% filter(PASTEpieceName == piece_use) + piece_name = sheet_piece_use$PASTEpieceName %>% unique + # number_range = str_remove(sample_names, piece_name) %>% as.numeric %>% sort + n_slice = length(sample_names) + plot_group = data.frame( + section1 = sample_names[1:(n_slice-1)], + section2 = sample_names[2:n_slice] + ) + + # method 1b, calculate min distance. take longer + # Rduce search space to +- 100 + tic('Outer loop') + result_list = pmap(plot_group, function(section1, section2){ + tic('Inner loop') + message(section1, ' and ',section2) + section1_df = all_coord_subclone %>% filter(PASTEFileName == section1) %>% setNames(str_c(names(.), '_1')) + section2_df = all_coord_subclone %>% filter(PASTEFileName == section2) %>% setNames(str_c(names(.), '_2')) + + section1_2_df = section1_df %>% + future_pmap(~with(list(...), { + section2_df %>% + # Add section 1 info - spot level + mutate( + #x_1 = all_of(x_1), y_1 = all_of(y_1), + barcode_1 = all_of(barcode_1), + Filtered_tumor_regions_1 = all_of(Filtered_tumor_regions_1), + genetic_clone_1 = all_of(genetic_clone_1), + PASTEpieceName_1 = all_of(PASTEpieceName_1), + PASTEFileName_1 = all_of(PASTEFileName_1), + LibraryName_1 = all_of(LibraryName_1) + ) %>% + # reduce search space to only +- 100 + filter(between(x_2, x_1 - 100, x_1 + 100), between(y_2, y_1 - 100, y_1 + 100)) %>% + mutate(spot_dist = sqrt((x_1 - x_2)^2 + (y_1 - y_2)^2)) %>% + slice(which.min(spot_dist)) %>% + mutate(x_1 = all_of(x_1), y_1 = all_of(y_1)) + })) %>% bind_rows() %>% + # section level info + mutate(section1 = section1, section2 = section2) + # Reorder to put all _1 to the front + section1_2_df = section1_2_df %>% select(section1, section2, spot_dist, ends_with('_1'), ends_with('_2')) + toc(); print(Sys.time()) + return(section1_2_df) + }) + toc(); print(Sys.time()) + + # Save result + #dir.create(str_glue("{out_path}"), recursive = T, showWarnings = F) + result_list %>% bind_rows() %>% + write_csv(str_glue("{out_path}/{piece_name}_min_dist.csv")) + +} + + + diff --git a/Figure6/2_PASTE2_analysis/3_multi_section_regions_N_min_3/script/1_get_multi_section_regions.r b/Figure6/2_PASTE2_analysis/3_multi_section_regions_N_min_3/script/1_get_multi_section_regions.r new file mode 100644 index 0000000..95b2f6d --- /dev/null +++ b/Figure6/2_PASTE2_analysis/3_multi_section_regions_N_min_3/script/1_get_multi_section_regions.r @@ -0,0 +1,408 @@ +# SEARCH CRITERIA for the definition +library(tidyverse) +library(patchwork) +library(googlesheets4) +library(tictoc) +library(future) +library(furrr) +library(igraph) + +# Functions +# Create Dummy Sankey Link +source('PATH/TO/ANALYSIS/FOLDER/4_multi_section_regions/script/src/function_sankey_make_dummy_link.r') +# Color +source('PATH/TO/ANALYSIS/FOLDER/4_multi_section_regions/script/src/function_sankey_colors.r') + + +## optparse list to input necessory info +library(optparse) +option_list = list( + make_option(c("-a", "--analysis_folder"), type="character", default=NULL, + help="analysis_folder"), + make_option(c("-p", "--piece_use"), type="character", default=NULL, + help="piece_use"), + make_option(c("-m", "--matching_out_path"), type="character", default=NULL, + help="matching_out_path"), + # Add n_spot threshold + make_option(c("-n", "--n_spot_minimum"), type="numeric", default=NULL, + help="n_spot_minimum") +) + +opt_parser = OptionParser(option_list=option_list) +opt = parse_args(opt_parser) + + +# parameters +piece_use = opt$piece_use +n_spot_minimum = opt$n_spot_minimum + +analysis_folder = opt$analysis_folder +out_path =str_glue("{analysis_folder}/out/{piece_use}") +dir.create(out_path, recursive = T, showWarnings = F) + + +######## 0. Loading data and info ######################################################## +# Load tracking sheet +gs4_deauth() +sample_info_url = "" +sheet_multi = read_sheet(sample_info_url, sheet = 'Sample_multisections') %>% + filter(!is.na(PASTEFileName)) %>% + filter(PASTEpieceName == piece_use) + + +# Load inidvidual PASTE2 coordinates +paste2_df_list = pmap(sheet_multi, ~with(list(...), { + message(LibraryName,' ' , piece_id, ' ' , PASTEFileName) + read_csv(PASTE2XinHaoCoordinate) %>% + mutate( + PASTEpieceName = PASTEpieceName, + PASTEFileName = PASTEFileName, + LibraryName = LibraryName + ) +})) %>% setNames(sheet_multi$LibraryName) + +# Load matching spots +matching_out_path=opt$matching_out_path +if(!file.exists(str_glue('{matching_out_path}/{piece_use}_min_dist.csv'))){ + message(str_glue('{matching_out_path}/{piece_use}_matching_spot.tsv not exist')) + message("Quit this run.") + quit() +} +matching_df = read_csv(str_glue('{matching_out_path}/{piece_use}_min_dist.csv')) + +# Load objects +obj_list = pmap(sheet_multi, ~with(list(...), { + message("Loading :", LibraryName, "Object") + read_rds(SeuratObject) +})) %>% setNames(sheet_multi$LibraryName) + +# # 1. create networks of spots on the sample slide +############################################################################################## +############################################################################################## +# 1. Count connections between slices +# 1a. Get adjacent spots from the same sections +# NEW: 2023/10/31 Filter out connection less than n_spot_minimum spots +count_connection_df = matching_df %>% + # filter out NonTumor spots + filter( + Filtered_tumor_regions_1 != '0', + Filtered_tumor_regions_2 != '0' + ) %>% + count(section1, section2, Filtered_tumor_regions_1, Filtered_tumor_regions_2) %>% + # v BUG: THIS SOMEHOW DOESN"T WORK. NOT SURE WHY. + filter(n > n_spot_minimum) %>% # Filter out connection less than n spots + group_by(Filtered_tumor_regions_1) %>% + mutate(percent = n/sum(n)) %>% + mutate( + region1 = str_c(section1,"-",Filtered_tumor_regions_1), + region2 = str_c(section2,"-",Filtered_tumor_regions_2) + ) + +write_tsv(count_connection_df, str_glue('{out_path}/1_count_connection.tsv')) + +# 2. Make sankey +### Functions ---------------------------------------------------------------------------------------- +# Set color in sankey Javascript format +library(colorspace) +define_colors = function(ident, colors){ + ident_string = ident %>% paste(collapse = "','") + colors_string = colors %>% paste(collapse = "','") + my_color <- str_glue("d3.scaleOrdinal() .domain(['{ident_string}']) .range(['{colors_string}'])") + return(my_color) +} +my_color = define_colors(c('Th1H3U1','Th1H3U2'), c('#1f77b4','#ff7f0e')) + + +# Library +library(networkD3) +library(dplyr) +library(gtools) + +links = count_connection_df %>% mutate(value = n) + +# Works but need iteratively add dummy levels for each level +missing_2nd_level = setdiff( + links %>% filter(section1 == 'Th1H3U2') %>% pull(region1), + links %>% filter(section1 == 'Th1H3U1') %>% pull(region2) + ) +links = links %>% ungroup %>% add_row(region1 = 'Dummy_level1', region2 = missing_2nd_level, value = 0.1) +#### -------------- +# From these flows we need to create a node data frame: it lists every entities involved in the flow +nodes <- data.frame( + name=c(as.character(links$region1), + as.character(links$region2)) %>% unique() +) + +# With networkD3, connection must be provided using id, not using real name like in the links dataframe.. So we need to reformat it. +links$IDsource <- match(links$region1, nodes$name)-1 +links$IDtarget <- match(links$region2, nodes$name)-1 + + +# Make the Network +p <- sankeyNetwork(Links = links, Nodes = nodes, + Source = "IDsource", Target = "IDtarget", + Value = "value", NodeID = "name", + #iterations = 0 , + colourScale = my_color, + sinksRight=FALSE) +p + +# Save the result as html +# https://stackoverflow.com/questions/56086341/how-to-save-simplenetwork-output-from-networkd3-in-pdf-jpeg-tiff-format +require(htmlwidgets) +saveWidget(p, file=str_glue("{out_path}/2_sankey.html")) + +require(webshot) # need conda install phantomjs +webshot(url = str_glue("{out_path}/2_sankey.html"), + file= str_glue("{out_path}/2_sankey.pdf"), + vwidth = 700, + vheight = 400, +) + + + +###### 3. Make spatial plot to check distribution ---------------------------------------------- +# 3. color by micoreiongion +spatial_map_df = bind_rows( + matching_df[,c('x_1','y_1','Filtered_tumor_regions_1','section1')] %>% rename(x=x_1, y=y_1, Filtered_tumor_regions=Filtered_tumor_regions_1, section=section1), + matching_df[,c('x_2','y_2','Filtered_tumor_regions_2','section2')] %>% rename(x=x_2, y=y_2, Filtered_tumor_regions=Filtered_tumor_regions_2, section=section2) + ) %>% + filter( + Filtered_tumor_regions != '0' + ) %>% + mutate(section = factor(section, levels = mixedsort(unique(section)))) +write_tsv(spatial_map_df, str_glue('{out_path}/3_Microregion_spatial_map.tsv')) +spatial_label_df = spatial_map_df %>% group_by(section,Filtered_tumor_regions) %>% summarize(across(where(is.numeric), mean)) +p_microregion = ggplot(spatial_map_df, aes(x=x,y=-y, color=as.character(Filtered_tumor_regions))) + + facet_wrap(~section) + + geom_point(size = 0.5) + + geom_text(data = spatial_label_df, aes(label = Filtered_tumor_regions), color = 'gray20') + + cowplot::theme_cowplot() + + theme(aspect.ratio = 1) + + theme( + legend.position = "bottom" + ) + + labs(title = 'Microregion') +ggsave(str_glue("{out_path}/3_Microregion_spatial_map.pdf"), p_microregion, width = 10, height = 5, units = "in", dpi = 300) + + +############################################################################ +# 4. 3D cluster +# 4a Assign 3D cluster +# CRITERIA: +# Rule of connection: if > n_spot_minimum spots are connected +# OR > 20% are connected, they are in the same cluster +connected_df = count_connection_df %>% + mutate(if_connect = ifelse(n > n_spot_minimum | percent > 0.2, 1, 0)) %>% + filter(if_connect == 1) + +# make graph +connected_region_graph = graph_from_data_frame(connected_df[,c('region1','region2')], directed = F) +# Get components (connected regions) +# https://stackoverflow.com/questions/56332850/how-can-i-extract-clusters-from-an-igraph-network +connected_region_graph_components = components(connected_region_graph) +threeD_cluster = connected_region_graph_components$membership + +# Assign component to each region +connected_df = connected_df %>% mutate( + volumn_cluster1 = connected_region_graph_components$membership[region1], + volumn_cluster2 = connected_region_graph_components$membership[region2] + ) +write_tsv(connected_df, str_glue('{out_path}/4a_3D_cluster.tsv')) + +# 4b. Assign to spatial plot +slice_library_name_match = sheet_multi$LibraryName %>% setNames(sheet_multi$PASTEFileName) +# Make spatial plot to see +spatial_map_df = + bind_rows( + matching_df[,c('x_1','y_1','Filtered_tumor_regions_1','section1')] %>% rename(x=x_1, y=y_1, Filtered_tumor_regions=Filtered_tumor_regions_1, section=section1), + matching_df[,c('x_2','y_2','Filtered_tumor_regions_2','section2')] %>% rename(x=x_2, y=y_2, Filtered_tumor_regions=Filtered_tumor_regions_2, section=section2) + ) %>% + filter( + Filtered_tumor_regions != '0' + ) %>% + mutate(section = factor(section, levels = mixedsort(unique(section)))) %>% + mutate(region = str_c(section,"-",Filtered_tumor_regions)) %>% + # Assign component + mutate(volumn_cluster = threeD_cluster[match(region, names(threeD_cluster))]) %>% + mutate(volumn_cluster = str_c('3D_',volumn_cluster)) %>% + # Add piece name + mutate(PASTEpieceName = piece_use) %>% + # add LibraryName + mutate(LibraryName = slice_library_name_match[section]) + +write_tsv(spatial_map_df, str_glue('{out_path}/4b_3D_cluster_spatial_map.tsv')) + +spatial_label_df = spatial_map_df %>% group_by(section,Filtered_tumor_regions) %>% summarize(across(where(is.numeric), mean)) +p_spatial_3dcluster = ggplot(spatial_map_df, aes(x=x,y=-y, color=as.character(volumn_cluster))) + + facet_wrap(~section) + + geom_point(size = 0.5) + + geom_text(data = spatial_label_df, aes(label = Filtered_tumor_regions), color = 'gray20') + + cowplot::theme_cowplot() + + coord_fixed() + + theme( + legend.position = "bottom" + ) + + labs(title = '3D cluster') +ggsave(str_glue("{out_path}/4b_3D_cluster_spatial_map.pdf"), p_spatial_3dcluster, width = 10, height = 5, units = "in", dpi = 300) + +# 5. save a simply 3D cluster label +volumn_cluster_clean_df = spatial_map_df %>% + mutate( + PASTEpieceName = piece_use + ) %>% + dplyr::rename( + PASTE_x = x, + PASTE_y = y, + PASTEFileName = section + ) %>% select(LibraryName, Filtered_tumor_regions, volumn_cluster, PASTEFileName) %>% + distinct() + +write_tsv(volumn_cluster_clean_df, str_glue('{out_path}/5_3D_cluster_label.tsv')) + +############################################################################################# +############################################################################################# +# NEW. 2023/10/19 + +# 6. Make sankey color by volume cluster -------------------------------------------------------- +# Library +library(networkD3) +library(dplyr) +library(gtools) +# Assign color for each region based on volumn cluster +# fix NA. +volumn_cluster_region_df = spatial_map_df %>% distinct(volumn_cluster, region) %>% + mutate(volumn_cluster = ifelse(is.na(volumn_cluster), 'NA', volumn_cluster)) +# Get unique volumn cluster and assign color each +color_volumn_cluster = unique(volumn_cluster_region_df$volumn_cluster) %>% + mixedsort %>% + setNames(colorspace::rainbow_hcl(length(.)),.) + +# Assign color to each volumn cluster +my_color_volumn_group = define_colors(names(color_volumn_cluster), color_volumn_cluster) +group_color_df = volumn_cluster_region_df %>% setNames(c("node_name", "color_group")) +color_group = color_volumn_cluster +color_group_sankey_format = define_colors(names(color_group), color_group) + +# Get links info +# NOTE. This format is important. AddDummyLinks uses these 5 columns +links_df = count_connection_df %>% mutate(value = n) %>% + ungroup %>% + select(section1, section2, region1, region2, value) %>% + setNames(c("From_layer","To_layer",'From_node','To_node','value')) + + +### ------------------------------------------------------------ +# A. Define layer order. Could be manually. If didn't provide or NULL, will use mixedsort to determine +layer_ranks = gtools::mixedsort(union(links_df$From_layer, links_df$To_layer)) +links_df = AddDummyLinks(links_df) #, layer_ranks) +# ------------------------------------------------------------ +# From these flows we need to create a node data frame: it lists every entities involved in the flow +nodes_df <- data.frame( + node_name= union(as.character(links_df$From_node), as.character(links_df$To_node)) +) + +# # Add color group to links, merged by region1 +links_df = links_df %>% left_join(group_color_df, by = c('From_node' = 'node_name')) # link color determined by From_node +nodes_df = nodes_df %>% left_join(group_color_df, by = c('node_name' = 'node_name')) + +# With networkD3, connection must be provided using id, not using real name like in the links dataframe.. So we need to reformat it. +links_df$IDsource <- match(links_df$From_node, nodes_df$node_name)-1 +links_df$IDtarget <- match(links_df$To_node, nodes_df$node_name)-1 + + +# Make the Network +p <- sankeyNetwork(Links = links_df, Nodes = nodes_df, + Source = "IDsource", Target = "IDtarget", + Value = "value", NodeID = "node_name", + #iterations = 0 , + colourScale = color_group_sankey_format, + LinkGroup = 'color_group', + NodeGroup = 'color_group', + sinksRight=FALSE) +p + +# Save the result as html +require(htmlwidgets) +saveWidget(p, file=str_glue("{out_path}/6_sankey_color_group.html")) + +require(webshot) # need conda install phantomjs +webshot(url = str_glue("{out_path}/6_sankey_color_group.html"), + file= str_glue("{out_path}/6_sankey_color_group.pdf"), + vwidth = 700, + vheight = 400, +) + + +# ---------------------------------------------------------------------------------------- +# 7. Make sankey color by volume cluster -------------------------------------------------------- +# with simpler node name +# Library +library(networkD3) +library(dplyr) +library(gtools) +# Assign color for each region based on volumn cluster +# fix NA. +volumn_cluster_region_df = spatial_map_df %>% distinct(volumn_cluster, region) %>% + mutate(volumn_cluster = ifelse(is.na(volumn_cluster), 'NA', volumn_cluster)) +# Get unique volumn cluster and assign color each +color_volumn_cluster = unique(volumn_cluster_region_df$volumn_cluster) %>% + mixedsort %>% + setNames(colorspace::rainbow_hcl(length(.)),.) + +# Assign color to each volumn cluster +my_color_volumn_group = define_colors(names(color_volumn_cluster), color_volumn_cluster) +group_color_df = volumn_cluster_region_df %>% setNames(c("node_name", "color_group")) +color_group = color_volumn_cluster +color_group_sankey_format = define_colors(names(color_group), color_group) + +# Get links info +# NOTE. This format is important. AddDummyLinks uses these 5 columns +links_df = count_connection_df %>% mutate(value = n) %>% + ungroup %>% + select(section1, section2, region1, region2, value) %>% + setNames(c("From_layer","To_layer",'From_node','To_node','value')) + + +### ------------------------------------------------------------ +# A. Define layer order. Could be manually. If didn't provide or NULL, will use mixedsort to determine +layer_ranks = gtools::mixedsort(union(links_df$From_layer, links_df$To_layer)) +links_df = AddDummyLinks(links_df) #, layer_ranks) +# ------------------------------------------------------------ +# From these flows we need to create a node data frame: it lists every entities involved in the flow +nodes_df <- data.frame( + node_name= union(as.character(links_df$From_node), as.character(links_df$To_node)) +) + +# # Add color group to links, merged by region1 +links_df = links_df %>% left_join(group_color_df, by = c('From_node' = 'node_name')) # link color determined by From_node +nodes_df = nodes_df %>% left_join(group_color_df, by = c('node_name' = 'node_name')) +nodes_df = nodes_df %>% mutate(node_name_simple = str_extract(node_name, "-[0-9]+$") %>% str_remove('-')) # Add simply node name for just microregion + +# With networkD3, connection must be provided using id, not using real name like in the links dataframe.. So we need to reformat it. +links_df$IDsource <- match(links_df$From_node, nodes_df$node_name)-1 +links_df$IDtarget <- match(links_df$To_node, nodes_df$node_name)-1 + + +# Make the Network +p <- sankeyNetwork(Links = links_df, Nodes = nodes_df, + Source = "IDsource", Target = "IDtarget", + Value = "value", NodeID = "node_name_simple", + #iterations = 0 , + colourScale = color_group_sankey_format, + LinkGroup = 'color_group', + NodeGroup = 'color_group', + sinksRight=FALSE) +p + +# Save the result as html +require(htmlwidgets) +saveWidget(p, file=str_glue("{out_path}/7_sankey_color_group_simple.html")) + +require(webshot) # need conda install phantomjs +webshot(url = str_glue("{out_path}/7_sankey_color_group_simple.html"), + file= str_glue("{out_path}/7_sankey_color_group_simple.pdf"), + vwidth = 700, + vheight = 400, +) + diff --git a/Figure6/2_PASTE2_analysis/3_multi_section_regions_N_min_3/script/src/function_sankey_colors.r b/Figure6/2_PASTE2_analysis/3_multi_section_regions_N_min_3/script/src/function_sankey_colors.r new file mode 100644 index 0000000..01676dd --- /dev/null +++ b/Figure6/2_PASTE2_analysis/3_multi_section_regions_N_min_3/script/src/function_sankey_colors.r @@ -0,0 +1,16 @@ +# usage: +# my_color = define_colors(c('Th1H3U1','Th1H3U2'), c('#1f77b4','#ff7f0e')) +# Then in Sankey +# p <- sankeyNetwork(Links = links, Nodes = nodes, +# Source = "IDsource", Target = "IDtarget", +# Value = "value", NodeID = "name", +# #iterations = 0 , +# colourScale = my_color, +# sinksRight=FALSE) + +define_colors = function(ident, colors){ + ident_string = ident %>% paste(collapse = "','") + colors_string = colors %>% paste(collapse = "','") + my_color <- str_glue("d3.scaleOrdinal() .domain(['{ident_string}']) .range(['{colors_string}'])") + return(my_color) +} \ No newline at end of file diff --git a/Figure6/2_PASTE2_analysis/3_multi_section_regions_N_min_3/script/src/function_sankey_make_dummy_link.r b/Figure6/2_PASTE2_analysis/3_multi_section_regions_N_min_3/script/src/function_sankey_make_dummy_link.r new file mode 100644 index 0000000..4e0f114 --- /dev/null +++ b/Figure6/2_PASTE2_analysis/3_multi_section_regions_N_min_3/script/src/function_sankey_make_dummy_link.r @@ -0,0 +1,81 @@ +# This helper function is used to make dummy links for sankey plot so that nodes are at the right level +# The format for layer node rank df is: +# Layer Node Layer_rank +# +# 1 Th1H3U1 Th1H3U1-4 1 +# 2 Th1H3U1 Th1H3U1-3 1 +# 3 Th1H3U1 Th1H3U1-6 1 +# 4 Th1H3U12 Th1H3U12-6 3 +# 5 Th1H3U12 Th1H3U12-10 3 +# 6 Th1H3U12 Th1H3U12-5 3 +# 7 Th1H3U2 Th1H3U2-2 2 +# 8 Th1H3U2 Th1H3U2-1 2 +# 9 Th1H3U2 Th1H3U2-5 2 + +make_dummy_link_one_node = function(missing_node, layer_node_rank_df){ + # 0. get layer ranks + layer_ranked = layer_node_rank_df %>% distinct(Layer,Layer_rank) %>% arrange(Layer_rank) %>% pull(Layer) + # 1. get the rank + missing_node_rank = layer_node_rank_df %>% filter(Node == missing_node) %>% pull(Layer_rank) + missing_node_layer = layer_node_rank_df %>% filter(Node == missing_node) %>% pull(Layer) + # First, create n-1 to actual node with missing layers. e.g rank = 4, dummy_layer_3 to missing_node + dummy_links_df = data.frame( + From_layer = layer_ranked[[missing_node_rank-1]], + To_layer = missing_node_layer, + From_node = str_glue("Dummy_node_{missing_node_rank-1}"), + To_node = missing_node, + value = 1 + ) + # 2nd, if rank > 2, Create all n-1 prior dummy layer links. e.g. rank = 4, create 1-2, 2-3, 2 layers. + if(missing_node_rank > 2){ + dummy_links_prior_df = map(seq(1, missing_node_rank-2, 1), function(missing_rank){ + data.frame( + From_layer = layer_ranked[[missing_rank]], + To_layer = layer_ranked[[missing_rank+1]], + From_node = str_glue("Dummy_node_{missing_rank}"), + To_node = str_glue("Dummy_node_{missing_rank+1}"), + value = 1 + ) + }) %>% bind_rows() %>% distinct() + dummy_links_df = bind_rows(dummy_links_prior_df, dummy_links_df) + } + + return(dummy_links_df) +} + + +# Main function to add dummy links +AddDummyLinks = function(links, layer_ranks=NULL){ + # A. set parameter + if(is.null(layer_ranks)){ + message("layer_ranks not provided. automatically determined by mixedsort") + layer_ranks = gtools::mixedsort(union(links$From_layer, links$To_layer)) + message("layer_ranks: ", toString(layer_ranks)) + } + first_layer_name = layer_ranks[[1]] + # function to help sort: https://stackoverflow.com/questions/32378108/using-gtoolsmixedsort-or-alternatives-with-dplyrarrange + mixedrank = function(x) order(gtools::mixedorder(x)) + + # B. create Layer-Node-LayerRank map to help create dummy layer + layer_node_rank_df = links %>% {bind_rows( + select(., contains('From_')) %>% setNames(c("Layer", "Node")), + select(., contains('To_')) %>% setNames(c("Layer", "Node")) + )} %>% distinct %>% arrange(mixedrank(Layer), Node) %>% + mutate(Layer_rank = factor(Layer, levels = layer_ranks) %>% as.numeric) + + + # C. Find nodes missing levels + true_first_layer_nodes = links %>% filter(From_layer == first_layer_name) %>% pull(From_node) + current_top_level_nodes = setdiff(links$From_node, links$To_node ) + nodes_missing_levels = setdiff(current_top_level_nodes, true_first_layer_nodes) + + # D. create all dummry rows for all missing nodes + dummy_links_df = map(nodes_missing_levels, function(missing_node){ + make_dummy_link_one_node(missing_node, layer_node_rank_df) + }) %>% bind_rows() %>% distinct() + message("Adding follow dummy links:") + print(dummy_links_df) + # Add to links + links = bind_rows(links, dummy_links_df) %>% distinct() + return(links) +} diff --git a/Figure6/2_PASTE2_analysis/3_multi_section_regions_N_min_3/workflow/1_get_multi_section_regions.workflow.sh b/Figure6/2_PASTE2_analysis/3_multi_section_regions_N_min_3/workflow/1_get_multi_section_regions.workflow.sh new file mode 100644 index 0000000..79ba2bc --- /dev/null +++ b/Figure6/2_PASTE2_analysis/3_multi_section_regions_N_min_3/workflow/1_get_multi_section_regions.workflow.sh @@ -0,0 +1,19 @@ +eval "$(conda shell.bash hook)" +conda activate seurat4.3 +# Run all sample in 2_get_matching_spot with 1_get_multi_section_regions.r +PROJECT_FOLDER="/diskmnt/Datasets/Spatial_Transcriptomics/Analysis/ST_subclone/30-PASTE2/1_PASTE2_result" +MATHING_OUT_PATH="$PROJECT_FOLDER/2_get_matching_spot/out" +ANALYSIS_PATH="$PROJECT_FOLDER/4_multi_section_regions_Ver20231031_N_min_3/" +SCRIPT_PATH="$ANALYSIS_PATH/script/1_get_multi_section_regions.r" + +# Name format {piece_name}_min_dist.csv in MATHING_OUT_PATH folder +for file in $MATHING_OUT_PATH/*_min_dist.csv; do + piece_name=$(basename "$file" _min_dist.csv) + # do something with $piece_name + echo $piece_name + Rscript $SCRIPT_PATH \ + --analysis_folder $ANALYSIS_PATH \ + --piece_use $piece_name \ + --n_spot_minimum 3 \ + --matching_out_path $MATHING_OUT_PATH > $ANALYSIS_PATH/log/$piece_name.log 2> $ANALYSIS_PATH/log/$piece_name.err & +done diff --git a/Figure6/2_PASTE2_analysis/4_mergeobj_and_3dcluster_N_min_3/script/1_merge_seurat_add3dcluster.r b/Figure6/2_PASTE2_analysis/4_mergeobj_and_3dcluster_N_min_3/script/1_merge_seurat_add3dcluster.r new file mode 100644 index 0000000..5351165 --- /dev/null +++ b/Figure6/2_PASTE2_analysis/4_mergeobj_and_3dcluster_N_min_3/script/1_merge_seurat_add3dcluster.r @@ -0,0 +1,236 @@ +library(optparse) +library(tidyverse) +library(googlesheets4) +library(tictoc) +library(future) +library(furrr) +library(Seurat) +library(qs) + +plan(multisession, workers = 20) +options(future.globals.maxSize = Inf) + +## Write a optparse list to input necessory info +option_list = list( + make_option(c("-a", "--analysis_folder"), type="character", default=NULL, + help="analysis_folder"), + make_option(c("-p", "--piece_use"), type="character", default=NULL, + help="piece_use"), + make_option(c("-m", "--matching_out_path"), type="character", default=NULL, + help="matching_out_path"), + make_option(c("-v", "--volumncluster_path"), type="character", default=NULL, + help="volumncluster_path") +) + +opt_parser = OptionParser(option_list=option_list) +opt = parse_args(opt_parser) + + +# parameters +# Out +# Parameter +#piece_use = 'ht268' +piece_use = opt$piece_use + +#analysis_folder='/diskmnt/Datasets/Spatial_Transcriptomics/Analysis/ST_subclone/30-PASTE2/1_PASTE2_result/4_multi_section_regions' +analysis_folder = opt$analysis_folder +out_path =str_glue("{analysis_folder}/out/{piece_use}") +dir.create(out_path, recursive = T, showWarnings = F) + + +######## 0. Loading data and info ######################################################## +# Load tracking sheet +gs4_deauth() +google_sheet_url = "" +sheet_multi = read_sheet(google_sheet_url, sheet = 'Sample_multisections') %>% + filter(!is.na(PASTEFileName)) %>% + filter(PASTEpieceName == piece_use) + +# Quit if empty +if(nrow(sheet_multi) == 0){ + message(str_glue('No PASTE2 data for {piece_use}')) + message("Quit this run.") + quit() +} + +# Load seurat objects +obj_list = pmap(sheet_multi, ~with(list(...), { + message("loading: ", LibraryName,' ' , piece_id, ' ' , PASTEFileName) + readRDS(SeuratObject) +})) %>% setNames(sheet_multi$LibraryName) + +# Load inidvidual PASTE2 coordinates +paste2_df_list = pmap(sheet_multi, ~with(list(...), { + message(LibraryName,' ' , piece_id, ' ' , PASTEFileName) + read_csv(PASTE2XinHaoCoordinate) %>% + mutate( + PASTEpieceName = PASTEpieceName, + PASTEFileName = PASTEFileName, + LibraryName = LibraryName + ) %>% + # Rename x and y into PASTE_x, PASTE_y + dplyr::rename(PASTE_x = x, PASTE_y = y) +})) %>% setNames(sheet_multi$LibraryName) + +# Load matching spots +matching_out_path=opt$matching_out_path +if(!file.exists(str_glue('{matching_out_path}/{piece_use}_min_dist.csv'))){ + message(str_glue('{matching_out_path}/{piece_use}_matching_spot.tsv not exist')) + message("Quit this run.") + quit() +} +matching_df = read_csv(str_glue('{matching_out_path}/{piece_use}_min_dist.csv')) +# Add keep those names with "_2" in there +# turn into list, split by LibraryName_1 (library name of the upper slice) +matching_df_list = matching_df %>% split(f = .$LibraryName_1) %>% + map(~as.data.frame(.) %>% + column_to_rownames('barcode_1') %>% + select(ends_with('_2')) %>% + setNames(str_replace(colnames(.), '_2','_NextSlice')) %>% + dplyr::rename( + PASTE_x_NextSlice = x_NextSlice, + PASTE_y_NextSlice = y_NextSlice + ) + ) + +## Load Morph output +morph_df = pmap(sheet_multi, ~with(list(...), { + read_tsv(MorphOutput) %>% + dplyr::rename(barcode = '...1') %>% + # Clean columns + select(barcode, `Filtered tumor regions`) %>% + dplyr::rename(Filtered_tumor_regions= `Filtered tumor regions`) %>% + mutate( + PASTEFileName = PASTEFileName, + PASTEpieceName = PASTEpieceName, + LibraryName = LibraryName + ) + })) %>% bind_rows() + + +## LOAD 3D cluster result! +volumn_cluster_path = opt$volumncluster_path +if(!file.exists(str_glue('{volumn_cluster_path}/{piece_use}/5_3D_cluster_label.tsv'))){ + message(str_glue('{volumn_cluster_path}/{piece_use}/5_3D_cluster_label.tsv not exist')) + message("Quit this run.") + quit() +} +volumn_cluster_df = read_tsv(str_glue('{volumn_cluster_path}/{piece_use}/5_3D_cluster_label.tsv')) %>% distinct() + +morph_3d_cluster_df = left_join(morph_df, volumn_cluster_df, by = c('LibraryName', 'Filtered_tumor_regions', 'PASTEFileName')) + +################### Analysis ################################################################ +# 1. Add meta data individual sample and save the meta.dat +# 1a. Add PASTEpieceName, PASTEFileName, PASTE_x, PASTE_y using paste2_df_list +obj_list_w_meta = obj_list +for(library_use in names(obj_list)){ + message(library_use) + # 1a Add PASTEpieceName, PASTEFileName, PASTE_x, PASTE_y using paste2_df_list + paste_df_use = paste2_df_list[[library_use]] %>% as.data.frame %>% column_to_rownames('barcode') + obj_list_w_meta[[library_use]] = AddMetaData(obj_list_w_meta[[library_use]], metadata = paste_df_use) + + # 1b. Add 3D column cluster uinsg volumn_cluster_df + volumn_cluster_df_use = morph_3d_cluster_df %>% filter(LibraryName == library_use) %>% + select(barcode, Filtered_tumor_regions, volumn_cluster) %>% + as.data.frame %>% column_to_rownames('barcode') + obj_list_w_meta[[library_use]] = AddMetaData(obj_list_w_meta[[library_use]], metadata = volumn_cluster_df_use) + + # 1c. Add matching barcode from adjacent section using matching_df_list + #!! NOTE. Will not have info for the last slice!! + #skip if library_use not in the matching_df_list + if(!library_use %in% names(matching_df_list)){ + message(str_glue('{library_use} not in matching_df_list')) + next + } + matching_df_use = matching_df_list[[library_use]] + obj_list_w_meta[[library_use]] = AddMetaData(obj_list_w_meta[[library_use]], metadata = matching_df_use) + obj_list_w_meta[[library_use]] = obj_list_w_meta[[library_use]] +} + +# save meta.data only +iwalk(obj_list_w_meta, function(obj, library_use){ + message(library_use) + metadata = obj@meta.data %>% rownames_to_column('barcode_orig') + write_tsv(metadata, str_glue('{out_path}/1_PASTE_3Dcluster_metadata_{library_use}.tsv')) + +}) + +# 2A. Creat merged object - ALL +# First check if file already exists. skip if exists +if(file.exists(str_glue('{out_path}/2_merged_obj.qs'))){ + message(str_glue('{out_path}/2_merged_obj.qs already exists')) + message("Skip the 2A whole object merge.") +}else{ + message("merging objects") + obj_merged = merge( + obj_list_w_meta[[1]], obj_list_w_meta[-1], + add.cell.ids = names(obj_list_w_meta), + project = piece_use + ) + # rerun analysis + message("rerun analysis") + DefaultAssay(obj_merged) <- "SCT" + VariableFeatures(obj_merged) <- map(obj_list_w_meta, VariableFeatures) %>% reduce(union) + obj_merged <- RunPCA(obj_merged, verbose = FALSE) + obj_merged <- FindNeighbors(obj_merged, dims = 1:30) + obj_merged <- FindClusters(obj_merged, verbose = FALSE) + obj_merged <- RunUMAP(obj_merged, dims = 1:30) + + # Post processing after merge: + # reorder libraryName. Sorted library Name in the 'names' of the vector lib_names_sorted + lib_names_sorted = unique(obj_merged@meta.data$LibraryName) %>% + setNames(str_extract(.,'U[0-9]+'),.) %>% + gtools::mixedsort() + # Fix NA in volumn_cluster + obj_merged@meta.data = obj_merged@meta.data %>% + mutate(volumn_cluster = ifelse(is.na(volumn_cluster), 'NA', volumn_cluster)) + + # Save object + qs::qsave(obj_merged, str_glue('{out_path}/2_merged_obj.qs'), nthreads = 20) +} + +# 2B. Creat merged object - Tumor Only +if(file.exists(str_glue('{out_path}/2_merged_obj_tumor.qs'))){ + message(str_glue('{out_path}/2_merged_obj_tumor.qs already exists')) + message("Skip the 2B tumor object merge.") +}else{ + message("merging TUMOR objects") + obj_list_tumor = obj_list_w_meta %>% imap(function(obj, sample_id){ + tumor_cells = obj@meta.data %>% filter(Filtered_tumor_regions != '0') %>% rownames() + print(sample_id);print(length(tumor_cells)) + # Note if morph failed, tumor_cells count will be 0 + # Return NULL to ignore the sample + if(length(tumor_cells) == 0){ return(NULL) } + obj = subset(obj, cells = tumor_cells) %>% FindVariableFeatures() + return(obj) + }) %>% compact() + print(str_c("Samples with tumor: ", toString(names(obj_list_tumor)))) + print(str_c("Samples without tumor: ", toString(setdiff(names(obj_list_w_meta), names(obj_list_tumor))))) + + message("merging TUMOR objects") + obj_tumor = merge( + obj_list_tumor[[1]], obj_list_tumor[-1], + add.cell.ids = names(obj_list_tumor), + project = piece_use + ) + # rerun analysis + message("rerun analysis") + DefaultAssay(obj_tumor) <- "SCT" + VariableFeatures(obj_tumor) <- map(obj_list_tumor, VariableFeatures) %>% reduce(union) + obj_tumor <- RunPCA(obj_tumor, verbose = FALSE) + obj_tumor <- FindNeighbors(obj_tumor, dims = 1:30) + obj_tumor <- FindClusters(obj_tumor, verbose = FALSE) + obj_tumor <- RunUMAP(obj_tumor, dims = 1:30) + + # Post processing after merge: + # reorder libraryName. Sorted library Name in the 'names' of the vector lib_names_sorted + lib_names_sorted = unique(obj_tumor@meta.data$LibraryName) %>% + setNames(str_extract(.,'U[0-9]+'),.) %>% + gtools::mixedsort() + # Fix NA in volumn_cluster + obj_tumor@meta.data = obj_tumor@meta.data %>% + mutate(volumn_cluster = ifelse(is.na(volumn_cluster), 'NA', volumn_cluster)) + + message("saving Tumor object") + qs::qsave(obj_tumor, str_glue('{out_path}/2_merged_obj_tumor.qs'), nthreads = 20) + } \ No newline at end of file diff --git a/Figure6/2_PASTE2_analysis/4_mergeobj_and_3dcluster_N_min_3/script/2_plot_3d_cluster.r b/Figure6/2_PASTE2_analysis/4_mergeobj_and_3dcluster_N_min_3/script/2_plot_3d_cluster.r new file mode 100644 index 0000000..da76bc5 --- /dev/null +++ b/Figure6/2_PASTE2_analysis/4_mergeobj_and_3dcluster_N_min_3/script/2_plot_3d_cluster.r @@ -0,0 +1,124 @@ +library(optparse) +library(tidyverse) +library(patchwork) +library(googlesheets4) +library(tictoc) +library(future) +library(furrr) +library(Seurat) +library(qs) + +plan(multisession, workers = 20) +options(future.globals.maxSize = Inf) + +## Write a optparse list to input necessory info +option_list = list( + make_option(c("-a", "--analysis_folder"), type="character", default=NULL, + help="analysis_folder"), + make_option(c("-p", "--piece_use"), type="character", default=NULL, + help="piece_use"), + make_option(c("-m", "--merged_obj_path"), type="character", default=NULL, + help="merged_obj_path") +) + +opt_parser = OptionParser(option_list=option_list) +opt = parse_args(opt_parser) + + +# parameters +#piece_use = 'ht268' +piece_use = opt$piece_use + +#analysis_folder='/diskmnt/Datasets/Spatial_Transcriptomics/Analysis/ST_subclone/30-PASTE2/1_PASTE2_result/5_mergeobj_and_3dcluster' +analysis_folder = opt$analysis_folder +out_path =str_glue("{analysis_folder}/out/{piece_use}") +dir.create(out_path, recursive = T, showWarnings = F) + +# volumn_folder +# volumn_folder="/diskmnt/Datasets/Spatial_Transcriptomics/Analysis/ST_subclone/30-PASTE2/1_PASTE2_result/5_mergeobj_and_3dcluster/out" +merged_folder=opt$merged_obj_path + +######## 0. Loading data and info ######################################################## +# Load tracking sheet +gs4_deauth() +sheet_multi = read_sheet('https://docs.google.com/spreadsheets/d/1MhZ98AVoUQoUxIiE4pX4pbL-eEy-RQeF9kV_xLwRz9E/edit#gid=1451401776', sheet = 'Sample_multisections') %>% + filter(!is.na(PASTEFileName)) %>% + filter(PASTEpieceName == piece_use) + +# Load merged object +merged_path = str_glue('{out_path}/2_merged_obj.qs') +merged_tumor_path = str_glue('{out_path}/2_merged_obj_tumor.qs') +obj_merged = qs::qread(merged_path, nthreads = 10) +obj_tumor = qs::qread(merged_tumor_path, nthreads = 10) + +######## A. Make Plots ######################################################## +# Starting from Number 3 +n_slice = length(Images(obj_merged)) + +#### Preprocess +# reorder libraryName. Sorted library Name in the 'names' of the vector lib_names_sorted +lib_names_sorted = unique(obj_merged@meta.data$LibraryName) %>% + setNames(str_extract(.,'U[0-9]+'),.) %>% + gtools::mixedsort() +# Fix NA in volumn_cluster +obj_merged@meta.data = obj_merged@meta.data %>% + mutate(volumn_cluster = ifelse(is.na(volumn_cluster), 'NA', volumn_cluster)) + +#### PLOT +obj_merged@meta.data$LibraryName = factor(obj_merged@meta.data$LibraryName, + levels = names(lib_names_sorted)) + +# 3a. SpatialDimPlot +idents_plot = c('seurat_clusters', 'Filtered_tumor_regions', 'volumn_cluster') +for(ident_use in idents_plot){ + message(ident_use) + p_st = SpatialPlot(obj_merged, group.by = ident_use, + stroke = NA, crop = F, image.alpha = 0.8, + label = T, label.box = F, label.color = 'black', combine =F, + ) %>% wrap_plots(guides = 'collect', nrow = 1) & theme(legend.position = 'bottom') & NoLegend() + pdf(str_glue('{out_path}/3a_{ident_use}_SpatialDimPlot.pdf'), width = 20, height = 5) + print(p_st) + dev.off() + + p_umap_merged = DimPlot(obj_merged, group.by = ident_use, raster = T) + p_umap_split = DimPlot(obj_merged, group.by = ident_use, split.by = 'LibraryName', raster = T, combine =F, label = T, repel = T) + p_umap_all = c(list(p_umap_merged), p_umap_split) %>% + wrap_plots(guides = 'collect', nrow = 1, widths = c(1, n_slice)) & + theme(legend.position = 'bottom') & coord_fixed() + pdf(str_glue('{out_path}/3a_{ident_use}_umap.pdf'), width = 20, height = 5) + print(p_umap_all) + dev.off() +} + + +######## B. Make Plots for tumor only ######################################################## +# reorder libraryName. Sorted library Name in the 'names' of the vector lib_names_sorted +lib_names_sorted = unique(obj_tumor@meta.data$LibraryName) %>% + setNames(str_extract(.,'U[0-9]+'),.) %>% + gtools::mixedsort() +# Fix NA in volumn_cluster +obj_tumor@meta.data = obj_tumor@meta.data %>% + mutate(volumn_cluster = ifelse(is.na(volumn_cluster), 'NA', volumn_cluster)) + +# 3b. SpatialDimPlot on tumor only +idents_plot = c('seurat_clusters', 'Filtered_tumor_regions', 'volumn_cluster') +#idents_plot ='seurat_clusters' +for(ident_use in idents_plot){ + message(ident_use) + p_st = SpatialPlot(obj_tumor, group.by = ident_use, + stroke = NA, crop = F, image.alpha = 0.8, + label = T, label.box = F, label.color = 'black', combine =F, + ) %>% wrap_plots(guides = 'collect', nrow = 1) & theme(legend.position = 'bottom') + pdf(str_glue('{out_path}/3b_tumor_{ident_use}_SpatialDimPlot.pdf'), width = 20, height = 5) + print(p_st) + dev.off() + + p_umap_merged = DimPlot(obj_tumor, group.by = ident_use, raster = T) + p_umap_split = DimPlot(obj_tumor, group.by = ident_use, split.by = 'LibraryName', raster = T, combine =F, label = T, repel = T) + p_umap_all = c(list(p_umap_merged), p_umap_split) %>% + wrap_plots(guides = 'collect', nrow = 1, widths = c(1, n_slice)) & + theme(legend.position = 'bottom') & coord_fixed() + pdf(str_glue('{out_path}/3b_tumor_{ident_use}_umap.pdf'), width = 20, height = 5) + print(p_umap_all) + dev.off() +} diff --git a/Figure6/2_PASTE2_analysis/4_mergeobj_and_3dcluster_N_min_3/workflow/1_merge_seurat_add3dcluster.workflow.parallel.sh b/Figure6/2_PASTE2_analysis/4_mergeobj_and_3dcluster_N_min_3/workflow/1_merge_seurat_add3dcluster.workflow.parallel.sh new file mode 100644 index 0000000..5de0497 --- /dev/null +++ b/Figure6/2_PASTE2_analysis/4_mergeobj_and_3dcluster_N_min_3/workflow/1_merge_seurat_add3dcluster.workflow.parallel.sh @@ -0,0 +1,35 @@ +eval "$(conda shell.bash hook)" +conda activate seurat4.3 + +# Name format {piece_name}_min_dist.csv in MATHING_OUT_PATH folder +export PROJECT_FOLDER="" +export MATHING_OUT_PATH="$PROJECT_FOLDER/2_get_matching_spot/out" +export VOLUMN_CLUSTER_PATH="$PROJECT_FOLDER/3_multi_section_regions_Ver20231031_N_min_3/out" +export MERGED_OBJECT_OUT="$PROJECT_FOLDER/4_mergeobj_and_3dcluster_N_min_3/" +export SCRIPT_PATH="$MERGED_OBJECT_OUT/script/1_merge_seurat_add3dcluster.r" + +ls $MATHING_OUT_PATH/*_min_dist.csv | parallel --jobs 4 \ + 'piece_name=$(basename {} _min_dist.csv); \ + echo $piece_name; \ + Rscript $SCRIPT_PATH \ + --analysis_folder $MERGED_OBJECT_OUT \ + --piece_use $piece_name \ + --matching_out_path $MATHING_OUT_PATH \ + --volumncluster_path $VOLUMN_CLUSTER_PATH > $MERGED_OBJECT_OUT/log/$piece_name.log 2> $MERGED_OBJECT_OUT/log/$piece_name.err' + +# 2. Make plot +PLOT_SCRIPT_PATH="$PROJECT_FOLDER/5_mergeobj_and_3dcluster/script/2_plot_3d_cluster.r" +export PLOT_SCRIPT_PATH +export MERGED_OBJECT_OUT +export MATHING_OUT_PATH + +ls $MATHING_OUT_PATH/*_min_dist.csv | parallel -j 4 ' + piece_name=$(basename {} _min_dist.csv) + echo $piece_name + Rscript $PLOT_SCRIPT_PATH \ + --analysis_folder $MERGED_OBJECT_OUT \ + --piece_use $piece_name \ + --merged_obj_path $MERGED_OBJECT_OUT >> $MERGED_OBJECT_OUT/log/$piece_name.log 2>> $MERGED_OBJECT_OUT/log/$piece_name.err +' + + diff --git a/Figure6/2_PASTE2_analysis/5_calculate_metrics/script/1_get_metrics.r b/Figure6/2_PASTE2_analysis/5_calculate_metrics/script/1_get_metrics.r new file mode 100644 index 0000000..0d86dd1 --- /dev/null +++ b/Figure6/2_PASTE2_analysis/5_calculate_metrics/script/1_get_metrics.r @@ -0,0 +1,215 @@ +# conda activate seurat4.3 +library(optparse) +library(tidyverse) +library(patchwork) +library(googlesheets4) +library(tictoc) +library(future) +library(furrr) +library(igraph) +library(ggraph) +library(Seurat) + +# 1. Overall level: number of component, aka number of the 3D volume +# 2. 3D volume level: number of cycle, handle +# 3. 3D volume level: maximum degree of each volume (espeically for sample with only 2 sections since there will be no loops) + +# color +project_folder = "" +analysis_folder = str_glue('{project_folder}/2_PASTE2_analysis/5_calculate_metrics') +source(str_glue('{project_folder}/0_palette/color_STsubclone.r')) + +# parameters +out_path = str_glue("{analysis_folder}/out") +dir.create(out_path, recursive = T, showWarnings = F) + + +######## 0. Loading data and info ######################################################## +# Load tracking sheet +gs4_deauth() +google_sheet_url = "" +sheet_multi = read_sheet(google_sheet_url, sheet = 'Sample_multisections') %>% + filter(!is.na(PASTEFileName)) + + +# Load inidvidual PASTE2 coordinates +paste2_df_list = pmap(sheet_multi, ~with(list(...), { + message(LibraryName,' ' , piece_id, ' ' , PASTEFileName) + read_csv(PASTE2XinHaoCoordinate) %>% + mutate( + PASTEpieceName = PASTEpieceName, + PASTEFileName = PASTEFileName, + LibraryName = LibraryName + ) +})) %>% setNames(sheet_multi$LibraryName) + +# Load objects +obj_list = pmap(sheet_multi, ~with(list(...), { + message("Loading :", LibraryName, "Object") + read_rds(SeuratObject) +})) %>% setNames(sheet_multi$LibraryName) + +# # Load region connection +three_d_cluster_root = str_glue("{analysis_folder}/4_multi_section_regions_Ver20231031_N_min_3") +all_pieces = list.files(str_glue('{three_d_cluster_root}/out')) +three_d_cluster_list = map(all_pieces, ~{ + read_tsv(str_glue('{three_d_cluster_root}/out/{.x}/4a_3D_cluster.tsv')) +}) %>% setNames(all_pieces) + +## ----- ANALYSIS ------------------------------------------------------------ +# 1. Create graph +message("Using undirected graph") +graph_list = map(three_d_cluster_list, function(three_d_cluster_df_use){ + # Split by each volumn + g_list = three_d_cluster_df_use %>% split(., f = .$volumn_cluster1) %>% + map(function(df, volumn_id){ + edges_df = df %>% select(region1, region2) %>% setNames(c('From','To')) + nodes_df = data.frame(name = union(df$region1, df$region2)) + + g <- graph_from_data_frame(edges_df, directed=FALSE, vertices=nodes_df) + return(g) + }) +}) %>% unlist(recursive = F) %>% as.list() + +## FUNCTIONS ------------------------------------------------------------ + +#### NEW 2023/10/31 +count_loop = function(graph_obj){ + # n loop = n-edges - n-nodes + 1 + length(E(graph_obj)) - length(V(graph_obj)) + 1 +} + +plot_degree_distribution = function(graph_obj, title = 'Degree distribution'){ + data.frame( + degree_frequency = degree_distribution(graph_obj) + ) %>% mutate(degree = 0:(nrow(.)-1)) %>% + ggplot(aes(x = degree, y = degree_frequency, fill = degree_frequency)) + + geom_bar(stat = 'identity') + + cowplot::theme_cowplot() + + labs(title = title, x = 'Degree', y = 'Frequency') + + scale_fill_viridis_c() +} + +mlutiple_degree_distribution_df = function(graph_list){ + imap(graph_list, ~{ + data.frame( + degree_frequency = degree_distribution(.x) + ) %>% mutate(degree = 0:(nrow(.)-1)) %>% + mutate(volume_id = .y) + }) %>% bind_rows() +} +plot_mlutiple_degree_distribution = function(graph_list){ + mlutiple_degree_distribution_df(graph_list) %>% + ggplot(aes(x = degree, y = degree_frequency, fill = degree)) + + geom_bar(stat = 'identity') + + facet_wrap(~volume_id) + + theme_bw() + + labs(x = 'Degree', y = 'Frequency')+ + scale_fill_viridis_c() +} + +## ----- ANALYSIS ------------------------------------------------------------ +# Calculate metrics +### Metric: LOOPS +analysis_loop_out = str_glue('{out_path}/0_loop_per_volume/');dir.create(analysis_loop_out, recursive = T, showWarnings = F) +loop_per_volume_df = map(graph_list, count_loop) %>% unlist %>% as.data.frame %>% setNames('n_loop') %>% rownames_to_column('volume_id') %>% + separate(volume_id, into = c('piece', 'volume_id'), sep = '\\.', remove = F) +write_tsv(loop_per_volume_df, str_glue('{analysis_loop_out}/0_loop_per_volume.tsv')) +# none zero entry +loop_per_volume_df %>% filter(n_loop >0) + +### Metric: DEGREE +# A. Plot all degree distribution +analysis_a_out = str_glue('{out_path}/A_degree_distribution_all/');dir.create(analysis_a_out, recursive = T, showWarnings = F) +for(piece_use in all_pieces){ + graph_list_use = graph_list %>% .[str_subset(names(.), piece_use)] + if(length(graph_list_use) == 0) next() + distribution_df = mlutiple_degree_distribution_df(graph_list_use) + write_tsv(distribution_df, str_glue('{analysis_a_out}/A_degree_distribution_{piece_use}.tsv')) + pdf(str_glue('{analysis_a_out}/0_degree_distribution_{piece_use}.pdf')) + plot_mlutiple_degree_distribution(graph_list_use) %>% print + dev.off() +} + +# 2. Plot metrics per sample +# B. Plot maximum degree distribuiotn +analysis_b_out = str_glue('{out_path}/B_degree_distribution_max/');dir.create(analysis_b_out, recursive = T, showWarnings = F) +for(piece_use in all_pieces){ + graph_list_use = graph_list %>% .[str_subset(names(.), piece_use)] + if(length(graph_list_use) == 0) next() + distribution_df = mlutiple_degree_distribution_df(graph_list_use) %>% + dplyr::rename(degree_frequency_per_volume = degree_frequency) + # keep max degree per each volume + distribution_max_df = distribution_df %>% group_by(volume_id) %>% slice_max(degree) %>% + dplyr::rename(max_degree = degree) + write_tsv(distribution_max_df, str_glue('{analysis_b_out}/B_degree_distribution_max_{piece_use}.tsv')) + # plot + pdf(str_glue('{analysis_b_out}/B_degree_distribution_max_{piece_use}.pdf')) + p_dist_max = distribution_max_df %>% + ggplot(aes(x = volume_id, y = max_degree, fill = max_degree)) + + geom_bar(stat = 'identity') + + theme_bw() + + labs(x = 'volume ID', y = 'Maximum Degree per volume')#+ + #scale_fill_viridis_d() + print(p_dist_max) + dev.off() +} + +# C. Plot maxnium degree distribution of all samples at onces! +analysis_c_out = str_glue('{out_path}/C_degree_distribution_max_all/');dir.create(analysis_c_out, recursive = T, showWarnings = F) +distribution_max_allsamples_df = map(all_pieces, function(piece_use){ + graph_list_use = graph_list %>% .[str_subset(names(.), piece_use)] + if(length(graph_list_use) == 0) return(NULL) + distribution_df = mlutiple_degree_distribution_df(graph_list_use) %>% + dplyr::rename(degree_frequency_per_volume = degree_frequency) + # keep max degree per each volume + distribution_max_df = distribution_df %>% group_by(volume_id) %>% slice_max(degree) %>% + dplyr::rename(max_degree = degree) + return(distribution_max_df) +}) %>% bind_rows() %>% +separate(volume_id, into = c('piece', 'volume'), sep = '\\.', remove = F) + +# c2. Add in meta data : cancer type, metastasis, n sections +sheet_multi_clean = sheet_multi %>% distinct(PASTEpieceName, N_sections, cancer_type, sample_type_tumor) %>% dplyr::rename(piece = PASTEpieceName) +distribution_max_allsamples_df = left_join(distribution_max_allsamples_df, sheet_multi_clean, by = 'piece') + +# c3. Add n components +distribution_max_allsamples_df = distribution_max_allsamples_df %>% + group_by(piece) %>% + mutate(n_componenet = n()) %>% + # sort sample by n_component per cancer type + arrange(cancer_type, n_componenet) +sample_order = distribution_max_allsamples_df$piece %>% unique %>% rev +write_tsv(distribution_max_allsamples_df, str_glue('{analysis_c_out}/C3_degree_distribution_max_all.tsv')) + +p_max_dist_all = distribution_max_allsamples_df %>% ggplot(aes(x = factor(piece, levels = sample_order), y = max_degree, fill = cancer_type)) + + facet_grid(.~cancer_type, space = 'free', scales='free') + + geom_violin(color = 'transparent', scale = 'width', alpha = 0.6) + + geom_boxplot(width = 0.1, alpha = 0.3, outlier.shape = NA) + + geom_jitter(aes(fill = cancer_type), color = 'gray20', height = 0, size = 3, seed = 123, alpha = 0.7, shape = 21) + + labs(title = 'Maximum degree distribution per sample', x = 'Sample', y = 'Maximum degree') + + theme_bw() + NoLegend() + + scale_fill_manual(values = color_cancer_all) + +pdf(str_glue('{analysis_c_out}/C3_degree_distribution_max_all.pdf'),w = 10, h = 5) +p_max_dist_all %>% print +dev.off() + + +# c4. Plot n component per cancer type +p_n_component_cancer_type = distribution_max_allsamples_df %>% + distinct(piece, n_componenet, cancer_type) %>% + ggplot(aes(x = factor(piece, levels = sample_order), y = n_componenet, fill = cancer_type)) + + facet_grid(.~cancer_type, space = 'free', scales='free') + + geom_bar(stat = 'identity') + + # Add number, center + geom_text(aes(label = n_componenet, y = n_componenet/2), vjust = -0.5, size = 3) + + labs(title = 'Number of components (tumor volume) per sample', x = 'Sample piece', y = 'Number of components (tumor volume)') + + theme_bw() + NoLegend() + + scale_fill_manual(values = color_cancer_all) + +pdf(str_glue('{analysis_c_out}/C4_n_component_per_sample.pdf'),w = 10, h = 4) +p_n_component_cancer_type %>% print +dev.off() + +