From 1de8a717f20336e689d5db3901d35ca6d635dc45 Mon Sep 17 00:00:00 2001
From: David McManamon <dmcmanam@gmail.com>
Date: Thu, 12 Sep 2024 09:20:01 -0400
Subject: [PATCH 1/3] Add ONT flowcell to the output summary.csv file

---
 scripts/ont_stats.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/scripts/ont_stats.py b/scripts/ont_stats.py
index aeb384c..81c2be5 100644
--- a/scripts/ont_stats.py
+++ b/scripts/ont_stats.py
@@ -4,6 +4,7 @@
 import glob
 import os
 from collections import OrderedDict
+import re
 
 # TODO get barcode info from lims
 # check if the run is pooled
@@ -14,7 +15,7 @@ def if_pooled(sequencing_summary_df):
     return pooled
 
 # get stats metric if the run is not pooled
-def get_read_length_and_summary(sequencing_summary_df):
+def get_read_length_and_summary(sequencing_summary_df, flowcell):
     read_length = sequencing_summary_df[sequencing_summary_df["passes_filtering"]]["sequence_length_template"].tolist()
     if len(read_length) != 0:
         read_length.sort(reverse = True)
@@ -30,10 +31,10 @@ def get_read_length_and_summary(sequencing_summary_df):
         median = 0
         N50_value = 0
         N50 = 0
-    return(len(read_length), N50_value * 2 / 1000000000, N50, median)
+    return(len(read_length), N50_value * 2 / 1000000000, N50, median, flowcell)
 
 # get stats metric if the run is pooled
-def get_read_length_and_summary_pooled(sequencing_summary_df, sample_name):
+def get_read_length_and_summary_pooled(sequencing_summary_df, sample_name, flowcell):
     sample_dict = {}
     samples = sequencing_summary_df["barcode_arrangement"].unique()
     for sample in samples:
@@ -45,13 +46,21 @@ def get_read_length_and_summary_pooled(sequencing_summary_df, sample_name):
             sample_dict[sample_sub] = get_read_length_and_summary(sample_df)
     return sample_dict
 
+def extract_flowcell(text):
+    # Regular expression to match the characters after 'sequencing_summary_' and before the next '_'
+    match = re.search(r'sequencing_summary_([^_]+)', text)
+    if match:
+        return match.group(1)
+    else:
+        return None
+
 def write_to_csv(sample_dict):
     file_name = "summary.csv"
     print("Writing stats file: " + file_name)
     with open(file_name,'w') as file:
-        file.write("sample_id, Reads, Bases, N50, Meidan Read Length\n")
+        file.write("sample_id, Reads, Bases, N50, Median Read Length, Flowcell\n")
         for key, value in sample_dict.items():
-            file.write("{}, {}, {}, {}, {}\n".format(key, value[0], value[1], value[2], value[3]))
+            file.write("{}, {}, {}, {}, {}, {}\n".format(key, value[0], value[1], value[2], value[3], value[4]))
 
 if __name__ == '__main__':
     # Usage: python ont_stats.py [project_directory]
@@ -70,16 +79,19 @@ def write_to_csv(sample_dict):
             file_count = 0
             for i in file:
                 file_count += 1
+                flowcell = extract_flowcell(i)
+                print("Processing file: " + i + " from flowcell: " + flowcell)
                 summary_matrix = pd.read_csv(i, delimiter = "\t")
                 pooled = if_pooled(summary_matrix)
                 # give different sample name for multi runs on one flow cell
                 if file_count != 1:
                     sample = sample + "_" + str(file_count)
                 if pooled:
-                    sample_dict_sub = get_read_length_and_summary_pooled(summary_matrix, sample)
+                    sample_dict_sub = get_read_length_and_summary_pooled(summary_matrix, sample, flowcell)
                     sample_dict.update(sample_dict_sub)
                 else:
-                    sample_dict[sample] = get_read_length_and_summary(summary_matrix)
+                    sample_dict[sample] = get_read_length_and_summary(summary_matrix, flowcell)
+                print(sample_dict)
 
     write_to_csv(sample_dict)
     print("ONT stats complete for: " + project_directory)

From 4a69d464a6167d58dc2cecfa89b74480961cec0a Mon Sep 17 00:00:00 2001
From: David McManamon <dmcmanam@gmail.com>
Date: Thu, 26 Sep 2024 13:32:59 -0400
Subject: [PATCH 2/3] Store ONT stats to LIMS

---
 scripts/ont_stats.py | 56 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 42 insertions(+), 14 deletions(-)

diff --git a/scripts/ont_stats.py b/scripts/ont_stats.py
index 81c2be5..8fe60d9 100644
--- a/scripts/ont_stats.py
+++ b/scripts/ont_stats.py
@@ -1,4 +1,5 @@
 import pandas as pd
+import requests
 import statistics
 import sys
 import glob
@@ -15,7 +16,7 @@ def if_pooled(sequencing_summary_df):
     return pooled
 
 # get stats metric if the run is not pooled
-def get_read_length_and_summary(sequencing_summary_df, flowcell):
+def get_read_length_and_summary(sequencing_summary_df, flowcell, position):
     read_length = sequencing_summary_df[sequencing_summary_df["passes_filtering"]]["sequence_length_template"].tolist()
     if len(read_length) != 0:
         read_length.sort(reverse = True)
@@ -31,19 +32,19 @@ def get_read_length_and_summary(sequencing_summary_df, flowcell):
         median = 0
         N50_value = 0
         N50 = 0
-    return(len(read_length), N50_value * 2 / 1000000000, N50, median, flowcell)
+    return(len(read_length), N50_value * 2 / 1000000000, N50, median, flowcell, position)
 
 # get stats metric if the run is pooled
-def get_read_length_and_summary_pooled(sequencing_summary_df, sample_name, flowcell):
+def get_read_length_and_summary_pooled(sequencing_summary_df, sample_name, flowcell, position):
     sample_dict = {}
     samples = sequencing_summary_df["barcode_arrangement"].unique()
     for sample in samples:
         sample_df = sequencing_summary_df.loc[sequencing_summary_df['barcode_arrangement'] == sample]
         sample_sub = sample_name + "_" + sample
-        stats = get_read_length_and_summary(sample_df)
+        stats = get_read_length_and_summary(sample_df, flowcell, position)
         # only record barcodes with more than 10000 reads
         if stats[0] > 10000:
-            sample_dict[sample_sub] = get_read_length_and_summary(sample_df)
+            sample_dict[sample_sub] = get_read_length_and_summary(sample_df, flowcell, position)
     return sample_dict
 
 def extract_flowcell(text):
@@ -54,13 +55,12 @@ def extract_flowcell(text):
     else:
         return None
 
-def write_to_csv(sample_dict):
-    file_name = "summary.csv"
+def write_to_csv(sample_dict, file_name):
     print("Writing stats file: " + file_name)
     with open(file_name,'w') as file:
-        file.write("sample_id, Reads, Bases, N50, Median Read Length, Flowcell\n")
+        file.write("sample_id, Reads, Bases, N50, Median Read Length, Flowcell, Position\n")
         for key, value in sample_dict.items():
-            file.write("{}, {}, {}, {}, {}, {}\n".format(key, value[0], value[1], value[2], value[3], value[4]))
+            file.write("{}, {}, {}, {}, {}, {}, {}\n".format(key, value[0], value[1], value[2], value[3], value[4], value[5]))
 
 if __name__ == '__main__':
     # Usage: python ont_stats.py [project_directory]
@@ -79,19 +79,47 @@ def write_to_csv(sample_dict):
             file_count = 0
             for i in file:
                 file_count += 1
+                position = i.split("/")[-2].split("_")[2]
                 flowcell = extract_flowcell(i)
-                print("Processing file: " + i + " from flowcell: " + flowcell)
+                print("Processing file: " + i + " from flowcell: " + flowcell + " at position:" + position)
                 summary_matrix = pd.read_csv(i, delimiter = "\t")
                 pooled = if_pooled(summary_matrix)
                 # give different sample name for multi runs on one flow cell
                 if file_count != 1:
                     sample = sample + "_" + str(file_count)
                 if pooled:
-                    sample_dict_sub = get_read_length_and_summary_pooled(summary_matrix, sample, flowcell)
+                    sample_dict_sub = get_read_length_and_summary_pooled(summary_matrix, sample, flowcell, position)
                     sample_dict.update(sample_dict_sub)
                 else:
-                    sample_dict[sample] = get_read_length_and_summary(summary_matrix, flowcell)
+                    sample_dict[sample] = get_read_length_and_summary(summary_matrix, flowcell, position)
                 print(sample_dict)
 
-    write_to_csv(sample_dict)
-    print("ONT stats complete for: " + project_directory)
+    print(sample_dict)
+    write_to_csv(sample_dict, "summary.csv")
+    print("ONT stats .csv complete for: " + project_directory)
+    
+    # List of parameter names corresponding to the values skipping columns "estimatedCoverage", "bamCoverage", "sequencerName"
+    parameter_names = ["reads", "bases", "N50", "medianReadLength", "flowcell", "sequencerPosition", "igoId"]
+
+    # Convert initial dictionary to a nested dictionary with parameter names
+    converted_sample_dict = {}
+    for key, values in sample_dict.items():
+        # Create a nested dictionary by zipping parameter names and values
+        converted_sample_dict[key] = dict(zip(parameter_names, values.append(key)))
+    print(converted_sample_dict)
+
+    # Write to LIMS endpoint with a GET:
+    # /LimsRest/updateLimsSampleLevelSequencingQcONT?igoId=04540_U_26_1_1_1_1_1&flowcell=PAY61078&reads=19775442&bases=9103016668&N50=16508&medianReadLength=766&estimatedCoverage=0&bamCoverage=0&sequencerPosition=1A&sequencerName=zeppelin
+    LIMS_ENDPOINT="https://igo-lims02.mskcc.org:8443/LimsRest/updateLimsSampleLevelSequencingQcONT"
+    for sample_id, params in converted_sample_dict.items():
+        # Send GET request for each set of parameters
+        print("Sending LIMS get request for: " + params)
+        response = requests.get(LIMS_ENDPOINT, params=params, verify=False)
+
+        # Check the response status and print the output
+        if response.status_code == 200:
+            print(f"Request for {sample_id} successful!")
+            print("Response Data:", response.json())
+        else:
+            print(f"Request for {sample_id} failed with status code {response.status_code}")
+            print("Error details:", response.text)
\ No newline at end of file

From 023fcebe51218027db22348c39321979a754176c Mon Sep 17 00:00:00 2001
From: David McManamon <dmcmanam@gmail.com>
Date: Thu, 26 Sep 2024 14:08:44 -0400
Subject: [PATCH 3/3] change how igoId is appended to the array

---
 scripts/ont_stats.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/ont_stats.py b/scripts/ont_stats.py
index 8fe60d9..37d743a 100644
--- a/scripts/ont_stats.py
+++ b/scripts/ont_stats.py
@@ -105,7 +105,8 @@ def write_to_csv(sample_dict, file_name):
     converted_sample_dict = {}
     for key, values in sample_dict.items():
         # Create a nested dictionary by zipping parameter names and values
-        converted_sample_dict[key] = dict(zip(parameter_names, values.append(key)))
+        values = values + (key,)
+        converted_sample_dict[key] = dict(zip(parameter_names, values))
     print(converted_sample_dict)
 
     # Write to LIMS endpoint with a GET: