From 4950c463c0be4cc7a172f3a5f49e1b5ed2d427f4 Mon Sep 17 00:00:00 2001
From: Xin Zhou <xzhou82@gmail.com>
Date: Wed, 5 Feb 2025 10:29:48 -0600
Subject: [PATCH 01/10] add a hardcoded geneset group name for blitzgseas

---
 client/plots/gsea.js | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/client/plots/gsea.js b/client/plots/gsea.js
index 23087874f1..7ab06f8498 100644
--- a/client/plots/gsea.js
+++ b/client/plots/gsea.js
@@ -104,6 +104,11 @@ class gsea {
 				{ label: 'CC: subset of GO', value: 'CC: subset of GO' },
 				{ label: 'WikiPathways subset of CP', value: 'WikiPathways subset of CP' },
 				{ label: 'REACTOME subset of CP', value: 'REACTOME subset of CP' },
+				/* QUICK FIX
+				geneset name ending in "--blitzgsea" signals to use built-in genesets but not msigdb
+				later a proper fix is to add a radio toggle of Blitzgsea versus MSigDB, and do not use such hardcode
+				*/
+				{ label: 'REACTOME (blitzgsea)', value: 'REACTOME--blitzgsea' },
 				{ label: 'H: hallmark gene sets', value: 'H: hallmark gene sets' }
 			]
 		}

From 39931b8896d42132d66732910e750549e0c9b157 Mon Sep 17 00:00:00 2001
From: kgangwan <kgangwan@stjude.org>
Date: Wed, 5 Feb 2025 11:44:10 -0500
Subject: [PATCH 02/10] clean up gsea script and add comments

---
 server/utils/gsea.py | 226 ++++++++++++++++++++++---------------------
 1 file changed, 114 insertions(+), 112 deletions(-)

diff --git a/server/utils/gsea.py b/server/utils/gsea.py
index 244c87e429..bf0fa525ad 100755
--- a/server/utils/gsea.py
+++ b/server/utils/gsea.py
@@ -1,129 +1,131 @@
-# cat ~/sjpp/test.txt | python gsea.py
+import blitzgsea as blitz  
+import json 
+import time  
+import sys  
+import sqlite3  
+import os  
+import numpy as np  
+import pandas as pd  
 
-import blitzgsea as blitz
-import json
-import time
-import sys
-import sqlite3
-import os
-import numpy as np
-import pandas as pd
-            
+# Helper function to extract gene symbols from a dictionary
 def extract_symbols(x):
-   return x['symbol']
-
-def extract_plot_data(signature, geneset, library, result, center=True):
-   signature = signature.copy()
-   signature.columns = ["i","v"]
-   signature = signature.sort_values("v", ascending=False).set_index("i")
-   signature = signature[~signature.index.duplicated(keep='first')]
-   if center:
-       signature.loc[:,"v"] -= np.mean(signature.loc[:,"v"])
-   signature_map = {}
-   for i,h in enumerate(signature.index):
-       signature_map[h] = i
-   
-   gs = set(library[geneset])
-   hits = [i for i,x in enumerate(signature.index) if x in gs]
-   
-   running_sum, es = blitz.enrichment_score(np.array(np.abs(signature.iloc[:,0])), signature_map, gs)
-   running_sum = list(running_sum)
-   nn = np.where(np.abs(running_sum)==np.max(np.abs(running_sum)))[0][0]
-   #print ("nn:",nn)
-   #print ("running_sum:",running_sum)
-   #print ("es:",es)
-   running_sum_str=[str(elem) for elem in running_sum]
-   print ('result: {"nn":'+str(nn)+',"running_sum":"'+",".join(running_sum_str)+'","es":'+str(es)+'}')
-
+    return x['symbol']  # Return the 'symbol' field from the dictionary
 
-# Main function   
+# Main function
 try:
-    # Try to read a single character from stdin without blocking
+    # Check if there is input from stdin
     if sys.stdin.read(1):
-        # Read from stdin
+        # Read each line from stdin
         for line in sys.stdin:
-            # Process each line
+            # Parse the JSON input
             json_object = json.loads(line)
-            cachedir=json_object['cachedir']
-            genes=json_object['genes']
-            fold_change=json_object['fold_change']
-            table_name=json_object['geneset_group']
-            filter_non_coding_genes=json_object['filter_non_coding_genes']
-            df = {'Genes': genes, 'fold_change': fold_change}
-            signature=pd.DataFrame(df)
-            db=json_object['db']
+            cachedir = json_object['cachedir']  # Get the cache directory from the JSON object
+            genes = json_object['genes']  # Get the genes from the JSON object
+            fold_change = json_object['fold_change']  # Get the fold change values from the JSON object
+            table_name = json_object['geneset_group']  # Get the gene set group from the JSON object
+            filter_non_coding_genes = json_object['filter_non_coding_genes']  # Get the filter_non_coding_genes flag from the JSON object
+            db = json_object['db']  # Get the database path from the JSON object
+            
+            # Create a DataFrame for the signature
+            df = {'Genes': genes, 'fold_change': fold_change}  # Create a dictionary with genes and fold change
+            signature = pd.DataFrame(df)  # Convert the dictionary to a DataFrame
+            
             # Connect to the SQLite database
-            conn = sqlite3.connect(db)
+            conn = sqlite3.connect(db)  # Connect to the SQLite database
+            cursor = conn.cursor()  # Create a cursor object
             
-            # Create a cursor object using the cursor() method
-            cursor = conn.cursor()
+            # Query to get gene set IDs
+            query = f"SELECT id FROM terms WHERE parent_id='{table_name}'"  # SQL query to get gene set IDs
+            cursor.execute(query)  # Execute the query
             
-            # SQL query to select all data from the table
-            query = f"select id from terms where parent_id='" + table_name  + "'"
-            # Execute the SQL query
-            cursor.execute(query)
-            if filter_non_coding_genes == True:
-                 # SQL query to code all the protein coding genes
-                 coding_genes_query = f"select * from codingGenes"
-                 genedb = json_object['genedb']
-                 gene_conn = sqlite3.connect(genedb)
-                 gene_cursor = gene_conn.cursor()
-                 gene_cursor.execute(coding_genes_query)
-                 coding_genes_list=gene_cursor.fetchall()
-                 coding_genes_list=list(map(lambda x: x[0],coding_genes_list))
-                 signature=signature[signature['Genes'].isin(coding_genes_list)]
-                 
-            # Fetch all rows from the executed SQL query
-            rows = cursor.fetchall()
+            # Filter out non-coding genes if specified
+            if filter_non_coding_genes:
+                coding_genes_query = "SELECT * FROM codingGenes"  # SQL query to get coding genes
+                genedb = json_object['genedb']  # Get the gene database path from the JSON object
+                gene_conn = sqlite3.connect(genedb)  # Connect to the gene database
+                gene_cursor = gene_conn.cursor()  # Create a cursor object for the gene database
+                gene_cursor.execute(coding_genes_query)  # Execute the query to get coding genes
+                coding_genes_list = gene_cursor.fetchall()  # Fetch all coding genes
+                coding_genes_list = list(map(lambda x: x[0], coding_genes_list))  # Extract the gene symbols
+                signature = signature[signature['Genes'].isin(coding_genes_list)]  # Filter the signature to include only coding genes
             
-            start_loop_time = time.time()
-            msigdb_library={} 
-            # Iterate over the rows and print them
+            # Fetch all gene set IDs
+            rows = cursor.fetchall()  # Fetch all rows from the executed query
+            
+            start_loop_time = time.time()  # Record the start time of the loop
+            msigdb_library = {}  # Initialize an empty dictionary for the gene set library
+            
+            # Iterate over gene set IDs and fetch corresponding genes
             for row in rows:
-                #print(row[0])
-                query2=f"select genes from term2genes where id='" + row[0]  + "'"
-                cursor.execute(query2)
-                rows2 = cursor.fetchall()
-                row3=json.loads(rows2[0][0])
-                msigdb_library[row[0]] = list(map(extract_symbols,row3))
+                query2 = f"SELECT genes FROM term2genes WHERE id='{row[0]}'"  # SQL query to get genes for a gene set ID
+                cursor.execute(query2)  # Execute the query
+                rows2 = cursor.fetchall()  # Fetch all rows from the executed query
+                row3 = json.loads(rows2[0][0])  # Parse the JSON data
+                msigdb_library[row[0]] = list(map(extract_symbols, row3))  # Extract gene symbols and add to the library
             
-            #print ("msigdb_library:",msigdb_library)
             # Close the cursor and connection to the database
-            cursor.close()
-            conn.close()
-            stop_loop_time = time.time()
-            execution_time = stop_loop_time - start_loop_time
-            print(f"Execution time: {execution_time} seconds")
-            try: # Extract ES data to be plotted on client side
-               geneset_name=json_object['geneset_name'] # Checks if geneset_name is present, if yes it indicates the server request is for generating the image. It retrieves the result.pkl file and generates the image without having to recompute gsea again.
-               pickle_file=json_object['pickle_file']
-               result = pd.read_pickle(os.path.join(cachedir,pickle_file))
-               fig = blitz.plot.running_sum(signature, geneset_name, msigdb_library, result=result.T, compact=True)
-               random_num = np.random.rand()
-               png_filename = "gsea_plot_" + str(random_num) + ".png"
-               fig.savefig(os.path.join(cachedir,png_filename), bbox_inches='tight')
-               #extract_plot_data(signature, geneset_name, msigdb_library, result) # This returns raw data to client side, not currently used
-               print ('image: {"image_file":"' + png_filename + '"}')
-            except KeyError: #Initial GSEA calculation, result saved to a result.pkl pickle file               
-               # run enrichment analysis
-               start_gsea_time = time.time()
-               if __name__ == "__main__":
-                  result = blitz.gsea(signature, msigdb_library).T
-                  random_num = np.random.rand()
-                  pickle_filename="gsea_result_"+ str(random_num) +".pkl"
-                  result.to_pickle(os.path.join(cachedir,pickle_filename))
-                  gsea_str='{"data":' + result.to_json() + '}'
-                  pickle_str='{"pickle_file":"' + pickle_filename + '"}'
-                  #print ("pickle_file:",pickle_str)
-                  gsea_dict = json.loads(gsea_str)
-                  pickle_dict = json.loads(pickle_str)
-                  result_dict = {**gsea_dict, **pickle_dict}
-                  print ("result:",json.dumps(result_dict))
-               stop_gsea_time = time.time()   
-               gsea_time = stop_gsea_time - start_gsea_time
-               print (f"GSEA time: {gsea_time} seconds")
+            cursor.close()  # Close the cursor
+            conn.close()  # Close the connection
+            
+            stop_loop_time = time.time()  # Record the stop time of the loop
+            execution_time = stop_loop_time - start_loop_time  # Calculate the execution time
+            print(f"Execution time: {execution_time} seconds")  # Print the execution time
             
+            try:
+                # Check if geneset_name and pickle_file are present for generating the plot
+                geneset_name = json_object['geneset_name']  # Get the gene set name from the JSON object
+                pickle_file = json_object['pickle_file']  # Get the pickle file name from the JSON object
+                result = pd.read_pickle(os.path.join(cachedir, pickle_file))  # Load the result from the pickle file
+                fig = blitz.plot.running_sum(signature, geneset_name, msigdb_library, result=result.T, compact=True)  # Generate the running sum plot
+                random_num = np.random.rand()  # Generate a random number
+                png_filename = f"gsea_plot_{random_num}.png"  # Create a filename for the plot
+                fig.savefig(os.path.join(cachedir, png_filename), bbox_inches='tight')  # Save the plot as a PNG file
+                print(f'image: {{"image_file": "{png_filename}"}}')  # Print the image file path in JSON format
+            except KeyError:
+                # Initial GSEA calculation and save the result to a pickle file
+                start_gsea_time = time.time()  # Record the start time of GSEA
+                if __name__ == "__main__":
+                    result = blitz.gsea(signature, msigdb_library, permutations=1000).T  # Perform GSEA and transpose the result
+                    random_num = np.random.rand()  # Generate a random number
+                    pickle_filename = f"gsea_result_{random_num}.pkl"  # Create a filename for the pickle file
+                    result.to_pickle(os.path.join(cachedir, pickle_filename))  # Save the result to the pickle file
+                    gsea_str = f'{{"data": {result.to_json()}}}'  # Convert the result to JSON format
+                    pickle_str = f'{{"pickle_file": "{pickle_filename}"}}'  # Create a JSON string for the pickle file
+                    gsea_dict = json.loads(gsea_str)  # Parse the JSON string
+                    pickle_dict = json.loads(pickle_str)  # Parse the JSON string
+                    result_dict = {**gsea_dict, **pickle_dict}  # Merge the dictionaries
+                    print(f"result: {json.dumps(result_dict)}")  # Print the result in JSON format
+                stop_gsea_time = time.time()  # Record the stop time of GSEA
+                gsea_time = stop_gsea_time - start_gsea_time  # Calculate the GSEA execution time
+                print(f"GSEA time: {gsea_time} seconds")  # Print the GSEA execution time
     else:
-       pass 
+        pass  # Do nothing if there is no input from stdin
 except (EOFError, IOError):
-    pass
+    pass  # Handle EOFError and IOError exceptions gracefully
+
+# Function to extract plot data for GSEA visualization
+# def extract_plot_data(signature, geneset, library, result, center=True):
+
+#     print("signature", signature)
+#     print("result", result)
+#     print("geneset", geneset)
+#     print("library", library)
+#     signature = signature.copy()  # Create a copy of the signature DataFrame
+#     signature.columns = ["i", "v"]  # Rename columns to 'i' and 'v'
+#     signature = signature.sort_values("v", ascending=False).set_index("i")  # Sort by 'v' in descending order and set 'i' as index
+#     signature = signature[~signature.index.duplicated(keep='first')]  # Remove duplicate indices, keeping the first occurrence
+    
+#     if center:
+#         signature.loc[:, "v"] -= np.mean(signature.loc[:, "v"])  # Center the signature values by subtracting the mean
+    
+#     signature_map = {h: i for i, h in enumerate(signature.index)}  # Create a mapping of signature indices
+    
+#     gs = set(library[geneset])  # Get the gene set from the library
+#     hits = [i for i, x in enumerate(signature.index) if x in gs]  # Find the indices of hits in the signature
+    
+#     running_sum, es = blitz.enrichment_score(np.array(np.abs(signature.iloc[:, 0])), signature_map, gs)  # Compute running sum and enrichment score
+#     running_sum = list(running_sum)  # Convert running sum to a list
+#     nn = np.where(np.abs(running_sum) == np.max(np.abs(running_sum)))[0][0]  # Find the index of the maximum absolute running sum
+    
+#     running_sum_str = [str(elem) for elem in running_sum]  # Convert running sum elements to strings
+#     print(f'result: {{"nn": {nn}, "running_sum": "{",".join(running_sum_str)}", "es": {es}}}')  # Print the result in JSON format
\ No newline at end of file

From d1808f7ac0ff6cc435f5fc7ff1fa6cdea46df284 Mon Sep 17 00:00:00 2001
From: robinpaul85 <robinpaul85@gmail.com>
Date: Wed, 5 Feb 2025 16:29:53 -0600
Subject: [PATCH 03/10] Added support for blitzgsea reactome, KEGG and
 WikiPathways

---
 client/plots/gsea.js |   2 +
 server/utils/gsea.py | 118 +++++++++++++++++++++++--------------------
 2 files changed, 65 insertions(+), 55 deletions(-)

diff --git a/client/plots/gsea.js b/client/plots/gsea.js
index 7ab06f8498..15fe602d65 100644
--- a/client/plots/gsea.js
+++ b/client/plots/gsea.js
@@ -109,6 +109,8 @@ class gsea {
 				later a proper fix is to add a radio toggle of Blitzgsea versus MSigDB, and do not use such hardcode
 				*/
 				{ label: 'REACTOME (blitzgsea)', value: 'REACTOME--blitzgsea' },
+				{ label: 'KEGG (blitzgsea)', value: 'KEGG--blitzgsea' },
+				{ label: 'WikiPathways (blitzgsea)', value: 'WikiPathways--blitzgsea' },
 				{ label: 'H: hallmark gene sets', value: 'H: hallmark gene sets' }
 			]
 		}
diff --git a/server/utils/gsea.py b/server/utils/gsea.py
index bf0fa525ad..83d238f37e 100755
--- a/server/utils/gsea.py
+++ b/server/utils/gsea.py
@@ -1,3 +1,5 @@
+# Test syntax: cat ~/sjpp/test.txt | time python gsea.py
+# test.txt contains the json string autogenerated by the commented out nodejs code.
 import blitzgsea as blitz  
 import json 
 import time  
@@ -33,11 +35,40 @@ def extract_symbols(x):
             # Connect to the SQLite database
             conn = sqlite3.connect(db)  # Connect to the SQLite database
             cursor = conn.cursor()  # Create a cursor object
-            
-            # Query to get gene set IDs
-            query = f"SELECT id FROM terms WHERE parent_id='{table_name}'"  # SQL query to get gene set IDs
-            cursor.execute(query)  # Execute the query
-            
+
+            msigdb_library = {}  # Initialize an empty dictionary for the gene set library
+            if table_name == "REACTOME--blitzgsea": # Parse from blitzgsea reactome library
+              msigdb_library = blitz.enrichr.get_library("Reactome_2022")
+            elif table_name == "KEGG--blitzgsea": # Parse from blitzgsea KEGG library
+              msigdb_library = blitz.enrichr.get_library("KEGG_2021_Human")
+            elif table_name == "WikiPathways--blitzgsea": # Parse from blitzgsea WikiPathways library
+              msigdb_library = blitz.enrichr.get_library("WikiPathways_2019_Human")              
+            else: # Use geneset groups from msigdb    
+              # Query to get gene set IDs
+              query = f"SELECT id FROM terms WHERE parent_id='{table_name}'"  # SQL query to get gene set IDs
+              cursor.execute(query)  # Execute the query
+              
+              # Fetch all gene set IDs
+              rows = cursor.fetchall()  # Fetch all rows from the executed query
+              
+              start_loop_time = time.time()  # Record the start time of the loop
+              
+              # Iterate over gene set IDs and fetch corresponding genes
+              for row in rows:
+                  query2 = f"SELECT genes FROM term2genes WHERE id='{row[0]}'"  # SQL query to get genes for a gene set ID
+                  cursor.execute(query2)  # Execute the query
+                  rows2 = cursor.fetchall()  # Fetch all rows from the executed query
+                  row3 = json.loads(rows2[0][0])  # Parse the JSON data
+                  msigdb_library[row[0]] = list(map(extract_symbols, row3))  # Extract gene symbols and add to the library
+              
+              # Close the cursor and connection to the database
+              cursor.close()  # Close the cursor
+              conn.close()  # Close the connection
+              
+              stop_loop_time = time.time()  # Record the stop time of the loop
+              execution_time = stop_loop_time - start_loop_time  # Calculate the execution time
+              print(f"Execution time: {execution_time} seconds")  # Print the execution time
+
             # Filter out non-coding genes if specified
             if filter_non_coding_genes:
                 coding_genes_query = "SELECT * FROM codingGenes"  # SQL query to get coding genes
@@ -49,35 +80,13 @@ def extract_symbols(x):
                 coding_genes_list = list(map(lambda x: x[0], coding_genes_list))  # Extract the gene symbols
                 signature = signature[signature['Genes'].isin(coding_genes_list)]  # Filter the signature to include only coding genes
             
-            # Fetch all gene set IDs
-            rows = cursor.fetchall()  # Fetch all rows from the executed query
-            
-            start_loop_time = time.time()  # Record the start time of the loop
-            msigdb_library = {}  # Initialize an empty dictionary for the gene set library
-            
-            # Iterate over gene set IDs and fetch corresponding genes
-            for row in rows:
-                query2 = f"SELECT genes FROM term2genes WHERE id='{row[0]}'"  # SQL query to get genes for a gene set ID
-                cursor.execute(query2)  # Execute the query
-                rows2 = cursor.fetchall()  # Fetch all rows from the executed query
-                row3 = json.loads(rows2[0][0])  # Parse the JSON data
-                msigdb_library[row[0]] = list(map(extract_symbols, row3))  # Extract gene symbols and add to the library
-            
-            # Close the cursor and connection to the database
-            cursor.close()  # Close the cursor
-            conn.close()  # Close the connection
-            
-            stop_loop_time = time.time()  # Record the stop time of the loop
-            execution_time = stop_loop_time - start_loop_time  # Calculate the execution time
-            print(f"Execution time: {execution_time} seconds")  # Print the execution time
-            
             try:
                 # Check if geneset_name and pickle_file are present for generating the plot
                 geneset_name = json_object['geneset_name']  # Get the gene set name from the JSON object
                 pickle_file = json_object['pickle_file']  # Get the pickle file name from the JSON object
                 result = pd.read_pickle(os.path.join(cachedir, pickle_file))  # Load the result from the pickle file
                 fig = blitz.plot.running_sum(signature, geneset_name, msigdb_library, result=result.T, compact=True)  # Generate the running sum plot
-                random_num = np.random.rand()  # Generate a random number
+                random_num = np.random.rand()  # Generate a random number for unique png filename
                 png_filename = f"gsea_plot_{random_num}.png"  # Create a filename for the plot
                 fig.savefig(os.path.join(cachedir, png_filename), bbox_inches='tight')  # Save the plot as a PNG file
                 print(f'image: {{"image_file": "{png_filename}"}}')  # Print the image file path in JSON format
@@ -86,7 +95,7 @@ def extract_symbols(x):
                 start_gsea_time = time.time()  # Record the start time of GSEA
                 if __name__ == "__main__":
                     result = blitz.gsea(signature, msigdb_library, permutations=1000).T  # Perform GSEA and transpose the result
-                    random_num = np.random.rand()  # Generate a random number
+                    random_num = np.random.rand()  # Generate a random number for unique pickle filename
                     pickle_filename = f"gsea_result_{random_num}.pkl"  # Create a filename for the pickle file
                     result.to_pickle(os.path.join(cachedir, pickle_filename))  # Save the result to the pickle file
                     gsea_str = f'{{"data": {result.to_json()}}}'  # Convert the result to JSON format
@@ -103,29 +112,28 @@ def extract_symbols(x):
 except (EOFError, IOError):
     pass  # Handle EOFError and IOError exceptions gracefully
 
-# Function to extract plot data for GSEA visualization
-# def extract_plot_data(signature, geneset, library, result, center=True):
-
-#     print("signature", signature)
-#     print("result", result)
-#     print("geneset", geneset)
-#     print("library", library)
-#     signature = signature.copy()  # Create a copy of the signature DataFrame
-#     signature.columns = ["i", "v"]  # Rename columns to 'i' and 'v'
-#     signature = signature.sort_values("v", ascending=False).set_index("i")  # Sort by 'v' in descending order and set 'i' as index
-#     signature = signature[~signature.index.duplicated(keep='first')]  # Remove duplicate indices, keeping the first occurrence
-    
-#     if center:
-#         signature.loc[:, "v"] -= np.mean(signature.loc[:, "v"])  # Center the signature values by subtracting the mean
-    
-#     signature_map = {h: i for i, h in enumerate(signature.index)}  # Create a mapping of signature indices
-    
-#     gs = set(library[geneset])  # Get the gene set from the library
-#     hits = [i for i, x in enumerate(signature.index) if x in gs]  # Find the indices of hits in the signature
-    
-#     running_sum, es = blitz.enrichment_score(np.array(np.abs(signature.iloc[:, 0])), signature_map, gs)  # Compute running sum and enrichment score
-#     running_sum = list(running_sum)  # Convert running sum to a list
-#     nn = np.where(np.abs(running_sum) == np.max(np.abs(running_sum)))[0][0]  # Find the index of the maximum absolute running sum
-    
-#     running_sum_str = [str(elem) for elem in running_sum]  # Convert running sum elements to strings
-#     print(f'result: {{"nn": {nn}, "running_sum": "{",".join(running_sum_str)}", "es": {es}}}')  # Print the result in JSON format
\ No newline at end of file
+# Function to extract plot data for GSEA visualization (NOT currently being used, but will be used for generating client side gsea plots)
+def extract_plot_data(signature, geneset, library, result, center=True):
+     print("signature", signature)
+     print("result", result)
+     print("geneset", geneset)
+     print("library", library)
+     signature = signature.copy()  # Create a copy of the signature DataFrame
+     signature.columns = ["i", "v"]  # Rename columns to 'i' and 'v'
+     signature = signature.sort_values("v", ascending=False).set_index("i")  # Sort by 'v' in descending order and set 'i' as index
+     signature = signature[~signature.index.duplicated(keep='first')]  # Remove duplicate indices, keeping the first occurrence
+   
+     if center:
+         signature.loc[:, "v"] -= np.mean(signature.loc[:, "v"])  # Center the signature values by subtracting the mean
+   
+     signature_map = {h: i for i, h in enumerate(signature.index)}  # Create a mapping of signature indices
+   
+     gs = set(library[geneset])  # Get the gene set from the library
+     hits = [i for i, x in enumerate(signature.index) if x in gs]  # Find the indices of hits in the signature
+   
+     running_sum, es = blitz.enrichment_score(np.array(np.abs(signature.iloc[:, 0])), signature_map, gs)  # Compute running sum and enrichment score
+     running_sum = list(running_sum)  # Convert running sum to a list
+     nn = np.where(np.abs(running_sum) == np.max(np.abs(running_sum)))[0][0]  # Find the index of the maximum absolute running sum
+   
+     running_sum_str = [str(elem) for elem in running_sum]  # Convert running sum elements to strings
+     print(f'result: {{"nn": {nn}, "running_sum": "{",".join(running_sum_str)}", "es": {es}}}')  # Print the result in JSON format

From d1313c34f99c74cc2ca0d73f6aac62989ec7b93d Mon Sep 17 00:00:00 2001
From: robinpaul85 <robinpaul85@gmail.com>
Date: Thu, 6 Feb 2025 11:36:28 -0600
Subject: [PATCH 04/10] Sorting pathways in ascending order of FDR

---
 client/plots/gsea.js | 1 +
 1 file changed, 1 insertion(+)

diff --git a/client/plots/gsea.js b/client/plots/gsea.js
index 15fe602d65..020a8352a2 100644
--- a/client/plots/gsea.js
+++ b/client/plots/gsea.js
@@ -201,6 +201,7 @@ add:
 
 		// Generating the table
 		self.gsea_table_rows = []
+		Object.keys(output.data).sort((i, j) => Number(i.fdr) - Number(j.fdr)) // Sorting pathways in ascending order of FDR
 		for (const pathway_name of Object.keys(output.data)) {
 			const pathway = output.data[pathway_name]
 			if (

From ac8d76b97cb4e432bc31ac6ea1e128eaedf8bed0 Mon Sep 17 00:00:00 2001
From: robinpaul85 <robinpaul85@gmail.com>
Date: Thu, 6 Feb 2025 14:08:22 -0600
Subject: [PATCH 05/10] Using title case for gsea labels

---
 client/plots/gsea.js | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/client/plots/gsea.js b/client/plots/gsea.js
index 020a8352a2..07c8dcd960 100644
--- a/client/plots/gsea.js
+++ b/client/plots/gsea.js
@@ -16,7 +16,7 @@ const gsea_table_cols = [
 	{ label: 'Pathway name' },
 	{ label: 'enrichment score' },
 	{ label: 'normalized enrichment score' },
-	{ label: 'Geneset size' },
+	{ label: 'Geneset total size' },
 	{ label: 'pvalue' },
 	{ label: 'sidak' },
 	{ label: 'FDR' },
@@ -53,7 +53,7 @@ class gsea {
 		this.dom.controlsDiv.selectAll('*').remove()
 		const inputs = [
 			{
-				label: 'P-value filter cutoff (linear scale)',
+				label: 'P-value Filter Cutoff (Linear Scale)',
 				type: 'number',
 				chartType: 'gsea',
 				settingsKey: 'pvalue',
@@ -62,18 +62,18 @@ class gsea {
 				max: 1
 			},
 			{
-				label: 'P-value filter type',
+				label: 'P-value Filter Type',
 				type: 'radio',
 				chartType: 'gsea',
 				settingsKey: 'adjusted_original_pvalue',
 				title: 'Toggle between original and adjusted pvalues for volcano plot',
 				options: [
-					{ label: 'adjusted', value: 'adjusted' },
-					{ label: 'original', value: 'original' }
+					{ label: 'Adjusted', value: 'adjusted' },
+					{ label: 'Original', value: 'original' }
 				]
 			},
 			{
-				label: 'Gene set size filter cutoff',
+				label: 'Gene Set Size Filter Cutoff',
 				type: 'number',
 				chartType: 'gsea',
 				settingsKey: 'gene_set_size_cutoff',
@@ -82,7 +82,7 @@ class gsea {
 				max: 20000
 			},
 			{
-				label: 'Filter non-coding genes',
+				label: 'Filter Non-coding Genes',
 				type: 'checkbox',
 				chartType: 'gsea',
 				settingsKey: 'filter_non_coding_genes',
@@ -92,7 +92,7 @@ class gsea {
 		]
 
 		const geneSet = {
-			label: 'Gene set group',
+			label: 'Gene Set Group',
 			type: 'dropdown',
 			chartType: 'gsea',
 			settingsKey: 'pathway',

From ec42c55674f61eaae8a9068ef2427cff17015bc9 Mon Sep 17 00:00:00 2001
From: robinpaul85 <robinpaul85@gmail.com>
Date: Thu, 6 Feb 2025 16:38:20 -0600
Subject: [PATCH 06/10] Put blitzgsea genesets and filters behind serverconfig
 flag: gsea_test

---
 client/plots/gsea.js | 81 +++++++++++++++++++++++++++++---------------
 1 file changed, 53 insertions(+), 28 deletions(-)

diff --git a/client/plots/gsea.js b/client/plots/gsea.js
index 07c8dcd960..49e327ff7f 100644
--- a/client/plots/gsea.js
+++ b/client/plots/gsea.js
@@ -108,12 +108,19 @@ class gsea {
 				geneset name ending in "--blitzgsea" signals to use built-in genesets but not msigdb
 				later a proper fix is to add a radio toggle of Blitzgsea versus MSigDB, and do not use such hardcode
 				*/
-				{ label: 'REACTOME (blitzgsea)', value: 'REACTOME--blitzgsea' },
-				{ label: 'KEGG (blitzgsea)', value: 'KEGG--blitzgsea' },
-				{ label: 'WikiPathways (blitzgsea)', value: 'WikiPathways--blitzgsea' },
 				{ label: 'H: hallmark gene sets', value: 'H: hallmark gene sets' }
 			]
 		}
+
+		// Now blitzgsea geneSets are inside serverconfig flag
+		if (JSON.parse(sessionStorage.getItem('optionalFeatures')).gsea_test == true) {
+			geneSet.options.push(
+				{ label: 'REACTOME (blitzgsea)', value: 'REACTOME--blitzgsea' },
+				{ label: 'KEGG (blitzgsea)', value: 'KEGG--blitzgsea' },
+				{ label: 'WikiPathways (blitzgsea)', value: 'WikiPathways--blitzgsea' }
+			)
+		}
+
 		if (!this.settings.pathway) {
 			geneSet.options.unshift({ label: '-', value: '-' })
 			this.settings.pathway = '-'
@@ -204,31 +211,7 @@ add:
 		Object.keys(output.data).sort((i, j) => Number(i.fdr) - Number(j.fdr)) // Sorting pathways in ascending order of FDR
 		for (const pathway_name of Object.keys(output.data)) {
 			const pathway = output.data[pathway_name]
-			if (
-				self.settings.adjusted_original_pvalue == 'adjusted' &&
-				self.settings.pvalue >= pathway.fdr &&
-				self.settings.gene_set_size_cutoff > pathway.geneset_size
-			) {
-				const es = pathway.es ? roundValueAuto(pathway.es) : pathway.es
-				const nes = pathway.nes ? roundValueAuto(pathway.nes) : pathway.nes
-				const pval = pathway.pval ? roundValueAuto(pathway.pval) : pathway.pval
-				const sidak = pathway.sidak ? roundValueAuto(pathway.sidak) : pathway.sidak
-				const fdr = pathway.fdr ? roundValueAuto(pathway.fdr) : pathway.fdr
-				self.gsea_table_rows.push([
-					{ value: pathway_name },
-					{ value: es },
-					{ value: nes },
-					{ value: pathway.geneset_size },
-					{ value: pval },
-					{ value: sidak },
-					{ value: fdr },
-					{ value: pathway.leading_edge }
-				])
-			} else if (
-				self.settings.adjusted_original_pvalue == 'original' &&
-				self.settings.pvalue >= pathway.pval &&
-				self.settings.gene_set_size_cutoff > pathway.geneset_size
-			) {
+			if (JSON.parse(sessionStorage.getItem('optionalFeatures')).gsea_test == true) {
 				const es = pathway.es ? roundValueAuto(pathway.es) : pathway.es
 				const nes = pathway.nes ? roundValueAuto(pathway.nes) : pathway.nes
 				const pval = pathway.pval ? roundValueAuto(pathway.pval) : pathway.pval
@@ -244,6 +227,48 @@ add:
 					{ value: fdr },
 					{ value: pathway.leading_edge }
 				])
+			} else {
+				if (
+					self.settings.adjusted_original_pvalue == 'adjusted' &&
+					self.settings.pvalue >= pathway.fdr &&
+					self.settings.gene_set_size_cutoff > pathway.geneset_size
+				) {
+					const es = pathway.es ? roundValueAuto(pathway.es) : pathway.es
+					const nes = pathway.nes ? roundValueAuto(pathway.nes) : pathway.nes
+					const pval = pathway.pval ? roundValueAuto(pathway.pval) : pathway.pval
+					const sidak = pathway.sidak ? roundValueAuto(pathway.sidak) : pathway.sidak
+					const fdr = pathway.fdr ? roundValueAuto(pathway.fdr) : pathway.fdr
+					self.gsea_table_rows.push([
+						{ value: pathway_name },
+						{ value: es },
+						{ value: nes },
+						{ value: pathway.geneset_size },
+						{ value: pval },
+						{ value: sidak },
+						{ value: fdr },
+						{ value: pathway.leading_edge }
+					])
+				} else if (
+					self.settings.adjusted_original_pvalue == 'original' &&
+					self.settings.pvalue >= pathway.pval &&
+					self.settings.gene_set_size_cutoff > pathway.geneset_size
+				) {
+					const es = pathway.es ? roundValueAuto(pathway.es) : pathway.es
+					const nes = pathway.nes ? roundValueAuto(pathway.nes) : pathway.nes
+					const pval = pathway.pval ? roundValueAuto(pathway.pval) : pathway.pval
+					const sidak = pathway.sidak ? roundValueAuto(pathway.sidak) : pathway.sidak
+					const fdr = pathway.fdr ? roundValueAuto(pathway.fdr) : pathway.fdr
+					self.gsea_table_rows.push([
+						{ value: pathway_name },
+						{ value: es },
+						{ value: nes },
+						{ value: pathway.geneset_size },
+						{ value: pval },
+						{ value: sidak },
+						{ value: fdr },
+						{ value: pathway.leading_edge }
+					])
+				}
 			}
 		}
 

From ac5cdd473f829b79748aca77abb0a55907dba0f7 Mon Sep 17 00:00:00 2001
From: robinpaul85 <robinpaul85@gmail.com>
Date: Fri, 7 Feb 2025 14:31:02 -0600
Subject: [PATCH 07/10] Fixed download table bug in gsea and geneORA

---
 client/plots/geneORA.js |  3 ++-
 client/plots/gsea.js    | 28 ++++++++++++++--------------
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/client/plots/geneORA.js b/client/plots/geneORA.js
index 97bede8817..fed9090278 100644
--- a/client/plots/geneORA.js
+++ b/client/plots/geneORA.js
@@ -6,6 +6,7 @@ import { getCompInit, copyMerge } from '#rx'
 import { Menu } from '../dom/menu'
 import { newSandboxDiv } from '../dom/sandbox.ts'
 import { select, pointer } from 'd3-selection'
+import { downloadTable } from '../dom/table'
 import { roundValueAuto } from '#shared/roundValue.js'
 
 const hlcolor = '#ffa200'
@@ -114,7 +115,7 @@ class geneORA {
 			})
 		}
 		this.components.controls.on('downloadClick.geneORA', () => {
-			downloadTable(this.table_rows, this.table_cols)
+			downloadTable(this.gene_ora_table_rows, this.gene_ora_table_cols)
 		})
 	}
 	getState(appState) {
diff --git a/client/plots/gsea.js b/client/plots/gsea.js
index 49e327ff7f..8e32d5614a 100644
--- a/client/plots/gsea.js
+++ b/client/plots/gsea.js
@@ -7,22 +7,11 @@ import { controlsInit } from './controls'
 import { getCompInit, copyMerge } from '#rx'
 import { Menu } from '../dom/menu'
 import { scaleLog, scaleLinear } from 'd3-scale'
+import { downloadTable } from '../dom/table'
 import { roundValueAuto } from '#shared/roundValue.js'
 
 const tip = new Menu()
 
-// table columns showing analysis results for each gene set
-const gsea_table_cols = [
-	{ label: 'Pathway name' },
-	{ label: 'enrichment score' },
-	{ label: 'normalized enrichment score' },
-	{ label: 'Geneset total size' },
-	{ label: 'pvalue' },
-	{ label: 'sidak' },
-	{ label: 'FDR' },
-	{ label: 'Leading edge' }
-]
-
 class gsea {
 	constructor() {
 		this.type = 'gsea'
@@ -136,7 +125,7 @@ class gsea {
 			})
 		}
 		this.components.controls.on('downloadClick.gsea', () => {
-			downloadTable(this.table_rows, this.table_cols)
+			downloadTable(this.gsea_table_rows, this.gsea_table_cols)
 		})
 	}
 	getState(appState) {
@@ -274,8 +263,19 @@ add:
 
 		self.dom.tableDiv.selectAll('*').remove()
 		const d_gsea = self.dom.tableDiv.append('div')
+		// table columns showing analysis results for each gene set
+		self.gsea_table_cols = [
+			{ label: 'Pathway name' },
+			{ label: 'enrichment score' },
+			{ label: 'normalized enrichment score' },
+			{ label: 'Geneset total size' },
+			{ label: 'pvalue' },
+			{ label: 'sidak' },
+			{ label: 'FDR' },
+			{ label: 'Leading edge' }
+		]
 		renderTable({
-			columns: gsea_table_cols,
+			columns: self.gsea_table_cols,
 			rows: self.gsea_table_rows,
 			div: d_gsea,
 			showLines: true,

From 16721dacdb3ed97ff13f6bcd389656c99932525c Mon Sep 17 00:00:00 2001
From: robinpaul85 <robinpaul85@gmail.com>
Date: Fri, 7 Feb 2025 15:47:50 -0600
Subject: [PATCH 08/10] Changed treatment to case group

---
 client/plots/DEanalysis.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/client/plots/DEanalysis.js b/client/plots/DEanalysis.js
index caa58859c4..0a8cb6b725 100644
--- a/client/plots/DEanalysis.js
+++ b/client/plots/DEanalysis.js
@@ -493,7 +493,7 @@ add:
 				value: sample_size1
 			},
 			{
-				label: self.config.samplelst.groups[1].name + ' sample size (treatment group)',
+				label: self.config.samplelst.groups[1].name + ' sample size (case group)',
 				value: sample_size2
 			}
 		]

From ae3b912f6413bdd7d2d568e8f2c712011f1dde11 Mon Sep 17 00:00:00 2001
From: robinpaul85 <robinpaul85@gmail.com>
Date: Mon, 10 Feb 2025 11:29:33 -0600
Subject: [PATCH 09/10] Filtering in GSEA out of serverconfig feature flag

---
 client/plots/gsea.js | 68 ++++++++++++++++----------------------------
 1 file changed, 25 insertions(+), 43 deletions(-)

diff --git a/client/plots/gsea.js b/client/plots/gsea.js
index 8e32d5614a..f17cba5630 100644
--- a/client/plots/gsea.js
+++ b/client/plots/gsea.js
@@ -200,7 +200,31 @@ add:
 		Object.keys(output.data).sort((i, j) => Number(i.fdr) - Number(j.fdr)) // Sorting pathways in ascending order of FDR
 		for (const pathway_name of Object.keys(output.data)) {
 			const pathway = output.data[pathway_name]
-			if (JSON.parse(sessionStorage.getItem('optionalFeatures')).gsea_test == true) {
+			if (
+				self.settings.adjusted_original_pvalue == 'adjusted' &&
+				self.settings.pvalue >= pathway.fdr &&
+				self.settings.gene_set_size_cutoff > pathway.geneset_size
+			) {
+				const es = pathway.es ? roundValueAuto(pathway.es) : pathway.es
+				const nes = pathway.nes ? roundValueAuto(pathway.nes) : pathway.nes
+				const pval = pathway.pval ? roundValueAuto(pathway.pval) : pathway.pval
+				const sidak = pathway.sidak ? roundValueAuto(pathway.sidak) : pathway.sidak
+				const fdr = pathway.fdr ? roundValueAuto(pathway.fdr) : pathway.fdr
+				self.gsea_table_rows.push([
+					{ value: pathway_name },
+					{ value: es },
+					{ value: nes },
+					{ value: pathway.geneset_size },
+					{ value: pval },
+					{ value: sidak },
+					{ value: fdr },
+					{ value: pathway.leading_edge }
+				])
+			} else if (
+				self.settings.adjusted_original_pvalue == 'original' &&
+				self.settings.pvalue >= pathway.pval &&
+				self.settings.gene_set_size_cutoff > pathway.geneset_size
+			) {
 				const es = pathway.es ? roundValueAuto(pathway.es) : pathway.es
 				const nes = pathway.nes ? roundValueAuto(pathway.nes) : pathway.nes
 				const pval = pathway.pval ? roundValueAuto(pathway.pval) : pathway.pval
@@ -216,48 +240,6 @@ add:
 					{ value: fdr },
 					{ value: pathway.leading_edge }
 				])
-			} else {
-				if (
-					self.settings.adjusted_original_pvalue == 'adjusted' &&
-					self.settings.pvalue >= pathway.fdr &&
-					self.settings.gene_set_size_cutoff > pathway.geneset_size
-				) {
-					const es = pathway.es ? roundValueAuto(pathway.es) : pathway.es
-					const nes = pathway.nes ? roundValueAuto(pathway.nes) : pathway.nes
-					const pval = pathway.pval ? roundValueAuto(pathway.pval) : pathway.pval
-					const sidak = pathway.sidak ? roundValueAuto(pathway.sidak) : pathway.sidak
-					const fdr = pathway.fdr ? roundValueAuto(pathway.fdr) : pathway.fdr
-					self.gsea_table_rows.push([
-						{ value: pathway_name },
-						{ value: es },
-						{ value: nes },
-						{ value: pathway.geneset_size },
-						{ value: pval },
-						{ value: sidak },
-						{ value: fdr },
-						{ value: pathway.leading_edge }
-					])
-				} else if (
-					self.settings.adjusted_original_pvalue == 'original' &&
-					self.settings.pvalue >= pathway.pval &&
-					self.settings.gene_set_size_cutoff > pathway.geneset_size
-				) {
-					const es = pathway.es ? roundValueAuto(pathway.es) : pathway.es
-					const nes = pathway.nes ? roundValueAuto(pathway.nes) : pathway.nes
-					const pval = pathway.pval ? roundValueAuto(pathway.pval) : pathway.pval
-					const sidak = pathway.sidak ? roundValueAuto(pathway.sidak) : pathway.sidak
-					const fdr = pathway.fdr ? roundValueAuto(pathway.fdr) : pathway.fdr
-					self.gsea_table_rows.push([
-						{ value: pathway_name },
-						{ value: es },
-						{ value: nes },
-						{ value: pathway.geneset_size },
-						{ value: pval },
-						{ value: sidak },
-						{ value: fdr },
-						{ value: pathway.leading_edge }
-					])
-				}
 			}
 		}
 

From 9e427a976e83b86b158d0c4db0af274d7c241f8a Mon Sep 17 00:00:00 2001
From: xzhou <xzhou82@gmail.com>
Date: Mon, 10 Feb 2025 13:02:10 -0600
Subject: [PATCH 10/10] fix gsea table header

---
 client/plots/gsea.js | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/client/plots/gsea.js b/client/plots/gsea.js
index f17cba5630..efbe66577b 100644
--- a/client/plots/gsea.js
+++ b/client/plots/gsea.js
@@ -247,14 +247,14 @@ add:
 		const d_gsea = self.dom.tableDiv.append('div')
 		// table columns showing analysis results for each gene set
 		self.gsea_table_cols = [
-			{ label: 'Pathway name' },
-			{ label: 'enrichment score' },
-			{ label: 'normalized enrichment score' },
-			{ label: 'Geneset total size' },
-			{ label: 'pvalue' },
-			{ label: 'sidak' },
+			{ label: 'Geneset' },
+			{ label: 'Enrichment Score' },
+			{ label: 'Normalized Enrichment Score' },
+			{ label: 'Geneset Size' },
+			{ label: 'P value' },
+			{ label: 'Sidak' },
 			{ label: 'FDR' },
-			{ label: 'Leading edge' }
+			{ label: 'Leading Edge' }
 		]
 		renderTable({
 			columns: self.gsea_table_cols,