From 4950c463c0be4cc7a172f3a5f49e1b5ed2d427f4 Mon Sep 17 00:00:00 2001 From: Xin Zhou Date: Wed, 5 Feb 2025 10:29:48 -0600 Subject: [PATCH 01/10] add a hardcoded geneset group name for blitzgseas --- client/plots/gsea.js | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/client/plots/gsea.js b/client/plots/gsea.js index 23087874f1..7ab06f8498 100644 --- a/client/plots/gsea.js +++ b/client/plots/gsea.js @@ -104,6 +104,11 @@ class gsea { { label: 'CC: subset of GO', value: 'CC: subset of GO' }, { label: 'WikiPathways subset of CP', value: 'WikiPathways subset of CP' }, { label: 'REACTOME subset of CP', value: 'REACTOME subset of CP' }, + /* QUICK FIX + geneset name ending in "--blitzgsea" signals to use built-in genesets but not msigdb + later a proper fix is to add a radio toggle of Blitzgsea versus MSigDB, and do not use such hardcode + */ + { label: 'REACTOME (blitzgsea)', value: 'REACTOME--blitzgsea' }, { label: 'H: hallmark gene sets', value: 'H: hallmark gene sets' } ] } From 39931b8896d42132d66732910e750549e0c9b157 Mon Sep 17 00:00:00 2001 From: kgangwan Date: Wed, 5 Feb 2025 11:44:10 -0500 Subject: [PATCH 02/10] clean up gsea script and add comments --- server/utils/gsea.py | 226 ++++++++++++++++++++++--------------------- 1 file changed, 114 insertions(+), 112 deletions(-) diff --git a/server/utils/gsea.py b/server/utils/gsea.py index 244c87e429..bf0fa525ad 100755 --- a/server/utils/gsea.py +++ b/server/utils/gsea.py @@ -1,129 +1,131 @@ -# cat ~/sjpp/test.txt | python gsea.py +import blitzgsea as blitz +import json +import time +import sys +import sqlite3 +import os +import numpy as np +import pandas as pd -import blitzgsea as blitz -import json -import time -import sys -import sqlite3 -import os -import numpy as np -import pandas as pd - +# Helper function to extract gene symbols from a dictionary def extract_symbols(x): - return x['symbol'] - -def extract_plot_data(signature, geneset, library, result, center=True): - signature = signature.copy() - signature.columns = ["i","v"] - signature = signature.sort_values("v", ascending=False).set_index("i") - signature = signature[~signature.index.duplicated(keep='first')] - if center: - signature.loc[:,"v"] -= np.mean(signature.loc[:,"v"]) - signature_map = {} - for i,h in enumerate(signature.index): - signature_map[h] = i - - gs = set(library[geneset]) - hits = [i for i,x in enumerate(signature.index) if x in gs] - - running_sum, es = blitz.enrichment_score(np.array(np.abs(signature.iloc[:,0])), signature_map, gs) - running_sum = list(running_sum) - nn = np.where(np.abs(running_sum)==np.max(np.abs(running_sum)))[0][0] - #print ("nn:",nn) - #print ("running_sum:",running_sum) - #print ("es:",es) - running_sum_str=[str(elem) for elem in running_sum] - print ('result: {"nn":'+str(nn)+',"running_sum":"'+",".join(running_sum_str)+'","es":'+str(es)+'}') - + return x['symbol'] # Return the 'symbol' field from the dictionary -# Main function +# Main function try: - # Try to read a single character from stdin without blocking + # Check if there is input from stdin if sys.stdin.read(1): - # Read from stdin + # Read each line from stdin for line in sys.stdin: - # Process each line + # Parse the JSON input json_object = json.loads(line) - cachedir=json_object['cachedir'] - genes=json_object['genes'] - fold_change=json_object['fold_change'] - table_name=json_object['geneset_group'] - filter_non_coding_genes=json_object['filter_non_coding_genes'] - df = {'Genes': genes, 'fold_change': fold_change} - signature=pd.DataFrame(df) - db=json_object['db'] + cachedir = json_object['cachedir'] # Get the cache directory from the JSON object + genes = json_object['genes'] # Get the genes from the JSON object + fold_change = json_object['fold_change'] # Get the fold change values from the JSON object + table_name = json_object['geneset_group'] # Get the gene set group from the JSON object + filter_non_coding_genes = json_object['filter_non_coding_genes'] # Get the filter_non_coding_genes flag from the JSON object + db = json_object['db'] # Get the database path from the JSON object + + # Create a DataFrame for the signature + df = {'Genes': genes, 'fold_change': fold_change} # Create a dictionary with genes and fold change + signature = pd.DataFrame(df) # Convert the dictionary to a DataFrame + # Connect to the SQLite database - conn = sqlite3.connect(db) + conn = sqlite3.connect(db) # Connect to the SQLite database + cursor = conn.cursor() # Create a cursor object - # Create a cursor object using the cursor() method - cursor = conn.cursor() + # Query to get gene set IDs + query = f"SELECT id FROM terms WHERE parent_id='{table_name}'" # SQL query to get gene set IDs + cursor.execute(query) # Execute the query - # SQL query to select all data from the table - query = f"select id from terms where parent_id='" + table_name + "'" - # Execute the SQL query - cursor.execute(query) - if filter_non_coding_genes == True: - # SQL query to code all the protein coding genes - coding_genes_query = f"select * from codingGenes" - genedb = json_object['genedb'] - gene_conn = sqlite3.connect(genedb) - gene_cursor = gene_conn.cursor() - gene_cursor.execute(coding_genes_query) - coding_genes_list=gene_cursor.fetchall() - coding_genes_list=list(map(lambda x: x[0],coding_genes_list)) - signature=signature[signature['Genes'].isin(coding_genes_list)] - - # Fetch all rows from the executed SQL query - rows = cursor.fetchall() + # Filter out non-coding genes if specified + if filter_non_coding_genes: + coding_genes_query = "SELECT * FROM codingGenes" # SQL query to get coding genes + genedb = json_object['genedb'] # Get the gene database path from the JSON object + gene_conn = sqlite3.connect(genedb) # Connect to the gene database + gene_cursor = gene_conn.cursor() # Create a cursor object for the gene database + gene_cursor.execute(coding_genes_query) # Execute the query to get coding genes + coding_genes_list = gene_cursor.fetchall() # Fetch all coding genes + coding_genes_list = list(map(lambda x: x[0], coding_genes_list)) # Extract the gene symbols + signature = signature[signature['Genes'].isin(coding_genes_list)] # Filter the signature to include only coding genes - start_loop_time = time.time() - msigdb_library={} - # Iterate over the rows and print them + # Fetch all gene set IDs + rows = cursor.fetchall() # Fetch all rows from the executed query + + start_loop_time = time.time() # Record the start time of the loop + msigdb_library = {} # Initialize an empty dictionary for the gene set library + + # Iterate over gene set IDs and fetch corresponding genes for row in rows: - #print(row[0]) - query2=f"select genes from term2genes where id='" + row[0] + "'" - cursor.execute(query2) - rows2 = cursor.fetchall() - row3=json.loads(rows2[0][0]) - msigdb_library[row[0]] = list(map(extract_symbols,row3)) + query2 = f"SELECT genes FROM term2genes WHERE id='{row[0]}'" # SQL query to get genes for a gene set ID + cursor.execute(query2) # Execute the query + rows2 = cursor.fetchall() # Fetch all rows from the executed query + row3 = json.loads(rows2[0][0]) # Parse the JSON data + msigdb_library[row[0]] = list(map(extract_symbols, row3)) # Extract gene symbols and add to the library - #print ("msigdb_library:",msigdb_library) # Close the cursor and connection to the database - cursor.close() - conn.close() - stop_loop_time = time.time() - execution_time = stop_loop_time - start_loop_time - print(f"Execution time: {execution_time} seconds") - try: # Extract ES data to be plotted on client side - geneset_name=json_object['geneset_name'] # Checks if geneset_name is present, if yes it indicates the server request is for generating the image. It retrieves the result.pkl file and generates the image without having to recompute gsea again. - pickle_file=json_object['pickle_file'] - result = pd.read_pickle(os.path.join(cachedir,pickle_file)) - fig = blitz.plot.running_sum(signature, geneset_name, msigdb_library, result=result.T, compact=True) - random_num = np.random.rand() - png_filename = "gsea_plot_" + str(random_num) + ".png" - fig.savefig(os.path.join(cachedir,png_filename), bbox_inches='tight') - #extract_plot_data(signature, geneset_name, msigdb_library, result) # This returns raw data to client side, not currently used - print ('image: {"image_file":"' + png_filename + '"}') - except KeyError: #Initial GSEA calculation, result saved to a result.pkl pickle file - # run enrichment analysis - start_gsea_time = time.time() - if __name__ == "__main__": - result = blitz.gsea(signature, msigdb_library).T - random_num = np.random.rand() - pickle_filename="gsea_result_"+ str(random_num) +".pkl" - result.to_pickle(os.path.join(cachedir,pickle_filename)) - gsea_str='{"data":' + result.to_json() + '}' - pickle_str='{"pickle_file":"' + pickle_filename + '"}' - #print ("pickle_file:",pickle_str) - gsea_dict = json.loads(gsea_str) - pickle_dict = json.loads(pickle_str) - result_dict = {**gsea_dict, **pickle_dict} - print ("result:",json.dumps(result_dict)) - stop_gsea_time = time.time() - gsea_time = stop_gsea_time - start_gsea_time - print (f"GSEA time: {gsea_time} seconds") + cursor.close() # Close the cursor + conn.close() # Close the connection + + stop_loop_time = time.time() # Record the stop time of the loop + execution_time = stop_loop_time - start_loop_time # Calculate the execution time + print(f"Execution time: {execution_time} seconds") # Print the execution time + try: + # Check if geneset_name and pickle_file are present for generating the plot + geneset_name = json_object['geneset_name'] # Get the gene set name from the JSON object + pickle_file = json_object['pickle_file'] # Get the pickle file name from the JSON object + result = pd.read_pickle(os.path.join(cachedir, pickle_file)) # Load the result from the pickle file + fig = blitz.plot.running_sum(signature, geneset_name, msigdb_library, result=result.T, compact=True) # Generate the running sum plot + random_num = np.random.rand() # Generate a random number + png_filename = f"gsea_plot_{random_num}.png" # Create a filename for the plot + fig.savefig(os.path.join(cachedir, png_filename), bbox_inches='tight') # Save the plot as a PNG file + print(f'image: {{"image_file": "{png_filename}"}}') # Print the image file path in JSON format + except KeyError: + # Initial GSEA calculation and save the result to a pickle file + start_gsea_time = time.time() # Record the start time of GSEA + if __name__ == "__main__": + result = blitz.gsea(signature, msigdb_library, permutations=1000).T # Perform GSEA and transpose the result + random_num = np.random.rand() # Generate a random number + pickle_filename = f"gsea_result_{random_num}.pkl" # Create a filename for the pickle file + result.to_pickle(os.path.join(cachedir, pickle_filename)) # Save the result to the pickle file + gsea_str = f'{{"data": {result.to_json()}}}' # Convert the result to JSON format + pickle_str = f'{{"pickle_file": "{pickle_filename}"}}' # Create a JSON string for the pickle file + gsea_dict = json.loads(gsea_str) # Parse the JSON string + pickle_dict = json.loads(pickle_str) # Parse the JSON string + result_dict = {**gsea_dict, **pickle_dict} # Merge the dictionaries + print(f"result: {json.dumps(result_dict)}") # Print the result in JSON format + stop_gsea_time = time.time() # Record the stop time of GSEA + gsea_time = stop_gsea_time - start_gsea_time # Calculate the GSEA execution time + print(f"GSEA time: {gsea_time} seconds") # Print the GSEA execution time else: - pass + pass # Do nothing if there is no input from stdin except (EOFError, IOError): - pass + pass # Handle EOFError and IOError exceptions gracefully + +# Function to extract plot data for GSEA visualization +# def extract_plot_data(signature, geneset, library, result, center=True): + +# print("signature", signature) +# print("result", result) +# print("geneset", geneset) +# print("library", library) +# signature = signature.copy() # Create a copy of the signature DataFrame +# signature.columns = ["i", "v"] # Rename columns to 'i' and 'v' +# signature = signature.sort_values("v", ascending=False).set_index("i") # Sort by 'v' in descending order and set 'i' as index +# signature = signature[~signature.index.duplicated(keep='first')] # Remove duplicate indices, keeping the first occurrence + +# if center: +# signature.loc[:, "v"] -= np.mean(signature.loc[:, "v"]) # Center the signature values by subtracting the mean + +# signature_map = {h: i for i, h in enumerate(signature.index)} # Create a mapping of signature indices + +# gs = set(library[geneset]) # Get the gene set from the library +# hits = [i for i, x in enumerate(signature.index) if x in gs] # Find the indices of hits in the signature + +# running_sum, es = blitz.enrichment_score(np.array(np.abs(signature.iloc[:, 0])), signature_map, gs) # Compute running sum and enrichment score +# running_sum = list(running_sum) # Convert running sum to a list +# nn = np.where(np.abs(running_sum) == np.max(np.abs(running_sum)))[0][0] # Find the index of the maximum absolute running sum + +# running_sum_str = [str(elem) for elem in running_sum] # Convert running sum elements to strings +# print(f'result: {{"nn": {nn}, "running_sum": "{",".join(running_sum_str)}", "es": {es}}}') # Print the result in JSON format \ No newline at end of file From d1808f7ac0ff6cc435f5fc7ff1fa6cdea46df284 Mon Sep 17 00:00:00 2001 From: robinpaul85 Date: Wed, 5 Feb 2025 16:29:53 -0600 Subject: [PATCH 03/10] Added support for blitzgsea reactome, KEGG and WikiPathways --- client/plots/gsea.js | 2 + server/utils/gsea.py | 118 +++++++++++++++++++++++-------------------- 2 files changed, 65 insertions(+), 55 deletions(-) diff --git a/client/plots/gsea.js b/client/plots/gsea.js index 7ab06f8498..15fe602d65 100644 --- a/client/plots/gsea.js +++ b/client/plots/gsea.js @@ -109,6 +109,8 @@ class gsea { later a proper fix is to add a radio toggle of Blitzgsea versus MSigDB, and do not use such hardcode */ { label: 'REACTOME (blitzgsea)', value: 'REACTOME--blitzgsea' }, + { label: 'KEGG (blitzgsea)', value: 'KEGG--blitzgsea' }, + { label: 'WikiPathways (blitzgsea)', value: 'WikiPathways--blitzgsea' }, { label: 'H: hallmark gene sets', value: 'H: hallmark gene sets' } ] } diff --git a/server/utils/gsea.py b/server/utils/gsea.py index bf0fa525ad..83d238f37e 100755 --- a/server/utils/gsea.py +++ b/server/utils/gsea.py @@ -1,3 +1,5 @@ +# Test syntax: cat ~/sjpp/test.txt | time python gsea.py +# test.txt contains the json string autogenerated by the commented out nodejs code. import blitzgsea as blitz import json import time @@ -33,11 +35,40 @@ def extract_symbols(x): # Connect to the SQLite database conn = sqlite3.connect(db) # Connect to the SQLite database cursor = conn.cursor() # Create a cursor object - - # Query to get gene set IDs - query = f"SELECT id FROM terms WHERE parent_id='{table_name}'" # SQL query to get gene set IDs - cursor.execute(query) # Execute the query - + + msigdb_library = {} # Initialize an empty dictionary for the gene set library + if table_name == "REACTOME--blitzgsea": # Parse from blitzgsea reactome library + msigdb_library = blitz.enrichr.get_library("Reactome_2022") + elif table_name == "KEGG--blitzgsea": # Parse from blitzgsea KEGG library + msigdb_library = blitz.enrichr.get_library("KEGG_2021_Human") + elif table_name == "WikiPathways--blitzgsea": # Parse from blitzgsea WikiPathways library + msigdb_library = blitz.enrichr.get_library("WikiPathways_2019_Human") + else: # Use geneset groups from msigdb + # Query to get gene set IDs + query = f"SELECT id FROM terms WHERE parent_id='{table_name}'" # SQL query to get gene set IDs + cursor.execute(query) # Execute the query + + # Fetch all gene set IDs + rows = cursor.fetchall() # Fetch all rows from the executed query + + start_loop_time = time.time() # Record the start time of the loop + + # Iterate over gene set IDs and fetch corresponding genes + for row in rows: + query2 = f"SELECT genes FROM term2genes WHERE id='{row[0]}'" # SQL query to get genes for a gene set ID + cursor.execute(query2) # Execute the query + rows2 = cursor.fetchall() # Fetch all rows from the executed query + row3 = json.loads(rows2[0][0]) # Parse the JSON data + msigdb_library[row[0]] = list(map(extract_symbols, row3)) # Extract gene symbols and add to the library + + # Close the cursor and connection to the database + cursor.close() # Close the cursor + conn.close() # Close the connection + + stop_loop_time = time.time() # Record the stop time of the loop + execution_time = stop_loop_time - start_loop_time # Calculate the execution time + print(f"Execution time: {execution_time} seconds") # Print the execution time + # Filter out non-coding genes if specified if filter_non_coding_genes: coding_genes_query = "SELECT * FROM codingGenes" # SQL query to get coding genes @@ -49,35 +80,13 @@ def extract_symbols(x): coding_genes_list = list(map(lambda x: x[0], coding_genes_list)) # Extract the gene symbols signature = signature[signature['Genes'].isin(coding_genes_list)] # Filter the signature to include only coding genes - # Fetch all gene set IDs - rows = cursor.fetchall() # Fetch all rows from the executed query - - start_loop_time = time.time() # Record the start time of the loop - msigdb_library = {} # Initialize an empty dictionary for the gene set library - - # Iterate over gene set IDs and fetch corresponding genes - for row in rows: - query2 = f"SELECT genes FROM term2genes WHERE id='{row[0]}'" # SQL query to get genes for a gene set ID - cursor.execute(query2) # Execute the query - rows2 = cursor.fetchall() # Fetch all rows from the executed query - row3 = json.loads(rows2[0][0]) # Parse the JSON data - msigdb_library[row[0]] = list(map(extract_symbols, row3)) # Extract gene symbols and add to the library - - # Close the cursor and connection to the database - cursor.close() # Close the cursor - conn.close() # Close the connection - - stop_loop_time = time.time() # Record the stop time of the loop - execution_time = stop_loop_time - start_loop_time # Calculate the execution time - print(f"Execution time: {execution_time} seconds") # Print the execution time - try: # Check if geneset_name and pickle_file are present for generating the plot geneset_name = json_object['geneset_name'] # Get the gene set name from the JSON object pickle_file = json_object['pickle_file'] # Get the pickle file name from the JSON object result = pd.read_pickle(os.path.join(cachedir, pickle_file)) # Load the result from the pickle file fig = blitz.plot.running_sum(signature, geneset_name, msigdb_library, result=result.T, compact=True) # Generate the running sum plot - random_num = np.random.rand() # Generate a random number + random_num = np.random.rand() # Generate a random number for unique png filename png_filename = f"gsea_plot_{random_num}.png" # Create a filename for the plot fig.savefig(os.path.join(cachedir, png_filename), bbox_inches='tight') # Save the plot as a PNG file print(f'image: {{"image_file": "{png_filename}"}}') # Print the image file path in JSON format @@ -86,7 +95,7 @@ def extract_symbols(x): start_gsea_time = time.time() # Record the start time of GSEA if __name__ == "__main__": result = blitz.gsea(signature, msigdb_library, permutations=1000).T # Perform GSEA and transpose the result - random_num = np.random.rand() # Generate a random number + random_num = np.random.rand() # Generate a random number for unique pickle filename pickle_filename = f"gsea_result_{random_num}.pkl" # Create a filename for the pickle file result.to_pickle(os.path.join(cachedir, pickle_filename)) # Save the result to the pickle file gsea_str = f'{{"data": {result.to_json()}}}' # Convert the result to JSON format @@ -103,29 +112,28 @@ def extract_symbols(x): except (EOFError, IOError): pass # Handle EOFError and IOError exceptions gracefully -# Function to extract plot data for GSEA visualization -# def extract_plot_data(signature, geneset, library, result, center=True): - -# print("signature", signature) -# print("result", result) -# print("geneset", geneset) -# print("library", library) -# signature = signature.copy() # Create a copy of the signature DataFrame -# signature.columns = ["i", "v"] # Rename columns to 'i' and 'v' -# signature = signature.sort_values("v", ascending=False).set_index("i") # Sort by 'v' in descending order and set 'i' as index -# signature = signature[~signature.index.duplicated(keep='first')] # Remove duplicate indices, keeping the first occurrence - -# if center: -# signature.loc[:, "v"] -= np.mean(signature.loc[:, "v"]) # Center the signature values by subtracting the mean - -# signature_map = {h: i for i, h in enumerate(signature.index)} # Create a mapping of signature indices - -# gs = set(library[geneset]) # Get the gene set from the library -# hits = [i for i, x in enumerate(signature.index) if x in gs] # Find the indices of hits in the signature - -# running_sum, es = blitz.enrichment_score(np.array(np.abs(signature.iloc[:, 0])), signature_map, gs) # Compute running sum and enrichment score -# running_sum = list(running_sum) # Convert running sum to a list -# nn = np.where(np.abs(running_sum) == np.max(np.abs(running_sum)))[0][0] # Find the index of the maximum absolute running sum - -# running_sum_str = [str(elem) for elem in running_sum] # Convert running sum elements to strings -# print(f'result: {{"nn": {nn}, "running_sum": "{",".join(running_sum_str)}", "es": {es}}}') # Print the result in JSON format \ No newline at end of file +# Function to extract plot data for GSEA visualization (NOT currently being used, but will be used for generating client side gsea plots) +def extract_plot_data(signature, geneset, library, result, center=True): + print("signature", signature) + print("result", result) + print("geneset", geneset) + print("library", library) + signature = signature.copy() # Create a copy of the signature DataFrame + signature.columns = ["i", "v"] # Rename columns to 'i' and 'v' + signature = signature.sort_values("v", ascending=False).set_index("i") # Sort by 'v' in descending order and set 'i' as index + signature = signature[~signature.index.duplicated(keep='first')] # Remove duplicate indices, keeping the first occurrence + + if center: + signature.loc[:, "v"] -= np.mean(signature.loc[:, "v"]) # Center the signature values by subtracting the mean + + signature_map = {h: i for i, h in enumerate(signature.index)} # Create a mapping of signature indices + + gs = set(library[geneset]) # Get the gene set from the library + hits = [i for i, x in enumerate(signature.index) if x in gs] # Find the indices of hits in the signature + + running_sum, es = blitz.enrichment_score(np.array(np.abs(signature.iloc[:, 0])), signature_map, gs) # Compute running sum and enrichment score + running_sum = list(running_sum) # Convert running sum to a list + nn = np.where(np.abs(running_sum) == np.max(np.abs(running_sum)))[0][0] # Find the index of the maximum absolute running sum + + running_sum_str = [str(elem) for elem in running_sum] # Convert running sum elements to strings + print(f'result: {{"nn": {nn}, "running_sum": "{",".join(running_sum_str)}", "es": {es}}}') # Print the result in JSON format From d1313c34f99c74cc2ca0d73f6aac62989ec7b93d Mon Sep 17 00:00:00 2001 From: robinpaul85 Date: Thu, 6 Feb 2025 11:36:28 -0600 Subject: [PATCH 04/10] Sorting pathways in ascending order of FDR --- client/plots/gsea.js | 1 + 1 file changed, 1 insertion(+) diff --git a/client/plots/gsea.js b/client/plots/gsea.js index 15fe602d65..020a8352a2 100644 --- a/client/plots/gsea.js +++ b/client/plots/gsea.js @@ -201,6 +201,7 @@ add: // Generating the table self.gsea_table_rows = [] + Object.keys(output.data).sort((i, j) => Number(i.fdr) - Number(j.fdr)) // Sorting pathways in ascending order of FDR for (const pathway_name of Object.keys(output.data)) { const pathway = output.data[pathway_name] if ( From ac8d76b97cb4e432bc31ac6ea1e128eaedf8bed0 Mon Sep 17 00:00:00 2001 From: robinpaul85 Date: Thu, 6 Feb 2025 14:08:22 -0600 Subject: [PATCH 05/10] Using title case for gsea labels --- client/plots/gsea.js | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/client/plots/gsea.js b/client/plots/gsea.js index 020a8352a2..07c8dcd960 100644 --- a/client/plots/gsea.js +++ b/client/plots/gsea.js @@ -16,7 +16,7 @@ const gsea_table_cols = [ { label: 'Pathway name' }, { label: 'enrichment score' }, { label: 'normalized enrichment score' }, - { label: 'Geneset size' }, + { label: 'Geneset total size' }, { label: 'pvalue' }, { label: 'sidak' }, { label: 'FDR' }, @@ -53,7 +53,7 @@ class gsea { this.dom.controlsDiv.selectAll('*').remove() const inputs = [ { - label: 'P-value filter cutoff (linear scale)', + label: 'P-value Filter Cutoff (Linear Scale)', type: 'number', chartType: 'gsea', settingsKey: 'pvalue', @@ -62,18 +62,18 @@ class gsea { max: 1 }, { - label: 'P-value filter type', + label: 'P-value Filter Type', type: 'radio', chartType: 'gsea', settingsKey: 'adjusted_original_pvalue', title: 'Toggle between original and adjusted pvalues for volcano plot', options: [ - { label: 'adjusted', value: 'adjusted' }, - { label: 'original', value: 'original' } + { label: 'Adjusted', value: 'adjusted' }, + { label: 'Original', value: 'original' } ] }, { - label: 'Gene set size filter cutoff', + label: 'Gene Set Size Filter Cutoff', type: 'number', chartType: 'gsea', settingsKey: 'gene_set_size_cutoff', @@ -82,7 +82,7 @@ class gsea { max: 20000 }, { - label: 'Filter non-coding genes', + label: 'Filter Non-coding Genes', type: 'checkbox', chartType: 'gsea', settingsKey: 'filter_non_coding_genes', @@ -92,7 +92,7 @@ class gsea { ] const geneSet = { - label: 'Gene set group', + label: 'Gene Set Group', type: 'dropdown', chartType: 'gsea', settingsKey: 'pathway', From ec42c55674f61eaae8a9068ef2427cff17015bc9 Mon Sep 17 00:00:00 2001 From: robinpaul85 Date: Thu, 6 Feb 2025 16:38:20 -0600 Subject: [PATCH 06/10] Put blitzgsea genesets and filters behind serverconfig flag: gsea_test --- client/plots/gsea.js | 81 +++++++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 28 deletions(-) diff --git a/client/plots/gsea.js b/client/plots/gsea.js index 07c8dcd960..49e327ff7f 100644 --- a/client/plots/gsea.js +++ b/client/plots/gsea.js @@ -108,12 +108,19 @@ class gsea { geneset name ending in "--blitzgsea" signals to use built-in genesets but not msigdb later a proper fix is to add a radio toggle of Blitzgsea versus MSigDB, and do not use such hardcode */ - { label: 'REACTOME (blitzgsea)', value: 'REACTOME--blitzgsea' }, - { label: 'KEGG (blitzgsea)', value: 'KEGG--blitzgsea' }, - { label: 'WikiPathways (blitzgsea)', value: 'WikiPathways--blitzgsea' }, { label: 'H: hallmark gene sets', value: 'H: hallmark gene sets' } ] } + + // Now blitzgsea geneSets are inside serverconfig flag + if (JSON.parse(sessionStorage.getItem('optionalFeatures')).gsea_test == true) { + geneSet.options.push( + { label: 'REACTOME (blitzgsea)', value: 'REACTOME--blitzgsea' }, + { label: 'KEGG (blitzgsea)', value: 'KEGG--blitzgsea' }, + { label: 'WikiPathways (blitzgsea)', value: 'WikiPathways--blitzgsea' } + ) + } + if (!this.settings.pathway) { geneSet.options.unshift({ label: '-', value: '-' }) this.settings.pathway = '-' @@ -204,31 +211,7 @@ add: Object.keys(output.data).sort((i, j) => Number(i.fdr) - Number(j.fdr)) // Sorting pathways in ascending order of FDR for (const pathway_name of Object.keys(output.data)) { const pathway = output.data[pathway_name] - if ( - self.settings.adjusted_original_pvalue == 'adjusted' && - self.settings.pvalue >= pathway.fdr && - self.settings.gene_set_size_cutoff > pathway.geneset_size - ) { - const es = pathway.es ? roundValueAuto(pathway.es) : pathway.es - const nes = pathway.nes ? roundValueAuto(pathway.nes) : pathway.nes - const pval = pathway.pval ? roundValueAuto(pathway.pval) : pathway.pval - const sidak = pathway.sidak ? roundValueAuto(pathway.sidak) : pathway.sidak - const fdr = pathway.fdr ? roundValueAuto(pathway.fdr) : pathway.fdr - self.gsea_table_rows.push([ - { value: pathway_name }, - { value: es }, - { value: nes }, - { value: pathway.geneset_size }, - { value: pval }, - { value: sidak }, - { value: fdr }, - { value: pathway.leading_edge } - ]) - } else if ( - self.settings.adjusted_original_pvalue == 'original' && - self.settings.pvalue >= pathway.pval && - self.settings.gene_set_size_cutoff > pathway.geneset_size - ) { + if (JSON.parse(sessionStorage.getItem('optionalFeatures')).gsea_test == true) { const es = pathway.es ? roundValueAuto(pathway.es) : pathway.es const nes = pathway.nes ? roundValueAuto(pathway.nes) : pathway.nes const pval = pathway.pval ? roundValueAuto(pathway.pval) : pathway.pval @@ -244,6 +227,48 @@ add: { value: fdr }, { value: pathway.leading_edge } ]) + } else { + if ( + self.settings.adjusted_original_pvalue == 'adjusted' && + self.settings.pvalue >= pathway.fdr && + self.settings.gene_set_size_cutoff > pathway.geneset_size + ) { + const es = pathway.es ? roundValueAuto(pathway.es) : pathway.es + const nes = pathway.nes ? roundValueAuto(pathway.nes) : pathway.nes + const pval = pathway.pval ? roundValueAuto(pathway.pval) : pathway.pval + const sidak = pathway.sidak ? roundValueAuto(pathway.sidak) : pathway.sidak + const fdr = pathway.fdr ? roundValueAuto(pathway.fdr) : pathway.fdr + self.gsea_table_rows.push([ + { value: pathway_name }, + { value: es }, + { value: nes }, + { value: pathway.geneset_size }, + { value: pval }, + { value: sidak }, + { value: fdr }, + { value: pathway.leading_edge } + ]) + } else if ( + self.settings.adjusted_original_pvalue == 'original' && + self.settings.pvalue >= pathway.pval && + self.settings.gene_set_size_cutoff > pathway.geneset_size + ) { + const es = pathway.es ? roundValueAuto(pathway.es) : pathway.es + const nes = pathway.nes ? roundValueAuto(pathway.nes) : pathway.nes + const pval = pathway.pval ? roundValueAuto(pathway.pval) : pathway.pval + const sidak = pathway.sidak ? roundValueAuto(pathway.sidak) : pathway.sidak + const fdr = pathway.fdr ? roundValueAuto(pathway.fdr) : pathway.fdr + self.gsea_table_rows.push([ + { value: pathway_name }, + { value: es }, + { value: nes }, + { value: pathway.geneset_size }, + { value: pval }, + { value: sidak }, + { value: fdr }, + { value: pathway.leading_edge } + ]) + } } } From ac5cdd473f829b79748aca77abb0a55907dba0f7 Mon Sep 17 00:00:00 2001 From: robinpaul85 Date: Fri, 7 Feb 2025 14:31:02 -0600 Subject: [PATCH 07/10] Fixed download table bug in gsea and geneORA --- client/plots/geneORA.js | 3 ++- client/plots/gsea.js | 28 ++++++++++++++-------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/client/plots/geneORA.js b/client/plots/geneORA.js index 97bede8817..fed9090278 100644 --- a/client/plots/geneORA.js +++ b/client/plots/geneORA.js @@ -6,6 +6,7 @@ import { getCompInit, copyMerge } from '#rx' import { Menu } from '../dom/menu' import { newSandboxDiv } from '../dom/sandbox.ts' import { select, pointer } from 'd3-selection' +import { downloadTable } from '../dom/table' import { roundValueAuto } from '#shared/roundValue.js' const hlcolor = '#ffa200' @@ -114,7 +115,7 @@ class geneORA { }) } this.components.controls.on('downloadClick.geneORA', () => { - downloadTable(this.table_rows, this.table_cols) + downloadTable(this.gene_ora_table_rows, this.gene_ora_table_cols) }) } getState(appState) { diff --git a/client/plots/gsea.js b/client/plots/gsea.js index 49e327ff7f..8e32d5614a 100644 --- a/client/plots/gsea.js +++ b/client/plots/gsea.js @@ -7,22 +7,11 @@ import { controlsInit } from './controls' import { getCompInit, copyMerge } from '#rx' import { Menu } from '../dom/menu' import { scaleLog, scaleLinear } from 'd3-scale' +import { downloadTable } from '../dom/table' import { roundValueAuto } from '#shared/roundValue.js' const tip = new Menu() -// table columns showing analysis results for each gene set -const gsea_table_cols = [ - { label: 'Pathway name' }, - { label: 'enrichment score' }, - { label: 'normalized enrichment score' }, - { label: 'Geneset total size' }, - { label: 'pvalue' }, - { label: 'sidak' }, - { label: 'FDR' }, - { label: 'Leading edge' } -] - class gsea { constructor() { this.type = 'gsea' @@ -136,7 +125,7 @@ class gsea { }) } this.components.controls.on('downloadClick.gsea', () => { - downloadTable(this.table_rows, this.table_cols) + downloadTable(this.gsea_table_rows, this.gsea_table_cols) }) } getState(appState) { @@ -274,8 +263,19 @@ add: self.dom.tableDiv.selectAll('*').remove() const d_gsea = self.dom.tableDiv.append('div') + // table columns showing analysis results for each gene set + self.gsea_table_cols = [ + { label: 'Pathway name' }, + { label: 'enrichment score' }, + { label: 'normalized enrichment score' }, + { label: 'Geneset total size' }, + { label: 'pvalue' }, + { label: 'sidak' }, + { label: 'FDR' }, + { label: 'Leading edge' } + ] renderTable({ - columns: gsea_table_cols, + columns: self.gsea_table_cols, rows: self.gsea_table_rows, div: d_gsea, showLines: true, From 16721dacdb3ed97ff13f6bcd389656c99932525c Mon Sep 17 00:00:00 2001 From: robinpaul85 Date: Fri, 7 Feb 2025 15:47:50 -0600 Subject: [PATCH 08/10] Changed treatment to case group --- client/plots/DEanalysis.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/plots/DEanalysis.js b/client/plots/DEanalysis.js index caa58859c4..0a8cb6b725 100644 --- a/client/plots/DEanalysis.js +++ b/client/plots/DEanalysis.js @@ -493,7 +493,7 @@ add: value: sample_size1 }, { - label: self.config.samplelst.groups[1].name + ' sample size (treatment group)', + label: self.config.samplelst.groups[1].name + ' sample size (case group)', value: sample_size2 } ] From ae3b912f6413bdd7d2d568e8f2c712011f1dde11 Mon Sep 17 00:00:00 2001 From: robinpaul85 Date: Mon, 10 Feb 2025 11:29:33 -0600 Subject: [PATCH 09/10] Filtering in GSEA out of serverconfig feature flag --- client/plots/gsea.js | 68 ++++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 43 deletions(-) diff --git a/client/plots/gsea.js b/client/plots/gsea.js index 8e32d5614a..f17cba5630 100644 --- a/client/plots/gsea.js +++ b/client/plots/gsea.js @@ -200,7 +200,31 @@ add: Object.keys(output.data).sort((i, j) => Number(i.fdr) - Number(j.fdr)) // Sorting pathways in ascending order of FDR for (const pathway_name of Object.keys(output.data)) { const pathway = output.data[pathway_name] - if (JSON.parse(sessionStorage.getItem('optionalFeatures')).gsea_test == true) { + if ( + self.settings.adjusted_original_pvalue == 'adjusted' && + self.settings.pvalue >= pathway.fdr && + self.settings.gene_set_size_cutoff > pathway.geneset_size + ) { + const es = pathway.es ? roundValueAuto(pathway.es) : pathway.es + const nes = pathway.nes ? roundValueAuto(pathway.nes) : pathway.nes + const pval = pathway.pval ? roundValueAuto(pathway.pval) : pathway.pval + const sidak = pathway.sidak ? roundValueAuto(pathway.sidak) : pathway.sidak + const fdr = pathway.fdr ? roundValueAuto(pathway.fdr) : pathway.fdr + self.gsea_table_rows.push([ + { value: pathway_name }, + { value: es }, + { value: nes }, + { value: pathway.geneset_size }, + { value: pval }, + { value: sidak }, + { value: fdr }, + { value: pathway.leading_edge } + ]) + } else if ( + self.settings.adjusted_original_pvalue == 'original' && + self.settings.pvalue >= pathway.pval && + self.settings.gene_set_size_cutoff > pathway.geneset_size + ) { const es = pathway.es ? roundValueAuto(pathway.es) : pathway.es const nes = pathway.nes ? roundValueAuto(pathway.nes) : pathway.nes const pval = pathway.pval ? roundValueAuto(pathway.pval) : pathway.pval @@ -216,48 +240,6 @@ add: { value: fdr }, { value: pathway.leading_edge } ]) - } else { - if ( - self.settings.adjusted_original_pvalue == 'adjusted' && - self.settings.pvalue >= pathway.fdr && - self.settings.gene_set_size_cutoff > pathway.geneset_size - ) { - const es = pathway.es ? roundValueAuto(pathway.es) : pathway.es - const nes = pathway.nes ? roundValueAuto(pathway.nes) : pathway.nes - const pval = pathway.pval ? roundValueAuto(pathway.pval) : pathway.pval - const sidak = pathway.sidak ? roundValueAuto(pathway.sidak) : pathway.sidak - const fdr = pathway.fdr ? roundValueAuto(pathway.fdr) : pathway.fdr - self.gsea_table_rows.push([ - { value: pathway_name }, - { value: es }, - { value: nes }, - { value: pathway.geneset_size }, - { value: pval }, - { value: sidak }, - { value: fdr }, - { value: pathway.leading_edge } - ]) - } else if ( - self.settings.adjusted_original_pvalue == 'original' && - self.settings.pvalue >= pathway.pval && - self.settings.gene_set_size_cutoff > pathway.geneset_size - ) { - const es = pathway.es ? roundValueAuto(pathway.es) : pathway.es - const nes = pathway.nes ? roundValueAuto(pathway.nes) : pathway.nes - const pval = pathway.pval ? roundValueAuto(pathway.pval) : pathway.pval - const sidak = pathway.sidak ? roundValueAuto(pathway.sidak) : pathway.sidak - const fdr = pathway.fdr ? roundValueAuto(pathway.fdr) : pathway.fdr - self.gsea_table_rows.push([ - { value: pathway_name }, - { value: es }, - { value: nes }, - { value: pathway.geneset_size }, - { value: pval }, - { value: sidak }, - { value: fdr }, - { value: pathway.leading_edge } - ]) - } } } From 9e427a976e83b86b158d0c4db0af274d7c241f8a Mon Sep 17 00:00:00 2001 From: xzhou Date: Mon, 10 Feb 2025 13:02:10 -0600 Subject: [PATCH 10/10] fix gsea table header --- client/plots/gsea.js | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/client/plots/gsea.js b/client/plots/gsea.js index f17cba5630..efbe66577b 100644 --- a/client/plots/gsea.js +++ b/client/plots/gsea.js @@ -247,14 +247,14 @@ add: const d_gsea = self.dom.tableDiv.append('div') // table columns showing analysis results for each gene set self.gsea_table_cols = [ - { label: 'Pathway name' }, - { label: 'enrichment score' }, - { label: 'normalized enrichment score' }, - { label: 'Geneset total size' }, - { label: 'pvalue' }, - { label: 'sidak' }, + { label: 'Geneset' }, + { label: 'Enrichment Score' }, + { label: 'Normalized Enrichment Score' }, + { label: 'Geneset Size' }, + { label: 'P value' }, + { label: 'Sidak' }, { label: 'FDR' }, - { label: 'Leading edge' } + { label: 'Leading Edge' } ] renderTable({ columns: self.gsea_table_cols,