Skip to content

Commit

Permalink
Merge pull request #77 from SATAY-LL/update_volcano
Browse files Browse the repository at this point in the history
Update volcano
  • Loading branch information
leilaicruz authored Mar 22, 2022
2 parents 79d7c50 + 1bfa7ed commit 2aa1651
Show file tree
Hide file tree
Showing 8 changed files with 106 additions and 22 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,15 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [1.1.4] - 2022-01-25

### Modified

- Fix the file in the transposonmapper module inside the exporting submodule: save_as_wig.py
- The change is related to save the wig file as the standard convention which requires a lower case letter instead of a capital one that was the case of previous versions. Specifically, the change of the `VariableStep` line to `variableStep`.
- This change is not affecting the functionality of the code.
- Fix the 10% first truncation of the gene calculation. Basically redefine`N10percent` to `int(len(N_reads) * 0.1)`

## [1.1.3] - 2021-11-23

### Added
Expand Down
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,5 @@ message: "If you use this software, please cite it using these metadata."
repository-code: "https://github.com/SATAY-LL/Transposonmapper"
title: transposonmapper
doi: 10.5281/zenodo.4636301
version: 1.1.3
version: 1.1.4
...
40 changes: 33 additions & 7 deletions notebooks/pipeline_workflow.ipynb

Large diffs are not rendered by default.

Binary file added notebooks/volcano_WTvsdnrp1-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 1.1.3
current_version = 1.1.4

[bumpversion:file:transposonmapper/__version__.py]
search = __version__ = "{current_version}"
Expand Down
2 changes: 1 addition & 1 deletion tests/statistics/test_volcano.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ def test_output():
file_b= ['dnrp1-1_merged-DpnII-NlaIII-a_trimmed.sorted.bam_pergene.txt',
'dnrp1-1_merged-DpnII-NlaIII-b_trimmed.sorted.bam_pergene.txt']

data=volcano(path,file_a,path,file_b)
data=volcano(path,file_a,path,file_b,fold_change_interval=[1,-1],p_value_interval=[2,2])

assert isinstance(data,pandas.core.frame.DataFrame) , "It is expected a dataframe"
2 changes: 1 addition & 1 deletion transposonmapper/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.1.3"
__version__ = "1.1.4"
71 changes: 60 additions & 11 deletions transposonmapper/statistics/volcanoplot.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import os, sys

import matplotlib.pyplot as plt
import numpy as np


from transposonmapper.statistics.volcano_helpers import apply_stats, info_from_datasets, make_datafile

def volcano(path_a, filelist_a, path_b, filelist_b, variable='read_per_gene', significance_threshold=0.01, normalize=True, trackgene_list=[], figure_title=""):
def volcano(path_a, filelist_a, path_b, filelist_b,fold_change_interval,p_value_interval,variable='read_per_gene',
significance_threshold=0.01, normalize=True, trackgene_list=[], figure_title="",savefigure=False):
"""This script creates a volcanoplot to show the significance of fold change between two datasets.
It is based on this website:
- https://towardsdatascience.com/inferential-statistics-series-t-test-using-numpy-2718f8f9bf2f
Expand Down Expand Up @@ -50,6 +52,16 @@ def volcano(path_a, filelist_a, path_b, filelist_b, variable='read_per_gene', si
The format of the pergene file should be TAB separated and NOT COMMA separated. if you have
it as comma separated you can convert to tab separated using the command line with
this command: `cat oldfile.txt | tr '[,]' '[\t]' > newfile.txt`
fold_change_interval : numpy.array
a vector of two elements where the first element(positive value) set the upper threshold for the positive
values and the second element(negative value) set the lower threshold for the negative values.
p_value_interval : numpy.array
a vector of two elements where the first element set the upper threshold for the positive
values and the second element set the lower threshold for the negative values.
variable : str, optional
tn_per_gene, read_per_gene or Nreadsperinsrt , by default 'read_per_gene'
significance_threshold : float, optional
Expand All @@ -61,6 +73,7 @@ def volcano(path_a, filelist_a, path_b, filelist_b, variable='read_per_gene', si
Enter a list of gene name(s) which will be highlighted in the plot (e.g. ['cdc42', 'nrp1']), by default []
figure_title : str, optional
The title of the figure if not empty, by default ""
savefigure : bool, optional
Returns
Expand Down Expand Up @@ -101,26 +114,29 @@ def volcano(path_a, filelist_a, path_b, filelist_b, variable='read_per_gene', si
grid = plt.GridSpec(1, 1, wspace=0.0, hspace=0.0)
ax = plt.subplot(grid[0,0])

colors = {False:'black', True:'red'} # based on p-value significance
sc = ax.scatter(x=volcano_df['fold_change'], y=volcano_df['p_value'], alpha=0.4, marker='.', c=volcano_df['significance'].apply(lambda x:colors[x]))
colors = {False:'gray', True:'red'} # based on p-value significance
sc = ax.scatter(x=volcano_df['fold_change'], y=volcano_df['p_value'], alpha=0.4,
s=100, c=volcano_df['significance'].apply(lambda x:colors[x]),label=variable)
ax.grid(True, which='major', axis='both', alpha=0.4)
ax.set_xlabel('Log2 FC')
ax.set_ylabel('-1*Log10 p-value')
ax.set_xlabel('Log2 FC',fontsize=16)
ax.set_ylabel('-*Log10 p-value',fontsize=16)
ax.tick_params(axis='both', which='major', labelsize=14)
ax.legend(loc='best',fontsize=16)
if not figure_title == "":
ax.set_title(variable + " - " + figure_title)
ax.set_title(figure_title,fontsize=20)
else:
ax.set_title(variable)
ax.scatter(x=[],y=[],marker='.',color='black', label='p-value > {}'.format(significance_threshold)) #set empty scatterplot for legend
ax.scatter(x=[],y=[],marker='.',color='red', label='p-value < {}'.format(significance_threshold)) #set empty scatterplot for legend
ax.legend()
ax.set_title(variable,fontsize=20)
# ax.scatter(x=[],y=[],marker='.',color='black', label='p-value > {}'.format(significance_threshold)) #set empty scatterplot for legend
# ax.scatter(x=[],y=[],marker='.',color='red', label='p-value < {}'.format(significance_threshold)) #set empty scatterplot for legend
# ax.legend()
if not trackgene_list == []:
genenames_array = volcano_df['gene_names'].to_numpy()
for trackgene in trackgene_list:
trackgene = trackgene.upper()
if trackgene in genenames_array:
trackgene_index = tnread_gene_a.loc[tnread_gene_a['gene_names'] == trackgene].index[0]
trackgene_annot = ax.annotate(volcano_df.iloc[trackgene_index,:]['gene_names'], (volcano_df.iloc[trackgene_index,:]['fold_change'], volcano_df.iloc[trackgene_index,:]['p_value']),
size=10, c='green', bbox=dict(boxstyle="round", fc="w"))
size=12, c='green', bbox=dict(boxstyle="round", fc="w"))
trackgene_annot.get_bbox_patch().set_alpha(0.6)
else:
print('WARNING: %s not found' % trackgene)
Expand All @@ -133,6 +149,36 @@ def volcano(path_a, filelist_a, path_b, filelist_b, variable='read_per_gene', si
arrowprops=dict(arrowstyle="->"))
annot.set_visible(False)

## Annotate based on fold change and p values
if not fold_change_interval == [] or not p_value_interval == []:
target=volcano_df[(volcano_df["fold_change"]>fold_change_interval[0]) & (volcano_df["p_value"]>p_value_interval[0])]["gene_names"]
target_left=volcano_df[(volcano_df["fold_change"]<fold_change_interval[1]) & (volcano_df["p_value"]>p_value_interval[1])]["gene_names"]

if len(target)!=0:
for i in np.arange(0,len(target)):

index=np.where(volcano_df==target.iloc[i])[0][0]



trackgene_annot = ax.annotate(volcano_df.iloc[index,:]['gene_names'], (volcano_df.iloc[index,:]['fold_change'],
volcano_df.iloc[index,:]['p_value']),
size=12, c='green', bbox=dict(boxstyle="round", fc="w"))
trackgene_annot.get_bbox_patch().set_alpha(0.6)

if len(target_left)!=0:
for i in np.arange(0,len(target_left)):

index=np.where(volcano_df==target_left.iloc[i])[0][0]



trackgene_annot = ax.annotate(volcano_df.iloc[index,:]['gene_names'], (volcano_df.iloc[index,:]['fold_change'],
volcano_df.iloc[index,:]['p_value']),
size=12, c='green', bbox=dict(boxstyle="round", fc="w"))
trackgene_annot.get_bbox_patch().set_alpha(0.6)




def update_annot(ind):
Expand Down Expand Up @@ -162,6 +208,9 @@ def hover(event):

fig.canvas.mpl_connect("motion_notify_event", hover)

if savefigure:
fig.savefig("volcano_"+filelist_a[0].split("_")[0]+"vs"+filelist_b[0].split("_")[0]+".png", dpi=400)


## return function
return(volcano_df)
Expand Down

0 comments on commit 2aa1651

Please sign in to comment.