Skip to content

Commit

Permalink
Merge pull request #49 from sanjaynagi/diffexp-nb-gene-id-by-position…
Browse files Browse the repository at this point in the history
…-22-08-23
  • Loading branch information
sanjaynagi authored Aug 22, 2023
2 parents 6ea46f5 + 1b233ec commit b96e2b0
Show file tree
Hide file tree
Showing 10 changed files with 317 additions and 1,085 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ Documentation: https://sanjaynagi.github.io/AnoExpress/

<br></br>

[![Custom badge](https://img.shields.io/endpoint?color=white&logo=Google%20Colab&url=https%3A%2F%2Fraw.githubusercontent.com%2Fsanjaynagi%2FAnoExpress%2Fmain%2Fgraphics%2Fbadge-diffexp.json)](https://colab.research.google.com/github/sanjaynagi/AnoExpress/blob/main/workflow/notebooks/differential-expression-meta-analysis.ipynb)


#### Contributing datasets

If you would like to contribute a dataset from a major malaria vector, please raise an [issue](https://github.com/sanjaynagi/AnoExpress/issues) or [email me!](mailto:[email protected]?subject=AnoExpress-datasets) The project has been working from the stage of raw read counts.
12 changes: 10 additions & 2 deletions anoexpress/anoexpress.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def data(data_type, analysis, microarray=False, gene_id=None, sort_by=None, anno
microarray: bool, optional
whether to include the IR-Tex microarray data in the requested data. Default is False.
gene_id: str or list, optional
A string (AGAP/AFUN identifier), or list of strings, or path to a file containing a list of gene ids in the first column.
A string (AGAP/AFUN identifier or genomic span in the format 2L:500-10000), or list of strings, or path to a file containing a list of gene ids in the first column.
Input file can be .tsv, .txt, or .csv, or .xlsx.
sort_by: {"median", "mean", "agap", "position", None}, optional
sort by median/mean of fold changes (descending), or by AGAP, or by position in the genome, or dont sort input gene ids.
Expand Down Expand Up @@ -130,7 +130,15 @@ def data(data_type, analysis, microarray=False, gene_id=None, sort_by=None, anno
# subset to the gene ids of interest including reading file
if gene_id is not None:
if isinstance(gene_id, str):
if gene_id.endswith(('.tsv', '.txt')):
if gene_id.startswith(('2L', '2R', '3L', '3R', 'X', '2RL', '3RL')):
import malariagen_data
if analysis == 'fun':
assert "Unfortunately the genome feature file in malariagen_data does not contain AFUN identifiers, so we cannot subset by genomic span for An. funestus."
else:
ag3 = malariagen_data.Ag3()
gff = ag3.genome_features(region=gene_id).query("type == 'gene'")
gene_id = gff.ID.to_list()
elif gene_id.endswith(('.tsv', '.txt')):
gene_id = pd.read_csv(gene_id, sep="\t", header=None).iloc[:, 0].to_list()
elif gene_id.endswith('.csv'):
gene_id = pd.read_csv(gene_id, header=None).iloc[:, 0].to_list()
Expand Down
6 changes: 6 additions & 0 deletions graphics/badge-diffexp.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"schemaVersion": 1,
"label": "AnoExpress",
"message": "Reproduce differential expression meta-analysis in Colab",
"logoSvg": "https://upload.wikimedia.org/wikipedia/commons/d/d0/Google_Colaboratory_SVG_Logo.svg"
}
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "anoexpress"
version = "0.1.8"
version = "0.2.0"
description = "A package to access insecticide resistance gene expression meta analyse in Anopheles mosquitoes"
authors = [
"Sanjay Nagi <[email protected]>",
Expand Down
15 changes: 14 additions & 1 deletion tests/test_anoexpress.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,20 @@ def test_plot_gene_expression_type(plot_type):
microarray=False,
plot_type=plot_type,
)



@pytest.mark.parametrize(
"gene_id",
['2L:28,500,500-28,520,000', 'X:8,500,500-8,530,000']
)
def test_plot_gene_expression_spans(gene_id):

xpress.plot_gene_expression(
gene_id=gene_id,
analysis="gamb_colu",
microarray=False,
plot_type='strip',
)

def test_contig_expression():

Expand Down
Loading

0 comments on commit b96e2b0

Please sign in to comment.