Skip to content

Commit

Permalink
Update github workflow to test new version and little reformat of
Browse files Browse the repository at this point in the history
script
  • Loading branch information
jpjarnoux committed Nov 29, 2021
1 parent 6f52583 commit 414fd26
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 34 deletions.
5 changes: 2 additions & 3 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,11 @@ jobs:
shell: bash -l {0}
run: |
cd testingDataset
ppanggolin align --pangenome mybasicpangenome/pangenome.h5 --proteins some_chlam_proteins.fasta --output test_align --draw_related --getinfo
ppanggolin align --pangenome mybasicpangenome/pangenome.h5 --anno NC_010287.gbff --output test_align -f
ppanggolin align --pangenome mybasicpangenome/pangenome.h5 --sequences some_chlam_proteins.fasta --output test_align --draw_related --getinfo
cd -
- name: testing context command
shell: bash -l {0}
run: |
cd testingDataset
ppanggolin context --pangenome myannopang/pangenome.h5 --proteins some_chlam_proteins.fasta --output test_context
ppanggolin context --pangenome myannopang/pangenome.h5 --sequences some_chlam_proteins.fasta --output test_context
ppanggolin context --pangenome readclusterpang/pangenome.h5 --family some_chlam_families.txt --output test_context -f
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.2.45
1.2.46
26 changes: 12 additions & 14 deletions ppanggolin/align/alignOnPang.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,9 @@
from collections import defaultdict

# local libraries
from ppanggolin.formats import checkPangenomeInfo, writeGeneSequencesFromAnnotations
from ppanggolin.formats import checkPangenomeInfo
from ppanggolin.utils import mkOutdir, read_compressed_or_not
from ppanggolin.pangenome import Pangenome
from ppanggolin.annotate import detect_filetype, read_org_gff, read_org_gbff
from ppanggolin.RGP.genomicIsland import compute_org_rgp
from ppanggolin.figures.draw_spot import drawSelectedSpots, subgraph


Expand All @@ -30,13 +28,12 @@ def createdb(fileObj, tmpdir):
:rtype: _io.TextIOWrapper
"""
seqdb = tempfile.NamedTemporaryFile(mode="w", dir=tmpdir.name)
cmd = ["mmseqs", "createdb", fileObj.name, seqdb.name, '--dbtype','0']
cmd = ["mmseqs", "createdb", fileObj.name, seqdb.name, '--dbtype', '0']
subprocess.run(cmd, stdout=subprocess.DEVNULL)
return seqdb


def alignSeqToPang(pangFile, seqFile, output, tmpdir, cpu=1, no_defrag=False, identity=0.8, coverage=0.8,
code=11):
def alignSeqToPang(pangFile, seqFile, output, tmpdir, cpu=1, no_defrag=False, identity=0.8, coverage=0.8):
pang_db = createdb(pangFile, tmpdir)
seq_db = createdb(seqFile, tmpdir)
cov_mode = "0" # coverage of query and target
Expand Down Expand Up @@ -140,7 +137,7 @@ def draw_spot_gexf(spots, output, multigenics, fam2mod, set_size=3):
subgraph(spot, fname, set_size=set_size, multigenics=multigenics, fam2mod=fam2mod)


def getSeqInfo(seq2pang, pangenome, output, cpu, draw_related, disable_bar=False):
def getSeqInfo(seq2pang, pangenome, output, draw_related, disable_bar=False):
logging.getLogger().info("Writing RGP and spot information related to hits in the pangenome")
multigenics = pangenome.get_multigenics(pangenome.parameters["RGP"]["dup_margin"])

Expand Down Expand Up @@ -191,7 +188,7 @@ def get_seq2pang(pangenome, sequenceFile, output, tmpdir, cpu=1, no_defrag=False
:param output: Output directory
:type output: str
:param tmpdir: Temporary directory
:type tmpdir: str
:type tmpdir: tempfile.TemporaryDirectory
:param cpu: number of CPU cores to use
:type cpu: int
:param no_defrag: do not use the defrag workflow if true
Expand Down Expand Up @@ -220,7 +217,7 @@ def get_seq2pang(pangenome, sequenceFile, output, tmpdir, cpu=1, no_defrag=False


def align(pangenome, sequenceFile, output, tmpdir, identity=0.8, coverage=0.8, no_defrag=False, cpu=1, getinfo=False,
draw_related=False, priority='name,ID', disable_bar=False):
draw_related=False, disable_bar=False):
if pangenome.status["geneFamilySequences"] not in ["inFile", "Loaded", "Computed"]:
raise Exception("Cannot use this function as your pangenome does not have gene families representatives "
"associated to it. For now this works only if the clustering is realised by PPanGGOLiN.")
Expand All @@ -240,10 +237,10 @@ def align(pangenome, sequenceFile, output, tmpdir, identity=0.8, coverage=0.8, n
new_tmpdir = tempfile.TemporaryDirectory(dir=tmpdir)

seqSet, alignFile, seq2pang = get_seq2pang(pangenome, sequenceFile, output, new_tmpdir,
cpu, no_defrag, identity, coverage)
cpu, no_defrag, identity, coverage)

if getinfo or draw_related:
getSeqInfo(seq2pang, pangenome, output, cpu, draw_related, disable_bar=disable_bar)
getSeqInfo(seq2pang, pangenome, output, draw_related, disable_bar=disable_bar)
else:
partProj = projectPartition(seq2pang, seqSet, output) # write the partition assignation only
logging.getLogger().info(f"sequences partition projection : '{partProj}'")
Expand All @@ -264,14 +261,15 @@ def launch(args):
if args.sequences is not None:
align(pangenome=pangenome, sequenceFile=args.sequences, output=args.output, tmpdir=args.tmpdir, cpu=args.cpu,
identity=args.identity, coverage=args.coverage, no_defrag=args.no_defrag, getinfo=args.getinfo,
draw_related=args.draw_related, priority=args.label_priority, disable_bar=args.disable_prog_bar)
draw_related=args.draw_related, disable_bar=args.disable_prog_bar)


def alignSubparser(subparser):
parser = subparser.add_parser("align", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
required = parser.add_argument_group(title="Required arguments",
description="All of the following arguments are required :")
required.add_argument('-S','--sequences', required=True, type=str,
help="sequences (nucleotides or amino acids) to align on the pangenome gene families")
required.add_argument('-S', '--sequences', required=True, type=str,
help="sequences (nucleotides or amino acids) to align on the pangenome gene families")

required.add_argument('-p', '--pangenome', required=True, type=str, help="The pangenome .h5 file")
required.add_argument('-o', '--output', required=True, type=str,
Expand Down
36 changes: 20 additions & 16 deletions ppanggolin/context/searchGeneContext.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class geneContext:
Methods
-------
"""

def __init__(self, ID, families=None):
""" Initial methods
Expand Down Expand Up @@ -61,8 +62,8 @@ def add_family(self, family):
self.families.add(family)


def search_geneContext_in_pangenome(pangenome, output, tmpdir, sequences=None, families=None, transitive=4, identity=0.5,
coverage=0.8, jaccard=0.85, no_defrag=False, cpu=1, disable_bar=True):
def search_geneContext_in_pangenome(pangenome, output, tmpdir, sequences=None, families=None, transitive=4,
identity=0.5, coverage=0.8, jaccard=0.85, no_defrag=False, cpu=1, disable_bar=True):
"""
Main function to search common gene contexts between sequence set and pangenome families
Expand Down Expand Up @@ -104,7 +105,7 @@ def search_geneContext_in_pangenome(pangenome, output, tmpdir, sequences=None, f
# Alignment of sequences on pangenome families
new_tmpdir = tempfile.TemporaryDirectory(dir=tmpdir)
seq_set, _, seq2pan = get_seq2pang(pangenome, sequences, output, new_tmpdir, cpu, no_defrag,
identity, coverage)
identity, coverage)
projectPartition(seq2pan, seq_set, output)
new_tmpdir.cleanup()
for k, v in seq2pan.items():
Expand Down Expand Up @@ -206,7 +207,7 @@ def extract_gene_context(gene, contig, families, t=4):
:rtype: (int, Bool, int, Bool)
"""
pos_left, pos_right = (max(0, gene.position - t),
min(gene.position + t, len(contig)-1)) # Gene positions to compare family
min(gene.position + t, len(contig) - 1)) # Gene positions to compare family
in_context_left, in_context_right = (False, False)
while pos_left < gene.position and not in_context_left:
if contig[pos_left].family in families.values():
Expand Down Expand Up @@ -247,7 +248,8 @@ def fam2seq(seq2pan):
"""
Create a dictionary with gene families as keys and list of sequences id as values
:param seq2pan: Dictionary storing the sequence ids as keys and the gene families to which they are assigned as values
:param seq2pan: Dictionary storing the sequence ids as keys and the gene families
to which they are assigned as values
:param seq2pan: dict
:return: Dictionary reversed
Expand All @@ -262,24 +264,24 @@ def fam2seq(seq2pan):
return fam_2_seq


def export_to_dataframe(families, geneContexts, fam_2_seq, output):
def export_to_dataframe(families, gene_contexts, fam_2_seq, output):
""" Export the results into dataFrame
:param families: Families related to the connected components
:type families: set
:param geneContexts: connected components found in the pangenome
:type geneContexts: set
:param gene_contexts: connected components found in the pangenome
:type gene_contexts: set
:param fam_2_seq: Dictionary with gene families as keys and list of sequence ids as values
:type fam_2_seq: dict
:param output: output path
:type output: str
"""
logging.getLogger().debug(f"There are {len(families)} families among {len(geneContexts)} gene contexts")
logging.getLogger().debug(f"There are {len(families)} families among {len(gene_contexts)} gene contexts")

lines = []
for geneContext in geneContexts:
for family in geneContext.families:
line = [geneContext.ID]
for gene_context in gene_contexts:
for family in gene_context.families:
line = [gene_context.ID]
if fam_2_seq is None or fam_2_seq.get(family.ID) is None:
line += [family.name, None, len(family.organisms)]
else:
Expand All @@ -298,10 +300,12 @@ def launch(args):
mkOutdir(args.output, args.force)
pangenome = Pangenome()
pangenome.addFile(args.pangenome)
search_geneContext_in_pangenome(pangenome=pangenome, sequences=args.sequences, families=args.family, output=args.output,
identity=args.identity, coverage=args.coverage, jaccard=args.jaccard,
transitive=args.transitive, cpu=args.cpu, tmpdir=args.tmpdir, no_defrag=args.no_defrag,
disable_bar=args.disable_prog_bar)
search_geneContext_in_pangenome(pangenome=pangenome, sequences=args.sequences, families=args.family,
output=args.output,
identity=args.identity, coverage=args.coverage, jaccard=args.jaccard,
transitive=args.transitive, cpu=args.cpu, tmpdir=args.tmpdir,
no_defrag=args.no_defrag,
disable_bar=args.disable_prog_bar)


def contextSubparser(sub_parser):
Expand Down

0 comments on commit 414fd26

Please sign in to comment.