Skip to content

Commit

Permalink
Version 1.3:
Browse files Browse the repository at this point in the history
- Removed the biopython package from the SynTracker_env.yml file
- Changed the execution of Blast processes in blast.py so that Bio.Blast.Applications module (deprecated) is not needed anymore and they are executed directly by the Subprocess module.
  • Loading branch information
inbalpaz authored and ipaz committed Jul 15, 2024
1 parent e58cdfc commit 99da7ba
Show file tree
Hide file tree
Showing 7 changed files with 58 additions and 36 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Create a new conda environment using the ‘SynTracker_env.yml’ file (located
`conda env create -f SynTracker_env.yml`

Activate the newly created environment:
`conda activate SynTracker_1_2`
`conda activate SynTracker_1_3`


## Input
Expand Down
Binary file modified SynTracker_Manual.docx
Binary file not shown.
Binary file modified SynTracker_Manual.pdf
Binary file not shown.
3 changes: 1 addition & 2 deletions SynTracker_env.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
name: SynTracker_1_2
name: SynTracker_1_3
channels:
- conda-forge
- bioconda
dependencies:
- python
- r-base=4.0.5
- bioconductor-decipher
- biopython
- r-tidyverse=1.3.0
2 changes: 1 addition & 1 deletion syntracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ def main():
config.flanking_length, config.minimal_flanking_length,
config.minimal_full_length,
config.minimal_identity, config.minimal_coverage,
config.blast_num_threads))
config.blast_num_threads, config.logfile_path))
# Start the process
process.start()

Expand Down
67 changes: 45 additions & 22 deletions syntracker_first_stage/blast.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,47 @@
from Bio.Blast.Applications import NcbimakeblastdbCommandline
from Bio.Blast.Applications import NcbiblastnCommandline
#from Bio.Blast.Applications import NcbimakeblastdbCommandline
#from Bio.Blast.Applications import NcbiblastnCommandline
import os
import sys
import csv
import re
import config
import subprocess


def make_blast_db(logfile):

input_file = config.combined_renamed_genomes_file_path

command = NcbimakeblastdbCommandline(input_file=input_file, out=config.blast_db_file_path, dbtype="nucl",
parse_seqids=True, title=config.blast_db_file)
command = "makeblastdb -in " + input_file + " -dbtype nucl -title " + config.blast_db_file + " -parse_seqids -out " \
+ config.blast_db_file_path

print("\nExecuting the following BLAST command:")
print(command)
logfile = open(config.logfile_path, "a")
logfile.write("\nExecuting the following BLAST command:\n" + str(command) + "\n")
logfile.close()
logfile.write("\nExecuting the following BLAST command:\n" + command + "\n")

try:
stdout, stderr = command()
print(stdout)
subprocess.run(["makeblastdb", "-in", input_file, "-dbtype", "nucl", "-title", config.blast_db_file,
"-parse_seqids", "-out", config.blast_db_file_path], check=True)
except subprocess.CalledProcessError as err:
print("\nThe following command has failed:")
print(command)
print(err)
logfile.write("\nThe following command has failed:\n" + command + "\n")
exit()
except Exception as err:
print("\nmakeblastdb command has failed for input file: " + input_file)
logfile = open(config.logfile_path, "a")
logfile.write("\nmakeblastdb command has failed for input file: " + input_file + "\n")
logfile.close()
print("\nThe following command has failed:")
print(command)
print(err)
logfile.write("\nThe following command has failed:\n" + command + "\n")
exit()

print("...Done!")
print("...Done!\n")


def blast_per_region_process(full_path_region_file, blast_region_outfile, blastdbcmd_region_outfile,
blastdbcmd_region_outfile_tmp, blast_db_file_path, flanking_length,
minimal_flanking_length, minimal_full_length, minimal_identity, minimal_coverage,
num_threads):
num_threads, logfile_path):

# Run blast for each region
exit_code = run_blastn(full_path_region_file, blast_region_outfile, blast_db_file_path, minimal_identity,
Expand All @@ -60,7 +65,7 @@ def blast_per_region_process(full_path_region_file, blast_region_outfile, blastd
end = row[2]
strand = row[3]

m = re.search("^(Sample\.\d+)_contig", sample_name)
m = re.search(r'^(Sample\.\d+)_contig', sample_name)
if m:
sample_name_only = m.group(1)

Expand Down Expand Up @@ -130,25 +135,43 @@ def blast_per_region_process(full_path_region_file, blast_region_outfile, blastd
else:
print("\nBLAST output file " + blast_region_outfile + " contains less than two valid samples - "
"skip it...\n")
logfile = open(logfile_path, "a")
logfile.write("\nBLAST output file " + blast_region_outfile + " contains less than two valid samples - "
"skip it...\n")
logfile.close()
sys.exit(1)

# The BLAST outfile contains less than 2 hits -> return failure for this region
else:
print("\nBLAST output file " + blast_region_outfile + " contains less than two samples - skip it...\n")
logfile = open(logfile_path, "a")
logfile.write("\nBLAST output file " + blast_region_outfile + " contains less than two valid samples - "
"skip it...\n")
logfile.close()
sys.exit(1)


def run_blastn(query_file, outfile, blast_db_file_path, minimal_identity, minimal_coverage, num_threads):
command = NcbiblastnCommandline(query=query_file, db=blast_db_file_path, out=outfile,
outfmt="6 sseqid sstart send sstrand",
max_target_seqs=10000, perc_identity=minimal_identity,
qcov_hsp_perc=minimal_coverage, num_threads=num_threads)

command = "blastn -query " + query_file + " -db " + blast_db_file_path + " -out " + outfile + \
" -outfmt 6 sseqid sstart send sstrand -max_target_seqs 10000 -perc_identity " + str(minimal_identity) + \
" -qcov_hsp_perc " + str(minimal_coverage) + " -num_threads " + str(num_threads)

try:
command()
subprocess.run(["blastn", "-query", query_file, "-db", blast_db_file_path, "-out", outfile,
"-outfmt", "6 sseqid sstart send sstrand", "-max_target_seqs", "10000",
"-perc_identity", str(minimal_identity), "-qcov_hsp_perc", str(minimal_coverage), "-num_threads"
, str(num_threads)], check=True)

except subprocess.CalledProcessError as err:
print("\nThe following command has failed:")
print(command)
print(err)
return 1

except Exception as err:
print("\nThe following command has failed:")
print(str(command))
print(command)
print(err)
return 1

Expand Down
20 changes: 10 additions & 10 deletions syntracker_first_stage/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,55 +192,55 @@ def read_conf_file(old_conf_file, new_conf_file, mode):
in_processed_genomes_list = 0
for line in read_conf:
if re.search("^Reference genomes directory:", line):
m = re.search("^Reference.+:\s(\S+)\n", line)
m = re.search(r'^Reference.+: (\S+)\n', line)
if m:
ref_dir = m.group(1)

if mode == "continue_all_genomes":
out_param.write("\n" + line)

elif re.search("^Target", line):
m = re.search("^Target.+:\s(\S+)\n", line)
m = re.search(r'^Target.+: (\S+)\n', line)
if m:
target_dir = m.group(1)

if mode == "continue_all_genomes":
out_param.write("\n" + line)

elif re.search("^Output", line):
m = re.search("^Output.+:\s(\S+)\n", line)
m = re.search(r'^Output.+: (\S+)\n', line)
if m:
output_dir = m.group(1)

if mode == "continue_all_genomes":
out_param.write("\n" + line)

elif re.search("^Metadata", line):
m = re.search("^Metadata.+:\s(\S+)\n", line)
m = re.search(r'^Metadata.+: (\S+)\n', line)
if m:
metadata_file = m.group(1)

if mode == "continue_all_genomes":
out_param.write("\n" + line)

elif re.search("^Full", line):
m = re.search("^Full.+:\s(\d+)\n", line)
m = re.search(r'^Full.+: (\d+)\n', line)
if m:
full_length = m.group(1)

if mode == "continue_all_genomes":
out_param.write("\n" + line)

elif re.search("^Minimal coverage", line):
m = re.search("^Minimal coverage:\s(\d+)\n", line)
m = re.search(r'^Minimal coverage: (\d+)\n', line)
if m:
minimal_coverage = m.group(1)

if mode == "continue_all_genomes":
out_param.write("\n" + line)

elif re.search("^Minimal identity", line):
m = re.search("^Minimal identity:\s(\d+)\n", line)
m = re.search(r'^Minimal identity: (\d+)\n', line)
if m:
minimal_identity = m.group(1)

Expand All @@ -267,8 +267,8 @@ def read_conf_file(old_conf_file, new_conf_file, mode):
out_param.write("\n" + line)
out_param.write("--------------------\n\n")

elif re.search("^\S+\t\S+\n", line) and in_ref_genomes_list and in_processed_genomes_list == 0:
m = re.search("^(\S+)\t(\S+)\n", line)
elif re.search(r'^\S+\t\S+\n', line) and in_ref_genomes_list and in_processed_genomes_list == 0:
m = re.search(r'^(\S+)\t(\S+)\n', line)
genome_name = m.group(1)
genome_file = m.group(2)
config.genomes_dict[genome_name] = dict()
Expand All @@ -293,7 +293,7 @@ def read_conf_file(old_conf_file, new_conf_file, mode):
break

elif re.search("^ref_genome:", line) and in_processed_genomes_list:
m = re.search("^ref_genome:\s+(\S+)\n$", line)
m = re.search(r'^ref_genome:\s+(\S+)\n$', line)
current_genome_name = m.group(1)

elif re.search("BLAST finished", line) and in_processed_genomes_list:
Expand Down

0 comments on commit 99da7ba

Please sign in to comment.