Skip to content

Commit

Permalink
Memory optimisation o sequence file handling;
Browse files Browse the repository at this point in the history
  • Loading branch information
SantiagoSanchezF committed Feb 7, 2025
1 parent e2a7da3 commit c06dd68
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 5 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ sanntis test/files/BGC0001472.fna
conda deactivate sanntis
```

### Support of preprocessed InterProScan outputs
#### Support of preprocessed InterProScan outputs

SanntiS can be executed using preprocessed InterProScan outputs along with a GenBank (GBK) file specifying the coding sequences (CDSs). This integration increases user flexibility.
```bash
Expand Down
6 changes: 2 additions & 4 deletions sanntis/modules/BGCdetection.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,16 +99,14 @@ def transformCDSpredToCDScontigs(self, cdsPredFile, file_format):
if not os.path.isfile(cdsPredFile):
log.exception(f"{cdsPredFile} file not found")

recs = list(SeqIO.parse(open(cdsPredFile, "r"), file_format))

if file_format == "fasta":

_prodigal_pattern = re.compile(
r"_\d+\s#\s(\d+)\s#\s(\d+)\s#\s(-?1)\s#\sID=(\d+_\d+);partial=(\d{2});start_type="
r"(\w+);rbs_motif=(.+);rbs_spacer=(\S+);gc_cont=(\d+\.\d+)"
)

for record in recs:
for record in SeqIO.parse(open(cdsPredFile, "r"), file_format):
header = record.description
prodigal_match = _prodigal_pattern.search(header)
if not prodigal_match:
Expand All @@ -126,7 +124,7 @@ def transformCDSpredToCDScontigs(self, cdsPredFile, file_format):

elif file_format == "genbank":

for record in recs:
for record in SeqIO.parse(open(cdsPredFile, "r"), file_format):
for f in record.features:
if f.type == "CDS":
start, end = int(f.location.start) + 1, int(f.location.end)
Expand Down

0 comments on commit c06dd68

Please sign in to comment.