Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix seq compare in integrity #338

Merged
merged 3 commits into from
Apr 8, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/python/ensembl/io/genomio/manifest/check_integrity.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def _check_md5sum(self, file_path: Path, md5sum: str) -> None:
if readable_hash != md5sum:
raise InvalidIntegrityError(f"Invalid md5 checksum for {file_path}")

def prepare_integrity_data(self) -> None:
def prepare_integrity_data(self) -> None: # pylint: disable=too-many-branches
"""Read all the files and keep a record (IDs and their lengths)
for each cases to be compared later.
"""
Expand Down Expand Up @@ -187,6 +187,10 @@ def prepare_integrity_data(self) -> None:
seq_circular[seq["name"]] = seq.get("circular", False)
if seq["coord_system_level"] == "contig":
seqr_seqlevel[seq["name"]] = int(seq["length"])
# Also record synonyms (in case GFF file uses synonyms)
if "synonyms" in seq:
for synonym in seq["synonyms"]:
seq_lengths[synonym["name"]] = int(seq["length"])
self.lengths["seq_regions"] = seq_lengths
self.circular["seq_regions"] = seq_circular
self.seq_regions = seq_regions
Expand Down Expand Up @@ -731,6 +735,7 @@ def _compare_seqs(
seq_id not in comp["common"]
and seq_id not in comp["diff"]
and seq_id not in comp["diff_circular"]
and seq_id not in seqrs
):
comp["only_feat"].append(seq_id)

Expand Down
Loading