From d65015ee3e9307c3e93a420ff31ab56f026959e5 Mon Sep 17 00:00:00 2001 From: Matthieu Barba Date: Wed, 27 Mar 2024 16:34:50 +0000 Subject: [PATCH 1/3] Add synonyms for comparison with GFF --- src/python/ensembl/io/genomio/manifest/check_integrity.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/python/ensembl/io/genomio/manifest/check_integrity.py b/src/python/ensembl/io/genomio/manifest/check_integrity.py index 41eb55e6f..91962f86c 100644 --- a/src/python/ensembl/io/genomio/manifest/check_integrity.py +++ b/src/python/ensembl/io/genomio/manifest/check_integrity.py @@ -187,6 +187,10 @@ def prepare_integrity_data(self) -> None: seq_circular[seq["name"]] = seq.get("circular", False) if seq["coord_system_level"] == "contig": seqr_seqlevel[seq["name"]] = int(seq["length"]) + # Also record synonyms (in case GFF file uses synonyms) + if "synonyms" in seq: + for synonym in seq["synonyms"]: + seq_lengths[synonym["name"]] = int(seq["length"]) self.lengths["seq_regions"] = seq_lengths self.circular["seq_regions"] = seq_circular self.seq_regions = seq_regions From 0953b864d6dd59146404358d08f53e96c098bb81 Mon Sep 17 00:00:00 2001 From: Matthieu Barba Date: Wed, 27 Mar 2024 16:35:16 +0000 Subject: [PATCH 2/3] Fix count: also exclude seq_region names --- src/python/ensembl/io/genomio/manifest/check_integrity.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/python/ensembl/io/genomio/manifest/check_integrity.py b/src/python/ensembl/io/genomio/manifest/check_integrity.py index 91962f86c..b1b8d80d3 100644 --- a/src/python/ensembl/io/genomio/manifest/check_integrity.py +++ b/src/python/ensembl/io/genomio/manifest/check_integrity.py @@ -735,6 +735,7 @@ def _compare_seqs( seq_id not in comp["common"] and seq_id not in comp["diff"] and seq_id not in comp["diff_circular"] + and seq_id not in seqrs ): comp["only_feat"].append(seq_id) From 14274d315b4ba5cc17ab9e80ee773ba2ef1158ad Mon Sep 17 00:00:00 2001 From: Matthieu Barba Date: Thu, 28 Mar 2024 09:36:57 +0000 Subject: [PATCH 3/3] Ignore too many branches for now --- src/python/ensembl/io/genomio/manifest/check_integrity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/ensembl/io/genomio/manifest/check_integrity.py b/src/python/ensembl/io/genomio/manifest/check_integrity.py index b1b8d80d3..7e8084eb1 100644 --- a/src/python/ensembl/io/genomio/manifest/check_integrity.py +++ b/src/python/ensembl/io/genomio/manifest/check_integrity.py @@ -154,7 +154,7 @@ def _check_md5sum(self, file_path: Path, md5sum: str) -> None: if readable_hash != md5sum: raise InvalidIntegrityError(f"Invalid md5 checksum for {file_path}") - def prepare_integrity_data(self) -> None: + def prepare_integrity_data(self) -> None: # pylint: disable=too-many-branches """Read all the files and keep a record (IDs and their lengths) for each cases to be compared later. """