From 5dad6dfb2d57c390c1ddf61e5133305d0a9c6e5b Mon Sep 17 00:00:00 2001 From: mboudet Date: Mon, 10 Jun 2024 16:09:46 +0000 Subject: [PATCH 1/4] Wrap mongo errors & manage empty sub --- .../genomes/annotation/parser/annotationParserGff3.js | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/imports/api/genomes/annotation/parser/annotationParserGff3.js b/imports/api/genomes/annotation/parser/annotationParserGff3.js index 8964cb47..df96c78f 100644 --- a/imports/api/genomes/annotation/parser/annotationParserGff3.js +++ b/imports/api/genomes/annotation/parser/annotationParserGff3.js @@ -358,6 +358,8 @@ class AnnotationProcessor { features.attributes, ); this.geneLevelHierarchy.attributes = attributesFiltered; + + this.geneLevelHierarchy.subfeatures = []; } else { // Create an array if not exists for the subfeatures (exons, cds ...) of // the gene. @@ -446,7 +448,7 @@ class AnnotationProcessor { GeneSchema.validate(geneWithoutId); } catch (err) { logger.error(err) - throw new Error('There is something wrong with the gene collection schema'); + throw new Error('Current gene is not valid, stopping'); } return true; }; @@ -538,6 +540,9 @@ class AnnotationProcessor { this.geneLevelHierarchy.children = this.geneLevelHierarchy.children.concat(protein_ids) + // Validate schema before adding to bulk + this.isValidateGeneSchema(); + // Add to bulk operation. this.geneBulkOperation.insert(this.geneLevelHierarchy) @@ -554,7 +559,8 @@ class AnnotationProcessor { // Arbitrary break up of batch size to save ram if (this.geneBulkOperation.length > 500) { this.isReset = true - return this.geneBulkOperation.execute(); + let execute = Meteor.wrapAsync(this.geneBulkOperation.execute, this.geneBulkOperation); + return execute() } } } else { From cc972a05d465316ddf26166f00178c0475f636ee Mon Sep 17 00:00:00 2001 From: mboudet Date: Tue, 11 Jun 2024 16:42:50 +0200 Subject: [PATCH 2/4] Manage CDS duplicate IDs --- .../genomes/annotation/addAnnotation.test.js | 8 ++++++++ .../annotation/parser/annotationParserGff3.js | 18 ++++++++++++++++-- private/data/Bnigra.gff3 | 1 + 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/imports/api/genomes/annotation/addAnnotation.test.js b/imports/api/genomes/annotation/addAnnotation.test.js index 5350f8ea..9772ed6b 100644 --- a/imports/api/genomes/annotation/addAnnotation.test.js +++ b/imports/api/genomes/annotation/addAnnotation.test.js @@ -64,6 +64,14 @@ describe('AddAnnotation', function testAnnotation() { chai.assert.equal(gene.end, 15401); chai.assert.lengthOf(gene.subfeatures, 13, 'Number of subfeatures is not 13'); + + // Check CDS with the same ID + has_default_cds = gene.subfeatures.some((sub) => sub.type == "CDS" && sub.ID == "Bni|B01g000010.2N.1.cds1") + has_new_cds = gene.subfeatures.some((sub) => sub.type == "CDS" && sub.ID == "Bni|B01g000010.2N.1.cds1.1") + + chai.assert.isTrue(has_default_cds, "Bni|B01g000010.2N.1.cds1 was not found") + chai.assert.isTrue(has_default_cds, "Bni|B01g000010.2N.1.cds1.1 was not found") + }); it('Should add multiple copies of genes with different annotation names', function addAnnotationGff3() { diff --git a/imports/api/genomes/annotation/parser/annotationParserGff3.js b/imports/api/genomes/annotation/parser/annotationParserGff3.js index df96c78f..3fb22629 100644 --- a/imports/api/genomes/annotation/parser/annotationParserGff3.js +++ b/imports/api/genomes/annotation/parser/annotationParserGff3.js @@ -51,6 +51,8 @@ class AnnotationProcessor { // (this.IdParents[parents[0]] in addChildren function). this.IdParents = {}; this.indexIdParent = 0; + + this.cds_ids = {}; } /** @@ -389,7 +391,18 @@ class AnnotationProcessor { ); } - const identifiant = features.ID + let identifiant = features.ID + + // Manage case of discontinuous CDS: Same ID -> we add a suffix to avoid crashing + if (typeAttr === 'CDS'){ + if identifiant in this.cds_ids: + identifiant = identifiant + "." + this.cds_ids[identifiant] + this.cds_ids[identifiant] += 1 + } else { + this.cds_ids[identifiant] = 1 + } + } + let proteinID // Complete ID parents. @@ -530,7 +543,7 @@ class AnnotationProcessor { // Increment. this.nAnnotation += 1; - const protein_ids = this.geneLevelHierarchy.subfeatures.flatMap(children => { + const protein_ids = this.geneLevelHierarchy.subfeatures.flatMap(children => { if(typeof children.protein_id === 'undefined'){ return [] } else { @@ -552,6 +565,7 @@ class AnnotationProcessor { this.shiftSequence = 0; this.IdParents = {}; this.indexIdParent = 0; + this.cds_ids = {}; // Init new gene. this.initGeneHierarchy(features); diff --git a/private/data/Bnigra.gff3 b/private/data/Bnigra.gff3 index 26a88232..cbc4a43d 100644 --- a/private/data/Bnigra.gff3 +++ b/private/data/Bnigra.gff3 @@ -8,6 +8,7 @@ B1 AAFC_GIFS exon 14210 14284 . - . ID=Bni%7CB01g000010.2N.1.exon4;Parent=Bni%7C B1 AAFC_GIFS exon 13970 14062 . - . ID=Bni%7CB01g000010.2N.1.exon5;Parent=Bni%7CB01g000010.2N.1 B1 AAFC_GIFS exon 13640 13870 . - . ID=Bni%7CB01g000010.2N.1.exon6;Parent=Bni%7CB01g000010.2N.1 B1 AAFC_GIFS CDS 14702 15401 . - 0 ID=Bni%7CB01g000010.2N.1.cds1;Parent=Bni%7CB01g000010.2N.1 +B1 AAFC_GIFS CDS 14702 15401 . - 0 ID=Bni%7CB01g000010.2N.1.cds1;Parent=Bni%7CB01g000010.2N.1 B1 AAFC_GIFS CDS 14557 14636 . - 2 ID=Bni%7CB01g000010.2N.1.cds2;Parent=Bni%7CB01g000010.2N.1 B1 AAFC_GIFS CDS 14403 14486 . - 0 ID=Bni%7CB01g000010.2N.1.cds3;Parent=Bni%7CB01g000010.2N.1 B1 AAFC_GIFS CDS 14210 14284 . - 0 ID=Bni%7CB01g000010.2N.1.cds4;Parent=Bni%7CB01g000010.2N.1 From 4f884eb14d08515fb79d6eaf10bfbbf5fd8f1ac4 Mon Sep 17 00:00:00 2001 From: mboudet Date: Tue, 11 Jun 2024 15:19:03 +0000 Subject: [PATCH 3/4] fix tests --- imports/api/genomes/annotation/addAnnotation.test.js | 2 +- imports/api/genomes/annotation/parser/annotationParserGff3.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/imports/api/genomes/annotation/addAnnotation.test.js b/imports/api/genomes/annotation/addAnnotation.test.js index 9772ed6b..1179e144 100644 --- a/imports/api/genomes/annotation/addAnnotation.test.js +++ b/imports/api/genomes/annotation/addAnnotation.test.js @@ -63,7 +63,7 @@ describe('AddAnnotation', function testAnnotation() { chai.assert.equal(gene.start, 13640); chai.assert.equal(gene.end, 15401); - chai.assert.lengthOf(gene.subfeatures, 13, 'Number of subfeatures is not 13'); + chai.assert.lengthOf(gene.subfeatures, 14, 'Number of subfeatures is not 14'); // Check CDS with the same ID has_default_cds = gene.subfeatures.some((sub) => sub.type == "CDS" && sub.ID == "Bni|B01g000010.2N.1.cds1") diff --git a/imports/api/genomes/annotation/parser/annotationParserGff3.js b/imports/api/genomes/annotation/parser/annotationParserGff3.js index 3fb22629..72f2e626 100644 --- a/imports/api/genomes/annotation/parser/annotationParserGff3.js +++ b/imports/api/genomes/annotation/parser/annotationParserGff3.js @@ -395,7 +395,7 @@ class AnnotationProcessor { // Manage case of discontinuous CDS: Same ID -> we add a suffix to avoid crashing if (typeAttr === 'CDS'){ - if identifiant in this.cds_ids: + if (identifiant in this.cds_ids){ identifiant = identifiant + "." + this.cds_ids[identifiant] this.cds_ids[identifiant] += 1 } else { From 4d44d804e1683f7c2181c3ed5d4281a8975ee0fb Mon Sep 17 00:00:00 2001 From: mboudet Date: Tue, 11 Jun 2024 15:26:14 +0000 Subject: [PATCH 4/4] Changelog --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ee3528b..f4cd4e58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html) +## [0.4.16] Unreleased + +- Better management for mongo error when adding annotation +- Manage empty genes (no subentities) +- Manage CDS with same IDs in the same gene + ## [0.4.15] 2024-04-05 ### Fixed