Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Various #70

Merged
merged 4 commits into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html)

## [0.4.16] Unreleased

- Better management for mongo error when adding annotation
- Manage empty genes (no subentities)
- Manage CDS with same IDs in the same gene

## [0.4.15] 2024-04-05

### Fixed
Expand Down
10 changes: 9 additions & 1 deletion imports/api/genomes/annotation/addAnnotation.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,15 @@ describe('AddAnnotation', function testAnnotation() {
chai.assert.equal(gene.start, 13640);
chai.assert.equal(gene.end, 15401);

chai.assert.lengthOf(gene.subfeatures, 13, 'Number of subfeatures is not 13');
chai.assert.lengthOf(gene.subfeatures, 14, 'Number of subfeatures is not 14');

// Check CDS with the same ID
has_default_cds = gene.subfeatures.some((sub) => sub.type == "CDS" && sub.ID == "Bni|B01g000010.2N.1.cds1")
has_new_cds = gene.subfeatures.some((sub) => sub.type == "CDS" && sub.ID == "Bni|B01g000010.2N.1.cds1.1")

chai.assert.isTrue(has_default_cds, "Bni|B01g000010.2N.1.cds1 was not found")
chai.assert.isTrue(has_default_cds, "Bni|B01g000010.2N.1.cds1.1 was not found")

});

it('Should add multiple copies of genes with different annotation names', function addAnnotationGff3() {
Expand Down
28 changes: 24 additions & 4 deletions imports/api/genomes/annotation/parser/annotationParserGff3.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ class AnnotationProcessor {
// (this.IdParents[parents[0]] in addChildren function).
this.IdParents = {};
this.indexIdParent = 0;

this.cds_ids = {};
}

/**
Expand Down Expand Up @@ -358,6 +360,8 @@ class AnnotationProcessor {
features.attributes,
);
this.geneLevelHierarchy.attributes = attributesFiltered;

this.geneLevelHierarchy.subfeatures = [];
} else {
// Create an array if not exists for the subfeatures (exons, cds ...) of
// the gene.
Expand Down Expand Up @@ -387,7 +391,18 @@ class AnnotationProcessor {
);
}

const identifiant = features.ID
let identifiant = features.ID

// Manage case of discontinuous CDS: Same ID -> we add a suffix to avoid crashing
if (typeAttr === 'CDS'){
if (identifiant in this.cds_ids){
identifiant = identifiant + "." + this.cds_ids[identifiant]
this.cds_ids[identifiant] += 1
} else {
this.cds_ids[identifiant] = 1
}
}

let proteinID

// Complete ID parents.
Expand Down Expand Up @@ -446,7 +461,7 @@ class AnnotationProcessor {
GeneSchema.validate(geneWithoutId);
} catch (err) {
logger.error(err)
throw new Error('There is something wrong with the gene collection schema');
throw new Error('Current gene is not valid, stopping');
}
return true;
};
Expand Down Expand Up @@ -528,7 +543,7 @@ class AnnotationProcessor {
// Increment.
this.nAnnotation += 1;

const protein_ids = this.geneLevelHierarchy.subfeatures.flatMap(children => {
const protein_ids = this.geneLevelHierarchy.subfeatures.flatMap(children => {
if(typeof children.protein_id === 'undefined'){
return []
} else {
Expand All @@ -538,6 +553,9 @@ class AnnotationProcessor {

this.geneLevelHierarchy.children = this.geneLevelHierarchy.children.concat(protein_ids)

// Validate schema before adding to bulk
this.isValidateGeneSchema();

// Add to bulk operation.
this.geneBulkOperation.insert(this.geneLevelHierarchy)

Expand All @@ -547,14 +565,16 @@ class AnnotationProcessor {
this.shiftSequence = 0;
this.IdParents = {};
this.indexIdParent = 0;
this.cds_ids = {};

// Init new gene.
this.initGeneHierarchy(features);

// Arbitrary break up of batch size to save ram
if (this.geneBulkOperation.length > 500) {
this.isReset = true
return this.geneBulkOperation.execute();
let execute = Meteor.wrapAsync(this.geneBulkOperation.execute, this.geneBulkOperation);
return execute()
}
}
} else {
Expand Down
1 change: 1 addition & 0 deletions private/data/Bnigra.gff3
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ B1 AAFC_GIFS exon 14210 14284 . - . ID=Bni%7CB01g000010.2N.1.exon4;Parent=Bni%7C
B1 AAFC_GIFS exon 13970 14062 . - . ID=Bni%7CB01g000010.2N.1.exon5;Parent=Bni%7CB01g000010.2N.1
B1 AAFC_GIFS exon 13640 13870 . - . ID=Bni%7CB01g000010.2N.1.exon6;Parent=Bni%7CB01g000010.2N.1
B1 AAFC_GIFS CDS 14702 15401 . - 0 ID=Bni%7CB01g000010.2N.1.cds1;Parent=Bni%7CB01g000010.2N.1
B1 AAFC_GIFS CDS 14702 15401 . - 0 ID=Bni%7CB01g000010.2N.1.cds1;Parent=Bni%7CB01g000010.2N.1
B1 AAFC_GIFS CDS 14557 14636 . - 2 ID=Bni%7CB01g000010.2N.1.cds2;Parent=Bni%7CB01g000010.2N.1
B1 AAFC_GIFS CDS 14403 14486 . - 0 ID=Bni%7CB01g000010.2N.1.cds3;Parent=Bni%7CB01g000010.2N.1
B1 AAFC_GIFS CDS 14210 14284 . - 0 ID=Bni%7CB01g000010.2N.1.cds4;Parent=Bni%7CB01g000010.2N.1
Expand Down
Loading