Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release 0.4.5 #37

Merged
merged 18 commits into from
Sep 19, 2023
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html)

## [0.4.5] 2023-09-19

### Added

- Added Hector loader

### Changed

- Changed GO API url due to changes

## [0.4.4] 2023-06-23

### Changed
Expand Down
39 changes: 39 additions & 0 deletions cli/genoboo.js
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,45 @@ Example:
})
.exitOverride(customExitOverride(addEggnog));

// Add Hectar annotations file.
const addHectar = add.command('hectar');

addHectar
.description('Add Hectar results to a running GeneNoteBook server')
.usage('[options] <Hectar tab output file>')
.arguments('<file>')
.requiredOption(
'-u, --username <adminUsername>',
'GeneNoteBook admin username'
)
.requiredOption(
'-p, --password <adminPassword>',
'GeneNoteBook admin password'
)
.option(
'--port [port]',
'Port on which GeneNoteBook is running. Default: 3000'
)
.action((file, { username, password, port = 3000 }) => {
if (typeof file !== 'string') addHectar.help();

const fileName = path.resolve(file);
if (!(fileName && username && password)) {
addHectar.help();
}

new GeneNoteBookConnection({ username, password, port }).call('addHectar', {
fileName,
});
})
.on('--help', () => {
console.log(`
Example:
genenotebook add hectar hectar_annotations.tab -u admin -p admin
`);
})
.exitOverride(customExitOverride(addHectar));

// add orthogroups.
const addOrthogroups = add.command('orthogroups');

Expand Down
2 changes: 2 additions & 0 deletions imports/api/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import './genomes/annotation/addAnnotation.js';
import './genes/interproscan.js';
import './genes/addInterproscan.js';
import './genes/eggnog/addEggnog.js';
import './genes/hectar/addHectar.js';
import './genes/scanGeneAttributes.js';
import './genes/updateAttributeInfo.js';
import './genes/updateGene.js';
Expand Down Expand Up @@ -54,5 +55,6 @@ import './jobqueue/process-blast.js';
import './jobqueue/process-download.js';
import './jobqueue/process-addGenome.js';
import './jobqueue/process-eggnog.js';
import './jobqueue/process-hectar.js';
import './jobqueue/process-similarsequences.js';
import './jobqueue/process-orthogroup.js';
6 changes: 6 additions & 0 deletions imports/api/genes/geneCollection.js
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,12 @@ const GeneSchema = new SimpleSchema(
optional: true,
label: 'eggnog DB identifier (_id in eggnog collection)',
},
hectarId: {
type: String,
index: true,
optional: true,
label: 'Hectar identifier (_id in hectar collection)',
},
seqid: {
type: String,
label: 'ID of the sequence on which the gene is, e.g. chr1',
Expand Down
143 changes: 143 additions & 0 deletions imports/api/genes/hectar/addHectar.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import { hectarCollection } from '/imports/api/genes/hectar/hectarCollection.js';
import jobQueue, { Job } from '/imports/api/jobqueue/jobqueue.js';
import { ValidatedMethod } from 'meteor/mdg:validated-method';
import { Genes } from '/imports/api/genes/geneCollection.js';
import logger from '/imports/api/util/logger.js';
import { Roles } from 'meteor/alanning:roles';
import SimpleSchema from 'simpl-schema';
import { Meteor } from 'meteor/meteor';

class HectarProcessor {
constructor() {
// Not a bulk mongo suite.
this.genesDb = Genes.rawCollection();
this.nHectar = 0;
}

/**
* Function that returns the total number of insertions or updates in the
* hectar collection.
* @function
* @return {Number} Return the total number of insertions or updates of
* hectar.
*/
getNumberHectar() {
return this.nHectar;
}

parse = (line) => {
if (!(line.slice(0,10) === 'protein id' || line.split('\t').length <= 1)) {
// Get all hectar informations line by line and separated by tabs.
const [
proteinId,
predictedTargetingCategory,
signalPeptideScore,
signalPeptideCleavageSite,
typeIISignalAnchorScore,
chloroplastScore,
mitochondrionScore,
otherScore,
] = line.split('\t');

// Organize data in a dictionary.
const annotations = {
protein_id: proteinId,
predicted_targeting_category: predictedTargetingCategory,
signal_peptide_score: signalPeptideScore,
signal_peptide_cleavage_site: signalPeptideCleavageSite,
typeII_signal_anchor_score: typeIISignalAnchorScore,
chloroplast_score: chloroplastScore,
mitochondrion_score: mitochondrionScore,
other_score: otherScore,
};

// Filters undefined data (with a dash) and splits into an array for
// comma-separated data.
for (const [key, value] of Object.entries(annotations)) {
if (value[0] === '-') {
annotations[key] = undefined;
}
if (value.indexOf(',') > -1) {
annotations[key] = value.split(',');
}
}
// If subfeatures is found in genes database (e.g: ID =
// MMUCEDO_000002-T1).
const subfeatureIsFound = Genes.findOne({
$or: [
{ 'subfeatures.ID': proteinId },
{ 'subfeatures.protein_id': proteinId },
],
});

if (typeof subfeatureIsFound !== 'undefined') {
console.log("if loop" + typeof subfeatureIsFound);
// Increment hectar.
this.nHectar += 1;

// Update or insert if no matching documents were found.
const documentHectar = hectarCollection.upsert(
{ protein_id: proteinId }, // selector.
annotations, // modifier.
);

// Update hectarId in genes database.
if (typeof documentHectar.insertedId !== 'undefined') {
// Hectar _id is created.
return this.genesDb.update({
$or: [
{ 'subfeatures.ID': proteinId },
{ 'subfeatures.protein_id': proteinId },
]},
{ $set: { hectarId: documentHectar.insertedId } },
);
} else {
// Hectar already exists.
const hectarIdentifiant = hectarCollection.findOne({ protein_id: proteinId })._id;
return this.genesDb.update(
{ $or: [{'subfeatures.ID': proteinId}, {'subfeatures.protein_id': proteinId}] },
{ $set: { hectarId: hectarIdentifiant } },
);
}
} else {
logger.warn(`
Warning ! ${proteinId} hectar annotation did
not find a matching protein domain in the genes database.
${proteinId} is not added to the hectar database.`);
}
}
};
}

const addHectar = new ValidatedMethod({
name: 'addHectar',
validate: new SimpleSchema({
fileName: { type: String },
}).validator(),
applyOptions: {
noRetry: true,
},
run({ fileName }) {
if (!this.userId) {
throw new Meteor.Error('not-authorized');
}
if (!Roles.userIsInRole(this.userId, 'admin')) {
throw new Meteor.Error('not-authorized');
}

logger.log('file :', { fileName });
const job = new Job(jobQueue, 'addHectar', { fileName });
const jobId = job.priority('high').save();

let { status } = job.doc;
logger.debug(`Job status: ${status}`);
while ((status !== 'completed') && (status !== 'failed')) {
const { doc } = job.refresh();
status = doc.status;
}
return { result: job.doc.result, jobStatus: status};
},
});

export default addHectar;
export { HectarProcessor };
64 changes: 64 additions & 0 deletions imports/api/genes/hectar/hectar.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/* eslint-env mocha */
import { resetDatabase } from 'meteor/xolvio:cleaner';
import chai from 'chai';
import logger from '../../util/logger';
import { hectarCollection } from './hectarCollection';
import addHectar from './addHectar';
import { addTestUsers, addTestGenome } from '../../../startup/server/fixtures/addTestData';
import '../../jobqueue/process-hectar';

describe('hectar', function testHectar() {
let adminId;
let newUserId;
let adminContext;
let userContext;

logger.log('Testing Hectar methods');

beforeEach(() => {
({ adminId, newUserId } = addTestUsers());
adminContext = { userId: adminId };
userContext = { userId: newUserId };
});

afterEach(() => {
resetDatabase();
});

it('Should add Hectar tab file', function importhectar() {
// Increase timeout
this.timeout(20000);

addTestGenome(annot = true);

const hectarParams = {
fileName: 'assets/app/data/Bnigra_hectar.tab',
};

// Should fail for non-logged in
chai.expect(() => {
addHectar._execute({}, hectarParams);
}).to.throw('[not-authorized]');

// Should fail for non admin user
chai.expect(() => {
addHectar._execute(userContext, hectarParams);
}).to.throw('[not-authorized]');

const { result } = addHectar._execute(adminContext, hectarParams);

chai.assert.equal(result.nInserted, 1)

const hecs = hectarCollection.find({ protein_id: 'BniB01g000010.2N.1' }).fetch();

chai.assert.lengthOf(hecs, 1, 'No hectar data found');

const hec = hecs[0];

chai.assert.equal(hec.predicted_targeting_category, 'other localisation');
chai.assert.equal(hec.signal_peptide_score, '0.0583');
chai.assert.equal(hec.typeII_signal_anchor_score, '0.0228');
chai.assert.equal(hec.mitochondrion_score, '0.1032');
chai.assert.equal(hec.other_score, '0.8968');
});
});
41 changes: 41 additions & 0 deletions imports/api/genes/hectar/hectarCollection.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import SimpleSchema from 'simpl-schema';
import { Mongo } from 'meteor/mongo';

const hectarSchema = new SimpleSchema({
protein_id: {
type: String,
label: 'Query sequence name and type.',
},
predicted_targeting_category: {
type: String,
label: 'Predicted sub-cellular localization.',
},
signal_peptide_score: {
type: String,
label: 'Probability (score) to be a signal peptide.',
},
signal_peptide_cleavage_site: {
type: String,
label: 'Predicted cleavage site of signal peptide.',
},
typeII_signal_anchor_score: {
type: String,
label: 'Probability (score) to be a type II signal anchor.',
},
chloroplast_score: {
type: String,
label: 'Probability (score) to be in chloroplast.',
},
mitochondrion_score: {
type: String,
label: 'Probability (score) to be in mitochondrion.',
},
other_score: {
type: String,
label: 'Probability (score) to be elsewhere .',
},
});

const hectarCollection = new Mongo.Collection('hectar');

export { hectarCollection, hectarSchema };
57 changes: 57 additions & 0 deletions imports/api/jobqueue/process-hectar.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import { HectarProcessor } from '/imports/api/genes/hectar/addHectar.js';
import logger from '/imports/api/util/logger.js';
import jobQueue from './jobqueue.js';
import readline from 'readline';
import fs from 'fs';

jobQueue.processJobs(
'addHectar',
{
concurrency: 4,
payload: 1,
},
async (job, callback) => {
const { fileName } = job.data;
logger.log(`Add ${fileName} hectar file.`);

const lineProcessor = new HectarProcessor();

const rl = readline.createInterface({
input: fs.createReadStream(fileName, 'utf8'),
crlfDelay: Infinity,
});

const { size: fileSize } = await fs.promises.stat(fileName);
let processedBytes = 0;
let processedLines = 0;
let nHectar = 0;

for await (const line of rl) {
processedBytes += line.length + 1; // also count \n
processedLines += 1;

if ((processedLines % 100) === 0) {
await job.progress(
processedBytes,
fileSize,
{ echo: true },
(err) => {
if (err) logger.error(err);
},
);
}

try {
await lineProcessor.parse(line);
nHectar = lineProcessor.getNumberHectar();
} catch (err) {
logger.error(err);
job.fail({ err });
callback();
}
}
logger.log(`Inserted ${nHectar} Hectar`);
job.done({ nInserted: nHectar });
callback();
},
);
Loading