Skip to content

Commit

Permalink
Merge pull request #35 from Kmassau/add_hectar
Browse files Browse the repository at this point in the history
Add hectar loader
  • Loading branch information
mboudet authored Aug 21, 2023
2 parents 1c9e160 + f5d16d9 commit 3b5db1a
Show file tree
Hide file tree
Showing 13 changed files with 584 additions and 0 deletions.
39 changes: 39 additions & 0 deletions cli/genoboo.js
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,45 @@ Example:
})
.exitOverride(customExitOverride(addEggnog));

// Add Hectar annotations file.
const addHectar = add.command('hectar');

addHectar
.description('Add Hectar results to a running GeneNoteBook server')
.usage('[options] <Hectar tab output file>')
.arguments('<file>')
.requiredOption(
'-u, --username <adminUsername>',
'GeneNoteBook admin username'
)
.requiredOption(
'-p, --password <adminPassword>',
'GeneNoteBook admin password'
)
.option(
'--port [port]',
'Port on which GeneNoteBook is running. Default: 3000'
)
.action((file, { username, password, port = 3000 }) => {
if (typeof file !== 'string') addHectar.help();

const fileName = path.resolve(file);
if (!(fileName && username && password)) {
addHectar.help();
}

new GeneNoteBookConnection({ username, password, port }).call('addHectar', {
fileName,
});
})
.on('--help', () => {
console.log(`
Example:
genenotebook add hectar hectar_annotations.tab -u admin -p admin
`);
})
.exitOverride(customExitOverride(addHectar));

// add orthogroups.
const addOrthogroups = add.command('orthogroups');

Expand Down
2 changes: 2 additions & 0 deletions imports/api/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import './genomes/annotation/addAnnotation.js';
import './genes/interproscan.js';
import './genes/addInterproscan.js';
import './genes/eggnog/addEggnog.js';
import './genes/hectar/addHectar.js';
import './genes/scanGeneAttributes.js';
import './genes/updateAttributeInfo.js';
import './genes/updateGene.js';
Expand Down Expand Up @@ -54,5 +55,6 @@ import './jobqueue/process-blast.js';
import './jobqueue/process-download.js';
import './jobqueue/process-addGenome.js';
import './jobqueue/process-eggnog.js';
import './jobqueue/process-hectar.js';
import './jobqueue/process-similarsequences.js';
import './jobqueue/process-orthogroup.js';
6 changes: 6 additions & 0 deletions imports/api/genes/geneCollection.js
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,12 @@ const GeneSchema = new SimpleSchema(
optional: true,
label: 'eggnog DB identifier (_id in eggnog collection)',
},
hectarId: {
type: String,
index: true,
optional: true,
label: 'Hectar identifier (_id in hectar collection)',
},
seqid: {
type: String,
label: 'ID of the sequence on which the gene is, e.g. chr1',
Expand Down
143 changes: 143 additions & 0 deletions imports/api/genes/hectar/addHectar.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import { hectarCollection } from '/imports/api/genes/hectar/hectarCollection.js';
import jobQueue, { Job } from '/imports/api/jobqueue/jobqueue.js';
import { ValidatedMethod } from 'meteor/mdg:validated-method';
import { Genes } from '/imports/api/genes/geneCollection.js';
import logger from '/imports/api/util/logger.js';
import { Roles } from 'meteor/alanning:roles';
import SimpleSchema from 'simpl-schema';
import { Meteor } from 'meteor/meteor';

class HectarProcessor {
constructor() {
// Not a bulk mongo suite.
this.genesDb = Genes.rawCollection();
this.nHectar = 0;
}

/**
* Function that returns the total number of insertions or updates in the
* hectar collection.
* @function
* @return {Number} Return the total number of insertions or updates of
* hectar.
*/
getNumberHectar() {
return this.nHectar;
}

parse = (line) => {
if (!(line.slice(0,10) === 'protein id' || line.split('\t').length <= 1)) {
// Get all hectar informations line by line and separated by tabs.
const [
proteinId,
predictedTargetingCategory,
signalPeptideScore,
signalPeptideCleavageSite,
typeIISignalAnchorScore,
chloroplastScore,
mitochondrionScore,
otherScore,
] = line.split('\t');

// Organize data in a dictionary.
const annotations = {
protein_id: proteinId,
predicted_targeting_category: predictedTargetingCategory,
signal_peptide_score: signalPeptideScore,
signal_peptide_cleavage_site: signalPeptideCleavageSite,
typeII_signal_anchor_score: typeIISignalAnchorScore,
chloroplast_score: chloroplastScore,
mitochondrion_score: mitochondrionScore,
other_score: otherScore,
};

// Filters undefined data (with a dash) and splits into an array for
// comma-separated data.
for (const [key, value] of Object.entries(annotations)) {
if (value[0] === '-') {
annotations[key] = undefined;
}
if (value.indexOf(',') > -1) {
annotations[key] = value.split(',');
}
}
// If subfeatures is found in genes database (e.g: ID =
// MMUCEDO_000002-T1).
const subfeatureIsFound = Genes.findOne({
$or: [
{ 'subfeatures.ID': proteinId },
{ 'subfeatures.protein_id': proteinId },
],
});

if (typeof subfeatureIsFound !== 'undefined') {
console.log("if loop" + typeof subfeatureIsFound);
// Increment hectar.
this.nHectar += 1;

// Update or insert if no matching documents were found.
const documentHectar = hectarCollection.upsert(
{ protein_id: proteinId }, // selector.
annotations, // modifier.
);

// Update hectarId in genes database.
if (typeof documentHectar.insertedId !== 'undefined') {
// Hectar _id is created.
return this.genesDb.update({
$or: [
{ 'subfeatures.ID': proteinId },
{ 'subfeatures.protein_id': proteinId },
]},
{ $set: { hectarId: documentHectar.insertedId } },
);
} else {
// Hectar already exists.
const hectarIdentifiant = hectarCollection.findOne({ protein_id: proteinId })._id;
return this.genesDb.update(
{ $or: [{'subfeatures.ID': proteinId}, {'subfeatures.protein_id': proteinId}] },
{ $set: { hectarId: hectarIdentifiant } },
);
}
} else {
logger.warn(`
Warning ! ${proteinId} hectar annotation did
not find a matching protein domain in the genes database.
${proteinId} is not added to the hectar database.`);
}
}
};
}

const addHectar = new ValidatedMethod({
name: 'addHectar',
validate: new SimpleSchema({
fileName: { type: String },
}).validator(),
applyOptions: {
noRetry: true,
},
run({ fileName }) {
if (!this.userId) {
throw new Meteor.Error('not-authorized');
}
if (!Roles.userIsInRole(this.userId, 'admin')) {
throw new Meteor.Error('not-authorized');
}

logger.log('file :', { fileName });
const job = new Job(jobQueue, 'addHectar', { fileName });
const jobId = job.priority('high').save();

let { status } = job.doc;
logger.debug(`Job status: ${status}`);
while ((status !== 'completed') && (status !== 'failed')) {
const { doc } = job.refresh();
status = doc.status;
}
return { result: job.doc.result, jobStatus: status};
},
});

export default addHectar;
export { HectarProcessor };
64 changes: 64 additions & 0 deletions imports/api/genes/hectar/hectar.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/* eslint-env mocha */
import { resetDatabase } from 'meteor/xolvio:cleaner';
import chai from 'chai';
import logger from '../../util/logger';
import { hectarCollection } from './hectarCollection';
import addHectar from './addHectar';
import { addTestUsers, addTestGenome } from '../../../startup/server/fixtures/addTestData';
import '../../jobqueue/process-hectar';

describe('hectar', function testHectar() {
let adminId;
let newUserId;
let adminContext;
let userContext;

logger.log('Testing Hectar methods');

beforeEach(() => {
({ adminId, newUserId } = addTestUsers());
adminContext = { userId: adminId };
userContext = { userId: newUserId };
});

afterEach(() => {
resetDatabase();
});

it('Should add Hectar tab file', function importhectar() {
// Increase timeout
this.timeout(20000);

addTestGenome(annot = true);

const hectarParams = {
fileName: 'assets/app/data/Bnigra_hectar.tab',
};

// Should fail for non-logged in
chai.expect(() => {
addHectar._execute({}, hectarParams);
}).to.throw('[not-authorized]');

// Should fail for non admin user
chai.expect(() => {
addHectar._execute(userContext, hectarParams);
}).to.throw('[not-authorized]');

const { result } = addHectar._execute(adminContext, hectarParams);

chai.assert.equal(result.nInserted, 1)

const hecs = hectarCollection.find({ protein_id: 'BniB01g000010.2N.1' }).fetch();

chai.assert.lengthOf(hecs, 1, 'No hectar data found');

const hec = hecs[0];

chai.assert.equal(hec.predicted_targeting_category, 'other localisation');
chai.assert.equal(hec.signal_peptide_score, '0.0583');
chai.assert.equal(hec.typeII_signal_anchor_score, '0.0228');
chai.assert.equal(hec.mitochondrion_score, '0.1032');
chai.assert.equal(hec.other_score, '0.8968');
});
});
41 changes: 41 additions & 0 deletions imports/api/genes/hectar/hectarCollection.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import SimpleSchema from 'simpl-schema';
import { Mongo } from 'meteor/mongo';

const hectarSchema = new SimpleSchema({
protein_id: {
type: String,
label: 'Query sequence name and type.',
},
predicted_targeting_category: {
type: String,
label: 'Predicted sub-cellular localization.',
},
signal_peptide_score: {
type: String,
label: 'Probability (score) to be a signal peptide.',
},
signal_peptide_cleavage_site: {
type: String,
label: 'Predicted cleavage site of signal peptide.',
},
typeII_signal_anchor_score: {
type: String,
label: 'Probability (score) to be a type II signal anchor.',
},
chloroplast_score: {
type: String,
label: 'Probability (score) to be in chloroplast.',
},
mitochondrion_score: {
type: String,
label: 'Probability (score) to be in mitochondrion.',
},
other_score: {
type: String,
label: 'Probability (score) to be elsewhere .',
},
});

const hectarCollection = new Mongo.Collection('hectar');

export { hectarCollection, hectarSchema };
57 changes: 57 additions & 0 deletions imports/api/jobqueue/process-hectar.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import { HectarProcessor } from '/imports/api/genes/hectar/addHectar.js';
import logger from '/imports/api/util/logger.js';
import jobQueue from './jobqueue.js';
import readline from 'readline';
import fs from 'fs';

jobQueue.processJobs(
'addHectar',
{
concurrency: 4,
payload: 1,
},
async (job, callback) => {
const { fileName } = job.data;
logger.log(`Add ${fileName} hectar file.`);

const lineProcessor = new HectarProcessor();

const rl = readline.createInterface({
input: fs.createReadStream(fileName, 'utf8'),
crlfDelay: Infinity,
});

const { size: fileSize } = await fs.promises.stat(fileName);
let processedBytes = 0;
let processedLines = 0;
let nHectar = 0;

for await (const line of rl) {
processedBytes += line.length + 1; // also count \n
processedLines += 1;

if ((processedLines % 100) === 0) {
await job.progress(
processedBytes,
fileSize,
{ echo: true },
(err) => {
if (err) logger.error(err);
},
);
}

try {
await lineProcessor.parse(line);
nHectar = lineProcessor.getNumberHectar();
} catch (err) {
logger.error(err);
job.fail({ err });
callback();
}
}
logger.log(`Inserted ${nHectar} Hectar`);
job.done({ nInserted: nHectar });
callback();
},
);
Loading

0 comments on commit 3b5db1a

Please sign in to comment.