forked from genenotebook/genenotebook
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #35 from Kmassau/add_hectar
Add hectar loader
- Loading branch information
Showing
13 changed files
with
584 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
import { hectarCollection } from '/imports/api/genes/hectar/hectarCollection.js'; | ||
import jobQueue, { Job } from '/imports/api/jobqueue/jobqueue.js'; | ||
import { ValidatedMethod } from 'meteor/mdg:validated-method'; | ||
import { Genes } from '/imports/api/genes/geneCollection.js'; | ||
import logger from '/imports/api/util/logger.js'; | ||
import { Roles } from 'meteor/alanning:roles'; | ||
import SimpleSchema from 'simpl-schema'; | ||
import { Meteor } from 'meteor/meteor'; | ||
|
||
class HectarProcessor { | ||
constructor() { | ||
// Not a bulk mongo suite. | ||
this.genesDb = Genes.rawCollection(); | ||
this.nHectar = 0; | ||
} | ||
|
||
/** | ||
* Function that returns the total number of insertions or updates in the | ||
* hectar collection. | ||
* @function | ||
* @return {Number} Return the total number of insertions or updates of | ||
* hectar. | ||
*/ | ||
getNumberHectar() { | ||
return this.nHectar; | ||
} | ||
|
||
parse = (line) => { | ||
if (!(line.slice(0,10) === 'protein id' || line.split('\t').length <= 1)) { | ||
// Get all hectar informations line by line and separated by tabs. | ||
const [ | ||
proteinId, | ||
predictedTargetingCategory, | ||
signalPeptideScore, | ||
signalPeptideCleavageSite, | ||
typeIISignalAnchorScore, | ||
chloroplastScore, | ||
mitochondrionScore, | ||
otherScore, | ||
] = line.split('\t'); | ||
|
||
// Organize data in a dictionary. | ||
const annotations = { | ||
protein_id: proteinId, | ||
predicted_targeting_category: predictedTargetingCategory, | ||
signal_peptide_score: signalPeptideScore, | ||
signal_peptide_cleavage_site: signalPeptideCleavageSite, | ||
typeII_signal_anchor_score: typeIISignalAnchorScore, | ||
chloroplast_score: chloroplastScore, | ||
mitochondrion_score: mitochondrionScore, | ||
other_score: otherScore, | ||
}; | ||
|
||
// Filters undefined data (with a dash) and splits into an array for | ||
// comma-separated data. | ||
for (const [key, value] of Object.entries(annotations)) { | ||
if (value[0] === '-') { | ||
annotations[key] = undefined; | ||
} | ||
if (value.indexOf(',') > -1) { | ||
annotations[key] = value.split(','); | ||
} | ||
} | ||
// If subfeatures is found in genes database (e.g: ID = | ||
// MMUCEDO_000002-T1). | ||
const subfeatureIsFound = Genes.findOne({ | ||
$or: [ | ||
{ 'subfeatures.ID': proteinId }, | ||
{ 'subfeatures.protein_id': proteinId }, | ||
], | ||
}); | ||
|
||
if (typeof subfeatureIsFound !== 'undefined') { | ||
console.log("if loop" + typeof subfeatureIsFound); | ||
// Increment hectar. | ||
this.nHectar += 1; | ||
|
||
// Update or insert if no matching documents were found. | ||
const documentHectar = hectarCollection.upsert( | ||
{ protein_id: proteinId }, // selector. | ||
annotations, // modifier. | ||
); | ||
|
||
// Update hectarId in genes database. | ||
if (typeof documentHectar.insertedId !== 'undefined') { | ||
// Hectar _id is created. | ||
return this.genesDb.update({ | ||
$or: [ | ||
{ 'subfeatures.ID': proteinId }, | ||
{ 'subfeatures.protein_id': proteinId }, | ||
]}, | ||
{ $set: { hectarId: documentHectar.insertedId } }, | ||
); | ||
} else { | ||
// Hectar already exists. | ||
const hectarIdentifiant = hectarCollection.findOne({ protein_id: proteinId })._id; | ||
return this.genesDb.update( | ||
{ $or: [{'subfeatures.ID': proteinId}, {'subfeatures.protein_id': proteinId}] }, | ||
{ $set: { hectarId: hectarIdentifiant } }, | ||
); | ||
} | ||
} else { | ||
logger.warn(` | ||
Warning ! ${proteinId} hectar annotation did | ||
not find a matching protein domain in the genes database. | ||
${proteinId} is not added to the hectar database.`); | ||
} | ||
} | ||
}; | ||
} | ||
|
||
const addHectar = new ValidatedMethod({ | ||
name: 'addHectar', | ||
validate: new SimpleSchema({ | ||
fileName: { type: String }, | ||
}).validator(), | ||
applyOptions: { | ||
noRetry: true, | ||
}, | ||
run({ fileName }) { | ||
if (!this.userId) { | ||
throw new Meteor.Error('not-authorized'); | ||
} | ||
if (!Roles.userIsInRole(this.userId, 'admin')) { | ||
throw new Meteor.Error('not-authorized'); | ||
} | ||
|
||
logger.log('file :', { fileName }); | ||
const job = new Job(jobQueue, 'addHectar', { fileName }); | ||
const jobId = job.priority('high').save(); | ||
|
||
let { status } = job.doc; | ||
logger.debug(`Job status: ${status}`); | ||
while ((status !== 'completed') && (status !== 'failed')) { | ||
const { doc } = job.refresh(); | ||
status = doc.status; | ||
} | ||
return { result: job.doc.result, jobStatus: status}; | ||
}, | ||
}); | ||
|
||
export default addHectar; | ||
export { HectarProcessor }; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
/* eslint-env mocha */ | ||
import { resetDatabase } from 'meteor/xolvio:cleaner'; | ||
import chai from 'chai'; | ||
import logger from '../../util/logger'; | ||
import { hectarCollection } from './hectarCollection'; | ||
import addHectar from './addHectar'; | ||
import { addTestUsers, addTestGenome } from '../../../startup/server/fixtures/addTestData'; | ||
import '../../jobqueue/process-hectar'; | ||
|
||
describe('hectar', function testHectar() { | ||
let adminId; | ||
let newUserId; | ||
let adminContext; | ||
let userContext; | ||
|
||
logger.log('Testing Hectar methods'); | ||
|
||
beforeEach(() => { | ||
({ adminId, newUserId } = addTestUsers()); | ||
adminContext = { userId: adminId }; | ||
userContext = { userId: newUserId }; | ||
}); | ||
|
||
afterEach(() => { | ||
resetDatabase(); | ||
}); | ||
|
||
it('Should add Hectar tab file', function importhectar() { | ||
// Increase timeout | ||
this.timeout(20000); | ||
|
||
addTestGenome(annot = true); | ||
|
||
const hectarParams = { | ||
fileName: 'assets/app/data/Bnigra_hectar.tab', | ||
}; | ||
|
||
// Should fail for non-logged in | ||
chai.expect(() => { | ||
addHectar._execute({}, hectarParams); | ||
}).to.throw('[not-authorized]'); | ||
|
||
// Should fail for non admin user | ||
chai.expect(() => { | ||
addHectar._execute(userContext, hectarParams); | ||
}).to.throw('[not-authorized]'); | ||
|
||
const { result } = addHectar._execute(adminContext, hectarParams); | ||
|
||
chai.assert.equal(result.nInserted, 1) | ||
|
||
const hecs = hectarCollection.find({ protein_id: 'BniB01g000010.2N.1' }).fetch(); | ||
|
||
chai.assert.lengthOf(hecs, 1, 'No hectar data found'); | ||
|
||
const hec = hecs[0]; | ||
|
||
chai.assert.equal(hec.predicted_targeting_category, 'other localisation'); | ||
chai.assert.equal(hec.signal_peptide_score, '0.0583'); | ||
chai.assert.equal(hec.typeII_signal_anchor_score, '0.0228'); | ||
chai.assert.equal(hec.mitochondrion_score, '0.1032'); | ||
chai.assert.equal(hec.other_score, '0.8968'); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import SimpleSchema from 'simpl-schema'; | ||
import { Mongo } from 'meteor/mongo'; | ||
|
||
const hectarSchema = new SimpleSchema({ | ||
protein_id: { | ||
type: String, | ||
label: 'Query sequence name and type.', | ||
}, | ||
predicted_targeting_category: { | ||
type: String, | ||
label: 'Predicted sub-cellular localization.', | ||
}, | ||
signal_peptide_score: { | ||
type: String, | ||
label: 'Probability (score) to be a signal peptide.', | ||
}, | ||
signal_peptide_cleavage_site: { | ||
type: String, | ||
label: 'Predicted cleavage site of signal peptide.', | ||
}, | ||
typeII_signal_anchor_score: { | ||
type: String, | ||
label: 'Probability (score) to be a type II signal anchor.', | ||
}, | ||
chloroplast_score: { | ||
type: String, | ||
label: 'Probability (score) to be in chloroplast.', | ||
}, | ||
mitochondrion_score: { | ||
type: String, | ||
label: 'Probability (score) to be in mitochondrion.', | ||
}, | ||
other_score: { | ||
type: String, | ||
label: 'Probability (score) to be elsewhere .', | ||
}, | ||
}); | ||
|
||
const hectarCollection = new Mongo.Collection('hectar'); | ||
|
||
export { hectarCollection, hectarSchema }; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import { HectarProcessor } from '/imports/api/genes/hectar/addHectar.js'; | ||
import logger from '/imports/api/util/logger.js'; | ||
import jobQueue from './jobqueue.js'; | ||
import readline from 'readline'; | ||
import fs from 'fs'; | ||
|
||
jobQueue.processJobs( | ||
'addHectar', | ||
{ | ||
concurrency: 4, | ||
payload: 1, | ||
}, | ||
async (job, callback) => { | ||
const { fileName } = job.data; | ||
logger.log(`Add ${fileName} hectar file.`); | ||
|
||
const lineProcessor = new HectarProcessor(); | ||
|
||
const rl = readline.createInterface({ | ||
input: fs.createReadStream(fileName, 'utf8'), | ||
crlfDelay: Infinity, | ||
}); | ||
|
||
const { size: fileSize } = await fs.promises.stat(fileName); | ||
let processedBytes = 0; | ||
let processedLines = 0; | ||
let nHectar = 0; | ||
|
||
for await (const line of rl) { | ||
processedBytes += line.length + 1; // also count \n | ||
processedLines += 1; | ||
|
||
if ((processedLines % 100) === 0) { | ||
await job.progress( | ||
processedBytes, | ||
fileSize, | ||
{ echo: true }, | ||
(err) => { | ||
if (err) logger.error(err); | ||
}, | ||
); | ||
} | ||
|
||
try { | ||
await lineProcessor.parse(line); | ||
nHectar = lineProcessor.getNumberHectar(); | ||
} catch (err) { | ||
logger.error(err); | ||
job.fail({ err }); | ||
callback(); | ||
} | ||
} | ||
logger.log(`Inserted ${nHectar} Hectar`); | ||
job.done({ nInserted: nHectar }); | ||
callback(); | ||
}, | ||
); |
Oops, something went wrong.