From bbd1cc72d5c402e75c15bdccbf613b0fde6011a4 Mon Sep 17 00:00:00 2001 From: Diego Poggioli Date: Mon, 10 Apr 2017 15:53:39 +0100 Subject: [PATCH 01/48] Variant annotation writers to multiple collections --- .../converters/data/AnnotationFieldNames.java | 14 +- .../DBObjectToVariantAnnotationConverter.java | 207 ---------------- .../VariantAnnotationToDBObjectConverter.java | 179 -------------- .../data/VariantToDBObjectConverter.java | 21 +- .../commons/models/data/ConsequenceType.java | 233 ++++++++++++++++++ .../ac/ebi/eva/commons/models/data/Score.java | 66 +++++ .../models/data/VariantAnnotation.java | 85 +++++-- .../ac/ebi/eva/commons/models/data/Xref.java | 57 +++++ .../configuration/MongoConfiguration.java | 2 + .../VariantAnnotationWriterConfiguration.java | 8 +- .../io/mappers/AnnotationLineMapper.java | 43 ++-- .../io/writers/VariantMongoWriter.java | 6 +- .../io/writers/VepAnnotationMongoWriter.java | 145 ++++++----- .../data/VariantToMongoDbObjectConverter.java | 87 +++++++ .../parameters/DatabaseParameters.java | 7 + .../parameters/JobParametersNames.java | 2 + ...DbCollectionsAnnotationsNameValidator.java | 36 +++ ...notationLoaderStepParametersValidator.java | 2 + .../uk/ac/ebi/eva/utils/MongoDBHelper.java | 25 +- ...bjectToVariantAnnotationConverterTest.java | 121 --------- ...iantAnnotationToDBObjectConverterTest.java | 177 ------------- .../data/VariantToDBObjectConverterTest.java | 2 +- .../io/mappers/AnnotationLineMapperTest.java | 41 ++- .../writers/VepAnnotationMongoWriterTest.java | 135 +++++----- .../eva/pipeline/jobs/AnnotationJobTest.java | 44 ++-- .../pipeline/jobs/GenotypedVcfJobTest.java | 16 +- .../jobs/GenotypedVcfJobWorkflowTest.java | 11 +- .../jobs/steps/AnnotationLoaderStepTest.java | 36 +-- ...llectionsAnnotationsNameValidatorTest.java | 65 +++++ ...gregatedVcfJobParametersValidatorTest.java | 12 +- ...enotypedVcfJobParametersValidatorTest.java | 1 + ...tionLoaderStepParametersValidatorTest.java | 8 + ...elineJobLauncherCommandLineRunnerTest.java | 46 ++-- .../test/utils/GenotypedVcfJobTestUtils.java | 39 +-- .../ebi/eva/utils/EvaCommandLineBuilder.java | 4 + .../ebi/eva/utils/EvaJobParameterBuilder.java | 6 + .../ac/ebi/eva/utils/MongoDBHelperTest.java | 6 +- 37 files changed, 969 insertions(+), 1026 deletions(-) delete mode 100644 src/main/java/uk/ac/ebi/eva/commons/models/converters/data/DBObjectToVariantAnnotationConverter.java delete mode 100644 src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantAnnotationToDBObjectConverter.java create mode 100644 src/main/java/uk/ac/ebi/eva/commons/models/data/ConsequenceType.java create mode 100644 src/main/java/uk/ac/ebi/eva/commons/models/data/Score.java create mode 100644 src/main/java/uk/ac/ebi/eva/commons/models/data/Xref.java create mode 100644 src/main/java/uk/ac/ebi/eva/pipeline/model/converters/data/VariantToMongoDbObjectConverter.java create mode 100644 src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsAnnotationsNameValidator.java delete mode 100644 src/test/java/uk/ac/ebi/eva/commons/models/converters/data/DBObjectToVariantAnnotationConverterTest.java delete mode 100644 src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantAnnotationToDBObjectConverterTest.java create mode 100644 src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsAnnotationsNameValidatorTest.java diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationFieldNames.java b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationFieldNames.java index 9e38b7873..e0993131f 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationFieldNames.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationFieldNames.java @@ -16,6 +16,16 @@ package uk.ac.ebi.eva.commons.models.converters.data; public class AnnotationFieldNames { + public static final String CHROMOSOME_FIELD = "chr"; + + public static final String START_FIELD = "start"; + + public static final String END_FIELD = "end"; + + public static final String ENSEMBL_VERSION_FIELD = "ensemblVer"; + + public static final String VEP_CACHE_VERSION_FIELD = "cacheVer"; + public static final String CONSEQUENCE_TYPE_FIELD = "ct"; public static final String GENE_NAME_FIELD = "gn"; @@ -42,8 +52,6 @@ public class AnnotationFieldNames { public static final String SO_ACCESSION_FIELD = "so"; - public static final String PROTEIN_SUBSTITUTION_SCORE_FIELD = "ps_score"; - public static final String POLYPHEN_FIELD = "polyphen"; public static final String SIFT_FIELD = "sift"; @@ -56,7 +64,5 @@ public class AnnotationFieldNames { public final static String SCORE_SCORE_FIELD = "sc"; - public final static String SCORE_SOURCE_FIELD = "src"; - public final static String SCORE_DESCRIPTION_FIELD = "desc"; } diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/DBObjectToVariantAnnotationConverter.java b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/DBObjectToVariantAnnotationConverter.java deleted file mode 100644 index bd03a7e94..000000000 --- a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/DBObjectToVariantAnnotationConverter.java +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright 2017 EMBL - European Bioinformatics Institute - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package uk.ac.ebi.eva.commons.models.converters.data; - -import com.mongodb.BasicDBList; -import com.mongodb.DBObject; -import org.opencb.biodata.models.variant.annotation.ConsequenceType; -import org.opencb.biodata.models.variant.annotation.ConsequenceTypeMappings; -import org.opencb.biodata.models.variant.annotation.Score; -import org.opencb.biodata.models.variant.annotation.Xref; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.core.convert.converter.Converter; -import org.springframework.util.Assert; - -import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; - -import java.util.LinkedList; -import java.util.List; - -/** - * Converts a mongoDb {@link DBObject} into {@link VariantAnnotation} - *

- * Slim version of {@link org.opencb.opencga.storage.mongodb.variant.DBObjectToVariantAnnotationConverter} - * Unused fields and methods removed. - *

- * The other way converter is {@link VariantAnnotationToDBObjectConverter} - */ -public class DBObjectToVariantAnnotationConverter implements Converter { - private static final Logger logger = LoggerFactory.getLogger(DBObjectToVariantAnnotationConverter.class); - - @Override - public VariantAnnotation convert(DBObject object) { - Assert.notNull(object, "Variant annotation DBObject should not be null"); - logger.trace("Convert mongo object into variant annotation {} ", object); - - VariantAnnotation variantAnnotation = new VariantAnnotation(); - - //ConsequenceType - List consequenceTypes = new LinkedList<>(); - Object cts = object.get(AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD); - if (cts != null) { - if (cts instanceof BasicDBList) { - for (Object o : ((BasicDBList) cts)) { - if (o instanceof DBObject) { - DBObject ct = (DBObject) o; - - List soAccessionNames = convertSoField(ct); - List proteinSubstitutionScores = convertProteinSubstitutionScoresField(ct); - - consequenceTypes.add(new ConsequenceType( - getDefault(ct, AnnotationFieldNames.GENE_NAME_FIELD, ""), - getDefault(ct, AnnotationFieldNames.ENSEMBL_GENE_ID_FIELD, ""), - getDefault(ct, AnnotationFieldNames.ENSEMBL_TRANSCRIPT_ID_FIELD, ""), - getDefault(ct, AnnotationFieldNames.STRAND_FIELD, ""), - getDefault(ct, AnnotationFieldNames.BIOTYPE_FIELD, ""), - getDefault(ct, AnnotationFieldNames.C_DNA_POSITION_FIELD, 0), - getDefault(ct, AnnotationFieldNames.CDS_POSITION_FIELD, 0), - getDefault(ct, AnnotationFieldNames.AA_POSITION_FIELD, 0), - getDefault(ct, AnnotationFieldNames.AA_CHANGE_FIELD, ""), - getDefault(ct, AnnotationFieldNames.CODON_FIELD, ""), - proteinSubstitutionScores, - soAccessionNames)); - } else { - throw new ClassCastException("Object was not of type DBObject"); - } - } - } else { - throw new ClassCastException("Object was not of type BasicDBList"); - } - } - - variantAnnotation.setConsequenceTypes(consequenceTypes); - - //XREfs - List xrefs = new LinkedList<>(); - Object xrs = object.get(AnnotationFieldNames.XREFS_FIELD); - if (xrs != null) { - if (xrs instanceof BasicDBList) { - for (Object o : (BasicDBList) xrs) { - if (o instanceof DBObject) { - DBObject xref = (DBObject) o; - - xrefs.add(new Xref( - (String) xref.get(AnnotationFieldNames.XREF_ID_FIELD), - (String) xref.get(AnnotationFieldNames.XREF_SOURCE_FIELD)) - ); - } else { - throw new ClassCastException("Object was not of type DBObject"); - } - } - } else { - throw new ClassCastException("Object was not of type BasicDBList"); - } - } - - variantAnnotation.setXrefs(xrefs); - - return variantAnnotation; - } - - private List convertSoField(DBObject ct) { - List soAccessionNames = new LinkedList<>(); - if (ct.containsField(AnnotationFieldNames.SO_ACCESSION_FIELD)) { - if (ct.get(AnnotationFieldNames.SO_ACCESSION_FIELD) instanceof List) { - List list = (List) ct.get(AnnotationFieldNames.SO_ACCESSION_FIELD); - for (Integer so : list) { - soAccessionNames.add(ConsequenceTypeMappings.accessionToTerm.get(so)); - } - } else { - soAccessionNames - .add(ConsequenceTypeMappings.accessionToTerm.get(ct.get( - AnnotationFieldNames.SO_ACCESSION_FIELD))); - } - } - return soAccessionNames; - } - - private List convertProteinSubstitutionScoresField(DBObject ct) { - List proteinSubstitutionScores = new LinkedList<>(); - if (ct.containsField(AnnotationFieldNames.PROTEIN_SUBSTITUTION_SCORE_FIELD)) { - List list = (List) ct.get(AnnotationFieldNames.PROTEIN_SUBSTITUTION_SCORE_FIELD); - for (DBObject dbObject : list) { - proteinSubstitutionScores.add(new Score( - getDefault(dbObject, AnnotationFieldNames.SCORE_SCORE_FIELD, 0.0), - getDefault(dbObject, AnnotationFieldNames.SCORE_SOURCE_FIELD, ""), - getDefault(dbObject, AnnotationFieldNames.SCORE_DESCRIPTION_FIELD, "") - )); - } - } - - if (ct.containsField(AnnotationFieldNames.POLYPHEN_FIELD)) { - DBObject dbObject = (DBObject) ct.get(AnnotationFieldNames.POLYPHEN_FIELD); - proteinSubstitutionScores - .add(new Score(getDefault(dbObject, AnnotationFieldNames.SCORE_SCORE_FIELD, 0.0), - "Polyphen", - getDefault(dbObject, AnnotationFieldNames.SCORE_DESCRIPTION_FIELD, - ""))); - } - - if (ct.containsField(AnnotationFieldNames.SIFT_FIELD)) { - DBObject dbObject = (DBObject) ct.get(AnnotationFieldNames.SIFT_FIELD); - proteinSubstitutionScores - .add(new Score(getDefault(dbObject, AnnotationFieldNames.SCORE_SCORE_FIELD, 0.0), - "Sift", - getDefault(dbObject, AnnotationFieldNames.SCORE_DESCRIPTION_FIELD, - ""))); - } - return proteinSubstitutionScores; - } - - private String getDefault(DBObject object, String key, String defaultValue) { - Object o = object.get(key); - if (o != null) { - return o.toString(); - } else { - return defaultValue; - } - } - - private int getDefault(DBObject object, String key, int defaultValue) { - Object o = object.get(key); - if (o != null) { - if (o instanceof Integer) { - return (Integer) o; - } else { - try { - return Integer.parseInt(o.toString()); - } catch (Exception e) { - return defaultValue; - } - } - } else { - return defaultValue; - } - } - - private double getDefault(DBObject object, String key, double defaultValue) { - Object o = object.get(key); - if (o != null) { - if (o instanceof Double) { - return (Double) o; - } else { - try { - return Double.parseDouble(o.toString()); - } catch (Exception e) { - return defaultValue; - } - } - } else { - return defaultValue; - } - } -} diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantAnnotationToDBObjectConverter.java b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantAnnotationToDBObjectConverter.java deleted file mode 100644 index 86c3b3677..000000000 --- a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantAnnotationToDBObjectConverter.java +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Copyright 2017 EMBL - European Bioinformatics Institute - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package uk.ac.ebi.eva.commons.models.converters.data; - -import com.mongodb.BasicDBObject; -import com.mongodb.DBObject; -import org.opencb.biodata.models.variant.annotation.ConsequenceType; -import org.opencb.biodata.models.variant.annotation.ConsequenceTypeMappings; -import org.opencb.biodata.models.variant.annotation.Score; -import org.opencb.biodata.models.variant.annotation.Xref; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.core.convert.converter.Converter; -import org.springframework.util.Assert; - -import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; - -import java.util.Collection; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Set; - -/** - * Converts a {@link VariantAnnotation} into mongoDb {@link DBObject} - *

- * Slim version of {@link org.opencb.opencga.storage.mongodb.variant.DBObjectToVariantAnnotationConverter} - * Unused fields and methods removed. - *

- * The other way converter is {@link DBObjectToVariantAnnotationConverter} - */ -public class VariantAnnotationToDBObjectConverter implements Converter { - private static final Logger logger = LoggerFactory.getLogger(VariantAnnotationToDBObjectConverter.class); - - @Override - public DBObject convert(VariantAnnotation variantAnnotation) { - Assert.notNull(variantAnnotation, - "Variant should not be null. Please provide a valid VariantAnnotation object"); - logger.trace("Convert variant annotation into mongo object {} ", variantAnnotation); - - DBObject dbObject = new BasicDBObject(); - Set xrefs = new HashSet<>(); - List cts = new LinkedList<>(); - - //ID - if (variantAnnotation.getId() != null && !variantAnnotation.getId().isEmpty()) { - xrefs.add(convertXrefToStorage(variantAnnotation.getId(), "dbSNP")); - } - - //ConsequenceType - if (variantAnnotation.getConsequenceTypes() != null) { - List consequenceTypes = variantAnnotation.getConsequenceTypes(); - for (ConsequenceType consequenceType : consequenceTypes) { - DBObject ct = new BasicDBObject(); - - putNotNull(ct, AnnotationFieldNames.GENE_NAME_FIELD, consequenceType.getGeneName()); - putNotNull(ct, AnnotationFieldNames.ENSEMBL_GENE_ID_FIELD, consequenceType.getEnsemblGeneId()); - putNotNull(ct, AnnotationFieldNames.ENSEMBL_TRANSCRIPT_ID_FIELD, consequenceType.getEnsemblTranscriptId()); - putNotNull(ct, AnnotationFieldNames.RELATIVE_POS_FIELD, consequenceType.getRelativePosition()); - putNotNull(ct, AnnotationFieldNames.CODON_FIELD, consequenceType.getCodon()); - putNotNull(ct, AnnotationFieldNames.STRAND_FIELD, consequenceType.getStrand()); - putNotNull(ct, AnnotationFieldNames.BIOTYPE_FIELD, consequenceType.getBiotype()); - putNotNull(ct, AnnotationFieldNames.C_DNA_POSITION_FIELD, consequenceType.getcDnaPosition()); - putNotNull(ct, AnnotationFieldNames.CDS_POSITION_FIELD, consequenceType.getCdsPosition()); - putNotNull(ct, AnnotationFieldNames.AA_POSITION_FIELD, consequenceType.getAaPosition()); - putNotNull(ct, AnnotationFieldNames.AA_CHANGE_FIELD, consequenceType.getAaChange()); - - if (consequenceType.getSoTerms() != null) { - List soAccession = new LinkedList<>(); - for (ConsequenceType.ConsequenceTypeEntry entry : consequenceType.getSoTerms()) { - soAccession.add(ConsequenceTypeMappings.termToAccession.get(entry.getSoName())); - } - putNotNull(ct, AnnotationFieldNames.SO_ACCESSION_FIELD, soAccession); - } - - //Protein substitution region score - if (consequenceType.getProteinSubstitutionScores() != null) { - List proteinSubstitutionScores = new LinkedList<>(); - for (Score score : consequenceType.getProteinSubstitutionScores()) { - if (score != null) { - if (score.getSource().equals("Polyphen")) { - putNotNull(ct, AnnotationFieldNames.POLYPHEN_FIELD, - convertScoreToStorage(score.getScore(), null, score.getDescription())); - } else if (score.getSource().equals("Sift")) { - putNotNull(ct, AnnotationFieldNames.SIFT_FIELD, - convertScoreToStorage(score.getScore(), null, score.getDescription())); - } else { - proteinSubstitutionScores.add(convertScoreToStorage(score)); - } - } - } - putNotNull(ct, AnnotationFieldNames.PROTEIN_SUBSTITUTION_SCORE_FIELD, proteinSubstitutionScores); - } - - - cts.add(ct); - - if (consequenceType.getGeneName() != null && !consequenceType.getGeneName().isEmpty()) { - xrefs.add(convertXrefToStorage(consequenceType.getGeneName(), "HGNC")); - } - if (consequenceType.getEnsemblGeneId() != null && !consequenceType.getEnsemblGeneId().isEmpty()) { - xrefs.add(convertXrefToStorage(consequenceType.getEnsemblGeneId(), "ensemblGene")); - } - if (consequenceType.getEnsemblTranscriptId() != null && !consequenceType.getEnsemblTranscriptId() - .isEmpty()) { - xrefs.add(convertXrefToStorage(consequenceType.getEnsemblTranscriptId(), "ensemblTranscript")); - } - - } - putNotNull(dbObject, AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD, cts); - } - - //XREFs - if (variantAnnotation.getXrefs() != null) { - for (Xref xref : variantAnnotation.getXrefs()) { - xrefs.add(convertXrefToStorage(xref.getId(), xref.getSrc())); - } - } - putNotNull(dbObject, AnnotationFieldNames.XREFS_FIELD, xrefs); - - return dbObject; - } - - private DBObject convertScoreToStorage(Score score) { - return convertScoreToStorage(score.getScore(), score.getSource(), score.getDescription()); - } - - private DBObject convertScoreToStorage(double score, String source, String description) { - DBObject dbObject = new BasicDBObject(AnnotationFieldNames.SCORE_SCORE_FIELD, score); - putNotNull(dbObject, AnnotationFieldNames.SCORE_SOURCE_FIELD, source); - putNotNull(dbObject, AnnotationFieldNames.SCORE_DESCRIPTION_FIELD, description); - return dbObject; - } - - private DBObject convertXrefToStorage(String id, String source) { - DBObject dbObject = new BasicDBObject(AnnotationFieldNames.XREF_ID_FIELD, id); - dbObject.put(AnnotationFieldNames.XREF_SOURCE_FIELD, source); - return dbObject; - } - - - private void putNotNull(DBObject dbObject, String key, Object obj) { - if (obj != null) { - dbObject.put(key, obj); - } - } - - private void putNotNull(DBObject dbObject, String key, Collection obj) { - if (obj != null && !obj.isEmpty()) { - dbObject.put(key, obj); - } - } - - private void putNotNull(DBObject dbObject, String key, String obj) { - if (obj != null && !obj.isEmpty()) { - dbObject.put(key, obj); - } - } - - private void putNotNull(DBObject dbObject, String key, Integer obj) { - if (obj != null && obj != 0) { - dbObject.put(key, obj); - } - } - -} diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverter.java b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverter.java index 8dc284fcc..f1e56fa79 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverter.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverter.java @@ -83,8 +83,6 @@ public class VariantToDBObjectConverter implements Converter private VariantSourceEntryToDBObjectConverter variantSourceEntryConverter; - private VariantAnnotationToDBObjectConverter variantAnnotationConverter; - private VariantStatsToDBObjectConverter statsConverter; /** @@ -92,7 +90,7 @@ public class VariantToDBObjectConverter implements Converter * VariantAnnotation and VariantStats should not be written. */ public VariantToDBObjectConverter() { - this(null, null, null); + this(null, null); } /** @@ -101,22 +99,19 @@ public VariantToDBObjectConverter() { * should not be written. * * @param variantSourceEntryConverter Nullable - * @param variantAnnotationConverter Nullable * @param VariantStatsConverter Nullable */ public VariantToDBObjectConverter( VariantSourceEntryToDBObjectConverter variantSourceEntryConverter, - VariantAnnotationToDBObjectConverter variantAnnotationConverter, VariantStatsToDBObjectConverter VariantStatsConverter) { this.variantSourceEntryConverter = variantSourceEntryConverter; - this.variantAnnotationConverter = variantAnnotationConverter; this.statsConverter = VariantStatsConverter; } @Override public DBObject convert(Variant object) { - String id = MongoDBHelper.buildStorageId(object.getChromosome(), object.getStart(), object.getReference(), - object.getAlternate()); + String id = MongoDBHelper.buildVariantStorageId(object.getChromosome(), object.getStart(), object.getReference(), + object.getAlternate()); BasicDBObject mongoVariant = new BasicDBObject("_id", id) // Do not include IDs: the MongoWriter will take care in the query using an $addToSet @@ -132,7 +127,6 @@ public DBObject convert(Variant object) { appendAt(object, mongoVariant); appendHgvs(object, mongoVariant); appendFiles(object, mongoVariant); - appendAnnotations(object, mongoVariant); appendStatistics(object, mongoVariant); return mongoVariant; @@ -188,15 +182,6 @@ private void appendFiles(Variant object, BasicDBObject mongoVariant) { } } - private void appendAnnotations(Variant object, BasicDBObject mongoVariant) { - if (variantAnnotationConverter != null) { - if (object.getAnnotation() != null) { - DBObject annotation = variantAnnotationConverter.convert(object.getAnnotation()); - mongoVariant.append(ANNOTATION_FIELD, annotation); - } - } - } - private void appendStatistics(Variant object, BasicDBObject mongoVariant) { if (statsConverter != null) { List mongoStats = new ArrayList<>(); diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/ConsequenceType.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/ConsequenceType.java new file mode 100644 index 000000000..3eeebf692 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/ConsequenceType.java @@ -0,0 +1,233 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package uk.ac.ebi.eva.commons.models.data; + +import org.springframework.data.mongodb.core.mapping.Document; +import org.springframework.data.mongodb.core.mapping.Field; + +import uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames; + +import java.util.List; + +/** + * org.opencb.biodata.models.variant.annotation.ConsequenceType + */ +@Document +public class ConsequenceType { + + @Field(value = AnnotationFieldNames.GENE_NAME_FIELD) + private String geneName; + + @Field(value = AnnotationFieldNames.ENSEMBL_GENE_ID_FIELD) + private String ensemblGeneId; + + @Field(value = AnnotationFieldNames.ENSEMBL_TRANSCRIPT_ID_FIELD) + private String ensemblTranscriptId; + + @Field(value = AnnotationFieldNames.STRAND_FIELD) + private String strand; + + @Field(value = AnnotationFieldNames.BIOTYPE_FIELD) + private String biotype; + + @Field(value = AnnotationFieldNames.C_DNA_POSITION_FIELD) + private Integer cDnaPosition; + + @Field(value = AnnotationFieldNames.CDS_POSITION_FIELD) + private Integer cdsPosition; + + @Field(value = AnnotationFieldNames.AA_POSITION_FIELD) + private Integer aaPosition; + + @Field(value = AnnotationFieldNames.AA_CHANGE_FIELD) + private String aaChange; + + @Field(value = AnnotationFieldNames.CODON_FIELD) + private String codon; + + @Field(value = AnnotationFieldNames.SIFT_FIELD) + private Score sifts; + + @Field(value = AnnotationFieldNames.POLYPHEN_FIELD) + private Score polyphen; + + @Field(value = AnnotationFieldNames.SO_ACCESSION_FIELD) + private List soAccessions; + + @Field(value = AnnotationFieldNames.RELATIVE_POS_FIELD) + private Integer relativePosition; + + public ConsequenceType() { + } + + public void setEnsemblTranscriptId(String ensemblTranscriptId) { + this.ensemblTranscriptId = ensemblTranscriptId; + } + + public void setGeneName(String geneName) { + this.geneName = geneName; + } + + public void setEnsemblGeneId(String ensemblGeneId) { + this.ensemblGeneId = ensemblGeneId; + } + + public void setRelativePosition(Integer relativePosition) { + this.relativePosition = relativePosition; + } + + public void setCodon(String codon) { + this.codon = codon; + } + + public void setStrand(String strand) { + this.strand = strand; + } + + public void setBiotype(String biotype) { + this.biotype = biotype; + } + + public void setcDnaPosition(Integer cDnaPosition) { + this.cDnaPosition = cDnaPosition; + } + + public void setCdsPosition(Integer cdsPosition) { + this.cdsPosition = cdsPosition; + } + + public void setAaPosition(Integer aaPosition) { + this.aaPosition = aaPosition; + } + + public void setAaChange(String aaChange) { + this.aaChange = aaChange; + } + + public String getGeneName() { + return geneName; + } + + public String getEnsemblGeneId() { + return ensemblGeneId; + } + + public String getEnsemblTranscriptId() { + return ensemblTranscriptId; + } + + public Integer getRelativePosition() { + return relativePosition; + } + + public String getCodon() { + return codon; + } + + public String getStrand() { + return strand; + } + + public String getBiotype() { + return biotype; + } + + public Integer getcDnaPosition() { + return cDnaPosition; + } + + public Integer getCdsPosition() { + return cdsPosition; + } + + public Integer getAaPosition() { + return aaPosition; + } + + public String getAaChange() { + return aaChange; + } + + public List getSoAccessions() { + return soAccessions; + } + + public void setSoAccessions(List soAccessions) { + this.soAccessions = soAccessions; + } + + public Score getSifts() { + return sifts; + } + + public void setSifts(Score sifts) { + this.sifts = sifts; + } + + public Score getPolyphen() { + return polyphen; + } + + public void setPolyphen(Score polyphen) { + this.polyphen = polyphen; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + ConsequenceType that = (ConsequenceType) o; + + if (geneName != null ? !geneName.equals(that.geneName) : that.geneName != null) return false; + if (ensemblGeneId != null ? !ensemblGeneId.equals(that.ensemblGeneId) : that.ensemblGeneId != null) + return false; + if (ensemblTranscriptId != null ? !ensemblTranscriptId + .equals(that.ensemblTranscriptId) : that.ensemblTranscriptId != null) return false; + if (strand != null ? !strand.equals(that.strand) : that.strand != null) return false; + if (biotype != null ? !biotype.equals(that.biotype) : that.biotype != null) return false; + if (cDnaPosition != null ? !cDnaPosition.equals(that.cDnaPosition) : that.cDnaPosition != null) return false; + if (cdsPosition != null ? !cdsPosition.equals(that.cdsPosition) : that.cdsPosition != null) return false; + if (aaPosition != null ? !aaPosition.equals(that.aaPosition) : that.aaPosition != null) return false; + if (aaChange != null ? !aaChange.equals(that.aaChange) : that.aaChange != null) return false; + if (codon != null ? !codon.equals(that.codon) : that.codon != null) return false; + if (sifts != null ? !sifts.equals(that.sifts) : that.sifts != null) return false; + if (polyphen != null ? !polyphen.equals(that.polyphen) : that.polyphen != null) return false; + if (soAccessions != null ? !soAccessions.equals(that.soAccessions) : that.soAccessions != null) return false; + return relativePosition != null ? relativePosition + .equals(that.relativePosition) : that.relativePosition == null; + } + + @Override + public int hashCode() { + int result = geneName != null ? geneName.hashCode() : 0; + result = 31 * result + (ensemblGeneId != null ? ensemblGeneId.hashCode() : 0); + result = 31 * result + (ensemblTranscriptId != null ? ensemblTranscriptId.hashCode() : 0); + result = 31 * result + (strand != null ? strand.hashCode() : 0); + result = 31 * result + (biotype != null ? biotype.hashCode() : 0); + result = 31 * result + (cDnaPosition != null ? cDnaPosition.hashCode() : 0); + result = 31 * result + (cdsPosition != null ? cdsPosition.hashCode() : 0); + result = 31 * result + (aaPosition != null ? aaPosition.hashCode() : 0); + result = 31 * result + (aaChange != null ? aaChange.hashCode() : 0); + result = 31 * result + (codon != null ? codon.hashCode() : 0); + result = 31 * result + (sifts != null ? sifts.hashCode() : 0); + result = 31 * result + (polyphen != null ? polyphen.hashCode() : 0); + result = 31 * result + (soAccessions != null ? soAccessions.hashCode() : 0); + result = 31 * result + (relativePosition != null ? relativePosition.hashCode() : 0); + return result; + } +} diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/Score.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/Score.java new file mode 100644 index 000000000..da1868164 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/Score.java @@ -0,0 +1,66 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package uk.ac.ebi.eva.commons.models.data; + +import org.springframework.data.mongodb.core.mapping.Document; +import org.springframework.data.mongodb.core.mapping.Field; + +import uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames; + +/** + * From {@link org.opencb.biodata.models.variant.annotation.Score} + */ +@Document +public class Score { + + @Field(value = AnnotationFieldNames.SCORE_SCORE_FIELD) + private Double score; + + @Field(value = AnnotationFieldNames.SCORE_DESCRIPTION_FIELD) + private String description; + + public Score(Double score, String description) { + this.score = score; + this.description = description; + } + + public Double getScore() { + return score; + } + + public String getDescription() { + return description; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + Score score1 = (Score) o; + + if (score != null ? !score.equals(score1.score) : score1.score != null) return false; + return description != null ? description.equals(score1.description) : score1.description == null; + } + + @Override + public int hashCode() { + int result = score != null ? score.hashCode() : 0; + result = 31 * result + (description != null ? description.hashCode() : 0); + return result; + } +} diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java index 2e7e7b232..0f2228dc5 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java @@ -15,44 +15,59 @@ */ package uk.ac.ebi.eva.commons.models.data; -import org.opencb.biodata.models.variant.annotation.ConsequenceType; -import org.opencb.biodata.models.variant.annotation.Xref; +import com.google.common.base.Strings; +import org.springframework.data.annotation.Id; +import org.springframework.data.annotation.Transient; +import org.springframework.data.mongodb.core.mapping.Document; +import org.springframework.data.mongodb.core.mapping.Field; + +import uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames; -import java.util.ArrayList; import java.util.HashMap; -import java.util.List; +import java.util.HashSet; import java.util.Map; +import java.util.Set; /** * Slim version of {@link org.opencb.biodata.models.variant.annotation.VariantAnnotation} * Unused fields removed. * */ +@Document public class VariantAnnotation { + @Field(value = AnnotationFieldNames.CHROMOSOME_FIELD) private String chromosome; + @Field(value = AnnotationFieldNames.START_FIELD) private int start; + @Field(value = AnnotationFieldNames.END_FIELD) private int end; + @Transient private String referenceAllele; + @Transient private String alternativeAllele; + @Id private String id; - private List xrefs; + @Field(value = AnnotationFieldNames.ENSEMBL_VERSION_FIELD) + private String ensmblVersion; - private List hgvs; + @Field(value = AnnotationFieldNames.VEP_CACHE_VERSION_FIELD) + private String vepCacheVersion; - private List consequenceTypes; + @Field(value = AnnotationFieldNames.XREFS_FIELD) + private Set xrefs; - private Map additionalAttributes; + @Field(value = AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD) + private Set consequenceTypes; - public VariantAnnotation() { - this("", -1, -1, ""); - } + @Transient + private Map additionalAttributes; public VariantAnnotation(String chromosome, int start, int end, String referenceAllele) { this(chromosome, start, end, referenceAllele, ""); @@ -66,9 +81,8 @@ public VariantAnnotation(String chromosome, int start, int end, String reference this.alternativeAllele = alternativeAllele; this.id = ""; - this.xrefs = new ArrayList<>(); - this.hgvs = new ArrayList<>(); - this.consequenceTypes = new ArrayList<>(); + this.xrefs = new HashSet<>(); + this.consequenceTypes = new HashSet<>(); this.additionalAttributes = new HashMap<>(); } @@ -112,27 +126,50 @@ public void setId(String id) { this.id = id; } - public List getXrefs() { + public Set getXrefs() { return xrefs; } - public void setXrefs(List xrefs) { + public void setXrefs(Set xrefs) { this.xrefs = xrefs; } - public List getHgvs() { - return hgvs; + public Set getConsequenceTypes() { + return consequenceTypes; } - public void setHgvs(List hgvs) { - this.hgvs = hgvs; + public void setConsequenceTypes(Set consequenceTypes) { + this.consequenceTypes = consequenceTypes; } - public List getConsequenceTypes() { - return consequenceTypes; + public String getEnsmblVersion() { + return ensmblVersion; } - public void setConsequenceTypes(List consequenceTypes) { - this.consequenceTypes = consequenceTypes; + public void setEnsmblVersion(String ensmblVersion) { + this.ensmblVersion = ensmblVersion; } + + public String getVepCacheVersion() { + return vepCacheVersion; + } + + public void setVepCacheVersion(String vepCacheVersion) { + this.vepCacheVersion = vepCacheVersion; + } + + public void extractXrefsFromConsequenceTypes(){ + for (ConsequenceType consequenceType : consequenceTypes) { + if (!Strings.isNullOrEmpty(consequenceType.getGeneName())) { + xrefs.add(new Xref(consequenceType.getGeneName(), "HGNC")); + } + if (!Strings.isNullOrEmpty(consequenceType.getEnsemblGeneId())) { + xrefs.add(new Xref(consequenceType.getEnsemblGeneId(), "ensemblGene")); + } + if (!Strings.isNullOrEmpty(consequenceType.getEnsemblTranscriptId())) { + xrefs.add(new Xref(consequenceType.getEnsemblTranscriptId(), "ensemblTranscript")); + } + } + } + } diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/Xref.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/Xref.java new file mode 100644 index 000000000..5487494f0 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/Xref.java @@ -0,0 +1,57 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.commons.models.data; + +import org.springframework.data.mongodb.core.mapping.Document; +import org.springframework.data.mongodb.core.mapping.Field; + +import uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames; + +/** + * From org.opencb.biodata.models.variant.annotation.Xref + */ +@Document +public class Xref { + + @Field(value = AnnotationFieldNames.XREF_ID_FIELD) + private String id; + + @Field(value = AnnotationFieldNames.XREF_SOURCE_FIELD) + private String src; + + public Xref(String id, String src) { + this.id = id; + this.src = src; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + Xref xref = (Xref) o; + + if (id != null ? !id.equals(xref.id) : xref.id != null) return false; + return src != null ? src.equals(xref.src) : xref.src == null; + } + + @Override + public int hashCode() { + int result = id != null ? id.hashCode() : 0; + result = 31 * result + (src != null ? src.hashCode() : 0); + return result; + } +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/MongoConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/MongoConfiguration.java index 0446b538d..86e603991 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/MongoConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/MongoConfiguration.java @@ -26,6 +26,7 @@ import org.springframework.data.mongodb.core.SimpleMongoDbFactory; import org.springframework.data.mongodb.core.convert.DbRefResolver; import org.springframework.data.mongodb.core.convert.DefaultDbRefResolver; +import org.springframework.data.mongodb.core.convert.DefaultMongoTypeMapper; import org.springframework.data.mongodb.core.convert.MappingMongoConverter; import org.springframework.data.mongodb.core.mapping.MongoMappingContext; @@ -103,6 +104,7 @@ private static MappingMongoConverter getMappingMongoConverter(MongoDbFactory mon MongoMappingContext mongoMappingContext) { DbRefResolver dbRefResolver = new DefaultDbRefResolver(mongoFactory); MappingMongoConverter mongoConverter = new MappingMongoConverter(dbRefResolver, mongoMappingContext); + mongoConverter.setTypeMapper(new DefaultMongoTypeMapper(null)); // Customization: replace dots with pound sign mongoConverter.setMapKeyDotReplacement("£"); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VariantAnnotationWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VariantAnnotationWriterConfiguration.java index db07645b3..bba043aa3 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VariantAnnotationWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VariantAnnotationWriterConfiguration.java @@ -25,6 +25,7 @@ import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.io.writers.VepAnnotationMongoWriter; +import uk.ac.ebi.eva.pipeline.parameters.AnnotationParameters; import uk.ac.ebi.eva.pipeline.parameters.DatabaseParameters; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.VARIANT_ANNOTATION_WRITER; @@ -36,7 +37,10 @@ public class VariantAnnotationWriterConfiguration { @StepScope @Profile(Application.VARIANT_ANNOTATION_MONGO_PROFILE) public ItemWriter variantAnnotationItemWriter(MongoOperations mongoOperations, - DatabaseParameters databaseParameters) { - return new VepAnnotationMongoWriter(mongoOperations, databaseParameters.getCollectionVariantsName()); + DatabaseParameters databaseParameters, + AnnotationParameters annotationParameters) { + return new VepAnnotationMongoWriter(mongoOperations, databaseParameters.getCollectionAnnotationsName(), + annotationParameters.getVepVersion(), + annotationParameters.getVepCacheVersion()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java index 7138b236b..250f57617 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java @@ -17,17 +17,20 @@ package uk.ac.ebi.eva.pipeline.io.mappers; import org.apache.commons.lang.ArrayUtils; -import org.opencb.biodata.models.variant.annotation.ConsequenceType; -import org.opencb.biodata.models.variant.annotation.Score; +import org.opencb.biodata.models.variant.annotation.ConsequenceTypeMappings; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.batch.item.file.LineMapper; +import uk.ac.ebi.eva.commons.models.data.ConsequenceType; +import uk.ac.ebi.eva.commons.models.data.Score; import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; import java.util.Arrays; import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.stream.Collectors; /** * Map a line in VEP output file to {@link VariantAnnotation} @@ -87,7 +90,7 @@ public VariantAnnotation mapLine(String line, int lineNumber) { parseTranscriptFields(consequenceType, lineFields); // Otherwise just set SO terms } else { - consequenceType.setSoTermsFromSoNames(Arrays.asList(lineFields[6].split(","))); // fill so terms + consequenceType.setSoAccessions(mapSoTermsToSoAccessions(lineFields[6].split(","))); } currentAnnotation.getConsequenceTypes().add(consequenceType); @@ -99,22 +102,26 @@ public VariantAnnotation mapLine(String line, int lineNumber) { * #parseRemainingFields(org.opencb.biodata.models.variant.annotation.ConsequenceType, java.lang.String[]) */ private void parseTranscriptFields(ConsequenceType consequenceType, String[] lineFields) { - consequenceType.setEnsemblGeneId(lineFields[3]); // fill Ensembl gene id - consequenceType.setEnsemblTranscriptId(lineFields[4]); // fill Ensembl transcript id + consequenceType.setEnsemblGeneId(lineFields[3]); + consequenceType.setEnsemblTranscriptId(lineFields[4]); if(!lineFields[6].equals("") && !lineFields[6].equals("-")) { // VEP may leave this field empty - consequenceType.setSoTermsFromSoNames(Arrays.asList(lineFields[6].split(","))); // fill so terms + consequenceType.setSoAccessions(mapSoTermsToSoAccessions(lineFields[6].split(","))); } if(!lineFields[7].equals("-")) { - consequenceType.setcDnaPosition(parseStringInterval(lineFields[7])); // fill cdna position + consequenceType.setcDnaPosition(parseStringInterval(lineFields[7])); } if(!lineFields[8].equals("-")) { - consequenceType.setCdsPosition(parseStringInterval(lineFields[8])); // fill cds position + consequenceType.setCdsPosition(parseStringInterval(lineFields[8])); } if(!lineFields[9].equals("-")) { - consequenceType.setAaPosition(parseStringInterval(lineFields[9])); // fill aa position + consequenceType.setAaPosition(parseStringInterval(lineFields[9])); } - consequenceType.setAaChange(lineFields[10]); // fill aa change - consequenceType.setCodon(lineFields[11]); // fill codon change + consequenceType.setAaChange(lineFields[10]); + consequenceType.setCodon(lineFields[11]); + } + + private List mapSoTermsToSoAccessions(String[] soTerms){ + return Arrays.stream(soTerms).map(ConsequenceTypeMappings.termToAccession::get).collect(Collectors.toList()); } /** @@ -191,17 +198,11 @@ private void parseExtraField(ConsequenceType consequenceType, String extraField, case "biotype": consequenceType.setBiotype(keyValue[1]); break; - case "hgvsc": - currentAnnotation.getHgvs().add(keyValue[1]); - break; - case "hgvsp": - currentAnnotation.getHgvs().add(keyValue[1]); - break; case "polyphen": // Format is PolyPhen=possibly_damaging(0.859) - consequenceType.addProteinSubstitutionScore(parseProteinSubstitutionScore("Polyphen", keyValue[1])); + consequenceType.setPolyphen(parseProteinSubstitutionScore(keyValue[1])); break; case "sift": // Format is SIFT=tolerated(0.07) - consequenceType.addProteinSubstitutionScore(parseProteinSubstitutionScore("Sift", keyValue[1])); + consequenceType.setSifts(parseProteinSubstitutionScore(keyValue[1])); break; case "strand": consequenceType.setStrand(keyValue[1].equals("1")?"+":"-"); @@ -220,8 +221,8 @@ private void parseExtraField(ConsequenceType consequenceType, String extraField, * From org.opencb.biodata.formats.annotation.io.VepFormatReader * #parseProteinSubstitutionScore(java.lang.String, java.lang.String) */ - private Score parseProteinSubstitutionScore(String predictorName, String scoreString) { + private Score parseProteinSubstitutionScore(String scoreString) { String[] scoreFields = scoreString.split("[\\(\\)]"); - return new Score(Double.valueOf(scoreFields[1]), predictorName, scoreFields[0]); + return new Score(Double.valueOf(scoreFields[1]), scoreFields[0]); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java index e01b49229..a8683a320 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java @@ -70,15 +70,15 @@ private void initializeConverters(boolean includeStats, boolean includeSamples) this.statsConverter = includeStats ? new VariantStatsToDBObjectConverter() : null; SamplesToDBObjectConverter sampleConverter = includeSamples ? new SamplesToDBObjectConverter() : null; this.sourceEntryConverter = new VariantSourceEntryToDBObjectConverter(sampleConverter); - this.variantConverter = new VariantToDBObjectConverter(null, null, null); + this.variantConverter = new VariantToDBObjectConverter(); } @Override protected void doWrite(List variants) { BulkWriteOperation bulk = mongoOperations.getCollection(collection).initializeUnorderedBulkOperation(); for (Variant variant : variants) { - String id = MongoDBHelper.buildStorageId(variant.getChromosome(), variant.getStart(), - variant.getReference(), variant.getAlternate()); + String id = MongoDBHelper.buildVariantStorageId(variant.getChromosome(), variant.getStart(), + variant.getReference(), variant.getAlternate()); // the chromosome and start appear just as shard keys, in an unsharded cluster they wouldn't be needed BasicDBObject query = new BasicDBObject("_id", id) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationMongoWriter.java index 093040201..bf2edd9e1 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationMongoWriter.java @@ -17,13 +17,13 @@ package uk.ac.ebi.eva.pipeline.io.writers; import com.mongodb.BasicDBObject; -import com.mongodb.DBObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.batch.item.data.MongoItemWriter; import org.springframework.data.mongodb.core.MongoOperations; - -import uk.ac.ebi.eva.commons.models.converters.data.VariantAnnotationToDBObjectConverter; +import org.springframework.data.mongodb.core.query.BasicQuery; +import org.springframework.data.mongodb.core.query.BasicUpdate; +import org.springframework.util.Assert; import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; import uk.ac.ebi.eva.utils.MongoDBHelper; @@ -32,62 +32,63 @@ import java.util.List; import java.util.Map; +import static uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD; +import static uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames.XREFS_FIELD; + /** * Write a list of {@link VariantAnnotation} into MongoDB - * + *

* A new annotation is added in the existing document. * In case of two annotations (or more) in the same variant the other annotations are appended: - * + *

* 20_63963_G/A 20:63963 A ENSG00000178591 ENST00000382410 Transcript upstream_gene_variant - - - - - - DISTANCE=4388;STRAND=1;SYMBOL=DEFB125;SYMBOL_SOURCE=HGNC;HGNC_ID=18105;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS12989.2;ENSP=ENSP00000371847;SWISSPROT=DB125_HUMAN;TREMBL=B2R4E8_HUMAN;UNIPARC=UPI00001A36DE * 20_63963_G/A 20:63963 A ENSG00000178591 ENST00000608838 Transcript upstream_gene_variant - - - - - - DISTANCE=3928;STRAND=1;SYMBOL=DEFB125;SYMBOL_SOURCE=HGNC;HGNC_ID=18105;BIOTYPE=processed_transcript - * + *

* Will be: - * + *

* "annot" : { - * "ct" : [ - * { "gn" : "DEFB125", "ensg" : "ENSG00000178591", "enst" : "ENST00000382410", "codon" : "-", "strand" : "+", "bt" : "protein_coding", "aaChange" : "-", "so" : [ 1631 ] }, - * { "gn" : "DEFB125", "ensg" : "ENSG00000178591", "enst" : "ENST00000608838", "codon" : "-", "strand" : "+", "bt" : "processed_transcript", "aaChange" : "-", - * "so" : [ 1631 ] } ], - * "xrefs" : [ - * { "id" : "DEFB125", "src" : "HGNC" }, - * { "id" : "ENST00000382410", "src" : "ensemblTranscript" }, - * { "id" : "ENST00000608838", "src" : "ensemblTranscript" }, - * { "id" : "ENSG00000178591", "src" : "ensemblGene" + * "ct" : [ + * { "gn" : "DEFB125", "ensg" : "ENSG00000178591", "enst" : "ENST00000382410", "codon" : "-", "strand" : "+", "bt" : "protein_coding", "aaChange" : "-", "so" : [ 1631 ] }, + * { "gn" : "DEFB125", "ensg" : "ENSG00000178591", "enst" : "ENST00000608838", "codon" : "-", "strand" : "+", "bt" : "processed_transcript", "aaChange" : "-", + * "so" : [ 1631 ] } ], + * "xrefs" : [ + * { "id" : "DEFB125", "src" : "HGNC" }, + * { "id" : "ENST00000382410", "src" : "ensemblTranscript" }, + * { "id" : "ENST00000608838", "src" : "ensemblTranscript" }, + * { "id" : "ENSG00000178591", "src" : "ensemblGene" */ public class VepAnnotationMongoWriter extends MongoItemWriter { private static final Logger logger = LoggerFactory.getLogger(VepAnnotationMongoWriter.class); private MongoOperations mongoOperations; + private String collection; - private VariantAnnotationToDBObjectConverter converter; - public VepAnnotationMongoWriter(MongoOperations mongoOperations) { - this.mongoOperations = mongoOperations; - this.converter = new VariantAnnotationToDBObjectConverter(); - } + private String vepVersion; + + private String vepCacheVersion; + + public VepAnnotationMongoWriter(MongoOperations mongoOperations, + String collection, + String vepVersion, + String vepCacheVersion) { + super(); + Assert.notNull(mongoOperations, "A Mongo instance is required"); + Assert.hasText(collection, "A collection name is required"); - public VepAnnotationMongoWriter(MongoOperations mongoOperations, String collection){ - this(mongoOperations); setCollection(collection); setTemplate(mongoOperations); - } - @Override - public void setCollection(String collection) { - super.setCollection(collection); + this.mongoOperations = mongoOperations; this.collection = collection; + this.vepVersion = vepVersion; + this.vepCacheVersion = vepCacheVersion; } - private Map> groupVariantAnnotationById(List variantAnnotations){ - // The following method is not working with java8 .<40. Should be resuscitated when travis is updated to a - // more recent java version (1.8.0_31 atm) - // http://stackoverflow.com/questions/37368060/why-this-code-compiles-with-jdk8u45-and-above-but-not-with-jdk8u25 - //Map> variantAnnotationsByStorageId = variantAnnotations.stream() - // .collect(Collectors.groupingBy(this::buildStorageIdFromVariantAnnotation)); - + private Map> groupVariantAnnotationById(List variantAnnotations) { Map> variantAnnotationsByStorageId = new HashMap<>(); - for (VariantAnnotation variantAnnotation: variantAnnotations) { - String id = buildStorageIdFromVariantAnnotation(variantAnnotation); + for (VariantAnnotation variantAnnotation : variantAnnotations) { + String id = buildAnnotationtorageId(variantAnnotation); variantAnnotationsByStorageId.putIfAbsent(id, new ArrayList<>()); variantAnnotationsByStorageId.get(id).add(variantAnnotation); @@ -98,82 +99,74 @@ private Map> groupVariantAnnotationById(List variantAnnotations) { + Map> variantAnnotationsByStorageId = groupVariantAnnotationById( + variantAnnotations); - Map> variantAnnotationsByStorageId = groupVariantAnnotationById(variantAnnotations); - - for (Map.Entry> annotationsIn : variantAnnotationsByStorageId.entrySet()){ - String storageId = annotationsIn.getKey(); - List annotations = annotationsIn.getValue(); + for (Map.Entry> annotationsIdEntry : variantAnnotationsByStorageId.entrySet()) { + String storageId = annotationsIdEntry.getKey(); + List annotations = annotationsIdEntry.getValue(); VariantAnnotation variantAnnotation = annotations.get(0); - if(annotations.size()>1){ + if (annotations.size() > 1) { variantAnnotation = concatenateOtherAnnotations( variantAnnotation, annotations.subList(1, annotations.size())); } + variantAnnotation.setId(storageId); + variantAnnotation.setEnsmblVersion(vepVersion); + variantAnnotation.setVepCacheVersion(vepCacheVersion); + + variantAnnotation.extractXrefsFromConsequenceTypes(); + writeVariantAnnotationInMongoDb(storageId, variantAnnotation); } - } /** * Append multiple annotation into a single {@link VariantAnnotation} * Updated fields are ConsequenceTypes and Hgvs * - * @param variantAnnotation annotation where other annotations will be appended + * @param variantAnnotation annotation where other annotations will be appended * @param otherAnnotationsToConcatenate annotations to be appended * @return a single {@link VariantAnnotation} ready to be persisted */ private VariantAnnotation concatenateOtherAnnotations(VariantAnnotation variantAnnotation, - List otherAnnotationsToConcatenate){ + List otherAnnotationsToConcatenate) { for (VariantAnnotation annotationToAppend : otherAnnotationsToConcatenate) { - - //update ConsequenceTypes - if(annotationToAppend.getConsequenceTypes() != null){ + if (annotationToAppend.getConsequenceTypes() != null) { variantAnnotation.getConsequenceTypes().addAll(annotationToAppend.getConsequenceTypes()); } - - //update Hgvs - if(annotationToAppend.getHgvs() != null){ - if(variantAnnotation.getHgvs() == null){ - variantAnnotation.setHgvs(new ArrayList<>()); - } - variantAnnotation.getHgvs().addAll(annotationToAppend.getHgvs()); - } } return variantAnnotation; } - private void writeVariantAnnotationInMongoDb(String storageId, VariantAnnotation variantAnnotation){ + private void writeVariantAnnotationInMongoDb(String storageId, VariantAnnotation variantAnnotation) { logger.trace("Writing annotations into mongo id: {}", storageId); - DBObject storageVariantAnnotation = converter.convert(variantAnnotation); + BasicDBObject id = new BasicDBObject("_id", storageId); - BasicDBObject find = new BasicDBObject("_id", storageId); + if (mongoOperations.exists(new BasicQuery(id), collection)) { + BasicDBObject updateConsequenceTypes = new BasicDBObject("$addToSet", + new BasicDBObject(CONSEQUENCE_TYPE_FIELD, + new BasicDBObject("$each", variantAnnotation.getConsequenceTypes()))); + BasicDBObject updateXrefs = new BasicDBObject("$addToSet", + new BasicDBObject(XREFS_FIELD, new BasicDBObject("$each", variantAnnotation.getXrefs()))); - if(storageVariantAnnotation.get("ct") != null){ - BasicDBObject updateCt = new BasicDBObject("$addToSet", new BasicDBObject("annot.ct", - new BasicDBObject("$each", storageVariantAnnotation.get("ct")) )); - mongoOperations.getCollection(collection).update(find, updateCt); + mongoOperations.upsert(new BasicQuery(id), new BasicUpdate(updateConsequenceTypes), collection); + mongoOperations.upsert(new BasicQuery(id), new BasicUpdate(updateXrefs), collection); + } else { + mongoOperations.save(variantAnnotation, collection); } - - if(storageVariantAnnotation.get("xrefs") != null){ - BasicDBObject updateXrefs = new BasicDBObject("$addToSet", new BasicDBObject("annot.xrefs", - new BasicDBObject("$each", storageVariantAnnotation.get("xrefs")))); - mongoOperations.getCollection(collection).update(find, updateXrefs); - } - } - private String buildStorageIdFromVariantAnnotation(VariantAnnotation variantAnnotation){ - return MongoDBHelper.buildStorageId( - variantAnnotation.getChromosome(), - variantAnnotation.getStart(), - variantAnnotation.getReferenceAllele(), - variantAnnotation.getAlternativeAllele()); + private String buildAnnotationtorageId(VariantAnnotation variantAnnotation) { + return MongoDBHelper.buildAnnotationStorageId(variantAnnotation.getChromosome(), variantAnnotation.getStart(), + variantAnnotation.getReferenceAllele(), + variantAnnotation.getAlternativeAllele(), vepVersion, + vepCacheVersion); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/model/converters/data/VariantToMongoDbObjectConverter.java b/src/main/java/uk/ac/ebi/eva/pipeline/model/converters/data/VariantToMongoDbObjectConverter.java new file mode 100644 index 000000000..d0189499a --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/model/converters/data/VariantToMongoDbObjectConverter.java @@ -0,0 +1,87 @@ +/* + * Copyright 2016 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.model.converters.data; + +import com.mongodb.BasicDBObject; +import com.mongodb.DBObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.core.convert.converter.Converter; +import org.springframework.util.Assert; + +import uk.ac.ebi.eva.commons.models.converters.data.SamplesToDBObjectConverter; +import uk.ac.ebi.eva.commons.models.converters.data.VariantSourceEntryToDBObjectConverter; +import uk.ac.ebi.eva.commons.models.converters.data.VariantStatsToDBObjectConverter; +import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; +import uk.ac.ebi.eva.commons.models.data.Variant; +import uk.ac.ebi.eva.commons.models.data.VariantSourceEntry; + +import java.util.List; + +/** + * Converts a {@link Variant} into mongoDb {@link DBObject} + */ +public class VariantToMongoDbObjectConverter implements Converter { + private static final Logger logger = LoggerFactory.getLogger(VariantToMongoDbObjectConverter.class); + + private VariantToDBObjectConverter variantConverter; + private VariantStatsToDBObjectConverter statsConverter; + private VariantSourceEntryToDBObjectConverter sourceEntryConverter; + + private boolean includeStats; + + public VariantToMongoDbObjectConverter(boolean includeStats, boolean includeSample) { + this(includeStats, includeStats, includeSample); + } + + public VariantToMongoDbObjectConverter(boolean includeStats, boolean calculateStats, boolean includeSample) { + + this.includeStats = includeStats; + this.statsConverter = calculateStats ? new VariantStatsToDBObjectConverter() : null; + + + SamplesToDBObjectConverter sampleConverter = includeSample ? new SamplesToDBObjectConverter() : null; + this.sourceEntryConverter = new VariantSourceEntryToDBObjectConverter(sampleConverter); + this.variantConverter = new VariantToDBObjectConverter(null, null); + } + + @Override + public DBObject convert(Variant variant) { + Assert.notNull(variant, "Variant should not be null. Please provide a valid Variant object"); + logger.trace("Convert variant {} into mongo object", variant); + + variant.setAnnotation(null); + + VariantSourceEntry variantSourceEntry = variant.getSourceEntries().values().iterator().next(); + + BasicDBObject addToSet = new BasicDBObject().append(VariantToDBObjectConverter.FILES_FIELD, + sourceEntryConverter.convert(variantSourceEntry)); + + if (includeStats) { + List sourceEntryStats = statsConverter.convert(variantSourceEntry); + addToSet.put(VariantToDBObjectConverter.STATS_FIELD, new BasicDBObject("$each", sourceEntryStats)); + } + + if (variant.getIds() != null && !variant.getIds().isEmpty()) { + addToSet.put(VariantToDBObjectConverter.IDS_FIELD, new BasicDBObject("$each", variant.getIds())); + } + + BasicDBObject update = new BasicDBObject(); + update.append("$addToSet", addToSet).append("$setOnInsert", variantConverter.convert(variant)); + + return update; + } +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/DatabaseParameters.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/DatabaseParameters.java index 210085fc0..cc6a6e826 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/DatabaseParameters.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/DatabaseParameters.java @@ -46,6 +46,9 @@ public class DatabaseParameters { @Value(PARAMETER + JobParametersNames.DB_COLLECTIONS_ANNOTATION_METADATA_NAME + END) private String collectionAnnotationMetadataName; + @Value(PARAMETER + JobParametersNames.DB_COLLECTIONS_ANNOTATIONS_NAME + END) + private String collectionAnnotationsName; + @Autowired private MongoConnection mongoConnection; @@ -72,4 +75,8 @@ public String getCollectionFeaturesName() { public String getCollectionAnnotationMetadataName() { return collectionAnnotationMetadataName; } + + public String getCollectionAnnotationsName() { + return collectionAnnotationsName; + } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/JobParametersNames.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/JobParametersNames.java index e04162cd1..a85bd5591 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/JobParametersNames.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/JobParametersNames.java @@ -85,6 +85,8 @@ public class JobParametersNames { public static final String DB_COLLECTIONS_ANNOTATION_METADATA_NAME = "db.collections.annotation.metadata.name"; + public static final String DB_COLLECTIONS_ANNOTATIONS_NAME = "db.collections.annotations.name"; + /* * Skip and overwrite steps diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsAnnotationsNameValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsAnnotationsNameValidator.java new file mode 100644 index 000000000..ed1fe659b --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsAnnotationsNameValidator.java @@ -0,0 +1,36 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.parameters.validation; + +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.core.JobParametersInvalidException; +import org.springframework.batch.core.JobParametersValidator; + +import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; + +/** + * Checks that the name of the annotations collection has been filled in. + * + * @throws JobParametersInvalidException If the annotation collection name is null or empty + */ +public class DbCollectionsAnnotationsNameValidator implements JobParametersValidator { + @Override + public void validate(JobParameters parameters) throws JobParametersInvalidException { + ParametersValidatorUtil + .checkIsValidString(parameters.getString(JobParametersNames.DB_COLLECTIONS_ANNOTATIONS_NAME), + JobParametersNames.DB_COLLECTIONS_ANNOTATIONS_NAME); + } +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidator.java index 6f083fd60..3c20ce194 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidator.java @@ -24,6 +24,7 @@ import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.pipeline.parameters.validation.ConfigChunkSizeValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.ConfigRestartabilityAllowValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.DbCollectionsAnnotationsNameValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.DbCollectionsVariantsNameValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.DbNameValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.InputStudyIdValidator; @@ -59,6 +60,7 @@ public void validate(JobParameters parameters) throws JobParametersInvalidExcept private CompositeJobParametersValidator compositeJobParametersValidator() { List jobParametersValidators = new ArrayList<>(); Collections.addAll(jobParametersValidators, + new DbCollectionsAnnotationsNameValidator(), new DbCollectionsVariantsNameValidator(), new DbNameValidator(), new OutputDirAnnotationValidator(), diff --git a/src/main/java/uk/ac/ebi/eva/utils/MongoDBHelper.java b/src/main/java/uk/ac/ebi/eva/utils/MongoDBHelper.java index f95567bfb..2bb457e32 100644 --- a/src/main/java/uk/ac/ebi/eva/utils/MongoDBHelper.java +++ b/src/main/java/uk/ac/ebi/eva/utils/MongoDBHelper.java @@ -51,17 +51,15 @@ public static List parseServerAddresses(String hosts) throws Unkn return serverAddresses; } - public static String buildStorageId(Variant v) { - return buildStorageId(v.getChromosome(), v.getStart(), v.getReference(), v.getAlternate()); + public static String buildVariantStorageId(Variant v) { + return buildVariantStorageId(v.getChromosome(), v.getStart(), v.getReference(), v.getAlternate()); } /** * From org.opencb.opencga.storage.mongodb.variant.VariantToDBObjectConverter - * #buildStorageId(java.lang.String, int, java.lang.String, java.lang.String) - *

- * To avoid the initialization of VariantSourceEntryToDBObjectConverter and VariantToDBObjectConverter + * #buildVariantStorageId(java.lang.String, int, java.lang.String, java.lang.String) */ - public static String buildStorageId(String chromosome, int start, String reference, String alternate) { + public static String buildVariantStorageId(String chromosome, int start, String reference, String alternate) { StringBuilder builder = new StringBuilder(chromosome); builder.append("_"); builder.append(start); @@ -86,4 +84,19 @@ public static String buildStorageId(String chromosome, int start, String referen return builder.toString(); } + public static String buildAnnotationStorageId(String chromosome, + int start, + String reference, + String alternate, + String vepVersion, + String vepCacheVersion) { + StringBuilder builder = new StringBuilder(buildVariantStorageId(chromosome, start, reference, alternate)); + builder.append("_"); + builder.append(vepVersion); + builder.append("_"); + builder.append(vepCacheVersion); + + return builder.toString(); + } + } diff --git a/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/DBObjectToVariantAnnotationConverterTest.java b/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/DBObjectToVariantAnnotationConverterTest.java deleted file mode 100644 index 5ea91a35f..000000000 --- a/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/DBObjectToVariantAnnotationConverterTest.java +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright 2017 EMBL - European Bioinformatics Institute - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package uk.ac.ebi.eva.commons.models.converters.data; - -import com.mongodb.DBObject; -import com.mongodb.util.JSON; -import org.junit.Before; -import org.junit.Test; -import org.opencb.biodata.models.variant.annotation.ConsequenceType; -import org.opencb.biodata.models.variant.annotation.Score; -import org.opencb.biodata.models.variant.annotation.Xref; -import org.springframework.core.convert.converter.Converter; - -import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; -import uk.ac.ebi.eva.test.data.VariantAnnotationData; - -import java.util.Collections; -import java.util.Comparator; -import java.util.List; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -/** - * Test {@link DBObjectToVariantAnnotationConverter} - */ -public class DBObjectToVariantAnnotationConverterTest { - private Converter converter; - - @Before - public void setUp() throws Exception { - converter = new DBObjectToVariantAnnotationConverter(); - } - - @Test(expected = IllegalArgumentException.class) - public void convertNullVariantAnnotationShouldThrowAnException() { - converter.convert(null); - } - - @Test - public void allFieldsOfVariantAnnotationShouldBeConverted() { - DBObject dbObject = (DBObject) JSON.parse(VariantAnnotationData.VARIANT_ANNOTATION_JSON); - - VariantAnnotation annotation = converter.convert(dbObject); - - //Consequence types - List consequenceTypeList = annotation.getConsequenceTypes(); - - assertEquals(1, consequenceTypeList.size()); - ConsequenceType consequenceType = consequenceTypeList.get(0); - - assertEquals("geneName", consequenceType.getGeneName()); - assertEquals("ensemblGeneId", consequenceType.getEnsemblGeneId()); - assertEquals("ensemblTranscriptId", consequenceType.getEnsemblTranscriptId()); - assertEquals("strand", consequenceType.getStrand()); - assertEquals("biotype", consequenceType.getBiotype()); - assertEquals("aaChange", consequenceType.getAaChange()); - assertEquals("codon", consequenceType.getCodon()); - - List actualScores = consequenceType.getProteinSubstitutionScores(); - - Score expectedScore1 = new Score(1.0, "Polyphen", "Polyphen description"); - Score expectedScore2 = new Score(1.0, "Sift", "Sift description"); - - Comparator scoreComparator = Comparator.comparing(Score::getSource).thenComparing(Score::getDescription) - .thenComparing(Score::getScore); - actualScores.sort(scoreComparator); - - assertTrue(Collections.binarySearch(actualScores, expectedScore1, scoreComparator) >= 0); - assertTrue(Collections.binarySearch(actualScores, expectedScore2, scoreComparator) >= 0); - - List consequenceTypeEntries = consequenceType.getSoTerms(); - - Comparator consequenceTypeEntryComparator = Comparator.comparing( - ConsequenceType.ConsequenceTypeEntry::getSoAccession).thenComparing( - ConsequenceType.ConsequenceTypeEntry::getSoName); - consequenceTypeEntries.sort(consequenceTypeEntryComparator); - - ConsequenceType.ConsequenceTypeEntry expectedConsequenceTypeEntry1 = new ConsequenceType.ConsequenceTypeEntry( - "transcript_ablation", "SO:0001893"); - ConsequenceType.ConsequenceTypeEntry expectedConsequenceTypeEntry2 = new ConsequenceType.ConsequenceTypeEntry( - "splice_donor_variant", "SO:0001575"); - - assertTrue(Collections.binarySearch(consequenceTypeEntries, expectedConsequenceTypeEntry1, - consequenceTypeEntryComparator) >= 0); - assertTrue(Collections.binarySearch(consequenceTypeEntries, expectedConsequenceTypeEntry2, - consequenceTypeEntryComparator) >= 0); - - //Xrefs - List xrefList = annotation.getXrefs(); - assertEquals(4, xrefList.size()); - - Xref xref1 = new Xref("OS01G0112100", "ensemblGene"); - Xref xref2 = new Xref("ensemblGeneId", "ensemblGene"); - Xref xref3 = new Xref("geneName", "HGNC"); - Xref xref4 = new Xref("ensemblTranscriptId", "ensemblTranscript"); - - Comparator xrefComparator = Comparator.comparing(Xref::getId).thenComparing(Xref::getSrc); - - xrefList.sort(xrefComparator); - - assertTrue(Collections.binarySearch(xrefList, xref1, xrefComparator) >= 0); - assertTrue(Collections.binarySearch(xrefList, xref2, xrefComparator) >= 0); - assertTrue(Collections.binarySearch(xrefList, xref3, xrefComparator) >= 0); - assertTrue(Collections.binarySearch(xrefList, xref4, xrefComparator) >= 0); - } - -} diff --git a/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantAnnotationToDBObjectConverterTest.java b/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantAnnotationToDBObjectConverterTest.java deleted file mode 100644 index 6b6de9cef..000000000 --- a/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantAnnotationToDBObjectConverterTest.java +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright 2017 EMBL - European Bioinformatics Institute - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package uk.ac.ebi.eva.commons.models.converters.data; - -import com.mongodb.BasicDBObject; -import com.mongodb.DBObject; -import org.junit.Before; -import org.junit.Test; -import org.opencb.biodata.models.variant.annotation.ConsequenceType; -import org.opencb.biodata.models.variant.annotation.ConsequenceTypeMappings; -import org.opencb.biodata.models.variant.annotation.Score; -import org.opencb.biodata.models.variant.annotation.Xref; -import org.springframework.core.convert.converter.Converter; - -import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Set; -import java.util.stream.Collectors; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; - -/** - * Test {@link VariantAnnotationToDBObjectConverter} - */ -public class VariantAnnotationToDBObjectConverterTest { - private Converter converter; - - @Before - public void setUp() throws Exception { - converter = new VariantAnnotationToDBObjectConverter(); - } - - @Test(expected = IllegalArgumentException.class) - public void convertNullVariantAnnotationShouldThrowAnException() { - converter.convert(null); - } - - @Test - public void allFieldsOfVariantAnnotationShouldBeConverted() { - List soAccessionsValues = Arrays.asList("transcript_ablation", "splice_donor_variant"); - - Score polyphenScore = new Score(1.0, "Polyphen", "Polyphen description"); - Score siftScore = new Score(1.0, "Sift", "Sift description"); - - List scores = Arrays.asList(polyphenScore, siftScore); - - ConsequenceType consequenceType = new ConsequenceType("geneName", "ensemblGeneId", "ensemblTranscriptId", - "strand", "biotype", 0, 0, 0, "aaChange", "codon", - scores, soAccessionsValues); - - List consequenceTypes = Collections.singletonList(consequenceType); - - VariantAnnotation annotation = new VariantAnnotation("1", 1, 2, "A", "T"); - annotation.setConsequenceTypes(consequenceTypes); - annotation.setHgvs(Arrays.asList("A", "B")); - - annotation.setXrefs(Collections.singletonList(new Xref("OS01G0112100", "ensemblGene"))); - - DBObject dbObject = converter.convert(annotation); - assertNotNull(dbObject); - - //Consequence types - LinkedList cts = (LinkedList) dbObject.get(AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD); - assertEquals(1, cts.size()); - - BasicDBObject ct = (BasicDBObject) cts.get(0); - assertEquals(consequenceType.getGeneName(), ct.getString(AnnotationFieldNames.GENE_NAME_FIELD)); - assertEquals(consequenceType.getEnsemblGeneId(), ct.getString(AnnotationFieldNames.ENSEMBL_GENE_ID_FIELD)); - assertEquals(consequenceType.getEnsemblTranscriptId(), - ct.getString(AnnotationFieldNames.ENSEMBL_TRANSCRIPT_ID_FIELD)); - assertEquals(consequenceType.getCodon(), ct.getString(AnnotationFieldNames.CODON_FIELD)); - assertEquals(consequenceType.getStrand(), ct.getString(AnnotationFieldNames.STRAND_FIELD)); - assertEquals(consequenceType.getBiotype(), ct.getString(AnnotationFieldNames.BIOTYPE_FIELD)); - assertEquals(consequenceType.getAaChange(), ct.getString(AnnotationFieldNames.AA_CHANGE_FIELD)); - - HashSet sos = new HashSet<>((LinkedList) ct.get(AnnotationFieldNames.SO_ACCESSION_FIELD)); - - assertEquals(2, sos.size()); - - Set expectedSoAccessionsNumbers = soAccessionsValues.stream().map( - ConsequenceTypeMappings.termToAccession::get).collect(Collectors.toSet()); - - assertEquals(expectedSoAccessionsNumbers, sos); - - BasicDBObject polyphenField = (BasicDBObject) ct.get(AnnotationFieldNames.POLYPHEN_FIELD); - BasicDBObject siftField = (BasicDBObject) ct.get(AnnotationFieldNames.SIFT_FIELD); - - assertEquals(polyphenScore.getScore(), polyphenField.getDouble("sc"), 0.001); - assertEquals(polyphenScore.getDescription(), polyphenField.getString("desc")); - assertEquals(siftScore.getScore(), siftField.getDouble("sc"), 0.001); - assertEquals(siftScore.getDescription(), siftField.getString("desc")); - - //Xrefs - HashSet xrefs = (HashSet) dbObject.get(AnnotationFieldNames.XREFS_FIELD); - - assertTrue(isXrefPresent(xrefs, "OS01G0112100", "ensemblGene")); //from xrefs in VariantAnnotation obj - assertTrue(isXrefPresent(xrefs, "ensemblGeneId", "ensemblGene")); //from ct - assertTrue(isXrefPresent(xrefs, "geneName", "HGNC")); //from ct - assertTrue(isXrefPresent(xrefs, "ensemblTranscriptId", "ensemblTranscript")); //from ct - } - - @Test - public void onlyConsequenceTypesShouldBeConverted() { - ConsequenceType consequenceType = new ConsequenceType("geneName", "ensemblGeneId", "ensemblTranscriptId", - "strand", "biotype", 0, 0, 0, "aaChange", "codon", - new ArrayList<>(), new ArrayList<>()); - - List consequenceTypes = Collections.singletonList(consequenceType); - - VariantAnnotation annotation = new VariantAnnotation("1", 1, 2, "A", "T"); - annotation.setConsequenceTypes(consequenceTypes); - - DBObject dbObject = converter.convert(annotation); - assertNotNull(dbObject); - - //Consequence types - LinkedList cts = (LinkedList) dbObject.get(AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD); - assertEquals(1, cts.size()); - - //Xrefs - HashSet xrefs = (HashSet) dbObject.get(AnnotationFieldNames.XREFS_FIELD); - - assertFalse(isXrefPresent(xrefs, "OS01G0112100", "ensemblGene")); //from xrefs in VariantAnnotation obj - assertTrue(isXrefPresent(xrefs, "ensemblGeneId", "ensemblGene")); //from ct - assertTrue(isXrefPresent(xrefs, "geneName", "HGNC")); //from ct - assertTrue(isXrefPresent(xrefs, "ensemblTranscriptId", "ensemblTranscript")); //from ct - } - - @Test - public void onlyXrefsShouldBeConverted() { - VariantAnnotation annotation = new VariantAnnotation("1", 1, 2, "A", "T"); - annotation.setXrefs(Collections.singletonList(new Xref("OS01G0112100", "ensemblGene"))); - - DBObject dbObject = converter.convert(annotation); - - //Consequence types - assertNull(dbObject.get(AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD)); - - //Xrefs - HashSet xrefs = (HashSet) dbObject.get(AnnotationFieldNames.XREFS_FIELD); - - assertEquals(1, xrefs.size()); - assertTrue(isXrefPresent(xrefs, "OS01G0112100", "ensemblGene")); //from xrefs in VariantAnnotation obj - } - - private boolean isXrefPresent(HashSet xrefs, String id, String src) { - for (BasicDBObject xref : xrefs) { - if (xref.getString("id").equals(id) && xref.getString("src").equals(src)) { - return true; - } - } - return false; - } -} diff --git a/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverterTest.java b/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverterTest.java index 4a060798f..f38f1d4f5 100644 --- a/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverterTest.java +++ b/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverterTest.java @@ -110,7 +110,7 @@ public void testConvertToStorageTypeWithFiles() { mongoVariant.append("files", files); VariantToDBObjectConverter converter = new VariantToDBObjectConverter( - new VariantSourceEntryToDBObjectConverter(new SamplesToDBObjectConverter()), null, null); + new VariantSourceEntryToDBObjectConverter(new SamplesToDBObjectConverter()), null); DBObject converted = converter.convert(variant); assertFalse(converted.containsField(VariantToDBObjectConverter.IDS_FIELD)); //IDs must be added manually. converted.put(VariantToDBObjectConverter.IDS_FIELD, variant.getIds()); //Add IDs diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java index 077feb5aa..3ce9abb0d 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java @@ -16,20 +16,17 @@ package uk.ac.ebi.eva.pipeline.io.mappers; import org.junit.Test; -import org.opencb.biodata.models.variant.annotation.ConsequenceType; -import org.opencb.biodata.models.variant.annotation.Score; +import uk.ac.ebi.eva.commons.models.data.ConsequenceType; +import uk.ac.ebi.eva.commons.models.data.Score; import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; import uk.ac.ebi.eva.test.data.VepOutputContent; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; +import java.util.Set; import static junit.framework.TestCase.assertNull; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; /** * {@link AnnotationLineMapper} @@ -51,12 +48,12 @@ public void shouldParseAllDefaultFieldsInVepOutput() throws Exception { public void shouldParseAllTranscriptFieldsInVepOutput() { AnnotationLineMapper lineMapper = new AnnotationLineMapper(); VariantAnnotation variantAnnotation = lineMapper.mapLine(VepOutputContent.vepOutputContentTranscriptFields, 0); - List consequenceTypes = variantAnnotation.getConsequenceTypes(); + Set consequenceTypes = variantAnnotation.getConsequenceTypes(); assertNotNull(consequenceTypes); assertEquals(1, consequenceTypes.size()); - ConsequenceType consequenceType = consequenceTypes.get(0); + ConsequenceType consequenceType = consequenceTypes.iterator().next(); assertEquals(Integer.valueOf(1), consequenceType.getcDnaPosition()); assertEquals(Integer.valueOf(4), consequenceType.getCdsPosition()); @@ -69,14 +66,14 @@ public void shouldParseAllTranscriptFieldsInVepOutput() { public void shouldParseVepOutputWithoutTranscript() { AnnotationLineMapper lineMapper = new AnnotationLineMapper(); VariantAnnotation variantAnnotation = lineMapper.mapLine(VepOutputContent.vepOutputContentWithOutTranscript, 0); - List consequenceTypes = variantAnnotation.getConsequenceTypes(); + Set consequenceTypes = variantAnnotation.getConsequenceTypes(); assertNotNull(consequenceTypes); assertEquals(1, consequenceTypes.size()); - ConsequenceType consequenceType = consequenceTypes.get(0); + ConsequenceType consequenceType = consequenceTypes.iterator().next(); - assertNotNull(consequenceType.getSoTerms()); + assertNotNull(consequenceType.getSoAccessions()); assertNull(consequenceType.getcDnaPosition()); assertNull(consequenceType.getCdsPosition()); assertNull(consequenceType.getAaPosition()); @@ -110,25 +107,23 @@ public void shouldParseVepOutputWithExtraFields() { AnnotationLineMapper lineMapper = new AnnotationLineMapper(); VariantAnnotation variantAnnotation = lineMapper.mapLine(VepOutputContent.vepOutputContentWithExtraFields, 0); - List consequenceTypes = variantAnnotation.getConsequenceTypes(); + Set consequenceTypes = variantAnnotation.getConsequenceTypes(); assertNotNull(consequenceTypes); assertEquals(1, consequenceTypes.size()); - ConsequenceType consequenceType = consequenceTypes.get(0); + ConsequenceType consequenceType = consequenceTypes.iterator().next(); - List actualScores = consequenceType.getProteinSubstitutionScores(); - assertNotNull(actualScores); - assertEquals(2, actualScores.size()); + Score polyphen = consequenceType.getPolyphen(); + Score sifts = consequenceType.getSifts(); - Score expectedSift = new Score(0.07, "Sift", "tolerated"); - Score expectedPolyphen = new Score(0.859, "Polyphen", "possibly_damaging"); + assertNotNull(polyphen); + assertNotNull(sifts); - Comparator scoreComparator = Comparator.comparing(Score::getSource).thenComparing(Score::getDescription) - .thenComparing(Score::getScore); - actualScores.sort(scoreComparator); + Score expectedSift = new Score(0.07, "tolerated"); + Score expectedPolyphen = new Score(0.859, "possibly_damaging"); - assertTrue(Collections.binarySearch(actualScores, expectedSift, scoreComparator) >= 0); - assertTrue(Collections.binarySearch(actualScores, expectedPolyphen, scoreComparator) >= 0); + assertEquals(sifts, expectedSift); + assertEquals(polyphen, expectedPolyphen); } } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationMongoWriterTest.java index fa6310d8d..c2c085d60 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationMongoWriterTest.java @@ -15,8 +15,8 @@ */ package uk.ac.ebi.eva.pipeline.io.writers; +import com.mongodb.BasicDBList; import com.mongodb.BasicDBObject; -import com.mongodb.DBCollection; import com.mongodb.DBCursor; import com.mongodb.DBObject; import org.junit.Before; @@ -30,26 +30,31 @@ import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; - -import uk.ac.ebi.eva.commons.models.converters.data.DBObjectToVariantAnnotationConverter; -import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; +import uk.ac.ebi.eva.commons.models.data.ConsequenceType; +import uk.ac.ebi.eva.commons.models.data.Score; import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; import uk.ac.ebi.eva.pipeline.io.mappers.AnnotationLineMapper; import uk.ac.ebi.eva.pipeline.parameters.MongoConnection; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; -import uk.ac.ebi.eva.utils.MongoDBHelper; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; -import java.util.Set; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import static uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD; +import static uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames.POLYPHEN_FIELD; +import static uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames.SCORE_DESCRIPTION_FIELD; +import static uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames.SCORE_SCORE_FIELD; +import static uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames.SIFT_FIELD; +import static uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames.XREFS_FIELD; import static uk.ac.ebi.eva.test.data.VepOutputContent.vepOutputContent; /** @@ -63,8 +68,11 @@ @TestPropertySource({"classpath:test-mongo.properties"}) @ContextConfiguration(classes = {MongoConnection.class, MongoMappingContext.class}) public class VepAnnotationMongoWriterTest { + private static final String COLLECTION_ANNOTATIONS_NAME = "annotations"; + + private static final String VEP_VERSION = "1"; - private static final String COLLECTION_VARIANTS_NAME = "variants"; + private static final String VEP_CACHE_VERSION = "1"; @Autowired private MongoConnection mongoConnection; @@ -75,10 +83,15 @@ public class VepAnnotationMongoWriterTest { @Rule public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); - private DBObjectToVariantAnnotationConverter converter; private VepAnnotationMongoWriter annotationWriter; + private AnnotationLineMapper AnnotationLineMapper; + @Before + public void setUp() throws Exception { + AnnotationLineMapper = new AnnotationLineMapper(); + } + @Test public void shouldWriteAllFieldsIntoMongoDb() throws Exception { String databaseName = mongoRule.getRandomTemporaryDatabaseName(); @@ -88,29 +101,26 @@ public void shouldWriteAllFieldsIntoMongoDb() throws Exception { annotations.add(AnnotationLineMapper.mapLine(annotLine, 0)); } - DBCollection variants = mongoRule.getCollection(databaseName, COLLECTION_VARIANTS_NAME); - - // first do a mock of a "variants" collection, with just the _id - writeIdsIntoMongo(annotations, variants); - - // now, load the annotation + // load the annotation MongoOperations operations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, - mongoMappingContext); - annotationWriter = new VepAnnotationMongoWriter(operations, COLLECTION_VARIANTS_NAME); + mongoMappingContext); + annotationWriter = new VepAnnotationMongoWriter(operations, COLLECTION_ANNOTATIONS_NAME, VEP_VERSION, + VEP_CACHE_VERSION); annotationWriter.write(annotations); - // and finally check that documents in DB have annotation (only consequence type) - DBCursor cursor = variants.find(); + // and finally check that documents in annotation collection have annotations + DBCursor cursor = mongoRule.getCollection(databaseName, COLLECTION_ANNOTATIONS_NAME).find(); int count = 0; int consequenceTypeCount = 0; while (cursor.hasNext()) { count++; - VariantAnnotation annot = converter.convert( - (DBObject) cursor.next().get(VariantToDBObjectConverter.ANNOTATION_FIELD)); - assertNotNull(annot.getConsequenceTypes()); - consequenceTypeCount += annot.getConsequenceTypes().size(); + DBObject annotation = cursor.next(); + BasicDBList consequenceTypes = (BasicDBList) annotation.get(CONSEQUENCE_TYPE_FIELD); + assertNotNull(consequenceTypes); + consequenceTypeCount += consequenceTypes.size(); } + assertTrue(count > 0); assertEquals(annotations.size(), consequenceTypeCount); } @@ -128,11 +138,6 @@ public void shouldWriteAllFieldsIntoMongoDbMultipleSetsAnnotations() throws Exce for (String annotLine : vepOutputContent.split("\n")) { annotations.add(AnnotationLineMapper.mapLine(annotLine, 0)); } - String dbCollectionVariantsName = COLLECTION_VARIANTS_NAME; - DBCollection variants = mongoRule.getCollection(databaseName, dbCollectionVariantsName); - - // first do a mock of a "variants" collection, with just the _id - writeIdsIntoMongo(annotations, variants); //prepare annotation sets List annotationSet1 = new ArrayList<>(); @@ -153,51 +158,69 @@ public void shouldWriteAllFieldsIntoMongoDbMultipleSetsAnnotations() throws Exce annotationSet3.add(AnnotationLineMapper.mapLine(annotLine, 0)); } - // now, load the annotation + // load the annotation MongoOperations operations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, - mongoMappingContext); - annotationWriter = new VepAnnotationMongoWriter(operations, dbCollectionVariantsName); + mongoMappingContext); + annotationWriter = new VepAnnotationMongoWriter(operations, COLLECTION_ANNOTATIONS_NAME, VEP_VERSION, + VEP_CACHE_VERSION); annotationWriter.write(annotationSet1); annotationWriter.write(annotationSet2); annotationWriter.write(annotationSet3); // and finally check that documents in DB have the correct number of annotation - DBCursor cursor = variants.find(); + DBCursor cursor = mongoRule.getCollection(databaseName, COLLECTION_ANNOTATIONS_NAME).find(); while (cursor.hasNext()) { - DBObject dbObject = cursor.next(); - String id = dbObject.get("_id").toString(); - - VariantAnnotation annot = converter.convert( - (DBObject) dbObject.get(VariantToDBObjectConverter.ANNOTATION_FIELD)); + DBObject annotation = cursor.next(); + String id = annotation.get("_id").toString(); if (id.equals("20_63360_C_T") || id.equals("20_63399_G_A") || id.equals("20_63426_G_T")) { - assertEquals(2, annot.getConsequenceTypes().size()); - assertEquals(4, annot.getXrefs().size()); + assertEquals(2, ((BasicDBList) annotation.get(CONSEQUENCE_TYPE_FIELD)).size()); + assertEquals(4, ((BasicDBList) annotation.get(XREFS_FIELD)).size()); } } } - @Before - public void setUp() throws Exception { - converter = new DBObjectToVariantAnnotationConverter(); - AnnotationLineMapper = new AnnotationLineMapper(); - } + @Test + public void shouldWriteSubstitutionScoresIntoMongoDb() throws Exception { + String databaseName = mongoRule.getRandomTemporaryDatabaseName(); - private void writeIdsIntoMongo(List annotations, DBCollection variants) { - Set uniqueIdsLoaded = new HashSet<>(); - for (VariantAnnotation annotation : annotations) { - String id = MongoDBHelper.buildStorageId( - annotation.getChromosome(), - annotation.getStart(), - annotation.getReferenceAllele(), - annotation.getAlternativeAllele()); - - if (!uniqueIdsLoaded.contains(id)) { - variants.insert(new BasicDBObject("_id", id)); - uniqueIdsLoaded.add(id); - } + VariantAnnotation variantAnnotation = new VariantAnnotation("X", 1, 10, "A", "T"); + + Score siftScore = new Score(0.02, "deleterious"); + Score polyphenScore = new Score(0.846, "possibly_damaging"); + + ConsequenceType consequenceType = new ConsequenceType(); + consequenceType.setSifts(siftScore); + consequenceType.setPolyphen(polyphenScore); + + variantAnnotation.setConsequenceTypes(new HashSet<>(Collections.singletonList(consequenceType))); + + MongoOperations operations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, + mongoMappingContext); + annotationWriter = new VepAnnotationMongoWriter(operations, COLLECTION_ANNOTATIONS_NAME, VEP_VERSION, + VEP_CACHE_VERSION); + + annotationWriter.write(Collections.singletonList(variantAnnotation)); + + DBCursor cursor = mongoRule.getCollection(databaseName, COLLECTION_ANNOTATIONS_NAME).find(); + while (cursor.hasNext()) { + DBObject annotation = cursor.next(); + BasicDBList consequenceTypes = (BasicDBList) annotation.get(CONSEQUENCE_TYPE_FIELD); + + assertNotNull(consequenceTypes); + + LinkedHashMap consequenceTypeMap = (LinkedHashMap) consequenceTypes.get(0); + + BasicDBObject sift = (BasicDBObject) consequenceTypeMap.get(SIFT_FIELD); + BasicDBObject polyphen = (BasicDBObject) consequenceTypeMap.get(POLYPHEN_FIELD); + + assertEquals(sift.getString(SCORE_DESCRIPTION_FIELD), siftScore.getDescription()); + assertEquals(sift.get(SCORE_SCORE_FIELD), siftScore.getScore()); + + assertEquals(polyphen.getString(SCORE_DESCRIPTION_FIELD), polyphenScore.getDescription()); + assertEquals(polyphen.get(SCORE_SCORE_FIELD), polyphenScore.getScore()); } } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java index 857152d2f..610164851 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java @@ -16,16 +16,12 @@ package uk.ac.ebi.eva.pipeline.jobs; +import com.mongodb.BasicDBList; import com.mongodb.DBCursor; import com.mongodb.DBObject; -import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; -import org.opencb.biodata.models.variant.annotation.VariantAnnotation; -import org.opencb.opencga.storage.mongodb.variant.DBObjectToVariantAnnotationConverter; -import org.springframework.batch.core.BatchStatus; -import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.JobExecution; import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.StepExecution; @@ -35,33 +31,28 @@ import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; - -import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; -import uk.ac.ebi.eva.test.utils.JobTestUtils; import uk.ac.ebi.eva.utils.EvaJobParameterBuilder; import uk.ac.ebi.eva.utils.URLHelper; import java.io.File; -import java.nio.file.Files; -import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD; import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResourceUrl; import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** * Test for {@link AnnotationJob} - * + *

* TODO The test should fail when we will integrate the JobParameter validation since there are empty parameters for VEP */ @RunWith(SpringRunner.class) @@ -73,10 +64,10 @@ public class AnnotationJobTest { private static final String MONGO_DUMP = "/dump/VariantStatsConfigurationTest_vl"; private static final String INPUT_STUDY_ID = "1"; private static final String INPUT_VCF_ID = "1"; + private static final String COLLECTION_ANNOTATIONS_NAME = "annotations"; private static final String COLLECTION_ANNOTATION_METADATA_NAME = "annotationMetadata"; private static final String COLLECTION_VARIANTS_NAME = "variants"; - @Rule public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); @@ -86,8 +77,6 @@ public class AnnotationJobTest { @Autowired private JobLauncherTestUtils jobLauncherTestUtils; - private DBObjectToVariantAnnotationConverter converter; - @Test public void allAnnotationStepsShouldBeExecuted() throws Exception { String dbName = mongoRule.restoreDumpInTemporaryDatabase(getResourceUrl(MONGO_DUMP)); @@ -101,6 +90,7 @@ public void allAnnotationStepsShouldBeExecuted() throws Exception { JobParameters jobParameters = new EvaJobParameterBuilder() .annotationOverwrite("false") .collectionAnnotationMetadataName(COLLECTION_ANNOTATION_METADATA_NAME) + .collectionAnnotationsName(COLLECTION_ANNOTATIONS_NAME) .collectionVariantsName(COLLECTION_VARIANTS_NAME) .databaseName(dbName) .inputFasta(fasta.getAbsolutePath()) @@ -131,21 +121,19 @@ public void allAnnotationStepsShouldBeExecuted() throws Exception { assertEquals(BeanNames.LOAD_ANNOTATION_METADATA_STEP, loadAnnotationMetadataStep.getStepName()); //check that documents have the annotation - DBCursor cursor = mongoRule.getCollection(dbName, COLLECTION_VARIANTS_NAME).find(); + DBCursor cursor = mongoRule.getCollection(dbName, COLLECTION_ANNOTATIONS_NAME).find(); - int count = 0; + int annotationCount = 0; int consequenceTypeCount = 0; while (cursor.hasNext()) { - count++; - DBObject dbObject = (DBObject) cursor.next().get(VariantToDBObjectConverter.ANNOTATION_FIELD); - if (dbObject != null) { - VariantAnnotation annot = converter.convertToDataModelType(dbObject); - assertNotNull(annot.getConsequenceTypes()); - consequenceTypeCount += annot.getConsequenceTypes().size(); - } + annotationCount++; + DBObject annotation = cursor.next(); + BasicDBList consequenceTypes = (BasicDBList) annotation.get(CONSEQUENCE_TYPE_FIELD); + assertNotNull(consequenceTypes); + consequenceTypeCount += consequenceTypes.size(); } - assertEquals(300, count); + assertEquals(299, annotationCount); assertEquals(536, consequenceTypeCount); //check that one line is skipped because malformed @@ -164,6 +152,7 @@ public void noVariantsToAnnotateOnlyGenerateAnnotationStepShouldRun() throws Exc JobParameters jobParameters = new EvaJobParameterBuilder() .annotationOverwrite("false") .collectionAnnotationMetadataName(COLLECTION_ANNOTATION_METADATA_NAME) + .collectionAnnotationsName(COLLECTION_ANNOTATIONS_NAME) .collectionVariantsName(COLLECTION_VARIANTS_NAME) .databaseName(dbName) .inputFasta(fasta.getAbsolutePath()) @@ -189,9 +178,4 @@ public void noVariantsToAnnotateOnlyGenerateAnnotationStepShouldRun() throws Exc assertEquals(BeanNames.GENERATE_VEP_ANNOTATION_STEP, findVariantsToAnnotateStep.getStepName()); } - @Before - public void setUp() throws Exception { - converter = new DBObjectToVariantAnnotationConverter(); - } - } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobTest.java index e19885df0..e6da4729f 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobTest.java @@ -21,8 +21,6 @@ import org.junit.Test; import org.junit.runner.RunWith; import org.opencb.opencga.lib.common.Config; -import org.springframework.batch.core.BatchStatus; -import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.JobExecution; import org.springframework.batch.core.JobParameters; import org.springframework.batch.test.JobLauncherTestUtils; @@ -31,18 +29,16 @@ import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; - import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; import uk.ac.ebi.eva.test.utils.GenotypedVcfJobTestUtils; -import uk.ac.ebi.eva.test.utils.JobTestUtils; import uk.ac.ebi.eva.utils.EvaJobParameterBuilder; import java.io.File; -import static org.junit.Assert.assertEquals; +import static uk.ac.ebi.eva.test.utils.GenotypedVcfJobTestUtils.COLLECTION_ANNOTATIONS_NAME; import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertFailed; @@ -52,7 +48,7 @@ * TODO: FILE_WRONG_NO_ALT should be renamed because the alt allele is not missing but is the same as the reference */ @RunWith(SpringRunner.class) -@ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE,Application.VARIANT_ANNOTATION_MONGO_PROFILE}) +@ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE, Application.VARIANT_ANNOTATION_MONGO_PROFILE}) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) @ContextConfiguration(classes = {GenotypedVcfJob.class, BatchTestConfiguration.class}) public class GenotypedVcfJobTest { @@ -91,6 +87,7 @@ public void fullGenotypedVcfJob() throws Exception { JobParameters jobParameters = new EvaJobParameterBuilder() .annotationOverwrite("false") .collectionAnnotationMetadataName(GenotypedVcfJobTestUtils.COLLECTION_ANNOTATION_METADATA_NAME) + .collectionAnnotationsName(COLLECTION_ANNOTATIONS_NAME) .collectionFilesName(GenotypedVcfJobTestUtils.COLLECTION_FILES_NAME) .collectionVariantsName(GenotypedVcfJobTestUtils.COLLECTION_VARIANTS_NAME) .databaseName(databaseName) @@ -126,7 +123,7 @@ public void fullGenotypedVcfJob() throws Exception { GenotypedVcfJobTestUtils.checkOutputFileLength(vepOutputFile); - GenotypedVcfJobTestUtils.checkLoadedAnnotation(databaseName); + GenotypedVcfJobTestUtils.checkLoadedAnnotation(mongoRule, databaseName); GenotypedVcfJobTestUtils.checkSkippedOneMalformedLine(jobExecution); @@ -145,16 +142,17 @@ public void aggregationIsNotAllowed() throws Exception { JobParameters jobParameters = new EvaJobParameterBuilder() .annotationOverwrite("false") .collectionAnnotationMetadataName(GenotypedVcfJobTestUtils.COLLECTION_ANNOTATION_METADATA_NAME) + .collectionAnnotationsName(COLLECTION_ANNOTATIONS_NAME) .collectionFilesName(GenotypedVcfJobTestUtils.COLLECTION_FILES_NAME) .collectionVariantsName(GenotypedVcfJobTestUtils.COLLECTION_VARIANTS_NAME) .databaseName(databaseName) .inputFasta(fasta.getAbsolutePath()) - .inputVcf(GenotypedVcfJobTestUtils.getInputFile().getAbsolutePath()) - .inputVcfId(GenotypedVcfJobTestUtils.INPUT_VCF_ID) .inputStudyId(GenotypedVcfJobTestUtils.INPUT_STUDY_ID) .inputStudyName("inputStudyName") .inputStudyType("COLLECTION") + .inputVcf(GenotypedVcfJobTestUtils.getInputFile().getAbsolutePath()) .inputVcfAggregation("BASIC") + .inputVcfId(GenotypedVcfJobTestUtils.INPUT_VCF_ID) .outputDirAnnotation(outputDirAnnotation) .outputDirStats(outputDirStats) .vepCachePath("") diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobWorkflowTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobWorkflowTest.java index 4cfc09a24..d1dc1883d 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobWorkflowTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobWorkflowTest.java @@ -30,7 +30,6 @@ import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; - import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; @@ -53,6 +52,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import static uk.ac.ebi.eva.test.utils.GenotypedVcfJobTestUtils.COLLECTION_ANNOTATIONS_NAME; import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; import static uk.ac.ebi.eva.utils.FileUtils.getResource; @@ -90,9 +90,9 @@ public class GenotypedVcfJobWorkflowTest { Arrays.asList(BeanNames.CALCULATE_STATISTICS_STEP, BeanNames.LOAD_STATISTICS_STEP)); public static final Set EXPECTED_ANNOTATION_STEP_NAMES = new TreeSet<>(Arrays.asList( - BeanNames.GENERATE_VEP_ANNOTATION_STEP, - BeanNames.LOAD_VEP_ANNOTATION_STEP, - BeanNames.LOAD_ANNOTATION_METADATA_STEP)); + BeanNames.GENERATE_VEP_ANNOTATION_STEP, + BeanNames.LOAD_VEP_ANNOTATION_STEP, + BeanNames.LOAD_ANNOTATION_METADATA_STEP)); @Before public void setUp() throws Exception { @@ -220,9 +220,10 @@ private EvaJobParameterBuilder initVariantConfigurationJob() throws IOException EvaJobParameterBuilder evaJobParameterBuilder = new EvaJobParameterBuilder() .annotationOverwrite("false") + .collectionAnnotationMetadataName("annotationMetadata") + .collectionAnnotationsName(COLLECTION_ANNOTATIONS_NAME) .collectionFilesName("files") .collectionVariantsName("variants") - .collectionAnnotationMetadataName("annotationMetadata") .databaseName(dbName) .inputFasta(fasta.getAbsolutePath()) .inputStudyId("genotyped-job-workflow") diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java index e037916a6..97cc31b8d 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java @@ -34,7 +34,6 @@ import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; -import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; import uk.ac.ebi.eva.pipeline.jobs.AnnotationJob; @@ -65,7 +64,10 @@ @ContextConfiguration(classes = {AnnotationJob.class, BatchTestConfiguration.class}) public class AnnotationLoaderStepTest { private static final String MONGO_DUMP = "/dump/VariantStatsConfigurationTest_vl"; - + private static final String COLLECTION_ANNOTATIONS_NAME = "annotations"; + private static final String COLLECTION_VARIANTS_NAME = "variants"; + private static final String INPUT_STUDY_ID = "1"; + private static final String INPUT_VCF_ID = "1"; @Rule public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); @@ -79,18 +81,16 @@ public class AnnotationLoaderStepTest { public void shouldLoadAllAnnotations() throws Exception { String annotationFolder = temporaryFolderRule.getRoot().getAbsolutePath(); String dbName = mongoRule.restoreDumpInTemporaryDatabase(getResourceUrl(MONGO_DUMP)); - String collectionVariantsName = "variants"; - String studyId = "1"; - String fileId = "1"; - String vepOutput = URLHelper.resolveVepOutput(annotationFolder, studyId, fileId); + String vepOutput = URLHelper.resolveVepOutput(annotationFolder, INPUT_STUDY_ID, INPUT_VCF_ID); String vepOutputName = Paths.get(vepOutput).getFileName().toString(); temporaryFolderRule.newGzipFile(VepOutputContent.vepOutputContent, vepOutputName); JobParameters jobParameters = new EvaJobParameterBuilder() - .collectionVariantsName(collectionVariantsName) + .collectionVariantsName(COLLECTION_VARIANTS_NAME) + .collectionAnnotationsName("annotations") .databaseName(dbName) - .inputStudyId(studyId) - .inputVcfId(fileId) + .inputStudyId(INPUT_STUDY_ID) + .inputVcfId(INPUT_VCF_ID) .outputDirAnnotation(annotationFolder) .toJobParameters(); @@ -99,24 +99,24 @@ public void shouldLoadAllAnnotations() throws Exception { assertCompleted(jobExecution); //check that documents have the annotation - DBCursor cursor = mongoRule.getCollection(dbName, collectionVariantsName).find(); + DBCursor cursor = mongoRule.getCollection(dbName, COLLECTION_ANNOTATIONS_NAME).find(); DBObjectToVariantAnnotationConverter converter = new DBObjectToVariantAnnotationConverter(); - int count = 0; + int annotationCount = 0; int consequenceTypeCount = 0; while (cursor.hasNext()) { - count++; - DBObject dbObject = (DBObject) cursor.next().get(VariantToDBObjectConverter.ANNOTATION_FIELD); + annotationCount++; + DBObject dbObject = cursor.next(); if (dbObject != null) { - VariantAnnotation annot = converter.convertToDataModelType(dbObject); - Assert.assertNotNull(annot.getConsequenceTypes()); - consequenceTypeCount += annot.getConsequenceTypes().size(); + VariantAnnotation annotation = converter.convertToDataModelType(dbObject); + Assert.assertNotNull(annotation.getConsequenceTypes()); + consequenceTypeCount += annotation.getConsequenceTypes().size(); } } - assertEquals(300, count); - assertTrue("Annotations not found", consequenceTypeCount > 0); + assertTrue("Annotations not found", annotationCount == 4); + assertTrue("ConsequenceType not found", consequenceTypeCount == 7); } } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsAnnotationsNameValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsAnnotationsNameValidatorTest.java new file mode 100644 index 000000000..6fe4e18ef --- /dev/null +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsAnnotationsNameValidatorTest.java @@ -0,0 +1,65 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.parameters.validation; + +import org.junit.Before; +import org.junit.Test; +import org.springframework.batch.core.JobParametersBuilder; +import org.springframework.batch.core.JobParametersInvalidException; + +import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; + +public class DbCollectionsAnnotationsNameValidatorTest { + + private DbCollectionsAnnotationsNameValidator validator; + + private JobParametersBuilder jobParametersBuilder; + + @Before + public void setUp() throws Exception { + validator = new DbCollectionsAnnotationsNameValidator(); + } + + @Test + public void collectionsAnnotationsNameIsValid() throws JobParametersInvalidException { + jobParametersBuilder = new JobParametersBuilder(); + jobParametersBuilder.addString(JobParametersNames.DB_COLLECTIONS_ANNOTATIONS_NAME, + "collectionsAnnotationsName"); + validator.validate(jobParametersBuilder.toJobParameters()); + } + + @Test(expected = JobParametersInvalidException.class) + public void collectionsAnnotationsNameIsEmpty() throws JobParametersInvalidException { + jobParametersBuilder = new JobParametersBuilder(); + jobParametersBuilder.addString(JobParametersNames.DB_COLLECTIONS_ANNOTATIONS_NAME, ""); + validator.validate(jobParametersBuilder.toJobParameters()); + } + + @Test(expected = JobParametersInvalidException.class) + public void collectionsAnnotationsNameIsWhitespace() throws JobParametersInvalidException { + jobParametersBuilder = new JobParametersBuilder(); + jobParametersBuilder.addString(JobParametersNames.DB_COLLECTIONS_ANNOTATIONS_NAME, " "); + validator.validate(jobParametersBuilder.toJobParameters()); + } + + @Test(expected = JobParametersInvalidException.class) + public void collectionsAnnotationsNameIsNull() throws JobParametersInvalidException { + jobParametersBuilder = new JobParametersBuilder(); + jobParametersBuilder.addString(JobParametersNames.DB_COLLECTIONS_ANNOTATIONS_NAME, null); + validator.validate(jobParametersBuilder.toJobParameters()); + } + +} diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AggregatedVcfJobParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AggregatedVcfJobParametersValidatorTest.java index b35ff3a38..89f13f67c 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AggregatedVcfJobParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AggregatedVcfJobParametersValidatorTest.java @@ -59,7 +59,7 @@ public void setUp() throws Exception { requiredParameters.put(JobParametersNames.INPUT_VCF_ID, new JobParameter("inputVcfId")); requiredParameters.put(JobParametersNames.INPUT_VCF_AGGREGATION, new JobParameter("NONE")); requiredParameters.put(JobParametersNames.INPUT_VCF, - new JobParameter(temporaryFolder.newFile().getCanonicalPath())); + new JobParameter(temporaryFolder.newFile().getCanonicalPath())); // file load step requiredParameters.put(JobParametersNames.DB_COLLECTIONS_FILES_NAME, new JobParameter("collectionsFilesName")); @@ -77,14 +77,15 @@ public void setUp() throws Exception { annotationParameters.put(JobParametersNames.APP_VEP_NUMFORKS, new JobParameter("6")); annotationParameters.put(JobParametersNames.APP_VEP_TIMEOUT, new JobParameter("600")); annotationParameters.put(JobParametersNames.ANNOTATION_OVERWRITE, new JobParameter("false")); + annotationParameters.put(JobParametersNames.DB_COLLECTIONS_ANNOTATIONS_NAME, new JobParameter("annotations")); annotationParameters.put(JobParametersNames.DB_COLLECTIONS_ANNOTATION_METADATA_NAME, - new JobParameter("annotationMetadata")); + new JobParameter("annotationMetadata")); annotationParameters.put(JobParametersNames.APP_VEP_CACHE_PATH, - new JobParameter(temporaryFolder.getRoot().getCanonicalPath())); + new JobParameter(temporaryFolder.getRoot().getCanonicalPath())); annotationParameters.put(JobParametersNames.APP_VEP_PATH, - new JobParameter(temporaryFolder.newFile().getCanonicalPath())); + new JobParameter(temporaryFolder.newFile().getCanonicalPath())); annotationParameters.put(JobParametersNames.INPUT_FASTA, - new JobParameter(temporaryFolder.newFile().getCanonicalPath())); + new JobParameter(temporaryFolder.newFile().getCanonicalPath())); // optionals @@ -103,6 +104,7 @@ public void allJobParametersAreValid() throws JobParametersInvalidException { parameters.putAll(optionalParameters); validator.validate(new JobParameters(parameters)); } + @Test public void allRequiredJobParametersAreValid() throws JobParametersInvalidException { Map parameters = new TreeMap<>(); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidatorTest.java index 46be6d691..5ec32295a 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidatorTest.java @@ -81,6 +81,7 @@ public void setUp() throws Exception { annotationParameters.put(JobParametersNames.APP_VEP_NUMFORKS, new JobParameter("6")); annotationParameters.put(JobParametersNames.APP_VEP_TIMEOUT, new JobParameter("600")); annotationParameters.put(JobParametersNames.ANNOTATION_OVERWRITE, new JobParameter("false")); + annotationParameters.put(JobParametersNames.DB_COLLECTIONS_ANNOTATIONS_NAME, new JobParameter("annotations")); annotationParameters.put(JobParametersNames.DB_COLLECTIONS_ANNOTATION_METADATA_NAME, new JobParameter("annotationMetadata")); annotationParameters.put(JobParametersNames.APP_VEP_CACHE_PATH, diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidatorTest.java index b66ad5d46..922ee5e71 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidatorTest.java @@ -50,6 +50,8 @@ public void setUp() throws Exception { final String dir = temporaryFolder.getRoot().getCanonicalPath(); requiredParameters = new TreeMap<>(); + requiredParameters.put(JobParametersNames.DB_COLLECTIONS_ANNOTATIONS_NAME, + new JobParameter("dbCollectionsAnnotationName")); requiredParameters.put(JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME, new JobParameter("dbCollectionsVariantName")); requiredParameters.put(JobParametersNames.DB_NAME, new JobParameter("dbName")); @@ -81,6 +83,12 @@ public void dbCollectionsVariantsNameIsRequired() throws JobParametersInvalidExc validator.validate(new JobParameters(requiredParameters)); } + @Test(expected = JobParametersInvalidException.class) + public void dbCollectionsAnnotationsNameIsRequired() throws JobParametersInvalidException, IOException { + requiredParameters.remove(JobParametersNames.DB_COLLECTIONS_ANNOTATIONS_NAME); + validator.validate(new JobParameters(requiredParameters)); + } + @Test(expected = JobParametersInvalidException.class) public void dbNameIsRequired() throws JobParametersInvalidException, IOException { requiredParameters.remove(JobParametersNames.DB_NAME); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunnerTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunnerTest.java index 4361767f0..ad50f2e3d 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunnerTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunnerTest.java @@ -31,12 +31,10 @@ import org.springframework.test.context.ActiveProfiles; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; - import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; import uk.ac.ebi.eva.test.utils.GenotypedVcfJobTestUtils; -import uk.ac.ebi.eva.test.utils.JobTestUtils; import uk.ac.ebi.eva.utils.EvaCommandLineBuilder; import java.io.File; @@ -67,7 +65,9 @@ public class EvaPipelineJobLauncherCommandLineRunnerTest { private static final String GENOTYPED_PROPERTIES_FILE = "/genotype-test.properties"; + private static final String NO_JOB_NAME_HAS_BEEN_PROVIDED = "No job name has been provided"; + private static final String NO_JOB_PARAMETERS_HAVE_BEEN_PROVIDED = "No job parameters have been provided"; @Autowired @@ -126,30 +126,31 @@ public void genotypedVcfJobTest() throws JobExecutionException, IOException, URI evaPipelineJobLauncherCommandLineRunner.setJobNames(GENOTYPED_VCF_JOB); evaPipelineJobLauncherCommandLineRunner.run(new EvaCommandLineBuilder() - .inputVcf(inputFile.getAbsolutePath()) - .inputVcfId(GenotypedVcfJobTestUtils.INPUT_VCF_ID) - .inputVcfAggregation("NONE") - .inputStudyName("small vcf") + .annotationOverwrite("false") + .appVepPath(GenotypedVcfJobTestUtils.getMockVep().getPath()) + .appVepTimeout("60") + .configDbReadPreference("secondary") + .databaseName(databaseName) + .dbCollectionsAnnotationMetadataName("annotationMetadata") + .dbCollectionsAnnotationsName(GenotypedVcfJobTestUtils.COLLECTION_ANNOTATIONS_NAME) + .dbCollectionsFeaturesName("features") + .dbCollectionsFilesName("files") + .dbCollectionsStatisticsName("populationStatistics") + .dbCollectionsVariantsName("variants") + .inputFasta(fasta.getAbsolutePath()) .inputStudyId(GenotypedVcfJobTestUtils.INPUT_STUDY_ID) + .inputStudyName("small vcf") .inputStudyType("COLLECTION") + .inputVcf(inputFile.getAbsolutePath()) + .inputVcfAggregation("NONE") + .inputVcfId(GenotypedVcfJobTestUtils.INPUT_VCF_ID) .outputDirAnnotation(outputDirAnnotation) .outputDirStatistics(outputDirStats) - .databaseName(databaseName) - .appVepPath(GenotypedVcfJobTestUtils.getMockVep().getPath()) .vepCachePath("") .vepCacheSpecies("human") .vepCacheVersion("1") .vepNumForks("1") - .appVepTimeout("60") .vepVersion("1") - .annotationOverwrite("false") - .inputFasta(fasta.getAbsolutePath()) - .configDbReadPreference("secondary") - .dbCollectionsVariantsName("variants") - .dbCollectionsFilesName("files") - .dbCollectionsFeaturesName("features") - .dbCollectionsStatisticsName("populationStatistics") - .dbCollectionsAnnotationMetadataName("annotationMetadata") .build() ); @@ -165,7 +166,9 @@ public void genotypedVcfJobTest() throws JobExecutionException, IOException, URI GenotypedVcfJobTestUtils.checkAnnotationCreateStep(vepOutputFile); GenotypedVcfJobTestUtils.checkOutputFileLength(vepOutputFile); - GenotypedVcfJobTestUtils.checkLoadedAnnotation(databaseName); + + GenotypedVcfJobTestUtils.checkLoadedAnnotation(mongoRule, databaseName); + GenotypedVcfJobTestUtils.checkSkippedOneMalformedLine(jobExecution); } @@ -254,6 +257,7 @@ public void genotypedVcfJobTestWithParametersFileAndCommandLineParameters() thro .outputDirAnnotation(outputDirAnnotation) .outputDirStatistics(outputDirStats) .databaseName(databaseName) + .dbCollectionsAnnotationsName(GenotypedVcfJobTestUtils.COLLECTION_ANNOTATIONS_NAME) .appVepPath(GenotypedVcfJobTestUtils.getMockVep().getPath()) .appVepTimeout("60") .inputFasta(fasta.getAbsolutePath()) @@ -272,7 +276,9 @@ public void genotypedVcfJobTestWithParametersFileAndCommandLineParameters() thro GenotypedVcfJobTestUtils.checkAnnotationCreateStep(vepOutputFile); GenotypedVcfJobTestUtils.checkOutputFileLength(vepOutputFile); - GenotypedVcfJobTestUtils.checkLoadedAnnotation(databaseName); + + GenotypedVcfJobTestUtils.checkLoadedAnnotation(mongoRule, databaseName); + GenotypedVcfJobTestUtils.checkSkippedOneMalformedLine(jobExecution); } @@ -289,6 +295,6 @@ public void onlyFileWithoutParametersFailsValidation() throws JobExecutionExcept evaPipelineJobLauncherCommandLineRunner.run(new EvaCommandLineBuilder().build()); assertEquals(EvaPipelineJobLauncherCommandLineRunner.EXIT_WITH_ERRORS, - evaPipelineJobLauncherCommandLineRunner.getExitCode()); + evaPipelineJobLauncherCommandLineRunner.getExitCode()); } } diff --git a/src/test/java/uk/ac/ebi/eva/test/utils/GenotypedVcfJobTestUtils.java b/src/test/java/uk/ac/ebi/eva/test/utils/GenotypedVcfJobTestUtils.java index c26dec720..9edfd9e1c 100644 --- a/src/test/java/uk/ac/ebi/eva/test/utils/GenotypedVcfJobTestUtils.java +++ b/src/test/java/uk/ac/ebi/eva/test/utils/GenotypedVcfJobTestUtils.java @@ -1,6 +1,8 @@ package uk.ac.ebi.eva.test.utils; -import org.opencb.biodata.models.variant.Variant; +import com.mongodb.BasicDBList; +import com.mongodb.DBCursor; +import com.mongodb.DBObject; import org.opencb.datastore.core.QueryOptions; import org.opencb.opencga.storage.core.StorageManagerException; import org.opencb.opencga.storage.core.StorageManagerFactory; @@ -11,7 +13,7 @@ import org.springframework.batch.core.StepExecution; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.utils.FileUtils; +import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; import uk.ac.ebi.eva.utils.URLHelper; import java.io.BufferedReader; @@ -28,8 +30,10 @@ import java.util.zip.GZIPInputStream; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +import static uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD; import static uk.ac.ebi.eva.test.utils.JobTestUtils.count; import static uk.ac.ebi.eva.test.utils.JobTestUtils.getLines; import static uk.ac.ebi.eva.utils.FileUtils.getResource; @@ -51,6 +55,8 @@ public class GenotypedVcfJobTestUtils { public static final String COLLECTION_VARIANTS_NAME = "variants"; + public static final String COLLECTION_ANNOTATIONS_NAME = "annotations"; + public static final String COLLECTION_ANNOTATION_METADATA_NAME = "annotationMetadata"; private static final int EXPECTED_ANNOTATIONS = 537; @@ -66,6 +72,10 @@ public static VariantDBIterator getVariantDBIterator(String dbName) throws Illeg return variantDBAdaptor.iterator(new QueryOptions()); } + public static DBCursor getAnnotationDBCursor(TemporaryMongoRule mongoRule, String databaseName){ + return mongoRule.getCollection(databaseName, COLLECTION_ANNOTATIONS_NAME).find(); + } + /** * 4 annotation flow annotation input vep generate step * @@ -97,30 +107,23 @@ public static void checkAnnotationInput(File vepInputFile) throws IOException { /** * Annotation load step: check documents in DB have annotation (only consequence type) - * - * @param dbName - * @throws IllegalAccessException - * @throws ClassNotFoundException - * @throws InstantiationException - * @throws StorageManagerException */ - public static void checkLoadedAnnotation(String dbName) throws IllegalAccessException, ClassNotFoundException, - InstantiationException, StorageManagerException { - VariantDBIterator iterator; - iterator = getVariantDBIterator(dbName); + public static void checkLoadedAnnotation(TemporaryMongoRule mongoRule, String databaseName) { + DBCursor cursor = getAnnotationDBCursor(mongoRule, databaseName); int count = 0; int consequenceTypeCount = 0; - while (iterator.hasNext()) { + while (cursor.hasNext()) { count++; - Variant next = iterator.next(); - if (next.getAnnotation().getConsequenceTypes() != null) { - consequenceTypeCount += next.getAnnotation().getConsequenceTypes().size(); - } + DBObject annotation = cursor.next(); + BasicDBList consequenceTypes = (BasicDBList) annotation.get(CONSEQUENCE_TYPE_FIELD); + assertNotNull(consequenceTypes); + consequenceTypeCount += consequenceTypes.size(); } - assertEquals(EXPECTED_VARIANTS, count); + assertTrue(count > 0); assertEquals(EXPECTED_VALID_ANNOTATIONS, consequenceTypeCount); + } public static void checkOutputFileLength(File vepOutputFile) throws IOException { diff --git a/src/test/java/uk/ac/ebi/eva/utils/EvaCommandLineBuilder.java b/src/test/java/uk/ac/ebi/eva/utils/EvaCommandLineBuilder.java index cb3ef6aa7..1bf8d7216 100644 --- a/src/test/java/uk/ac/ebi/eva/utils/EvaCommandLineBuilder.java +++ b/src/test/java/uk/ac/ebi/eva/utils/EvaCommandLineBuilder.java @@ -179,4 +179,8 @@ public EvaCommandLineBuilder dbCollectionsAnnotationMetadataName(String name) { public EvaCommandLineBuilder chunksize(String chunksize) { return addString(JobParametersNames.CONFIG_CHUNK_SIZE, chunksize); } + + public EvaCommandLineBuilder dbCollectionsAnnotationsName(String name) { + return addString(JobParametersNames.DB_COLLECTIONS_ANNOTATIONS_NAME, name); + } } diff --git a/src/test/java/uk/ac/ebi/eva/utils/EvaJobParameterBuilder.java b/src/test/java/uk/ac/ebi/eva/utils/EvaJobParameterBuilder.java index 8975c1893..e76a42839 100644 --- a/src/test/java/uk/ac/ebi/eva/utils/EvaJobParameterBuilder.java +++ b/src/test/java/uk/ac/ebi/eva/utils/EvaJobParameterBuilder.java @@ -90,6 +90,12 @@ public EvaJobParameterBuilder collectionAnnotationMetadataName(String collection return this; } + public EvaJobParameterBuilder collectionAnnotationsName(String collectionAnnotationsName) { + addParameter(JobParametersNames.DB_COLLECTIONS_ANNOTATIONS_NAME, + new JobParameter(collectionAnnotationsName)); + return this; + } + public EvaJobParameterBuilder vepPath(String vepPath) { addParameter(JobParametersNames.APP_VEP_PATH, new JobParameter(vepPath)); return this; diff --git a/src/test/java/uk/ac/ebi/eva/utils/MongoDBHelperTest.java b/src/test/java/uk/ac/ebi/eva/utils/MongoDBHelperTest.java index 1abd454f6..17ea4c57a 100644 --- a/src/test/java/uk/ac/ebi/eva/utils/MongoDBHelperTest.java +++ b/src/test/java/uk/ac/ebi/eva/utils/MongoDBHelperTest.java @@ -12,19 +12,19 @@ public class MongoDBHelperTest { @Test public void testBuildStorageIdSnv() { Variant variant = new Variant("1", 1000, 1000, "A", "C"); - assertEquals("1_1000_A_C", MongoDBHelper.buildStorageId(variant)); + assertEquals("1_1000_A_C", MongoDBHelper.buildVariantStorageId(variant)); } @Test public void testBuildStorageIdIndel() { Variant variant = new Variant("1", 1000, 1002, "", "CA"); - assertEquals("1_1000__CA", MongoDBHelper.buildStorageId(variant)); + assertEquals("1_1000__CA", MongoDBHelper.buildVariantStorageId(variant)); } @Test public void testBuildStorageIdStructural() { String alt = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT"; Variant variant = new Variant("1", 1000, 1002, "TAG", alt); - assertEquals("1_1000_TAG_" + new String(CryptoUtils.encryptSha1(alt)), MongoDBHelper.buildStorageId(variant)); + assertEquals("1_1000_TAG_" + new String(CryptoUtils.encryptSha1(alt)), MongoDBHelper.buildVariantStorageId(variant)); } } From 8b623c9a6dc21296ece4d4e7bf7002a206b49b4c Mon Sep 17 00:00:00 2001 From: Diego Poggioli Date: Mon, 10 Apr 2017 16:02:09 +0100 Subject: [PATCH 02/48] DB_COLLECTIONS_ANNOTATIONS_NAME added as mandatory parameter --- ...notationLoaderStepParametersValidator.java | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidator.java index 3c20ce194..00d3b7441 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidator.java @@ -44,10 +44,10 @@ public class AnnotationLoaderStepParametersValidator extends DefaultJobParameter private boolean isStudyIdRequired; public AnnotationLoaderStepParametersValidator(boolean isStudyIdRequired) { - super(new String[]{JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME, - JobParametersNames.DB_NAME, - JobParametersNames.OUTPUT_DIR_ANNOTATION}, - new String[]{}); + super(new String[]{JobParametersNames.DB_COLLECTIONS_ANNOTATIONS_NAME, + JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME, JobParametersNames.DB_NAME, + JobParametersNames.OUTPUT_DIR_ANNOTATION}, + new String[]{}); this.isStudyIdRequired = isStudyIdRequired; } @@ -61,12 +61,12 @@ private CompositeJobParametersValidator compositeJobParametersValidator() { List jobParametersValidators = new ArrayList<>(); Collections.addAll(jobParametersValidators, new DbCollectionsAnnotationsNameValidator(), - new DbCollectionsVariantsNameValidator(), - new DbNameValidator(), - new OutputDirAnnotationValidator(), - new OptionalValidator(new ConfigRestartabilityAllowValidator(), - JobParametersNames.CONFIG_RESTARTABILITY_ALLOW), - new OptionalValidator(new ConfigChunkSizeValidator(), JobParametersNames.CONFIG_CHUNK_SIZE) + new DbCollectionsVariantsNameValidator(), + new DbNameValidator(), + new OutputDirAnnotationValidator(), + new OptionalValidator(new ConfigRestartabilityAllowValidator(), + JobParametersNames.CONFIG_RESTARTABILITY_ALLOW), + new OptionalValidator(new ConfigChunkSizeValidator(), JobParametersNames.CONFIG_CHUNK_SIZE) ); if (isStudyIdRequired) { From d0146d67ae0f3750fc0dbec5d1cead4ed2c15c62 Mon Sep 17 00:00:00 2001 From: Diego Poggioli Date: Wed, 19 Apr 2017 14:26:21 +0100 Subject: [PATCH 03/48] New writer added to write some annotation fields into Variant collection --- .../data/VariantToDBObjectConverter.java | 4 +- .../eva/commons/models/data/Annotation.java | 173 +++++++++++++++ .../data/AnnotationFieldNames.java | 2 +- .../commons/models/data/ConsequenceType.java | 23 +- .../ac/ebi/eva/commons/models/data/Score.java | 2 - .../ebi/eva/commons/models/data/Variant.java | 10 +- .../models/data/VariantAnnotation.java | 155 +++---------- .../ac/ebi/eva/commons/models/data/Xref.java | 6 +- .../eva/pipeline/configuration/BeanNames.java | 4 +- ...ava => AnnotationReaderConfiguration.java} | 6 +- ...nnotationCompositeWriterConfiguration.java | 57 +++++ ...nnotationInVariantWriterConfiguration.java | 43 ++++ ...ava => AnnotationWriterConfiguration.java} | 22 +- .../io/mappers/AnnotationLineMapper.java | 33 ++- .../io/readers/AnnotationFlatFileReader.java | 4 +- .../AnnotationInVariantMongoWriter.java | 206 ++++++++++++++++++ ...Writer.java => AnnotationMongoWriter.java} | 116 +++++----- .../io/writers/VariantMongoWriter.java | 9 +- .../jobs/steps/AnnotationLoaderStep.java | 32 ++- .../uk/ac/ebi/eva/utils/MongoDBHelper.java | 5 + .../io/mappers/AnnotationLineMapperTest.java | 26 +-- .../readers/AnnotationFlatFileReaderTest.java | 10 +- .../AnnotationInVariantMongoWriterTest.java | 162 ++++++++++++++ ...st.java => AnnotationMongoWriterTest.java} | 86 +++++--- .../io/writers/VariantMongoWriterTest.java | 16 +- .../writers/VepAnnotationFileWriterTest.java | 2 +- .../eva/pipeline/jobs/AnnotationJobTest.java | 7 +- .../jobs/steps/AnnotationLoaderStepTest.java | 50 +++-- ...nnotationData.java => AnnotationData.java} | 2 +- .../ebi/eva/test/data/VepOutputContent.java | 13 +- .../test/utils/GenotypedVcfJobTestUtils.java | 8 +- 31 files changed, 945 insertions(+), 349 deletions(-) create mode 100644 src/main/java/uk/ac/ebi/eva/commons/models/data/Annotation.java rename src/main/java/uk/ac/ebi/eva/commons/models/{converters => }/data/AnnotationFieldNames.java (97%) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/{VariantAnnotationReaderConfiguration.java => AnnotationReaderConfiguration.java} (86%) create mode 100644 src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationCompositeWriterConfiguration.java create mode 100644 src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationInVariantWriterConfiguration.java rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/{VariantAnnotationWriterConfiguration.java => AnnotationWriterConfiguration.java} (58%) create mode 100644 src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java rename src/main/java/uk/ac/ebi/eva/pipeline/io/writers/{VepAnnotationMongoWriter.java => AnnotationMongoWriter.java} (53%) create mode 100644 src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java rename src/test/java/uk/ac/ebi/eva/pipeline/io/writers/{VepAnnotationMongoWriterTest.java => AnnotationMongoWriterTest.java} (69%) rename src/test/java/uk/ac/ebi/eva/test/data/{VariantAnnotationData.java => AnnotationData.java} (98%) diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverter.java b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverter.java index f1e56fa79..279d49e7c 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverter.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverter.java @@ -87,7 +87,7 @@ public class VariantToDBObjectConverter implements Converter /** * Create a converter between Variant and DBObject entities when the fields of VariantSourceEntry, - * VariantAnnotation and VariantStats should not be written. + * Annotation and VariantStats should not be written. */ public VariantToDBObjectConverter() { this(null, null); @@ -95,7 +95,7 @@ public VariantToDBObjectConverter() { /** * Create a converter between Variant and DBObject entities. For complex inner fields (VariantSourceEntry, - * VariantStats, VariantAnnotation), converters must be provided. If they are null, it is assumed that the field + * VariantStats, Annotation), converters must be provided. If they are null, it is assumed that the field * should not be written. * * @param variantSourceEntryConverter Nullable diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/Annotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/Annotation.java new file mode 100644 index 000000000..3af9d49a5 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/Annotation.java @@ -0,0 +1,173 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.commons.models.data; + +import com.google.common.base.Strings; +import org.springframework.data.annotation.Id; +import org.springframework.data.annotation.Transient; +import org.springframework.data.mongodb.core.mapping.Document; +import org.springframework.data.mongodb.core.mapping.Field; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * Slim version of {@link org.opencb.biodata.models.variant.annotation.VariantAnnotation} + * Unused fields removed. + * + */ +@Document +public class Annotation { + + @Field(value = AnnotationFieldNames.CHROMOSOME_FIELD) + private String chromosome; + + @Field(value = AnnotationFieldNames.START_FIELD) + private int start; + + @Field(value = AnnotationFieldNames.END_FIELD) + private int end; + + @Transient + private String referenceAllele; + + @Transient + private String alternativeAllele; + + @Id + private String id; + + @Field(value = AnnotationFieldNames.ENSEMBL_VERSION_FIELD) + private String ensmblVersion; + + @Field(value = AnnotationFieldNames.VEP_CACHE_VERSION_FIELD) + private String vepCacheVersion; + + @Field(value = AnnotationFieldNames.XREFS_FIELD) + private Set xrefs; + + @Field(value = AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD) + private Set consequenceTypes; + + @Transient + private Map additionalAttributes; + + public Annotation(String chromosome, int start, int end, String referenceAllele) { + this(chromosome, start, end, referenceAllele, ""); + } + + public Annotation(String chromosome, int start, int end, String referenceAllele, String alternativeAllele) { + this.chromosome = chromosome; + this.start = start; + this.end = end; + this.referenceAllele = referenceAllele; + this.alternativeAllele = alternativeAllele; + + this.id = ""; + this.xrefs = new HashSet<>(); + this.consequenceTypes = new HashSet<>(); + this.additionalAttributes = new HashMap<>(); + } + + public String getChromosome() { + return chromosome; + } + + public void setChromosome(String chromosome) { + this.chromosome = chromosome; + } + + public int getStart() { + return start; + } + + public void setStart(int start) { + this.start = start; + } + + public int getEnd() { + return end; + } + + public void setEnd(int end) { + this.end = end; + } + + public String getReferenceAllele() { + return referenceAllele; + } + + public String getAlternativeAllele() { + return alternativeAllele; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public Set getXrefs() { + return xrefs; + } + + public void setXrefs(Set xrefs) { + this.xrefs = xrefs; + } + + public Set getConsequenceTypes() { + return consequenceTypes; + } + + public void setConsequenceTypes(Set consequenceTypes) { + this.consequenceTypes = consequenceTypes; + } + + public String getEnsmblVersion() { + return ensmblVersion; + } + + public void setEnsmblVersion(String ensmblVersion) { + this.ensmblVersion = ensmblVersion; + } + + public String getVepCacheVersion() { + return vepCacheVersion; + } + + public void setVepCacheVersion(String vepCacheVersion) { + this.vepCacheVersion = vepCacheVersion; + } + + public void generateXrefsFromConsequenceTypes(){ + for (ConsequenceType consequenceType : consequenceTypes) { + if (!Strings.isNullOrEmpty(consequenceType.getGeneName())) { + xrefs.add(new Xref(consequenceType.getGeneName(), "HGNC")); + } + if (!Strings.isNullOrEmpty(consequenceType.getEnsemblGeneId())) { + xrefs.add(new Xref(consequenceType.getEnsemblGeneId(), "ensemblGene")); + } + if (!Strings.isNullOrEmpty(consequenceType.getEnsemblTranscriptId())) { + xrefs.add(new Xref(consequenceType.getEnsemblTranscriptId(), "ensemblTranscript")); + } + } + } + +} diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationFieldNames.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/AnnotationFieldNames.java similarity index 97% rename from src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationFieldNames.java rename to src/main/java/uk/ac/ebi/eva/commons/models/data/AnnotationFieldNames.java index e0993131f..b505892a8 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationFieldNames.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/AnnotationFieldNames.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.commons.models.converters.data; +package uk.ac.ebi.eva.commons.models.data; public class AnnotationFieldNames { public static final String CHROMOSOME_FIELD = "chr"; diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/ConsequenceType.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/ConsequenceType.java index 3eeebf692..6cba7d3c0 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/ConsequenceType.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/ConsequenceType.java @@ -19,9 +19,8 @@ import org.springframework.data.mongodb.core.mapping.Document; import org.springframework.data.mongodb.core.mapping.Field; -import uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames; - import java.util.List; +import java.util.Set; /** * org.opencb.biodata.models.variant.annotation.ConsequenceType @@ -60,13 +59,13 @@ public class ConsequenceType { private String codon; @Field(value = AnnotationFieldNames.SIFT_FIELD) - private Score sifts; + private Score sift; @Field(value = AnnotationFieldNames.POLYPHEN_FIELD) private Score polyphen; @Field(value = AnnotationFieldNames.SO_ACCESSION_FIELD) - private List soAccessions; + private Set soAccessions; @Field(value = AnnotationFieldNames.RELATIVE_POS_FIELD) private Integer relativePosition; @@ -162,20 +161,20 @@ public String getAaChange() { return aaChange; } - public List getSoAccessions() { + public Set getSoAccessions() { return soAccessions; } - public void setSoAccessions(List soAccessions) { + public void setSoAccessions(Set soAccessions) { this.soAccessions = soAccessions; } - public Score getSifts() { - return sifts; + public Score getSift() { + return sift; } - public void setSifts(Score sifts) { - this.sifts = sifts; + public void setSift(Score sift) { + this.sift = sift; } public Score getPolyphen() { @@ -205,7 +204,7 @@ public boolean equals(Object o) { if (aaPosition != null ? !aaPosition.equals(that.aaPosition) : that.aaPosition != null) return false; if (aaChange != null ? !aaChange.equals(that.aaChange) : that.aaChange != null) return false; if (codon != null ? !codon.equals(that.codon) : that.codon != null) return false; - if (sifts != null ? !sifts.equals(that.sifts) : that.sifts != null) return false; + if (sift != null ? !sift.equals(that.sift) : that.sift != null) return false; if (polyphen != null ? !polyphen.equals(that.polyphen) : that.polyphen != null) return false; if (soAccessions != null ? !soAccessions.equals(that.soAccessions) : that.soAccessions != null) return false; return relativePosition != null ? relativePosition @@ -224,7 +223,7 @@ public int hashCode() { result = 31 * result + (aaPosition != null ? aaPosition.hashCode() : 0); result = 31 * result + (aaChange != null ? aaChange.hashCode() : 0); result = 31 * result + (codon != null ? codon.hashCode() : 0); - result = 31 * result + (sifts != null ? sifts.hashCode() : 0); + result = 31 * result + (sift != null ? sift.hashCode() : 0); result = 31 * result + (polyphen != null ? polyphen.hashCode() : 0); result = 31 * result + (soAccessions != null ? soAccessions.hashCode() : 0); result = 31 * result + (relativePosition != null ? relativePosition.hashCode() : 0); diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/Score.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/Score.java index da1868164..5fe241a1b 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/Score.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/Score.java @@ -19,8 +19,6 @@ import org.springframework.data.mongodb.core.mapping.Document; import org.springframework.data.mongodb.core.mapping.Field; -import uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames; - /** * From {@link org.opencb.biodata.models.variant.annotation.Score} */ diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java index f24de34cc..6ceef9db2 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java @@ -117,7 +117,7 @@ public enum VariantType { /** * Annotations of the genomic variation. */ - private VariantAnnotation annotation; + private Annotation annotation; public Variant() { @@ -147,7 +147,7 @@ public Variant(String chromosome, int start, int end, String reference, String a } this.sourceEntries = new HashMap<>(); - this.annotation = new VariantAnnotation(this.chromosome, this.start, this.end, this.reference); + this.annotation = new Annotation(this.chromosome, this.start, this.end, this.reference); } public VariantType getType() { @@ -287,11 +287,11 @@ public VariantStats getStats(String studyId, String fileId) { return file.getStats(); } - public VariantAnnotation getAnnotation() { + public Annotation getAnnotation() { return annotation; } - public void setAnnotation(VariantAnnotation annotation) { + public void setAnnotation(Annotation annotation) { this.annotation = annotation; } @@ -391,7 +391,7 @@ public boolean equals(Object obj) { } /** - * As the clone in the classes Map, Set and VariantAnnotation doesn't exist, this is a shallow clone. + * As the clone in the classes Map, Set and Annotation doesn't exist, this is a shallow clone. * * @return a shallow copy of this variant. */ diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java index 0f2228dc5..c1947881a 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java @@ -15,161 +15,56 @@ */ package uk.ac.ebi.eva.commons.models.data; -import com.google.common.base.Strings; -import org.springframework.data.annotation.Id; -import org.springframework.data.annotation.Transient; -import org.springframework.data.mongodb.core.mapping.Document; -import org.springframework.data.mongodb.core.mapping.Field; - -import uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames; - -import java.util.HashMap; +import java.util.Collection; import java.util.HashSet; -import java.util.Map; import java.util.Set; /** - * Slim version of {@link org.opencb.biodata.models.variant.annotation.VariantAnnotation} - * Unused fields removed. * */ -@Document public class VariantAnnotation { + private Set sifts = new HashSet<>(); + private Set polyphens = new HashSet<>(); + private Set soAccessions = new HashSet<>(); + private Set xrefIds = new HashSet<>(); - @Field(value = AnnotationFieldNames.CHROMOSOME_FIELD) - private String chromosome; - - @Field(value = AnnotationFieldNames.START_FIELD) - private int start; - - @Field(value = AnnotationFieldNames.END_FIELD) - private int end; - - @Transient - private String referenceAllele; - - @Transient - private String alternativeAllele; - - @Id - private String id; - - @Field(value = AnnotationFieldNames.ENSEMBL_VERSION_FIELD) - private String ensmblVersion; - - @Field(value = AnnotationFieldNames.VEP_CACHE_VERSION_FIELD) - private String vepCacheVersion; - - @Field(value = AnnotationFieldNames.XREFS_FIELD) - private Set xrefs; - - @Field(value = AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD) - private Set consequenceTypes; - - @Transient - private Map additionalAttributes; - - public VariantAnnotation(String chromosome, int start, int end, String referenceAllele) { - this(chromosome, start, end, referenceAllele, ""); + public void addSift(Double sift) { + this.sifts.add(sift); } - public VariantAnnotation(String chromosome, int start, int end, String referenceAllele, String alternativeAllele) { - this.chromosome = chromosome; - this.start = start; - this.end = end; - this.referenceAllele = referenceAllele; - this.alternativeAllele = alternativeAllele; - - this.id = ""; - this.xrefs = new HashSet<>(); - this.consequenceTypes = new HashSet<>(); - this.additionalAttributes = new HashMap<>(); + public void addSifts(Collection sifts) { + this.sifts.addAll(sifts); } - public String getChromosome() { - return chromosome; + public void addPolyphen(Double polyphen) { + this.polyphens.add(polyphen); } - public void setChromosome(String chromosome) { - this.chromosome = chromosome; + public void addPolyphens(Collection polyphens) { + this.polyphens.addAll(polyphens); } - public int getStart() { - return start; + public void addXrefIds(Set xrefIds) { + this.xrefIds.addAll(xrefIds); } - public void setStart(int start) { - this.start = start; + public void addsoAccessions(Set soAccessions) { + this.soAccessions.addAll(soAccessions); } - public int getEnd() { - return end; + public Set getSifts() { + return sifts; } - public void setEnd(int end) { - this.end = end; + public Set getPolyphens() { + return polyphens; } - public String getReferenceAllele() { - return referenceAllele; + public Set getSoAccessions() { + return soAccessions; } - public String getAlternativeAllele() { - return alternativeAllele; + public Set getXrefIds() { + return xrefIds; } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public Set getXrefs() { - return xrefs; - } - - public void setXrefs(Set xrefs) { - this.xrefs = xrefs; - } - - public Set getConsequenceTypes() { - return consequenceTypes; - } - - public void setConsequenceTypes(Set consequenceTypes) { - this.consequenceTypes = consequenceTypes; - } - - public String getEnsmblVersion() { - return ensmblVersion; - } - - public void setEnsmblVersion(String ensmblVersion) { - this.ensmblVersion = ensmblVersion; - } - - public String getVepCacheVersion() { - return vepCacheVersion; - } - - public void setVepCacheVersion(String vepCacheVersion) { - this.vepCacheVersion = vepCacheVersion; - } - - public void extractXrefsFromConsequenceTypes(){ - for (ConsequenceType consequenceType : consequenceTypes) { - if (!Strings.isNullOrEmpty(consequenceType.getGeneName())) { - xrefs.add(new Xref(consequenceType.getGeneName(), "HGNC")); - } - if (!Strings.isNullOrEmpty(consequenceType.getEnsemblGeneId())) { - xrefs.add(new Xref(consequenceType.getEnsemblGeneId(), "ensemblGene")); - } - if (!Strings.isNullOrEmpty(consequenceType.getEnsemblTranscriptId())) { - xrefs.add(new Xref(consequenceType.getEnsemblTranscriptId(), "ensemblTranscript")); - } - } - } - } diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/Xref.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/Xref.java index 5487494f0..ca59986a9 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/Xref.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/Xref.java @@ -18,8 +18,6 @@ import org.springframework.data.mongodb.core.mapping.Document; import org.springframework.data.mongodb.core.mapping.Field; -import uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames; - /** * From org.opencb.biodata.models.variant.annotation.Xref */ @@ -37,6 +35,10 @@ public Xref(String id, String src) { this.src = src; } + public String getId() { + return id; + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/BeanNames.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/BeanNames.java index 87badb9ad..fee4beb7a 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/BeanNames.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/BeanNames.java @@ -27,7 +27,9 @@ public class BeanNames { public static final String GENE_WRITER = "gene-writer"; public static final String VEP_ANNOTATION_WRITER = "vep-annotation-writer"; - public static final String VARIANT_ANNOTATION_WRITER = "variant-annotation-writer"; + public static final String ANNOTATION_WRITER = "annotation-writer"; + public static final String ANNOTATION_IN_VARIANT_WRITER = "annotation-in-variant-writer"; + public static final String COMPOSITE_ANNOTATION_VARIANT_WRITER = "composite-annotation-variant-writer"; public static final String VARIANT_WRITER = "variant-writer"; public static final String ANNOTATION_SKIP_STEP_DECIDER = "annotation-skip-step-decider"; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/VariantAnnotationReaderConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/AnnotationReaderConfiguration.java similarity index 86% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/VariantAnnotationReaderConfiguration.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/AnnotationReaderConfiguration.java index d355c6e49..1e4173adb 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/VariantAnnotationReaderConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/AnnotationReaderConfiguration.java @@ -20,7 +20,7 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; +import uk.ac.ebi.eva.commons.models.data.Annotation; import uk.ac.ebi.eva.pipeline.io.readers.AnnotationFlatFileReader; import uk.ac.ebi.eva.pipeline.parameters.AnnotationParameters; @@ -30,11 +30,11 @@ * Configuration to inject a AnnotationFlatFileReader as a Variant Annotation Reader in the pipeline. */ @Configuration -public class VariantAnnotationReaderConfiguration { +public class AnnotationReaderConfiguration { @Bean(VARIANT_ANNOTATION_READER) @StepScope - public ItemStreamReader variantAnnotationReader(AnnotationParameters annotationParameters) { + public ItemStreamReader annotationReader(AnnotationParameters annotationParameters) { return new AnnotationFlatFileReader(annotationParameters.getVepOutput()); } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationCompositeWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationCompositeWriterConfiguration.java new file mode 100644 index 000000000..ace3dcdd7 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationCompositeWriterConfiguration.java @@ -0,0 +1,57 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.configuration.writers; + +import org.springframework.batch.core.configuration.annotation.StepScope; +import org.springframework.batch.item.ItemWriter; +import org.springframework.batch.item.support.CompositeItemWriter; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; +import org.springframework.context.annotation.Profile; + +import uk.ac.ebi.eva.commons.models.data.Annotation; +import uk.ac.ebi.eva.pipeline.Application; + +import java.util.Arrays; + +import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.COMPOSITE_ANNOTATION_VARIANT_WRITER; +import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.ANNOTATION_IN_VARIANT_WRITER; +import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.ANNOTATION_WRITER; + +@Configuration +@Import({AnnotationWriterConfiguration.class, AnnotationInVariantWriterConfiguration.class}) +public class AnnotationCompositeWriterConfiguration { + + @Autowired + @Qualifier(ANNOTATION_WRITER) + private ItemWriter annotationItemWriter; + + @Autowired + @Qualifier(ANNOTATION_IN_VARIANT_WRITER) + private ItemWriter variantAnnotationItemWriter; + + @Bean(COMPOSITE_ANNOTATION_VARIANT_WRITER) + @StepScope + @Profile(Application.VARIANT_ANNOTATION_MONGO_PROFILE) + public CompositeItemWriter compositeAnnotationItemWriter(){ + CompositeItemWriter writer = new CompositeItemWriter(); + writer.setDelegates(Arrays.asList(annotationItemWriter, variantAnnotationItemWriter)); + return writer; + } +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationInVariantWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationInVariantWriterConfiguration.java new file mode 100644 index 000000000..ae91d81bc --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationInVariantWriterConfiguration.java @@ -0,0 +1,43 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.configuration.writers; + +import org.springframework.batch.core.configuration.annotation.StepScope; +import org.springframework.batch.item.ItemWriter; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Profile; +import org.springframework.data.mongodb.core.MongoOperations; + +import uk.ac.ebi.eva.commons.models.data.Annotation; +import uk.ac.ebi.eva.pipeline.Application; +import uk.ac.ebi.eva.pipeline.io.writers.AnnotationInVariantMongoWriter; +import uk.ac.ebi.eva.pipeline.parameters.DatabaseParameters; + +import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.ANNOTATION_IN_VARIANT_WRITER; + +@Configuration +public class AnnotationInVariantWriterConfiguration { + + @Bean(ANNOTATION_IN_VARIANT_WRITER) + @StepScope + @Profile(Application.VARIANT_ANNOTATION_MONGO_PROFILE) + public ItemWriter variantAnnotationItemWriter(MongoOperations mongoOperations, + DatabaseParameters databaseParameters) { + return new AnnotationInVariantMongoWriter(mongoOperations, databaseParameters.getCollectionVariantsName()); + } + +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VariantAnnotationWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java similarity index 58% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VariantAnnotationWriterConfiguration.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java index bba043aa3..eb80bdfcf 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VariantAnnotationWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java @@ -22,25 +22,25 @@ import org.springframework.context.annotation.Profile; import org.springframework.data.mongodb.core.MongoOperations; -import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; +import uk.ac.ebi.eva.commons.models.data.Annotation; import uk.ac.ebi.eva.pipeline.Application; -import uk.ac.ebi.eva.pipeline.io.writers.VepAnnotationMongoWriter; +import uk.ac.ebi.eva.pipeline.io.writers.AnnotationMongoWriter; import uk.ac.ebi.eva.pipeline.parameters.AnnotationParameters; import uk.ac.ebi.eva.pipeline.parameters.DatabaseParameters; -import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.VARIANT_ANNOTATION_WRITER; +import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.ANNOTATION_WRITER; @Configuration -public class VariantAnnotationWriterConfiguration { +public class AnnotationWriterConfiguration { - @Bean(VARIANT_ANNOTATION_WRITER) + @Bean(ANNOTATION_WRITER) @StepScope @Profile(Application.VARIANT_ANNOTATION_MONGO_PROFILE) - public ItemWriter variantAnnotationItemWriter(MongoOperations mongoOperations, - DatabaseParameters databaseParameters, - AnnotationParameters annotationParameters) { - return new VepAnnotationMongoWriter(mongoOperations, databaseParameters.getCollectionAnnotationsName(), - annotationParameters.getVepVersion(), - annotationParameters.getVepCacheVersion()); + public ItemWriter annotationItemWriter(MongoOperations mongoOperations, + DatabaseParameters databaseParameters, + AnnotationParameters annotationParameters) { + return new AnnotationMongoWriter(mongoOperations, databaseParameters.getCollectionAnnotationsName(), + annotationParameters.getVepVersion(), + annotationParameters.getVepCacheVersion()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java index 250f57617..cd4143cc6 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java @@ -22,18 +22,18 @@ import org.slf4j.LoggerFactory; import org.springframework.batch.item.file.LineMapper; +import uk.ac.ebi.eva.commons.models.data.Annotation; import uk.ac.ebi.eva.commons.models.data.ConsequenceType; import uk.ac.ebi.eva.commons.models.data.Score; -import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; import java.util.Arrays; import java.util.HashMap; -import java.util.List; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; /** - * Map a line in VEP output file to {@link VariantAnnotation} + * Map a line in VEP output file to {@link Annotation} * * Example of VEP output line * 20_60343_G/A 20:60343 A - - - intergenic_variant - - - - - - @@ -45,19 +45,19 @@ * public methods in VepFormatReader can't be reused because there is a reference to the previous line (currentVariantString) * that prevent each line to be independent * - * Here each line is mapped to {@link VariantAnnotation}; in case of two annotations for the same variant, a new - * {@link VariantAnnotation} object is created containing only the fields that will be appended: + * Here each line is mapped to {@link Annotation}; in case of two annotations for the same variant, a new + * {@link Annotation} object is created containing only the fields that will be appended: * - ConsequenceTypes * - Hgvs */ -public class AnnotationLineMapper implements LineMapper { +public class AnnotationLineMapper implements LineMapper { private static final Logger logger = LoggerFactory.getLogger(AnnotationLineMapper.class); /** - * Map a line in VEP output file to {@link VariantAnnotation} + * Map a line in VEP output file to {@link Annotation} * @param line in VEP output * @param lineNumber - * @return a {@link VariantAnnotation} + * @return a {@link Annotation} * * Most of the code is from org.opencb.biodata.formats.annotation.io.VepFormatReader#read() with few differences: * - An empty array is initialized for Hgvs (like ConsequenceTypes); @@ -65,13 +65,13 @@ public class AnnotationLineMapper implements LineMapper { * - The logic to move around the file (read line) and reference to previous line (currentVariantString) are removed; */ @Override - public VariantAnnotation mapLine(String line, int lineNumber) { - //logger.debug("Mapping line {} to VariantAnnotation", line); + public Annotation mapLine(String line, int lineNumber) { + //logger.debug("Mapping line {} to Annotation", line); ConsequenceType consequenceType = new ConsequenceType(); String[] lineFields = line.split("\t"); Map variantMap = parseVariant(lineFields[0], lineFields[1]); // coordinates and alternative are only parsed once - VariantAnnotation currentAnnotation = new VariantAnnotation( + Annotation currentAnnotation = new Annotation( variantMap.get("chromosome"), Integer.valueOf(variantMap.get("start")), Integer.valueOf(variantMap.get("end")), variantMap.get("reference"), @@ -82,7 +82,7 @@ public VariantAnnotation mapLine(String line, int lineNumber) { * Some lines do not have extra field and end with a \t: the split function above does not return that field */ if(lineFields.length == 14) { - parseExtraField(consequenceType, lineFields[13], currentAnnotation); + parseExtraField(consequenceType, lineFields[13]); } // Remaining fields only of interest if the feature is a transcript @@ -120,8 +120,8 @@ private void parseTranscriptFields(ConsequenceType consequenceType, String[] lin consequenceType.setCodon(lineFields[11]); } - private List mapSoTermsToSoAccessions(String[] soTerms){ - return Arrays.stream(soTerms).map(ConsequenceTypeMappings.termToAccession::get).collect(Collectors.toList()); + private Set mapSoTermsToSoAccessions(String[] soTerms){ + return Arrays.stream(soTerms).map(ConsequenceTypeMappings.termToAccession::get).collect(Collectors.toSet()); } /** @@ -189,8 +189,7 @@ private Map parseVariant(String variantString, String coordinates * * The parseFrequencies option has been removed */ - private void parseExtraField(ConsequenceType consequenceType, String extraField, VariantAnnotation currentAnnotation) { - + private void parseExtraField(ConsequenceType consequenceType, String extraField) { for (String field : extraField.split(";")) { String[] keyValue = field.split("="); @@ -202,7 +201,7 @@ private void parseExtraField(ConsequenceType consequenceType, String extraField, consequenceType.setPolyphen(parseProteinSubstitutionScore(keyValue[1])); break; case "sift": // Format is SIFT=tolerated(0.07) - consequenceType.setSifts(parseProteinSubstitutionScore(keyValue[1])); + consequenceType.setSift(parseProteinSubstitutionScore(keyValue[1])); break; case "strand": consequenceType.setStrand(keyValue[1].equals("1")?"+":"-"); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReader.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReader.java index a0d2c1d4e..f436fa1ec 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReader.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReader.java @@ -18,7 +18,7 @@ import org.springframework.batch.item.file.FlatFileItemReader; import org.springframework.core.io.Resource; -import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; +import uk.ac.ebi.eva.commons.models.data.Annotation; import uk.ac.ebi.eva.pipeline.io.GzipLazyResource; import uk.ac.ebi.eva.pipeline.io.mappers.AnnotationLineMapper; @@ -34,7 +34,7 @@ * 20_60479_C/T 20:60479 T - - - intergenic_variant - - - - - rs149529999 GMAF=T:0.0018;AFR_MAF=T:0.01;AMR_MAF=T:0.0028 * ... */ -public class AnnotationFlatFileReader extends FlatFileItemReader { +public class AnnotationFlatFileReader extends FlatFileItemReader { public AnnotationFlatFileReader(File file) { Resource resource = new GzipLazyResource(file); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java new file mode 100644 index 000000000..a6b011fdb --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java @@ -0,0 +1,206 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.io.writers; + +import com.mongodb.BasicDBList; +import com.mongodb.BasicDBObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.batch.item.data.MongoItemWriter; +import org.springframework.data.mongodb.core.MongoOperations; +import org.springframework.data.mongodb.core.query.BasicQuery; +import org.springframework.data.mongodb.core.query.BasicUpdate; +import org.springframework.util.Assert; + +import uk.ac.ebi.eva.commons.models.data.Annotation; +import uk.ac.ebi.eva.commons.models.data.ConsequenceType; +import uk.ac.ebi.eva.commons.models.data.Score; +import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; +import uk.ac.ebi.eva.commons.models.data.Xref; +import uk.ac.ebi.eva.utils.MongoDBHelper; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.GENE_NAME_FIELD; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.POLYPHEN_FIELD; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.SIFT_FIELD; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.SO_ACCESSION_FIELD; + +/** + * Update the {@link uk.ac.ebi.eva.commons.models.data.Variant} mongo document with some fields from {@link Annotation} + * and {@link ConsequenceType} + *

+ * The fields are: + * - sifts + * - polyphens + * - soAccessions + * - Xref Ids + */ +public class AnnotationInVariantMongoWriter extends MongoItemWriter { + private static final Logger logger = LoggerFactory.getLogger(AnnotationInVariantMongoWriter.class); + + private final MongoOperations mongoOperations; + + private final String collection; + + public AnnotationInVariantMongoWriter(MongoOperations mongoOperations, String collection) { + super(); + Assert.notNull(mongoOperations, "A Mongo instance is required"); + Assert.hasText(collection, "A collection name is required"); + + setCollection(collection); + setTemplate(mongoOperations); + + this.mongoOperations = mongoOperations; + this.collection = collection; + } + + @Override + protected void doWrite(List annotations) { + Map> annotationsByStorageId = annotations.stream() + .collect(Collectors.groupingBy(MongoDBHelper::buildVariantStorageId)); + + for (Map.Entry> annotationsIdEntry : annotationsByStorageId.entrySet()) { + VariantAnnotation variantAnnotation = extractFieldsFromAnnotations(annotationsIdEntry.getValue()); + + String storageId = annotationsIdEntry.getKey(); + BasicDBObject id = new BasicDBObject("_id", storageId); + + if (mongoOperations.exists(new BasicQuery(id), collection)) { + logger.trace("Writing annotations fields into mongo id: {}, collection: {}", storageId, collection); + + Set xrefs = variantAnnotation.getXrefIds(); + BasicDBObject updateGeneNames = new BasicDBObject("$addToSet", new BasicDBObject(GENE_NAME_FIELD, + new BasicDBObject( + "$each", + xrefs))); + mongoOperations.upsert(new BasicQuery(id), new BasicUpdate(updateGeneNames), collection); + + Set soAccessions = variantAnnotation.getSoAccessions(); + BasicDBObject updateConsequenceTypes = new BasicDBObject("$addToSet", + new BasicDBObject(SO_ACCESSION_FIELD, + new BasicDBObject("$each", + soAccessions))); + mongoOperations.upsert(new BasicQuery(id), new BasicUpdate(updateConsequenceTypes), collection); + + variantAnnotation.addSifts(lookupExistingSubstitutionScore(SIFT_FIELD, storageId)); + Set sifts = calculateRangeOfScores(variantAnnotation.getSifts()); + updateSubstitutionScore(sifts, SIFT_FIELD, id); + + variantAnnotation.addPolyphens(lookupExistingSubstitutionScore(POLYPHEN_FIELD, storageId)); + Set polyphens = calculateRangeOfScores(variantAnnotation.getPolyphens()); + updateSubstitutionScore(polyphens, POLYPHEN_FIELD, id); + + } else { + logger.info("Unable to update annotation fields into variant {} because it doesn't exist", storageId); + } + + } + } + + private void updateSubstitutionScore(Set substitutionScores, + String substitutionScoreName, + BasicDBObject id) { + if (!substitutionScores.isEmpty()) { + BasicDBObject updateSubstitutionScore = new BasicDBObject("$set", new BasicDBObject(substitutionScoreName, + substitutionScores)); + mongoOperations.upsert(new BasicQuery(id), new BasicUpdate(updateSubstitutionScore), collection); + } + } + + /** + * Checks for a given variant if a protein substitution score is already present + * + * @param substitutionScoreField substitution score name like POLYPHEN_FIELD, SIFT_FIELD etc. + * @param storageId variant ID + * @return a set containing all the substitution scores if any already loaded, or an empty set otherwise + */ + private Set lookupExistingSubstitutionScore(String substitutionScoreField, String storageId) { + Set substitutionScores = new HashSet<>(); + + BasicDBObject field = new BasicDBObject(substitutionScoreField, new BasicDBObject("$exists", true)) + .append("_id", storageId); + + BasicQuery fieldQuery = new BasicQuery(field); + fieldQuery.fields().include(substitutionScoreField); + fieldQuery.fields().exclude("_id"); + + BasicDBObject existingFields = mongoOperations.findOne(fieldQuery, BasicDBObject.class, collection); + + if (existingFields != null) { + BasicDBList scores = (BasicDBList) existingFields.getOrDefault(substitutionScoreField, new BasicDBList()); + substitutionScores.addAll(scores.stream().map(score -> (Double) score).collect(Collectors.toSet())); + } + + return substitutionScores; + } + + + /** + * Extract Xrefs, so terms and protein substitution score from {@link Annotation} + */ + private VariantAnnotation extractFieldsFromAnnotations(List annotations) { + VariantAnnotation variantAnnotation = new VariantAnnotation(); + + for (Annotation annotation : annotations) { + annotation.generateXrefsFromConsequenceTypes(); + Set xrefs = annotation.getXrefs(); + if (xrefs != null) { + variantAnnotation.addXrefIds(xrefs.stream().map(Xref::getId).collect(Collectors.toSet())); + } + + extractSubstitutionScores(variantAnnotation, annotation.getConsequenceTypes()); + } + + return variantAnnotation; + } + + private void extractSubstitutionScores(VariantAnnotation variantAnnotation, Set consequenceTypes) { + if (consequenceTypes != null) { + for (ConsequenceType consequenceType : consequenceTypes) { + Score sift = consequenceType.getSift(); + if (sift != null) { + variantAnnotation.addSift(sift.getScore()); + } + + Score polyphen = consequenceType.getPolyphen(); + if (polyphen != null) { + variantAnnotation.addPolyphen(polyphen.getScore()); + } + + variantAnnotation.addsoAccessions(consequenceType.getSoAccessions()); + } + } + } + + /** + * Return the min and max in case of multiple ProteinSubstitutionScores (sift/polyphen...) + */ + private Set calculateRangeOfScores(Set scores) { + if (scores.size() <= 1) { + return scores; + } else { + return new HashSet<>(Arrays.asList(Collections.min(scores), Collections.max(scores))); + } + } + +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java similarity index 53% rename from src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationMongoWriter.java rename to src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java index bf2edd9e1..cd1488719 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java @@ -25,6 +25,9 @@ import org.springframework.data.mongodb.core.query.BasicUpdate; import org.springframework.util.Assert; import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; +import uk.ac.ebi.eva.commons.models.converters.data.VariantSourceEntryToDBObjectConverter; +import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; +import uk.ac.ebi.eva.commons.models.data.Annotation; import uk.ac.ebi.eva.utils.MongoDBHelper; import java.util.ArrayList; @@ -32,11 +35,11 @@ import java.util.List; import java.util.Map; -import static uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD; -import static uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames.XREFS_FIELD; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.XREFS_FIELD; /** - * Write a list of {@link VariantAnnotation} into MongoDB + * Write a list of {@link Annotation} into MongoDB *

* A new annotation is added in the existing document. * In case of two annotations (or more) in the same variant the other annotations are appended: @@ -57,8 +60,12 @@ * { "id" : "ENST00000608838", "src" : "ensemblTranscript" }, * { "id" : "ENSG00000178591", "src" : "ensemblGene" */ -public class VepAnnotationMongoWriter extends MongoItemWriter { - private static final Logger logger = LoggerFactory.getLogger(VepAnnotationMongoWriter.class); +public class AnnotationMongoWriter extends MongoItemWriter { + private static final Logger logger = LoggerFactory.getLogger(AnnotationMongoWriter.class); + + private static final String ANNOTATION_XREF_ID_FIELD = "xrefs.id"; + + private static final String ANNOTATION_CT_SO_FIELD = "ct.so"; private MongoOperations mongoOperations; @@ -68,10 +75,10 @@ public class VepAnnotationMongoWriter extends MongoItemWriter private String vepCacheVersion; - public VepAnnotationMongoWriter(MongoOperations mongoOperations, - String collection, - String vepVersion, - String vepCacheVersion) { + public AnnotationMongoWriter(MongoOperations mongoOperations, + String collection, + String vepVersion, + String vepCacheVersion) { super(); Assert.notNull(mongoOperations, "A Mongo instance is required"); Assert.hasText(collection, "A collection name is required"); @@ -83,67 +90,68 @@ public VepAnnotationMongoWriter(MongoOperations mongoOperations, this.collection = collection; this.vepVersion = vepVersion; this.vepCacheVersion = vepCacheVersion; - } - - private Map> groupVariantAnnotationById(List variantAnnotations) { - Map> variantAnnotationsByStorageId = new HashMap<>(); - for (VariantAnnotation variantAnnotation : variantAnnotations) { - String id = buildAnnotationtorageId(variantAnnotation); - - variantAnnotationsByStorageId.putIfAbsent(id, new ArrayList<>()); - variantAnnotationsByStorageId.get(id).add(variantAnnotation); - } - return variantAnnotationsByStorageId; + createIndexes(); } @Override - protected void doWrite(List variantAnnotations) { - Map> variantAnnotationsByStorageId = groupVariantAnnotationById( - variantAnnotations); + protected void doWrite(List annotations) { + Map> annotationsByStorageId = groupAnnotationById(annotations); - for (Map.Entry> annotationsIdEntry : variantAnnotationsByStorageId.entrySet()) { + for (Map.Entry> annotationsIdEntry : annotationsByStorageId.entrySet()) { String storageId = annotationsIdEntry.getKey(); - List annotations = annotationsIdEntry.getValue(); + List annotationsById = annotationsIdEntry.getValue(); - VariantAnnotation variantAnnotation = annotations.get(0); + Annotation annotation = annotationsById.get(0); - if (annotations.size() > 1) { - variantAnnotation = concatenateOtherAnnotations( - variantAnnotation, annotations.subList(1, annotations.size())); + if (annotationsById.size() > 1) { + annotation = concatenateOtherAnnotations( + annotation, annotationsById.subList(1, annotationsById.size())); } - variantAnnotation.setId(storageId); - variantAnnotation.setEnsmblVersion(vepVersion); - variantAnnotation.setVepCacheVersion(vepCacheVersion); + annotation.setId(storageId); + annotation.setEnsmblVersion(vepVersion); + annotation.setVepCacheVersion(vepCacheVersion); - variantAnnotation.extractXrefsFromConsequenceTypes(); + annotation.generateXrefsFromConsequenceTypes(); - writeVariantAnnotationInMongoDb(storageId, variantAnnotation); + writeAnnotationInMongoDb(storageId, annotation); } } + private Map> groupAnnotationById(List annotations) { + Map> annotationsByStorageId = new HashMap<>(); + for (Annotation annotation : annotations) { + String id = buildAnnotationtorageId(annotation); + + annotationsByStorageId.putIfAbsent(id, new ArrayList<>()); + annotationsByStorageId.get(id).add(annotation); + } + + return annotationsByStorageId; + } + /** - * Append multiple annotation into a single {@link VariantAnnotation} + * Append multiple annotation into a single {@link Annotation} * Updated fields are ConsequenceTypes and Hgvs * - * @param variantAnnotation annotation where other annotations will be appended + * @param annotation annotation where other annotations will be appended * @param otherAnnotationsToConcatenate annotations to be appended - * @return a single {@link VariantAnnotation} ready to be persisted + * @return a single {@link Annotation} ready to be persisted */ - private VariantAnnotation concatenateOtherAnnotations(VariantAnnotation variantAnnotation, - List otherAnnotationsToConcatenate) { + private Annotation concatenateOtherAnnotations(Annotation annotation, + List otherAnnotationsToConcatenate) { - for (VariantAnnotation annotationToAppend : otherAnnotationsToConcatenate) { + for (Annotation annotationToAppend : otherAnnotationsToConcatenate) { if (annotationToAppend.getConsequenceTypes() != null) { - variantAnnotation.getConsequenceTypes().addAll(annotationToAppend.getConsequenceTypes()); + annotation.getConsequenceTypes().addAll(annotationToAppend.getConsequenceTypes()); } } - return variantAnnotation; + return annotation; } - private void writeVariantAnnotationInMongoDb(String storageId, VariantAnnotation variantAnnotation) { + private void writeAnnotationInMongoDb(String storageId, Annotation annotation) { logger.trace("Writing annotations into mongo id: {}", storageId); BasicDBObject id = new BasicDBObject("_id", storageId); @@ -151,22 +159,30 @@ private void writeVariantAnnotationInMongoDb(String storageId, VariantAnnotation if (mongoOperations.exists(new BasicQuery(id), collection)) { BasicDBObject updateConsequenceTypes = new BasicDBObject("$addToSet", new BasicDBObject(CONSEQUENCE_TYPE_FIELD, - new BasicDBObject("$each", variantAnnotation.getConsequenceTypes()))); + new BasicDBObject("$each",annotation.getConsequenceTypes()))); BasicDBObject updateXrefs = new BasicDBObject("$addToSet", - new BasicDBObject(XREFS_FIELD, new BasicDBObject("$each", variantAnnotation.getXrefs()))); + new BasicDBObject(XREFS_FIELD, new BasicDBObject("$each", annotation.getXrefs()))); mongoOperations.upsert(new BasicQuery(id), new BasicUpdate(updateConsequenceTypes), collection); mongoOperations.upsert(new BasicQuery(id), new BasicUpdate(updateXrefs), collection); } else { - mongoOperations.save(variantAnnotation, collection); + mongoOperations.save(annotation, collection); } } - private String buildAnnotationtorageId(VariantAnnotation variantAnnotation) { - return MongoDBHelper.buildAnnotationStorageId(variantAnnotation.getChromosome(), variantAnnotation.getStart(), - variantAnnotation.getReferenceAllele(), - variantAnnotation.getAlternativeAllele(), vepVersion, + private String buildAnnotationtorageId(Annotation annotation) { + return MongoDBHelper.buildAnnotationStorageId(annotation.getChromosome(), annotation.getStart(), + annotation.getReferenceAllele(), + annotation.getAlternativeAllele(), vepVersion, vepCacheVersion); } + private void createIndexes() { + mongoOperations.getCollection(collection).createIndex( + new BasicDBObject(ANNOTATION_XREF_ID_FIELD, 1), + new BasicDBObject(MongoDBHelper.BACKGROUND_INDEX, true)); + mongoOperations.getCollection(collection).createIndex( + new BasicDBObject(ANNOTATION_CT_SO_FIELD, 1), + new BasicDBObject(MongoDBHelper.BACKGROUND_INDEX, true)); + } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java index a8683a320..1778e8311 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java @@ -41,9 +41,7 @@ public class VariantMongoWriter extends MongoItemWriter { private static final Logger logger = LoggerFactory.getLogger(VariantMongoWriter.class); - private static final String ANNOTATION_CT_SO_FIELD = "annot.ct.so"; - - private static final String ANNOTATION_XREF_ID_FIELD = "annot.xrefs.id"; + private static final String VARIANT_ANNOTATION_SO_FIELD = "so"; private final MongoOperations mongoOperations; @@ -118,10 +116,7 @@ private void createIndexes() { new BasicDBObject(MongoDBHelper.BACKGROUND_INDEX, true)); mongoOperations.getCollection(collection).createIndex( - new BasicDBObject(ANNOTATION_XREF_ID_FIELD, 1), - new BasicDBObject(MongoDBHelper.BACKGROUND_INDEX, true)); - mongoOperations.getCollection(collection).createIndex( - new BasicDBObject(ANNOTATION_CT_SO_FIELD, 1), + new BasicDBObject(VARIANT_ANNOTATION_SO_FIELD, 1), new BasicDBObject(MongoDBHelper.BACKGROUND_INDEX, true)); } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java index 372d622be..6c16c097e 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java @@ -22,29 +22,28 @@ import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing; import org.springframework.batch.core.configuration.annotation.StepBuilderFactory; import org.springframework.batch.item.ItemStreamReader; -import org.springframework.batch.item.ItemWriter; import org.springframework.batch.item.file.FlatFileParseException; +import org.springframework.batch.item.support.CompositeItemWriter; import org.springframework.batch.repeat.policy.SimpleCompletionPolicy; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; - -import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; +import uk.ac.ebi.eva.commons.models.data.Annotation; import uk.ac.ebi.eva.pipeline.configuration.ChunkSizeCompletionPolicyConfiguration; -import uk.ac.ebi.eva.pipeline.configuration.readers.VariantAnnotationReaderConfiguration; -import uk.ac.ebi.eva.pipeline.configuration.writers.VariantAnnotationWriterConfiguration; +import uk.ac.ebi.eva.pipeline.configuration.readers.AnnotationReaderConfiguration; +import uk.ac.ebi.eva.pipeline.configuration.writers.AnnotationCompositeWriterConfiguration; import uk.ac.ebi.eva.pipeline.io.readers.AnnotationFlatFileReader; -import uk.ac.ebi.eva.pipeline.io.writers.VepAnnotationMongoWriter; +import uk.ac.ebi.eva.pipeline.io.writers.AnnotationMongoWriter; import uk.ac.ebi.eva.pipeline.listeners.AnnotationLoaderStepStatisticsListener; import uk.ac.ebi.eva.pipeline.listeners.SkippedItemListener; import uk.ac.ebi.eva.pipeline.listeners.StepProgressListener; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; +import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.COMPOSITE_ANNOTATION_VARIANT_WRITER; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.LOAD_VEP_ANNOTATION_STEP; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.VARIANT_ANNOTATION_READER; -import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.VARIANT_ANNOTATION_WRITER; /** * This step loads annotations into MongoDB. @@ -57,24 +56,23 @@ * 20_60419_A/G 20:60419 G - - - intergenic_variant - - - - - - * 20_60479_C/T 20:60479 T - - - intergenic_variant - - - - - rs149529999 GMAF=T:0.0018;AFR_MAF=T:0.01;AMR_MAF=T:0.0028 *

- * each line of the file is loaded with {@link AnnotationFlatFileReader} into a {@link VariantAnnotation} and then sent - * to mongo with {@link VepAnnotationMongoWriter}. + * each line of the file is loaded with {@link AnnotationFlatFileReader} into a {@link Annotation} and then sent + * to mongo with {@link AnnotationMongoWriter}. */ @Configuration @EnableBatchProcessing -@Import({VariantAnnotationReaderConfiguration.class, VariantAnnotationWriterConfiguration.class, - ChunkSizeCompletionPolicyConfiguration.class}) +@Import({AnnotationReaderConfiguration.class, AnnotationCompositeWriterConfiguration.class, ChunkSizeCompletionPolicyConfiguration.class}) public class AnnotationLoaderStep { private static final Logger logger = LoggerFactory.getLogger(AnnotationLoaderStep.class); @Autowired @Qualifier(VARIANT_ANNOTATION_READER) - private ItemStreamReader variantAnnotationReader; + private ItemStreamReader annotationReader; @Autowired - @Qualifier(VARIANT_ANNOTATION_WRITER) - private ItemWriter variantAnnotationItemWriter; + @Qualifier(COMPOSITE_ANNOTATION_VARIANT_WRITER) + private CompositeItemWriter compositeAnnotationVariantItemWriter; @Bean(LOAD_VEP_ANNOTATION_STEP) public Step loadVepAnnotationStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions, @@ -82,9 +80,9 @@ public Step loadVepAnnotationStep(StepBuilderFactory stepBuilderFactory, JobOpti logger.debug("Building '" + LOAD_VEP_ANNOTATION_STEP + "'"); return stepBuilderFactory.get(LOAD_VEP_ANNOTATION_STEP) - .chunk(chunkSizeCompletionPolicy) - .reader(variantAnnotationReader) - .writer(variantAnnotationItemWriter) + .chunk(chunkSizeCompletionPolicy) + .reader(annotationReader) + .writer(compositeAnnotationVariantItemWriter) .faultTolerant().skipLimit(50).skip(FlatFileParseException.class) .allowStartIfComplete(jobOptions.isAllowStartIfComplete()) .listener(new SkippedItemListener()) diff --git a/src/main/java/uk/ac/ebi/eva/utils/MongoDBHelper.java b/src/main/java/uk/ac/ebi/eva/utils/MongoDBHelper.java index 2bb457e32..14829ef6e 100644 --- a/src/main/java/uk/ac/ebi/eva/utils/MongoDBHelper.java +++ b/src/main/java/uk/ac/ebi/eva/utils/MongoDBHelper.java @@ -19,6 +19,7 @@ import org.opencb.commons.utils.CryptoUtils; import uk.ac.ebi.eva.commons.models.data.Variant; +import uk.ac.ebi.eva.commons.models.data.Annotation; import java.net.UnknownHostException; import java.util.LinkedList; @@ -55,6 +56,10 @@ public static String buildVariantStorageId(Variant v) { return buildVariantStorageId(v.getChromosome(), v.getStart(), v.getReference(), v.getAlternate()); } + public static String buildVariantStorageId(Annotation va) { + return buildVariantStorageId(va.getChromosome(), va.getStart(), va.getReferenceAllele(), va.getAlternativeAllele()); + } + /** * From org.opencb.opencga.storage.mongodb.variant.VariantToDBObjectConverter * #buildVariantStorageId(java.lang.String, int, java.lang.String, java.lang.String) diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java index 3ce9abb0d..e49835fd3 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java @@ -17,9 +17,9 @@ import org.junit.Test; +import uk.ac.ebi.eva.commons.models.data.Annotation; import uk.ac.ebi.eva.commons.models.data.ConsequenceType; import uk.ac.ebi.eva.commons.models.data.Score; -import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; import uk.ac.ebi.eva.test.data.VepOutputContent; import java.util.Set; @@ -31,7 +31,7 @@ /** * {@link AnnotationLineMapper} * input: an annotation line from VEP - * output: a VariantAnnotation with at least: consequence types + * output: a Annotation with at least: consequence types */ public class AnnotationLineMapperTest { @@ -39,16 +39,16 @@ public class AnnotationLineMapperTest { public void shouldParseAllDefaultFieldsInVepOutput() throws Exception { AnnotationLineMapper lineMapper = new AnnotationLineMapper(); for (String annotLine : VepOutputContent.vepOutputContent.split("\n")) { - VariantAnnotation variantAnnotation = lineMapper.mapLine(annotLine, 0); - assertNotNull(variantAnnotation.getConsequenceTypes()); + Annotation annotation = lineMapper.mapLine(annotLine, 0); + assertNotNull(annotation.getConsequenceTypes()); } } @Test public void shouldParseAllTranscriptFieldsInVepOutput() { AnnotationLineMapper lineMapper = new AnnotationLineMapper(); - VariantAnnotation variantAnnotation = lineMapper.mapLine(VepOutputContent.vepOutputContentTranscriptFields, 0); - Set consequenceTypes = variantAnnotation.getConsequenceTypes(); + Annotation annotation = lineMapper.mapLine(VepOutputContent.vepOutputContentTranscriptFields, 0); + Set consequenceTypes = annotation.getConsequenceTypes(); assertNotNull(consequenceTypes); assertEquals(1, consequenceTypes.size()); @@ -65,8 +65,8 @@ public void shouldParseAllTranscriptFieldsInVepOutput() { @Test public void shouldParseVepOutputWithoutTranscript() { AnnotationLineMapper lineMapper = new AnnotationLineMapper(); - VariantAnnotation variantAnnotation = lineMapper.mapLine(VepOutputContent.vepOutputContentWithOutTranscript, 0); - Set consequenceTypes = variantAnnotation.getConsequenceTypes(); + Annotation annotation = lineMapper.mapLine(VepOutputContent.vepOutputContentWithOutTranscript, 0); + Set consequenceTypes = annotation.getConsequenceTypes(); assertNotNull(consequenceTypes); assertEquals(1, consequenceTypes.size()); @@ -90,10 +90,10 @@ public void shouldNotParseVepOutputWithMalformedCoordinates() { @Test public void shouldParseVepOutputWithChromosomeIdWithUnderscore() { AnnotationLineMapper lineMapper = new AnnotationLineMapper(); - VariantAnnotation variantAnnotation = lineMapper + Annotation annotation = lineMapper .mapLine(VepOutputContent.vepOutputContentChromosomeIdWithUnderscore, 0); - assertEquals("20_1", variantAnnotation.getChromosome()); + assertEquals("20_1", annotation.getChromosome()); } @Test(expected = ArrayIndexOutOfBoundsException.class) @@ -105,9 +105,9 @@ public void shouldNotParseVepOutputWithMalformedVariantFields() { @Test public void shouldParseVepOutputWithExtraFields() { AnnotationLineMapper lineMapper = new AnnotationLineMapper(); - VariantAnnotation variantAnnotation = lineMapper.mapLine(VepOutputContent.vepOutputContentWithExtraFields, 0); + Annotation annotation = lineMapper.mapLine(VepOutputContent.vepOutputContentWithExtraFieldsSingleAnnotation, 0); - Set consequenceTypes = variantAnnotation.getConsequenceTypes(); + Set consequenceTypes = annotation.getConsequenceTypes(); assertNotNull(consequenceTypes); assertEquals(1, consequenceTypes.size()); @@ -115,7 +115,7 @@ public void shouldParseVepOutputWithExtraFields() { ConsequenceType consequenceType = consequenceTypes.iterator().next(); Score polyphen = consequenceType.getPolyphen(); - Score sifts = consequenceType.getSifts(); + Score sifts = consequenceType.getSift(); assertNotNull(polyphen); assertNotNull(sifts); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReaderTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReaderTest.java index 46aff63d9..efa4aad1b 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReaderTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReaderTest.java @@ -21,7 +21,7 @@ import org.springframework.batch.item.file.FlatFileParseException; import org.springframework.batch.test.MetaDataInstanceFactory; -import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; +import uk.ac.ebi.eva.commons.models.data.Annotation; import uk.ac.ebi.eva.test.data.VepOutputContent; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.utils.JobTestUtils; @@ -35,7 +35,7 @@ /** * {@link AnnotationFlatFileReader} * input: a File written by VEP - * output: a VariantAnnotation each time its `.read()` is called + * output: a Annotation each time its `.read()` is called *

* incorrect input lines should not make the reader fail. */ @@ -55,12 +55,12 @@ public void shouldReadAllLinesInVepOutput() throws Exception { annotationFlatFileReader.setSaveState(false); annotationFlatFileReader.open(executionContext); - VariantAnnotation variantAnnotation; + Annotation annotation; int consequenceTypeCount = 0; int count = 0; - while ((variantAnnotation = annotationFlatFileReader.read()) != null) { + while ((annotation = annotationFlatFileReader.read()) != null) { count++; - if (variantAnnotation.getConsequenceTypes() != null && !variantAnnotation.getConsequenceTypes().isEmpty()) { + if (annotation.getConsequenceTypes() != null && !annotation.getConsequenceTypes().isEmpty()) { consequenceTypeCount++; } } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java new file mode 100644 index 000000000..56bb8fac1 --- /dev/null +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java @@ -0,0 +1,162 @@ +/* + * Copyright 2016-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.io.writers; + +import com.mongodb.BasicDBList; +import com.mongodb.DBCursor; +import com.mongodb.DBObject; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.data.mongodb.core.MongoOperations; +import org.springframework.data.mongodb.core.mapping.MongoMappingContext; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.TestPropertySource; +import org.springframework.test.context.junit4.SpringRunner; + +import uk.ac.ebi.eva.commons.models.data.Annotation; +import uk.ac.ebi.eva.pipeline.Application; +import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; +import uk.ac.ebi.eva.pipeline.io.mappers.AnnotationLineMapper; +import uk.ac.ebi.eva.pipeline.parameters.MongoConnection; +import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.GENE_NAME_FIELD; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.POLYPHEN_FIELD; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.SIFT_FIELD; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.SO_ACCESSION_FIELD; +import static uk.ac.ebi.eva.test.data.VepOutputContent.vepOutputContentWithExtraFields; +import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResourceUrl; + +/** + * {@link AnnotationInVariantMongoWriter} + */ +@RunWith(SpringRunner.class) +@ActiveProfiles(Application.VARIANT_ANNOTATION_MONGO_PROFILE) +@TestPropertySource({"classpath:test-mongo.properties"}) +@ContextConfiguration(classes = {MongoConnection.class, MongoMappingContext.class}) +public class AnnotationInVariantMongoWriterTest { + private static final String MONGO_DUMP = "/dump/VariantStatsConfigurationTest_vl"; + + private static final String COLLECTION_VARIANTS_NAME = "variants"; + + @Autowired + private MongoConnection mongoConnection; + + @Autowired + private MongoMappingContext mongoMappingContext; + + @Rule + public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); + + private AnnotationInVariantMongoWriter annotationInVariantMongoWriter; + + private uk.ac.ebi.eva.pipeline.io.mappers.AnnotationLineMapper AnnotationLineMapper; + + @Before + public void setUp() throws Exception { + AnnotationLineMapper = new AnnotationLineMapper(); + } + + @Test + public void shouldWriteAllFieldsIntoMongoDb() throws Exception { + String databaseName = mongoRule.restoreDumpInTemporaryDatabase(getResourceUrl(MONGO_DUMP)); + + //prepare annotation sets + List annotationSet1 = new ArrayList<>(); + List annotationSet2 = new ArrayList<>(); + List annotationSet3 = new ArrayList<>(); + + String[] vepOutputLines = vepOutputContentWithExtraFields.split("\n"); + + for (String annotLine : Arrays.copyOfRange(vepOutputLines, 0, 2)) { + annotationSet1.add(AnnotationLineMapper.mapLine(annotLine, 0)); + } + + for (String annotLine : Arrays.copyOfRange(vepOutputLines, 2, 4)) { + annotationSet2.add(AnnotationLineMapper.mapLine(annotLine, 0)); + } + + for (String annotLine : Arrays.copyOfRange(vepOutputLines, 4, 7)) { + annotationSet3.add(AnnotationLineMapper.mapLine(annotLine, 0)); + } + + // load the annotation + MongoOperations operations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, + mongoMappingContext); + annotationInVariantMongoWriter = new AnnotationInVariantMongoWriter(operations, COLLECTION_VARIANTS_NAME); + + annotationInVariantMongoWriter.write(annotationSet1); + annotationInVariantMongoWriter.write(annotationSet2); + annotationInVariantMongoWriter.write(annotationSet3); + + // and finally check that variant documents have the annotations fields + DBCursor cursor = mongoRule.getCollection(databaseName, COLLECTION_VARIANTS_NAME).find(); + + while (cursor.hasNext()) { + DBObject variant = cursor.next(); + String id = (String) variant.get("_id"); + + if (id.equals("20_63360_C_T")) { + BasicDBList sifts = (BasicDBList) variant.get(SIFT_FIELD); + assertNotNull(sifts); + assertTrue(sifts.containsAll(Arrays.asList(0.1, 0.2))); + + BasicDBList so = (BasicDBList) variant.get(SO_ACCESSION_FIELD); + assertNotNull(so); + assertTrue(so.contains(1631)); + + BasicDBList polyphen = (BasicDBList) variant.get(POLYPHEN_FIELD); + assertNotNull(polyphen); + assertTrue(polyphen.containsAll(Arrays.asList(0.1, 0.2))); + + BasicDBList geneNames = (BasicDBList) variant.get(GENE_NAME_FIELD); + assertNotNull(geneNames); + assertTrue(geneNames.containsAll( + Arrays.asList("ENST00000382410", "DEFB125", "ENST00000608838", "ENSG00000178591"))); + } + + if (id.equals("20_63399_G_A")){ + BasicDBList sifts = (BasicDBList) variant.get(SIFT_FIELD); + assertNotNull(sifts); + assertTrue(sifts.size() == 1); + + BasicDBList so = (BasicDBList) variant.get(SO_ACCESSION_FIELD); + assertNotNull(so); + assertTrue(so.size() == 1); + + BasicDBList polyphen = (BasicDBList) variant.get(POLYPHEN_FIELD); + assertNotNull(polyphen); + assertTrue(polyphen.size() == 1); + + BasicDBList geneNames = (BasicDBList) variant.get(GENE_NAME_FIELD); + assertNotNull(geneNames); + assertTrue(geneNames.size() == 4); + } + } + cursor.close(); + } + +} diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java similarity index 69% rename from src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationMongoWriterTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java index c2c085d60..ccd585b62 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java @@ -17,6 +17,7 @@ import com.mongodb.BasicDBList; import com.mongodb.BasicDBObject; +import com.mongodb.DBCollection; import com.mongodb.DBCursor; import com.mongodb.DBObject; import org.junit.Before; @@ -30,44 +31,48 @@ import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; +import uk.ac.ebi.eva.commons.models.data.Annotation; import uk.ac.ebi.eva.commons.models.data.ConsequenceType; import uk.ac.ebi.eva.commons.models.data.Score; -import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; import uk.ac.ebi.eva.pipeline.io.mappers.AnnotationLineMapper; import uk.ac.ebi.eva.pipeline.parameters.MongoConnection; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; +import uk.ac.ebi.eva.utils.MongoDBHelper; +import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -import static uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD; -import static uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames.POLYPHEN_FIELD; -import static uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames.SCORE_DESCRIPTION_FIELD; -import static uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames.SCORE_SCORE_FIELD; -import static uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames.SIFT_FIELD; -import static uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames.XREFS_FIELD; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.POLYPHEN_FIELD; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.SCORE_DESCRIPTION_FIELD; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.SCORE_SCORE_FIELD; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.SIFT_FIELD; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.XREFS_FIELD; import static uk.ac.ebi.eva.test.data.VepOutputContent.vepOutputContent; /** - * {@link VepAnnotationMongoWriter} - * input: a List of VariantAnnotation to each call of `.write()` - * output: all the VariantAnnotations get written in mongo, with at least the + * {@link AnnotationMongoWriter} + * input: a List of Annotation to each call of `.write()` + * output: all the Annotations get written in mongo, with at least the * "consequence types" annotations set */ @RunWith(SpringRunner.class) @ActiveProfiles(Application.VARIANT_ANNOTATION_MONGO_PROFILE) @TestPropertySource({"classpath:test-mongo.properties"}) @ContextConfiguration(classes = {MongoConnection.class, MongoMappingContext.class}) -public class VepAnnotationMongoWriterTest { +public class AnnotationMongoWriterTest { private static final String COLLECTION_ANNOTATIONS_NAME = "annotations"; private static final String VEP_VERSION = "1"; @@ -83,7 +88,7 @@ public class VepAnnotationMongoWriterTest { @Rule public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); - private VepAnnotationMongoWriter annotationWriter; + private AnnotationMongoWriter annotationWriter; private AnnotationLineMapper AnnotationLineMapper; @@ -96,7 +101,7 @@ public void setUp() throws Exception { public void shouldWriteAllFieldsIntoMongoDb() throws Exception { String databaseName = mongoRule.getRandomTemporaryDatabaseName(); - List annotations = new ArrayList<>(); + List annotations = new ArrayList<>(); for (String annotLine : vepOutputContent.split("\n")) { annotations.add(AnnotationLineMapper.mapLine(annotLine, 0)); } @@ -104,8 +109,8 @@ public void shouldWriteAllFieldsIntoMongoDb() throws Exception { // load the annotation MongoOperations operations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, mongoMappingContext); - annotationWriter = new VepAnnotationMongoWriter(operations, COLLECTION_ANNOTATIONS_NAME, VEP_VERSION, - VEP_CACHE_VERSION); + annotationWriter = new AnnotationMongoWriter(operations, COLLECTION_ANNOTATIONS_NAME, VEP_VERSION, + VEP_CACHE_VERSION); annotationWriter.write(annotations); // and finally check that documents in annotation collection have annotations @@ -126,7 +131,7 @@ public void shouldWriteAllFieldsIntoMongoDb() throws Exception { } /** - * Test that every VariantAnnotation gets written, even if the same variant receives different annotation from + * Test that every Annotation gets written, even if the same variant receives different annotation from * different batches. * * @throws Exception if the annotationWriter.write fails, or the DBs cleaning fails @@ -134,15 +139,11 @@ public void shouldWriteAllFieldsIntoMongoDb() throws Exception { @Test public void shouldWriteAllFieldsIntoMongoDbMultipleSetsAnnotations() throws Exception { String databaseName = mongoRule.getRandomTemporaryDatabaseName(); - List annotations = new ArrayList<>(); - for (String annotLine : vepOutputContent.split("\n")) { - annotations.add(AnnotationLineMapper.mapLine(annotLine, 0)); - } //prepare annotation sets - List annotationSet1 = new ArrayList<>(); - List annotationSet2 = new ArrayList<>(); - List annotationSet3 = new ArrayList<>(); + List annotationSet1 = new ArrayList<>(); + List annotationSet2 = new ArrayList<>(); + List annotationSet3 = new ArrayList<>(); String[] vepOutputLines = vepOutputContent.split("\n"); @@ -161,8 +162,8 @@ public void shouldWriteAllFieldsIntoMongoDbMultipleSetsAnnotations() throws Exce // load the annotation MongoOperations operations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, mongoMappingContext); - annotationWriter = new VepAnnotationMongoWriter(operations, COLLECTION_ANNOTATIONS_NAME, VEP_VERSION, - VEP_CACHE_VERSION); + annotationWriter = new AnnotationMongoWriter(operations, COLLECTION_ANNOTATIONS_NAME, VEP_VERSION, + VEP_CACHE_VERSION); annotationWriter.write(annotationSet1); annotationWriter.write(annotationSet2); @@ -186,28 +187,28 @@ public void shouldWriteAllFieldsIntoMongoDbMultipleSetsAnnotations() throws Exce public void shouldWriteSubstitutionScoresIntoMongoDb() throws Exception { String databaseName = mongoRule.getRandomTemporaryDatabaseName(); - VariantAnnotation variantAnnotation = new VariantAnnotation("X", 1, 10, "A", "T"); + Annotation annotation = new Annotation("X", 1, 10, "A", "T"); Score siftScore = new Score(0.02, "deleterious"); Score polyphenScore = new Score(0.846, "possibly_damaging"); ConsequenceType consequenceType = new ConsequenceType(); - consequenceType.setSifts(siftScore); + consequenceType.setSift(siftScore); consequenceType.setPolyphen(polyphenScore); - variantAnnotation.setConsequenceTypes(new HashSet<>(Collections.singletonList(consequenceType))); + annotation.setConsequenceTypes(new HashSet<>(Collections.singletonList(consequenceType))); MongoOperations operations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, mongoMappingContext); - annotationWriter = new VepAnnotationMongoWriter(operations, COLLECTION_ANNOTATIONS_NAME, VEP_VERSION, - VEP_CACHE_VERSION); + annotationWriter = new AnnotationMongoWriter(operations, COLLECTION_ANNOTATIONS_NAME, VEP_VERSION, + VEP_CACHE_VERSION); - annotationWriter.write(Collections.singletonList(variantAnnotation)); + annotationWriter.write(Collections.singletonList(annotation)); DBCursor cursor = mongoRule.getCollection(databaseName, COLLECTION_ANNOTATIONS_NAME).find(); while (cursor.hasNext()) { - DBObject annotation = cursor.next(); - BasicDBList consequenceTypes = (BasicDBList) annotation.get(CONSEQUENCE_TYPE_FIELD); + DBObject annotationField = cursor.next(); + BasicDBList consequenceTypes = (BasicDBList) annotationField.get(CONSEQUENCE_TYPE_FIELD); assertNotNull(consequenceTypes); @@ -225,4 +226,23 @@ public void shouldWriteSubstitutionScoresIntoMongoDb() throws Exception { } } + @Test + public void indexesShouldBeCreatedInBackground() throws UnknownHostException { + String dbName = mongoRule.getRandomTemporaryDatabaseName(); + MongoOperations mongoOperations = MongoConfiguration.getMongoOperations(dbName, mongoConnection, mongoMappingContext); + DBCollection dbCollection = mongoOperations.getCollection(COLLECTION_ANNOTATIONS_NAME); + + AnnotationMongoWriter writer = new AnnotationMongoWriter(mongoOperations, COLLECTION_ANNOTATIONS_NAME, VEP_VERSION, VEP_CACHE_VERSION); + + List indexInfo = dbCollection.getIndexInfo(); + + Set createdIndexes = indexInfo.stream().map(index -> index.get("name").toString()).collect(Collectors.toSet()); + Set expectedIndexes = new HashSet<>(); + expectedIndexes.addAll(Arrays.asList("ct.so_1", "xrefs.id_1", "_id_")); + + assertEquals(expectedIndexes, createdIndexes); + + indexInfo.stream().filter(index -> !("_id_".equals(index.get("name").toString()))).forEach(index -> assertEquals("true", index.get(MongoDBHelper.BACKGROUND_INDEX).toString())); + } + } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriterTest.java index ae2218886..e313fe092 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriterTest.java @@ -77,7 +77,7 @@ public class VariantMongoWriterTest { public void noVariantsNothingShouldBeWritten() throws UnknownHostException { String dbName = mongoRule.getRandomTemporaryDatabaseName(); MongoOperations mongoOperations = MongoConfiguration.getMongoOperations(dbName, mongoConnection, - mongoMappingContext); + mongoMappingContext); DBCollection dbCollection = mongoOperations.getCollection(collectionName); VariantMongoWriter variantMongoWriter = new VariantMongoWriter(collectionName, mongoOperations, false, false); @@ -93,7 +93,7 @@ public void variantsShouldBeWrittenIntoMongoDb() throws Exception { String dbName = mongoRule.getRandomTemporaryDatabaseName(); MongoOperations mongoOperations = MongoConfiguration.getMongoOperations(dbName, mongoConnection, - mongoMappingContext); + mongoMappingContext); DBCollection dbCollection = mongoOperations.getCollection(collectionName); BasicDBObject dbObject = new BasicDBObject(); @@ -109,7 +109,7 @@ public void variantsShouldBeWrittenIntoMongoDb() throws Exception { public void indexesShouldBeCreatedInBackground() throws UnknownHostException { String dbName = mongoRule.getRandomTemporaryDatabaseName(); MongoOperations mongoOperations = MongoConfiguration.getMongoOperations(dbName, mongoConnection, - mongoMappingContext); + mongoMappingContext); DBCollection dbCollection = mongoOperations.getCollection(collectionName); VariantMongoWriter variantMongoWriter = new VariantMongoWriter(collectionName, mongoOperations, false, false); @@ -119,13 +119,13 @@ public void indexesShouldBeCreatedInBackground() throws UnknownHostException { Set createdIndexes = indexInfo.stream().map(index -> index.get("name").toString()) .collect(Collectors.toSet()); Set expectedIndexes = new HashSet<>(); - expectedIndexes.addAll(Arrays.asList("annot.ct.so_1", "annot.xrefs.id_1", "chr_1_start_1_end_1", - "files.sid_1_files.fid_1", "_id_", "ids_1")); + expectedIndexes.addAll(Arrays.asList("so_1", "chr_1_start_1_end_1", "files.sid_1_files.fid_1", "_id_", + "ids_1")); assertEquals(expectedIndexes, createdIndexes); indexInfo.stream().filter(index -> !("_id_".equals(index.get("name").toString()))) - .forEach(index -> assertEquals("true", - index.get(MongoDBHelper.BACKGROUND_INDEX).toString())); + .forEach(index -> assertEquals("true", index.get(MongoDBHelper.BACKGROUND_INDEX).toString())); + } @Test @@ -135,7 +135,7 @@ public void writeTwiceSameVariantShouldUpdate() throws Exception { String dbName = mongoRule.getRandomTemporaryDatabaseName(); MongoOperations mongoOperations = MongoConfiguration.getMongoOperations(dbName, mongoConnection, - mongoMappingContext); + mongoMappingContext); VariantMongoWriter variantMongoWriter = new VariantMongoWriter(collectionName, mongoOperations, false, false); variantMongoWriter.write(Collections.singletonList(variant1)); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationFileWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationFileWriterTest.java index 4a5ec6fd5..e416df9e3 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationFileWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationFileWriterTest.java @@ -48,7 +48,7 @@ public class VepAnnotationFileWriterTest { /** * the mockvep writes an extra line as if some variant had two annotations, to check that the writer is not assuming - * that the count of variants to annotate is the same as variantAnnotations to write in the file. + * that the count of variants to annotate is the same as annotations to write in the file. */ private static final int EXTRA_ANNOTATIONS = 1; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java index 610164851..9d99d0489 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java @@ -45,7 +45,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; -import static uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD; import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResourceUrl; import static uk.ac.ebi.eva.utils.FileUtils.getResource; @@ -132,15 +132,16 @@ public void allAnnotationStepsShouldBeExecuted() throws Exception { assertNotNull(consequenceTypes); consequenceTypeCount += consequenceTypes.size(); } + cursor.close(); assertEquals(299, annotationCount); assertEquals(536, consequenceTypeCount); //check that one line is skipped because malformed - List variantAnnotationLoadStepExecution = jobExecution.getStepExecutions().stream() + List annotationLoadStepExecution = jobExecution.getStepExecutions().stream() .filter(stepExecution -> stepExecution.getStepName().equals(BeanNames.LOAD_VEP_ANNOTATION_STEP)) .collect(Collectors.toList()); - assertEquals(1, variantAnnotationLoadStepExecution.get(0).getReadSkipCount()); + assertEquals(1, annotationLoadStepExecution.get(0).getReadSkipCount()); } @Test diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java index 97cc31b8d..89bd92d3d 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java @@ -15,16 +15,13 @@ */ package uk.ac.ebi.eva.pipeline.jobs.steps; +import com.mongodb.BasicDBList; import com.mongodb.DBCursor; import com.mongodb.DBObject; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; -import org.opencb.biodata.models.variant.annotation.VariantAnnotation; -import org.opencb.opencga.storage.mongodb.variant.DBObjectToVariantAnnotationConverter; -import org.springframework.batch.core.BatchStatus; -import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.JobExecution; import org.springframework.batch.core.JobParameters; import org.springframework.batch.test.JobLauncherTestUtils; @@ -33,7 +30,6 @@ import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; - import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; import uk.ac.ebi.eva.pipeline.jobs.AnnotationJob; @@ -41,18 +37,21 @@ import uk.ac.ebi.eva.test.data.VepOutputContent; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; -import uk.ac.ebi.eva.test.utils.JobTestUtils; import uk.ac.ebi.eva.utils.EvaJobParameterBuilder; import uk.ac.ebi.eva.utils.URLHelper; import java.nio.file.Paths; -import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.GENE_NAME_FIELD; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.POLYPHEN_FIELD; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.SIFT_FIELD; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.SO_ACCESSION_FIELD; import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResourceUrl; - /** * Test for {@link AnnotationLoaderStep}. In the context it is loaded {@link AnnotationJob} * because {@link JobLauncherTestUtils} require one {@link org.springframework.batch.core.Job} to be present in order @@ -68,6 +67,9 @@ public class AnnotationLoaderStepTest { private static final String COLLECTION_VARIANTS_NAME = "variants"; private static final String INPUT_STUDY_ID = "1"; private static final String INPUT_VCF_ID = "1"; + private static final String VEP_VERSION = "1"; + private static final String VEP_CACHE_VERSION = "1"; + @Rule public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); @@ -92,31 +94,45 @@ public void shouldLoadAllAnnotations() throws Exception { .inputStudyId(INPUT_STUDY_ID) .inputVcfId(INPUT_VCF_ID) .outputDirAnnotation(annotationFolder) + .vepCacheVersion(VEP_CACHE_VERSION) + .vepVersion(VEP_VERSION) .toJobParameters(); JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.LOAD_VEP_ANNOTATION_STEP, jobParameters); assertCompleted(jobExecution); - //check that documents have the annotation - DBCursor cursor = mongoRule.getCollection(dbName, COLLECTION_ANNOTATIONS_NAME).find(); - - DBObjectToVariantAnnotationConverter converter = new DBObjectToVariantAnnotationConverter(); + //check that the annotation collection has been populated properly + DBCursor annotationCursor = mongoRule.getCollection(dbName, COLLECTION_ANNOTATIONS_NAME).find(); int annotationCount = 0; int consequenceTypeCount = 0; - while (cursor.hasNext()) { + while (annotationCursor.hasNext()) { annotationCount++; - DBObject dbObject = cursor.next(); + DBObject dbObject = annotationCursor.next(); if (dbObject != null) { - VariantAnnotation annotation = converter.convertToDataModelType(dbObject); - Assert.assertNotNull(annotation.getConsequenceTypes()); - consequenceTypeCount += annotation.getConsequenceTypes().size(); + BasicDBList consequenceTypes = ((BasicDBList) dbObject.get(CONSEQUENCE_TYPE_FIELD)); + Assert.assertNotNull(consequenceTypes); + consequenceTypeCount += consequenceTypes.size(); } } + annotationCursor.close(); assertTrue("Annotations not found", annotationCount == 4); assertTrue("ConsequenceType not found", consequenceTypeCount == 7); + + //check that the annotation fields are present in the variant + DBCursor variantCursor = mongoRule.getCollection(dbName, COLLECTION_VARIANTS_NAME).find(); + while (variantCursor.hasNext()) { + DBObject variant = variantCursor.next(); + if (variant.get("_id").equals("20_63351_A_G")) { + assertNotNull(variant.get(SIFT_FIELD)); + assertNotNull(variant.get(SO_ACCESSION_FIELD)); + assertNotNull(variant.get(POLYPHEN_FIELD)); + assertNotNull(variant.get(GENE_NAME_FIELD)); + } + } + variantCursor.close(); } } diff --git a/src/test/java/uk/ac/ebi/eva/test/data/VariantAnnotationData.java b/src/test/java/uk/ac/ebi/eva/test/data/AnnotationData.java similarity index 98% rename from src/test/java/uk/ac/ebi/eva/test/data/VariantAnnotationData.java rename to src/test/java/uk/ac/ebi/eva/test/data/AnnotationData.java index a4b397a3c..87fa82657 100644 --- a/src/test/java/uk/ac/ebi/eva/test/data/VariantAnnotationData.java +++ b/src/test/java/uk/ac/ebi/eva/test/data/AnnotationData.java @@ -15,7 +15,7 @@ */ package uk.ac.ebi.eva.test.data; -public class VariantAnnotationData { +public class AnnotationData { public final static String VARIANT_ANNOTATION_JSON = "{ \n" + " \"ct\":[ \n" + " { \n" + diff --git a/src/test/java/uk/ac/ebi/eva/test/data/VepOutputContent.java b/src/test/java/uk/ac/ebi/eva/test/data/VepOutputContent.java index 1867739c6..e60aa7e5e 100644 --- a/src/test/java/uk/ac/ebi/eva/test/data/VepOutputContent.java +++ b/src/test/java/uk/ac/ebi/eva/test/data/VepOutputContent.java @@ -18,7 +18,7 @@ public class VepOutputContent { public static final String vepOutputContent = "" + - "20_63351_A/G\t20:63351\tG\tENSG00000178591\tENST00000608838\tTranscript\tupstream_gene_variant\t-\t-\t-\t-\t-\trs181305519\tDISTANCE=4540;STRAND=1;SYMBOL=DEFB125;SYMBOL_SOURCE=HGNC;HGNC_ID=18105;BIOTYPE=processed_transcript;GMAF=G:0.0005;AFR_MAF=G:0.0020\n" + + "20_63351_A/G\t20:63351\tG\tENSG00000178591\tENST00000608838\tTranscript\tupstream_gene_variant\t-\t-\t-\t-\t-\trs181305519\tDISTANCE=4540;STRAND=1;SYMBOL=DEFB125;SYMBOL_SOURCE=HGNC;HGNC_ID=18105;BIOTYPE=processed_transcript;GMAF=G:0.0005;AFR_MAF=G:0.0020;polyphen=possibly_damaging(0.859);sift=tolerated(0.07);\n" + "20_63360_C/T\t20:63360\tT\tENSG00000178591\tENST00000382410\tTranscript\tupstream_gene_variant\t-\t-\t-\t-\t-\trs186156309\tDISTANCE=4991;STRAND=1;SYMBOL=DEFB125;SYMBOL_SOURCE=HGNC;HGNC_ID=18105;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS12989.2;ENSP=ENSP00000371847;SWISSPROT=DB125_HUMAN;TREMBL=B2R4E8_HUMAN;UNIPARC=UPI00001A36DE;GMAF=T:0.0014;AMR_MAF=T:0.01\n" + "20_63360_C/T\t20:63360\tT\tENSG00000178591\tENST00000608838\tTranscript\tupstream_gene_variant\t-\t-\t-\t-\t-\trs186156309\tDISTANCE=4531;STRAND=1;SYMBOL=DEFB125;SYMBOL_SOURCE=HGNC;HGNC_ID=18105;BIOTYPE=processed_transcript;GMAF=T:0.0014;AMR_MAF=T:0.01\n" + "20_63399_G/A\t20:63399\tA\tENSG00000178591\tENST00000382410\tTranscript\tupstream_gene_variant\t-\t-\t-\t-\t-\t-\tDISTANCE=4952;STRAND=1;SYMBOL=DEFB125;SYMBOL_SOURCE=HGNC;HGNC_ID=18105;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS12989.2;ENSP=ENSP00000371847;SWISSPROT=DB125_HUMAN;TREMBL=B2R4E8_HUMAN;UNIPARC=UPI00001A36DE\n" + @@ -41,7 +41,16 @@ public class VepOutputContent { public static final String vepOutputContentChromosomeIdWithUnderscore = "" + "20_1_63351_A/G\t20:63351\tG\tENSG00000178591\tENST00000608838\tTranscript\tupstream_gene_variant\t-\t-\t-\t-\t-\trs181305519\tDISTANCE=4540;STRAND=1;SYMBOL=DEFB125;SYMBOL_SOURCE=HGNC;HGNC_ID=18105;BIOTYPE=processed_transcript;GMAF=G:0.0005;AFR_MAF=G:0.0020\n"; - public static final String vepOutputContentWithExtraFields = "" + + public static final String vepOutputContentWithExtraFieldsSingleAnnotation = "" + "20_63351_A/G\t20:63351\tG\tENSG00000178591\tENST00000608838\tTranscript\tupstream_gene_variant\t-\t-\t-\t-\t-\trs181305519\tDISTANCE=4540;STRAND=1;SYMBOL=DEFB125;SYMBOL_SOURCE=HGNC;HGNC_ID=18105;BIOTYPE=processed_transcript;GMAF=G:0.0005;AFR_MAF=G:0.0020;HGVSC=hgvsc_value;HGVSP=hgvsp_value;polyphen=possibly_damaging(0.859);sift=tolerated(0.07);\n"; + public static final String vepOutputContentWithExtraFields = "" + + "20_63351_A/G\t20:63351\tG\tENSG00000178591\tENST00000608838\tTranscript\tupstream_gene_variant\t-\t-\t-\t-\t-\trs181305519\tDISTANCE=4540;STRAND=1;SYMBOL=DEFB125;SYMBOL_SOURCE=HGNC;HGNC_ID=18105;BIOTYPE=processed_transcript;GMAF=G:0.0005;AFR_MAF=G:0.0020;polyphen=possibly_damaging(0.859);sift=tolerated(0.07);\n" + + "20_63360_C/T\t20:63360\tT\tENSG00000178591\tENST00000382410\tTranscript\tupstream_gene_variant\t-\t-\t-\t-\t-\trs186156309\tDISTANCE=4991;STRAND=1;SYMBOL=DEFB125;SYMBOL_SOURCE=HGNC;HGNC_ID=18105;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS12989.2;ENSP=ENSP00000371847;SWISSPROT=DB125_HUMAN;TREMBL=B2R4E8_HUMAN;UNIPARC=UPI00001A36DE;GMAF=T:0.0014;AMR_MAF=T:0.01;polyphen=possibly_damaging(0.1);sift=tolerated(0.1);\n" + + "20_63360_C/T\t20:63360\tT\tENSG00000178591\tENST00000608838\tTranscript\tupstream_gene_variant\t-\t-\t-\t-\t-\trs186156309\tDISTANCE=4531;STRAND=1;SYMBOL=DEFB125;SYMBOL_SOURCE=HGNC;HGNC_ID=18105;BIOTYPE=processed_transcript;GMAF=T:0.0014;AMR_MAF=T:0.01;polyphen=possibly_damaging(0.2);sift=tolerated(0.2);\n" + + "20_63399_G/A\t20:63399\tA\tENSG00000178591\tENST00000382410\tTranscript\tupstream_gene_variant\t-\t-\t-\t-\t-\t-\tDISTANCE=4952;STRAND=1;SYMBOL=DEFB125;SYMBOL_SOURCE=HGNC;HGNC_ID=18105;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS12989.2;ENSP=ENSP00000371847;SWISSPROT=DB125_HUMAN;TREMBL=B2R4E8_HUMAN;UNIPARC=UPI00001A36DE;polyphen=possibly_damaging(0.859);sift=tolerated(0.07);\n" + + "20_63399_G/A\t20:63399\tA\tENSG00000178591\tENST00000608838\tTranscript\tupstream_gene_variant\t-\t-\t-\t-\t-\t-\tDISTANCE=4492;STRAND=1;SYMBOL=DEFB125;SYMBOL_SOURCE=HGNC;HGNC_ID=18105;BIOTYPE=processed_transcript;polyphen=possibly_damaging(0.859);sift=tolerated(0.07);\n" + + "20_63426_G/T\t20:63426\tT\tENSG00000178591\tENST00000382410\tTranscript\tupstream_gene_variant\t-\t-\t-\t-\t-\trs147063585\tDISTANCE=4925;STRAND=1;SYMBOL=DEFB125;SYMBOL_SOURCE=HGNC;HGNC_ID=18105;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS12989.2;ENSP=ENSP00000371847;SWISSPROT=DB125_HUMAN;TREMBL=B2R4E8_HUMAN;UNIPARC=UPI00001A36DE;GMAF=T:0.0028;AFR_MAF=T:0.01;polyphen=possibly_damaging(0.3);sift=tolerated(0.3);\n" + + "20_63426_G/T\t20:63426\tT\tENSG00000178591\tENST00000608838\tTranscript\tupstream_gene_variant\t-\t-\t-\t-\t-\trs147063585\tDISTANCE=4465;STRAND=1;SYMBOL=DEFB125;SYMBOL_SOURCE=HGNC;HGNC_ID=18105;BIOTYPE=processed_transcript;GMAF=T:0.0028;AFR_MAF=T:0.01;polyphen=possibly_damaging(0.4);sift=tolerated(0.4);\n"; + } diff --git a/src/test/java/uk/ac/ebi/eva/test/utils/GenotypedVcfJobTestUtils.java b/src/test/java/uk/ac/ebi/eva/test/utils/GenotypedVcfJobTestUtils.java index 9edfd9e1c..060534aeb 100644 --- a/src/test/java/uk/ac/ebi/eva/test/utils/GenotypedVcfJobTestUtils.java +++ b/src/test/java/uk/ac/ebi/eva/test/utils/GenotypedVcfJobTestUtils.java @@ -33,7 +33,7 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; -import static uk.ac.ebi.eva.commons.models.converters.data.AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD; import static uk.ac.ebi.eva.test.utils.JobTestUtils.count; import static uk.ac.ebi.eva.test.utils.JobTestUtils.getLines; import static uk.ac.ebi.eva.utils.FileUtils.getResource; @@ -120,10 +120,10 @@ public static void checkLoadedAnnotation(TemporaryMongoRule mongoRule, String da assertNotNull(consequenceTypes); consequenceTypeCount += consequenceTypes.size(); } + cursor.close(); assertTrue(count > 0); assertEquals(EXPECTED_VALID_ANNOTATIONS, consequenceTypeCount); - } public static void checkOutputFileLength(File vepOutputFile) throws IOException { @@ -171,10 +171,10 @@ public static void checkCreateStatsStep(File variantsStatsFile, File sourceStats public static void checkSkippedOneMalformedLine(JobExecution jobExecution) { //check that one line is skipped because malformed - List variantAnnotationLoadStepExecution = jobExecution.getStepExecutions().stream() + List annotationLoadStepExecution = jobExecution.getStepExecutions().stream() .filter(stepExecution -> stepExecution.getStepName().equals(BeanNames.LOAD_VEP_ANNOTATION_STEP)) .collect(Collectors.toList()); - assertEquals(1, variantAnnotationLoadStepExecution.get(0).getReadSkipCount()); + assertEquals(1, annotationLoadStepExecution.get(0).getReadSkipCount()); } public static File getVariantsStatsFile(String outputDirStats) throws URISyntaxException { From c312016ae9b2c93295b8200d37f4f3b61b7340a2 Mon Sep 17 00:00:00 2001 From: Diego Poggioli Date: Fri, 21 Apr 2017 15:44:32 +0100 Subject: [PATCH 04/48] Grouping mechanism of annotations moved from stream to old style method --- .../writers/AnnotationInVariantMongoWriter.java | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java index a6b011fdb..5a69156bf 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java @@ -32,8 +32,10 @@ import uk.ac.ebi.eva.commons.models.data.Xref; import uk.ac.ebi.eva.utils.MongoDBHelper; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -76,8 +78,7 @@ public AnnotationInVariantMongoWriter(MongoOperations mongoOperations, String co @Override protected void doWrite(List annotations) { - Map> annotationsByStorageId = annotations.stream() - .collect(Collectors.groupingBy(MongoDBHelper::buildVariantStorageId)); + Map> annotationsByStorageId = groupAnnotationById(annotations); for (Map.Entry> annotationsIdEntry : annotationsByStorageId.entrySet()) { VariantAnnotation variantAnnotation = extractFieldsFromAnnotations(annotationsIdEntry.getValue()); @@ -203,4 +204,16 @@ private Set calculateRangeOfScores(Set scores) { } } + private Map> groupAnnotationById(List annotations) { + Map> annotationsByStorageId = new HashMap<>(); + for (Annotation annotation : annotations) { + String id = MongoDBHelper.buildVariantStorageId(annotation); + + annotationsByStorageId.putIfAbsent(id, new ArrayList<>()); + annotationsByStorageId.get(id).add(annotation); + } + + return annotationsByStorageId; + } + } From f74df0ffd0ce6689605b032b0aa6801012a0a299 Mon Sep 17 00:00:00 2001 From: Diego Poggioli Date: Fri, 28 Apr 2017 16:23:47 +0100 Subject: [PATCH 05/48] Number of queries to mongo reduced --- .../eva/commons/models/data/Annotation.java | 14 +- .../models/data/VariantAnnotation.java | 40 ++++- ...nnotationInVariantWriterConfiguration.java | 8 +- .../AnnotationInVariantMongoWriter.java | 141 +++++++++++------- .../io/writers/AnnotationMongoWriter.java | 9 +- .../AnnotationInVariantMongoWriterTest.java | 35 +++-- .../pipeline/jobs/GenotypedVcfJobTest.java | 4 +- .../jobs/steps/AnnotationLoaderStepTest.java | 20 ++- ...elineJobLauncherCommandLineRunnerTest.java | 12 +- .../test/utils/GenotypedVcfJobTestUtils.java | 65 ++------ 10 files changed, 207 insertions(+), 141 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/Annotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/Annotation.java index 3af9d49a5..67ac1ac75 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/Annotation.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/Annotation.java @@ -29,11 +29,9 @@ /** * Slim version of {@link org.opencb.biodata.models.variant.annotation.VariantAnnotation} * Unused fields removed. - * */ @Document public class Annotation { - @Field(value = AnnotationFieldNames.CHROMOSOME_FIELD) private String chromosome; @@ -53,7 +51,7 @@ public class Annotation { private String id; @Field(value = AnnotationFieldNames.ENSEMBL_VERSION_FIELD) - private String ensmblVersion; + private String ensemblVersion; @Field(value = AnnotationFieldNames.VEP_CACHE_VERSION_FIELD) private String vepCacheVersion; @@ -140,12 +138,12 @@ public void setConsequenceTypes(Set consequenceTypes) { this.consequenceTypes = consequenceTypes; } - public String getEnsmblVersion() { - return ensmblVersion; + public String getEnsemblVersion() { + return ensemblVersion; } - public void setEnsmblVersion(String ensmblVersion) { - this.ensmblVersion = ensmblVersion; + public void setEnsemblVersion(String ensemblVersion) { + this.ensemblVersion = ensemblVersion; } public String getVepCacheVersion() { @@ -156,7 +154,7 @@ public void setVepCacheVersion(String vepCacheVersion) { this.vepCacheVersion = vepCacheVersion; } - public void generateXrefsFromConsequenceTypes(){ + public void generateXrefsFromConsequenceTypes() { for (ConsequenceType consequenceType : consequenceTypes) { if (!Strings.isNullOrEmpty(consequenceType.getGeneName())) { xrefs.add(new Xref(consequenceType.getGeneName(), "HGNC")); diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java index c1947881a..ae17cd6a4 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java @@ -15,19 +15,49 @@ */ package uk.ac.ebi.eva.commons.models.data; +import org.springframework.data.mongodb.core.mapping.Document; +import org.springframework.data.mongodb.core.mapping.Field; +import org.springframework.util.Assert; + import java.util.Collection; import java.util.HashSet; import java.util.Set; /** - * + * Annotations of the genomic variation */ +@Document public class VariantAnnotation { + @Field(value = AnnotationFieldNames.ENSEMBL_VERSION_FIELD) + private String ensemblVersion; + + @Field(value = AnnotationFieldNames.VEP_CACHE_VERSION_FIELD) + private String vepCacheVersion; + + @Field(value = AnnotationFieldNames.SIFT_FIELD) private Set sifts = new HashSet<>(); + + @Field(value = AnnotationFieldNames.POLYPHEN_FIELD) private Set polyphens = new HashSet<>(); + + @Field(value = AnnotationFieldNames.SO_ACCESSION_FIELD) private Set soAccessions = new HashSet<>(); + + @Field(value = AnnotationFieldNames.XREFS_FIELD) private Set xrefIds = new HashSet<>(); + /** + * Make sure to specify the ensemblVersion and vepCacheVersion + * @param ensemblVersion + * @param vepCacheVersion + */ + public VariantAnnotation(String ensemblVersion, String vepCacheVersion) { + Assert.notNull(ensemblVersion); + Assert.notNull(vepCacheVersion); + this.ensemblVersion = ensemblVersion; + this.vepCacheVersion = vepCacheVersion; + } + public void addSift(Double sift) { this.sifts.add(sift); } @@ -67,4 +97,12 @@ public Set getSoAccessions() { public Set getXrefIds() { return xrefIds; } + + public String getEnsemblVersion() { + return ensemblVersion; + } + + public String getVepCacheVersion() { + return vepCacheVersion; + } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationInVariantWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationInVariantWriterConfiguration.java index ae91d81bc..6536cdb5a 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationInVariantWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationInVariantWriterConfiguration.java @@ -25,6 +25,7 @@ import uk.ac.ebi.eva.commons.models.data.Annotation; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.io.writers.AnnotationInVariantMongoWriter; +import uk.ac.ebi.eva.pipeline.parameters.AnnotationParameters; import uk.ac.ebi.eva.pipeline.parameters.DatabaseParameters; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.ANNOTATION_IN_VARIANT_WRITER; @@ -36,8 +37,11 @@ public class AnnotationInVariantWriterConfiguration { @StepScope @Profile(Application.VARIANT_ANNOTATION_MONGO_PROFILE) public ItemWriter variantAnnotationItemWriter(MongoOperations mongoOperations, - DatabaseParameters databaseParameters) { - return new AnnotationInVariantMongoWriter(mongoOperations, databaseParameters.getCollectionVariantsName()); + DatabaseParameters databaseParameters, + AnnotationParameters annotationParameters) { + return new AnnotationInVariantMongoWriter(mongoOperations, databaseParameters.getCollectionVariantsName(), + annotationParameters.getVepVersion(), + annotationParameters.getVepCacheVersion()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java index 5a69156bf..5c60104b3 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java @@ -17,6 +17,7 @@ import com.mongodb.BasicDBList; import com.mongodb.BasicDBObject; +import com.mongodb.DBObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.batch.item.data.MongoItemWriter; @@ -25,7 +26,9 @@ import org.springframework.data.mongodb.core.query.BasicUpdate; import org.springframework.util.Assert; +import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; import uk.ac.ebi.eva.commons.models.data.Annotation; +import uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames; import uk.ac.ebi.eva.commons.models.data.ConsequenceType; import uk.ac.ebi.eva.commons.models.data.Score; import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; @@ -42,16 +45,15 @@ import java.util.Set; import java.util.stream.Collectors; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.GENE_NAME_FIELD; import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.POLYPHEN_FIELD; import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.SIFT_FIELD; import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.SO_ACCESSION_FIELD; +import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.XREFS_FIELD; /** - * Update the {@link uk.ac.ebi.eva.commons.models.data.Variant} mongo document with some fields from {@link Annotation} - * and {@link ConsequenceType} + * Update the {@link uk.ac.ebi.eva.commons.models.data.Variant} mongo document with {@link VariantAnnotation} *

- * The fields are: + * The fields updated are: * - sifts * - polyphens * - soAccessions @@ -64,7 +66,26 @@ public class AnnotationInVariantMongoWriter extends MongoItemWriter private final String collection; - public AnnotationInVariantMongoWriter(MongoOperations mongoOperations, String collection) { + private String vepVersion; + + private String vepCacheVersion; + + private final String ANNOTATION_XREFS = VariantToDBObjectConverter.ANNOTATION_FIELD + ".$." + XREFS_FIELD; + + private final String ANNOTATION_SO = VariantToDBObjectConverter.ANNOTATION_FIELD + ".$." + SO_ACCESSION_FIELD; + + private final String ANNOTATION_SIFT = VariantToDBObjectConverter.ANNOTATION_FIELD + ".$." + SIFT_FIELD; + + private final String ANNOTATION_POLYPHEN = VariantToDBObjectConverter.ANNOTATION_FIELD + ".$." + POLYPHEN_FIELD; + + private final String ANNOTATION_ENSEMBL_VERSION = VariantToDBObjectConverter.ANNOTATION_FIELD + "." + AnnotationFieldNames.ENSEMBL_VERSION_FIELD; + + private final String ANNOTATION_VEP_CACHE_VERSION = VariantToDBObjectConverter.ANNOTATION_FIELD + "." + AnnotationFieldNames.VEP_CACHE_VERSION_FIELD; + + public AnnotationInVariantMongoWriter(MongoOperations mongoOperations, + String collection, + String vepVersion, + String vepCacheVersion) { super(); Assert.notNull(mongoOperations, "A Mongo instance is required"); Assert.hasText(collection, "A collection name is required"); @@ -74,6 +95,8 @@ public AnnotationInVariantMongoWriter(MongoOperations mongoOperations, String co this.mongoOperations = mongoOperations; this.collection = collection; + this.vepVersion = vepVersion; + this.vepCacheVersion = vepCacheVersion; } @Override @@ -86,81 +109,97 @@ protected void doWrite(List annotations) { String storageId = annotationsIdEntry.getKey(); BasicDBObject id = new BasicDBObject("_id", storageId); - if (mongoOperations.exists(new BasicQuery(id), collection)) { - logger.trace("Writing annotations fields into mongo id: {}, collection: {}", storageId, collection); + DBObject dbObject = mongoOperations.getCollection(collection).findOne(id); + + if (dbObject != null) { + logger.trace("Writing annotations into variant : {}, collection: {}", storageId, collection); + + List existingAnnotations = (List) dbObject + .get(VariantToDBObjectConverter.ANNOTATION_FIELD); + + if (existingAnnotations != null) { + updateExistingVariantAnnotation(variantAnnotation, storageId, existingAnnotations); + } else { + addNewVariantAnnotation(variantAnnotation, id); + } + + } + } + } + + /** + * Update {@link VariantAnnotation} fields if some are already present in the {@link uk.ac.ebi.eva.commons.models.data.Variant} + * Just make sure to update the specific version of annotation! + * + * @param variantAnnotation the current annotation to append + * @param storageId + * @param existingAnnotations already in {@link uk.ac.ebi.eva.commons.models.data.Variant} + */ + private void updateExistingVariantAnnotation(VariantAnnotation variantAnnotation, + String storageId, + List existingAnnotations) { + for (BasicDBObject existingAnnotation : existingAnnotations) { + if (existingAnnotation.getString(AnnotationFieldNames.ENSEMBL_VERSION_FIELD) + .equals(vepVersion) && existingAnnotation.getString(AnnotationFieldNames.VEP_CACHE_VERSION_FIELD) + .equals(vepCacheVersion)) { Set xrefs = variantAnnotation.getXrefIds(); - BasicDBObject updateGeneNames = new BasicDBObject("$addToSet", new BasicDBObject(GENE_NAME_FIELD, - new BasicDBObject( - "$each", - xrefs))); - mongoOperations.upsert(new BasicQuery(id), new BasicUpdate(updateGeneNames), collection); + BasicDBObject addToSetValue = new BasicDBObject(ANNOTATION_XREFS, new BasicDBObject("$each", xrefs)); Set soAccessions = variantAnnotation.getSoAccessions(); - BasicDBObject updateConsequenceTypes = new BasicDBObject("$addToSet", - new BasicDBObject(SO_ACCESSION_FIELD, - new BasicDBObject("$each", - soAccessions))); - mongoOperations.upsert(new BasicQuery(id), new BasicUpdate(updateConsequenceTypes), collection); + addToSetValue.append(ANNOTATION_SO, new BasicDBObject("$each", soAccessions)); - variantAnnotation.addSifts(lookupExistingSubstitutionScore(SIFT_FIELD, storageId)); + BasicDBObject update = new BasicDBObject("$addToSet", addToSetValue); + + variantAnnotation + .addSifts(lookupExistingSubstitutionScore((BasicDBList) existingAnnotation.get(SIFT_FIELD))); Set sifts = calculateRangeOfScores(variantAnnotation.getSifts()); - updateSubstitutionScore(sifts, SIFT_FIELD, id); + BasicDBObject setValue = new BasicDBObject(ANNOTATION_SIFT, sifts); - variantAnnotation.addPolyphens(lookupExistingSubstitutionScore(POLYPHEN_FIELD, storageId)); + variantAnnotation.addPolyphens( + lookupExistingSubstitutionScore((BasicDBList) existingAnnotation.get(POLYPHEN_FIELD))); Set polyphens = calculateRangeOfScores(variantAnnotation.getPolyphens()); - updateSubstitutionScore(polyphens, POLYPHEN_FIELD, id); + setValue.append(ANNOTATION_POLYPHEN, polyphens); - } else { - logger.info("Unable to update annotation fields into variant {} because it doesn't exist", storageId); - } + update.append("$set", setValue); - } - } + BasicDBObject versionedId = new BasicDBObject("_id", storageId); + versionedId.append(ANNOTATION_ENSEMBL_VERSION, vepVersion); + versionedId.append(ANNOTATION_VEP_CACHE_VERSION, vepCacheVersion); - private void updateSubstitutionScore(Set substitutionScores, - String substitutionScoreName, - BasicDBObject id) { - if (!substitutionScores.isEmpty()) { - BasicDBObject updateSubstitutionScore = new BasicDBObject("$set", new BasicDBObject(substitutionScoreName, - substitutionScores)); - mongoOperations.upsert(new BasicQuery(id), new BasicUpdate(updateSubstitutionScore), collection); + mongoOperations.updateFirst(new BasicQuery(versionedId), new BasicUpdate(update), collection); + } } } /** - * Checks for a given variant if a protein substitution score is already present + * Append a new {@link VariantAnnotation} field into {@link uk.ac.ebi.eva.commons.models.data.Variant} * - * @param substitutionScoreField substitution score name like POLYPHEN_FIELD, SIFT_FIELD etc. - * @param storageId variant ID - * @return a set containing all the substitution scores if any already loaded, or an empty set otherwise + * @param variantAnnotation + * @param id */ - private Set lookupExistingSubstitutionScore(String substitutionScoreField, String storageId) { - Set substitutionScores = new HashSet<>(); - - BasicDBObject field = new BasicDBObject(substitutionScoreField, new BasicDBObject("$exists", true)) - .append("_id", storageId); - - BasicQuery fieldQuery = new BasicQuery(field); - fieldQuery.fields().include(substitutionScoreField); - fieldQuery.fields().exclude("_id"); + private void addNewVariantAnnotation(VariantAnnotation variantAnnotation, BasicDBObject id) { + Set variantAnnotations = new HashSet<>(Collections.singletonList(variantAnnotation)); + BasicDBObject updateAnnotation = new BasicDBObject("$set", new BasicDBObject( + VariantToDBObjectConverter.ANNOTATION_FIELD, variantAnnotations)); + mongoOperations.upsert(new BasicQuery(id), new BasicUpdate(updateAnnotation), collection); + } - BasicDBObject existingFields = mongoOperations.findOne(fieldQuery, BasicDBObject.class, collection); + private Set lookupExistingSubstitutionScore(BasicDBList scores) { + Set substitutionScores = new HashSet<>(); - if (existingFields != null) { - BasicDBList scores = (BasicDBList) existingFields.getOrDefault(substitutionScoreField, new BasicDBList()); + if (scores != null) { substitutionScores.addAll(scores.stream().map(score -> (Double) score).collect(Collectors.toSet())); } return substitutionScores; } - /** * Extract Xrefs, so terms and protein substitution score from {@link Annotation} */ private VariantAnnotation extractFieldsFromAnnotations(List annotations) { - VariantAnnotation variantAnnotation = new VariantAnnotation(); + VariantAnnotation variantAnnotation = new VariantAnnotation(vepVersion, vepCacheVersion); for (Annotation annotation : annotations) { annotation.generateXrefsFromConsequenceTypes(); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java index cd1488719..d33d3eff5 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java @@ -24,9 +24,6 @@ import org.springframework.data.mongodb.core.query.BasicQuery; import org.springframework.data.mongodb.core.query.BasicUpdate; import org.springframework.util.Assert; -import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; -import uk.ac.ebi.eva.commons.models.converters.data.VariantSourceEntryToDBObjectConverter; -import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; import uk.ac.ebi.eva.commons.models.data.Annotation; import uk.ac.ebi.eva.utils.MongoDBHelper; @@ -110,7 +107,7 @@ protected void doWrite(List annotations) { } annotation.setId(storageId); - annotation.setEnsmblVersion(vepVersion); + annotation.setEnsemblVersion(vepVersion); annotation.setVepCacheVersion(vepCacheVersion); annotation.generateXrefsFromConsequenceTypes(); @@ -135,7 +132,7 @@ private Map> groupAnnotationById(List rows = new ArrayList<>(); - - String s; - while ((s = actualReader.readLine()) != null) { - rows.add(s); - } - Collections.sort(rows); - - String testLine = testReader.readLine(); - for (String row : rows) { - assertEquals(testLine, row); - testLine = testReader.readLine(); - } - assertNull(testLine); // if both files have the same length testReader should be after the last line - } - - /** * Annotation load step: check documents in DB have annotation (only consequence type) */ @@ -136,26 +99,28 @@ public static void checkAnnotationCreateStep(File vepOutputFile) { /** * load stats step: check the DB docs have the field "st" - * - * @param dbName */ - public static void checkLoadStatsStep(String dbName) throws ClassNotFoundException, StorageManagerException, - InstantiationException, IllegalAccessException { - VariantDBIterator iterator = GenotypedVcfJobTestUtils.getVariantDBIterator(dbName); - assertEquals(1, iterator.next().getSourceEntries().values().iterator().next().getCohortStats().size()); + public static void checkLoadStatsStep(TemporaryMongoRule mongoRule, + String databaseName) throws ClassNotFoundException, StorageManagerException, InstantiationException, IllegalAccessException { + DBCursor iterator = getVariantDBCursor(mongoRule, databaseName); + DBObject stField = ((DBObject) iterator.next().get("st")); + + assertNotNull(stField); + iterator.close(); } /** * 1 load step: check ((documents in DB) == (lines in transformed file)) * variantStorageManager = StorageManagerFactory.getVariantStorageManager(); * variantDBAdaptor = variantStorageManager.getDBAdaptor(dbName, null); - * - * @param dbName */ - public static void checkLoadStep(String dbName) throws ClassNotFoundException, StorageManagerException, + public static void checkLoadStep(TemporaryMongoRule mongoRule, + String databaseName) throws ClassNotFoundException, StorageManagerException, InstantiationException, IllegalAccessException { - VariantDBIterator iterator = GenotypedVcfJobTestUtils.getVariantDBIterator(dbName); + DBCursor iterator = getVariantDBCursor(mongoRule, databaseName); + assertEquals(EXPECTED_VARIANTS, count(iterator)); + iterator.close(); } /** From 3930775e0f054558fd3753298759dfd967883ce2 Mon Sep 17 00:00:00 2001 From: jorizci Date: Wed, 3 May 2017 16:20:38 +0100 Subject: [PATCH 06/48] Simple cosmetic changes. Enforced valid strings in VariantAnnotation --- .../eva/commons/models/data/Annotation.java | 12 +++++++++--- .../models/data/AnnotationFieldNames.java | 2 ++ .../commons/models/data/VariantAnnotation.java | 12 +++++++----- .../models/metadata/AnnotationMetadata.java | 18 +++++------------- 4 files changed, 23 insertions(+), 21 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/Annotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/Annotation.java index 67ac1ac75..5474580d1 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/Annotation.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/Annotation.java @@ -17,6 +17,7 @@ import com.google.common.base.Strings; import org.springframework.data.annotation.Id; +import org.springframework.data.annotation.PersistenceConstructor; import org.springframework.data.annotation.Transient; import org.springframework.data.mongodb.core.mapping.Document; import org.springframework.data.mongodb.core.mapping.Field; @@ -32,6 +33,10 @@ */ @Document public class Annotation { + + @Id + private String id; + @Field(value = AnnotationFieldNames.CHROMOSOME_FIELD) private String chromosome; @@ -47,9 +52,6 @@ public class Annotation { @Transient private String alternativeAllele; - @Id - private String id; - @Field(value = AnnotationFieldNames.ENSEMBL_VERSION_FIELD) private String ensemblVersion; @@ -65,6 +67,10 @@ public class Annotation { @Transient private Map additionalAttributes; + Annotation(){ + // Empty document constructor for spring-data + } + public Annotation(String chromosome, int start, int end, String referenceAllele) { this(chromosome, start, end, referenceAllele, ""); } diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/AnnotationFieldNames.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/AnnotationFieldNames.java index b505892a8..2141cbde0 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/AnnotationFieldNames.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/AnnotationFieldNames.java @@ -16,6 +16,7 @@ package uk.ac.ebi.eva.commons.models.data; public class AnnotationFieldNames { + public static final String CHROMOSOME_FIELD = "chr"; public static final String START_FIELD = "start"; @@ -65,4 +66,5 @@ public class AnnotationFieldNames { public final static String SCORE_SCORE_FIELD = "sc"; public final static String SCORE_DESCRIPTION_FIELD = "desc"; + } diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java index ae17cd6a4..9e57c6d93 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java @@ -28,6 +28,7 @@ */ @Document public class VariantAnnotation { + @Field(value = AnnotationFieldNames.ENSEMBL_VERSION_FIELD) private String ensemblVersion; @@ -47,13 +48,14 @@ public class VariantAnnotation { private Set xrefIds = new HashSet<>(); /** - * Make sure to specify the ensemblVersion and vepCacheVersion - * @param ensemblVersion - * @param vepCacheVersion + * Variant annotation constructor. Requires non empty values, otherwise throws {@link IllegalArgumentException} + * + * @param ensemblVersion non empty value required, otherwise throws {@link IllegalArgumentException} + * @param vepCacheVersion non empty value required, otherwise throws {@link IllegalArgumentException} */ public VariantAnnotation(String ensemblVersion, String vepCacheVersion) { - Assert.notNull(ensemblVersion); - Assert.notNull(vepCacheVersion); + Assert.hasText(ensemblVersion, "A non empty ensemblVersion is required"); + Assert.hasText(vepCacheVersion, "A non empty vepCacheVersion is required"); this.ensemblVersion = ensemblVersion; this.vepCacheVersion = vepCacheVersion; } diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/metadata/AnnotationMetadata.java b/src/main/java/uk/ac/ebi/eva/commons/models/metadata/AnnotationMetadata.java index 11b90a607..52e95cf9c 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/metadata/AnnotationMetadata.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/metadata/AnnotationMetadata.java @@ -17,6 +17,7 @@ package uk.ac.ebi.eva.commons.models.metadata; import org.springframework.data.mongodb.core.mapping.Document; +import org.springframework.util.Assert; @Document public class AnnotationMetadata { @@ -28,16 +29,14 @@ public class AnnotationMetadata { private String cacheVersion; AnnotationMetadata() { + // Empty document constructor for spring-data } public AnnotationMetadata(String vepVersion, String cacheVersion) { - this.id = vepVersion + "_" + cacheVersion; - this.vepVersion = vepVersion; - this.cacheVersion = cacheVersion; - } + Assert.hasText(vepVersion, "A non empty vepVerion is required"); + Assert.hasText(vepVersion, "A non empty cacheVersion is required"); - public AnnotationMetadata(String id, String vepVersion, String cacheVersion) { - this.id = id; + this.id = vepVersion + "_" + cacheVersion; this.vepVersion = vepVersion; this.cacheVersion = cacheVersion; } @@ -46,15 +45,8 @@ public String getVepVersion() { return vepVersion; } - public void setVepVersion(String vepVersion) { - this.vepVersion = vepVersion; - } - public String getCacheVersion() { return cacheVersion; } - public void setCacheVersion(String cacheVersion) { - this.cacheVersion = cacheVersion; - } } From 0f464fec9fd7e1a8179ee3441f5a263522bd829a Mon Sep 17 00:00:00 2001 From: jorizci Date: Mon, 8 May 2017 11:20:46 +0100 Subject: [PATCH 07/48] Moved Annotation document to a different package with its subdocuments. --- .../java/uk/ac/ebi/eva/commons/models/data/Variant.java | 2 ++ .../models/{data => mongo/documents}/Annotation.java | 6 ++++-- .../documents/subdocuments}/ConsequenceType.java | 5 +++-- .../models/{data => mongo/documents/subdocuments}/Xref.java | 3 ++- .../readers/AnnotationReaderConfiguration.java | 2 +- .../writers/AnnotationCompositeWriterConfiguration.java | 2 +- .../writers/AnnotationInVariantWriterConfiguration.java | 2 +- .../writers/AnnotationWriterConfiguration.java | 2 +- .../ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java | 4 ++-- .../eva/pipeline/io/readers/AnnotationFlatFileReader.java | 2 +- .../pipeline/io/writers/AnnotationInVariantMongoWriter.java | 6 +++--- .../ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java | 2 +- .../ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java | 2 +- src/main/java/uk/ac/ebi/eva/utils/MongoDBHelper.java | 2 +- .../eva/pipeline/io/mappers/AnnotationLineMapperTest.java | 4 ++-- .../pipeline/io/readers/AnnotationFlatFileReaderTest.java | 2 +- .../io/writers/AnnotationInVariantMongoWriterTest.java | 2 +- .../eva/pipeline/io/writers/AnnotationMongoWriterTest.java | 4 ++-- 18 files changed, 30 insertions(+), 24 deletions(-) rename src/main/java/uk/ac/ebi/eva/commons/models/{data => mongo/documents}/Annotation.java (94%) rename src/main/java/uk/ac/ebi/eva/commons/models/{data => mongo/documents/subdocuments}/ConsequenceType.java (97%) rename src/main/java/uk/ac/ebi/eva/commons/models/{data => mongo/documents/subdocuments}/Xref.java (92%) diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java index 6ceef9db2..3e9c12636 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java @@ -16,6 +16,8 @@ */ package uk.ac.ebi.eva.commons.models.data; +import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; + import java.util.HashMap; import java.util.HashSet; import java.util.Map; diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/Annotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java similarity index 94% rename from src/main/java/uk/ac/ebi/eva/commons/models/data/Annotation.java rename to src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java index 5474580d1..895e433af 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/Annotation.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java @@ -13,14 +13,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.commons.models.data; +package uk.ac.ebi.eva.commons.models.mongo.documents; import com.google.common.base.Strings; import org.springframework.data.annotation.Id; -import org.springframework.data.annotation.PersistenceConstructor; import org.springframework.data.annotation.Transient; import org.springframework.data.mongodb.core.mapping.Document; import org.springframework.data.mongodb.core.mapping.Field; +import uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames; +import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; +import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Xref; import java.util.HashMap; import java.util.HashSet; diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/ConsequenceType.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/ConsequenceType.java similarity index 97% rename from src/main/java/uk/ac/ebi/eva/commons/models/data/ConsequenceType.java rename to src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/ConsequenceType.java index 6cba7d3c0..350f9e918 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/ConsequenceType.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/ConsequenceType.java @@ -14,12 +14,13 @@ * limitations under the License. */ -package uk.ac.ebi.eva.commons.models.data; +package uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments; import org.springframework.data.mongodb.core.mapping.Document; import org.springframework.data.mongodb.core.mapping.Field; +import uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames; +import uk.ac.ebi.eva.commons.models.data.Score; -import java.util.List; import java.util.Set; /** diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/Xref.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java similarity index 92% rename from src/main/java/uk/ac/ebi/eva/commons/models/data/Xref.java rename to src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java index ca59986a9..3972afc53 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/Xref.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java @@ -13,10 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.commons.models.data; +package uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments; import org.springframework.data.mongodb.core.mapping.Document; import org.springframework.data.mongodb.core.mapping.Field; +import uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames; /** * From org.opencb.biodata.models.variant.annotation.Xref diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/AnnotationReaderConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/AnnotationReaderConfiguration.java index 1e4173adb..533d657a9 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/AnnotationReaderConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/AnnotationReaderConfiguration.java @@ -20,7 +20,7 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import uk.ac.ebi.eva.commons.models.data.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; import uk.ac.ebi.eva.pipeline.io.readers.AnnotationFlatFileReader; import uk.ac.ebi.eva.pipeline.parameters.AnnotationParameters; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationCompositeWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationCompositeWriterConfiguration.java index ace3dcdd7..565db3c95 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationCompositeWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationCompositeWriterConfiguration.java @@ -25,7 +25,7 @@ import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Profile; -import uk.ac.ebi.eva.commons.models.data.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; import uk.ac.ebi.eva.pipeline.Application; import java.util.Arrays; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationInVariantWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationInVariantWriterConfiguration.java index 6536cdb5a..321712cc6 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationInVariantWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationInVariantWriterConfiguration.java @@ -22,7 +22,7 @@ import org.springframework.context.annotation.Profile; import org.springframework.data.mongodb.core.MongoOperations; -import uk.ac.ebi.eva.commons.models.data.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.io.writers.AnnotationInVariantMongoWriter; import uk.ac.ebi.eva.pipeline.parameters.AnnotationParameters; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java index eb80bdfcf..707da36b6 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java @@ -22,7 +22,7 @@ import org.springframework.context.annotation.Profile; import org.springframework.data.mongodb.core.MongoOperations; -import uk.ac.ebi.eva.commons.models.data.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.io.writers.AnnotationMongoWriter; import uk.ac.ebi.eva.pipeline.parameters.AnnotationParameters; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java index cd4143cc6..8396fa4a4 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java @@ -22,8 +22,8 @@ import org.slf4j.LoggerFactory; import org.springframework.batch.item.file.LineMapper; -import uk.ac.ebi.eva.commons.models.data.Annotation; -import uk.ac.ebi.eva.commons.models.data.ConsequenceType; +import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; import uk.ac.ebi.eva.commons.models.data.Score; import java.util.Arrays; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReader.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReader.java index f436fa1ec..a12bfb6a1 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReader.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReader.java @@ -18,7 +18,7 @@ import org.springframework.batch.item.file.FlatFileItemReader; import org.springframework.core.io.Resource; -import uk.ac.ebi.eva.commons.models.data.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; import uk.ac.ebi.eva.pipeline.io.GzipLazyResource; import uk.ac.ebi.eva.pipeline.io.mappers.AnnotationLineMapper; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java index 5c60104b3..6bc8b9438 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java @@ -27,12 +27,12 @@ import org.springframework.util.Assert; import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; -import uk.ac.ebi.eva.commons.models.data.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; import uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames; -import uk.ac.ebi.eva.commons.models.data.ConsequenceType; +import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; import uk.ac.ebi.eva.commons.models.data.Score; import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; -import uk.ac.ebi.eva.commons.models.data.Xref; +import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Xref; import uk.ac.ebi.eva.utils.MongoDBHelper; import java.util.ArrayList; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java index d33d3eff5..5a25571c2 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java @@ -24,7 +24,7 @@ import org.springframework.data.mongodb.core.query.BasicQuery; import org.springframework.data.mongodb.core.query.BasicUpdate; import org.springframework.util.Assert; -import uk.ac.ebi.eva.commons.models.data.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; import uk.ac.ebi.eva.utils.MongoDBHelper; import java.util.ArrayList; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java index 6c16c097e..d4f3930af 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java @@ -30,7 +30,7 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; -import uk.ac.ebi.eva.commons.models.data.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; import uk.ac.ebi.eva.pipeline.configuration.ChunkSizeCompletionPolicyConfiguration; import uk.ac.ebi.eva.pipeline.configuration.readers.AnnotationReaderConfiguration; import uk.ac.ebi.eva.pipeline.configuration.writers.AnnotationCompositeWriterConfiguration; diff --git a/src/main/java/uk/ac/ebi/eva/utils/MongoDBHelper.java b/src/main/java/uk/ac/ebi/eva/utils/MongoDBHelper.java index 14829ef6e..205d8117e 100644 --- a/src/main/java/uk/ac/ebi/eva/utils/MongoDBHelper.java +++ b/src/main/java/uk/ac/ebi/eva/utils/MongoDBHelper.java @@ -19,7 +19,7 @@ import org.opencb.commons.utils.CryptoUtils; import uk.ac.ebi.eva.commons.models.data.Variant; -import uk.ac.ebi.eva.commons.models.data.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; import java.net.UnknownHostException; import java.util.LinkedList; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java index e49835fd3..c31321bb0 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java @@ -17,8 +17,8 @@ import org.junit.Test; -import uk.ac.ebi.eva.commons.models.data.Annotation; -import uk.ac.ebi.eva.commons.models.data.ConsequenceType; +import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; import uk.ac.ebi.eva.commons.models.data.Score; import uk.ac.ebi.eva.test.data.VepOutputContent; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReaderTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReaderTest.java index efa4aad1b..06176ab07 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReaderTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReaderTest.java @@ -21,7 +21,7 @@ import org.springframework.batch.item.file.FlatFileParseException; import org.springframework.batch.test.MetaDataInstanceFactory; -import uk.ac.ebi.eva.commons.models.data.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; import uk.ac.ebi.eva.test.data.VepOutputContent; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.utils.JobTestUtils; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java index d3c31f5a3..beaa97f3f 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java @@ -32,7 +32,7 @@ import org.springframework.test.context.junit4.SpringRunner; import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; -import uk.ac.ebi.eva.commons.models.data.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; import uk.ac.ebi.eva.pipeline.io.mappers.AnnotationLineMapper; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java index ccd585b62..32c65d4b8 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java @@ -31,8 +31,8 @@ import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; -import uk.ac.ebi.eva.commons.models.data.Annotation; -import uk.ac.ebi.eva.commons.models.data.ConsequenceType; +import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; import uk.ac.ebi.eva.commons.models.data.Score; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; From ee386dc72011be3c72c991ddbb130833b42f897f Mon Sep 17 00:00:00 2001 From: jorizci Date: Mon, 8 May 2017 12:13:41 +0100 Subject: [PATCH 08/48] Splitted field names through the documents that uses them. --- .../models/data/AnnotationFieldNames.java | 70 ------------------- .../models/data/VariantAnnotation.java | 38 ++++++---- .../models/mongo/documents/Annotation.java | 39 +++++++---- .../subdocuments/ConsequenceType.java | 58 ++++++++++----- .../documents/subdocuments}/Score.java | 10 ++- .../mongo/documents/subdocuments/Xref.java | 9 ++- .../io/mappers/AnnotationLineMapper.java | 2 +- .../AnnotationInVariantMongoWriter.java | 24 +++---- .../io/writers/AnnotationMongoWriter.java | 6 +- .../io/mappers/AnnotationLineMapperTest.java | 2 +- .../AnnotationInVariantMongoWriterTest.java | 8 +-- .../io/writers/AnnotationMongoWriterTest.java | 15 ++-- .../eva/pipeline/jobs/AnnotationJobTest.java | 2 +- .../jobs/steps/AnnotationLoaderStepTest.java | 10 +-- .../test/utils/GenotypedVcfJobTestUtils.java | 2 +- 15 files changed, 142 insertions(+), 153 deletions(-) delete mode 100644 src/main/java/uk/ac/ebi/eva/commons/models/data/AnnotationFieldNames.java rename src/main/java/uk/ac/ebi/eva/commons/models/{data => mongo/documents/subdocuments}/Score.java (86%) diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/AnnotationFieldNames.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/AnnotationFieldNames.java deleted file mode 100644 index 2141cbde0..000000000 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/AnnotationFieldNames.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright 2017 EMBL - European Bioinformatics Institute - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package uk.ac.ebi.eva.commons.models.data; - -public class AnnotationFieldNames { - - public static final String CHROMOSOME_FIELD = "chr"; - - public static final String START_FIELD = "start"; - - public static final String END_FIELD = "end"; - - public static final String ENSEMBL_VERSION_FIELD = "ensemblVer"; - - public static final String VEP_CACHE_VERSION_FIELD = "cacheVer"; - - public static final String CONSEQUENCE_TYPE_FIELD = "ct"; - - public static final String GENE_NAME_FIELD = "gn"; - - public static final String ENSEMBL_GENE_ID_FIELD = "ensg"; - - public static final String ENSEMBL_TRANSCRIPT_ID_FIELD = "enst"; - - public static final String RELATIVE_POS_FIELD = "relPos"; - - public static final String CODON_FIELD = "codon"; - - public static final String STRAND_FIELD = "strand"; - - public static final String BIOTYPE_FIELD = "bt"; - - public static final String C_DNA_POSITION_FIELD = "cDnaPos"; - - public static final String CDS_POSITION_FIELD = "cdsPos"; - - public static final String AA_POSITION_FIELD = "aaPos"; - - public static final String AA_CHANGE_FIELD = "aaChange"; - - public static final String SO_ACCESSION_FIELD = "so"; - - public static final String POLYPHEN_FIELD = "polyphen"; - - public static final String SIFT_FIELD = "sift"; - - public static final String XREFS_FIELD = "xrefs"; - - public final static String XREF_ID_FIELD = "id"; - - public final static String XREF_SOURCE_FIELD = "src"; - - public final static String SCORE_SCORE_FIELD = "sc"; - - public final static String SCORE_DESCRIPTION_FIELD = "desc"; - -} diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java index 9e57c6d93..41b5d7c81 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java @@ -29,34 +29,46 @@ @Document public class VariantAnnotation { - @Field(value = AnnotationFieldNames.ENSEMBL_VERSION_FIELD) - private String ensemblVersion; + public static final String VEP_VERSION_FIELD = "vepVer"; - @Field(value = AnnotationFieldNames.VEP_CACHE_VERSION_FIELD) + public static final String VEP_CACHE_VERSION_FIELD = "cacheVer"; + + public static final String SIFT_FIELD = "sift"; + + public static final String POLYPHEN_FIELD = "polyphen"; + + public static final String SO_ACCESSION_FIELD = "so"; + + public static final String XREFS_FIELD = "xrefs"; + + @Field(value = VEP_VERSION_FIELD) + private String vepVersion; + + @Field(value = VEP_CACHE_VERSION_FIELD) private String vepCacheVersion; - @Field(value = AnnotationFieldNames.SIFT_FIELD) + @Field(value = SIFT_FIELD) private Set sifts = new HashSet<>(); - @Field(value = AnnotationFieldNames.POLYPHEN_FIELD) + @Field(value = POLYPHEN_FIELD) private Set polyphens = new HashSet<>(); - @Field(value = AnnotationFieldNames.SO_ACCESSION_FIELD) + @Field(value = SO_ACCESSION_FIELD) private Set soAccessions = new HashSet<>(); - @Field(value = AnnotationFieldNames.XREFS_FIELD) + @Field(value = XREFS_FIELD) private Set xrefIds = new HashSet<>(); /** * Variant annotation constructor. Requires non empty values, otherwise throws {@link IllegalArgumentException} * - * @param ensemblVersion non empty value required, otherwise throws {@link IllegalArgumentException} + * @param vepVersion non empty value required, otherwise throws {@link IllegalArgumentException} * @param vepCacheVersion non empty value required, otherwise throws {@link IllegalArgumentException} */ - public VariantAnnotation(String ensemblVersion, String vepCacheVersion) { - Assert.hasText(ensemblVersion, "A non empty ensemblVersion is required"); + public VariantAnnotation(String vepVersion, String vepCacheVersion) { + Assert.hasText(vepVersion, "A non empty vepVersion is required"); Assert.hasText(vepCacheVersion, "A non empty vepCacheVersion is required"); - this.ensemblVersion = ensemblVersion; + this.vepVersion = vepVersion; this.vepCacheVersion = vepCacheVersion; } @@ -100,8 +112,8 @@ public Set getXrefIds() { return xrefIds; } - public String getEnsemblVersion() { - return ensemblVersion; + public String getVepVersion() { + return vepVersion; } public String getVepCacheVersion() { diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java index 895e433af..a2d0e3d2d 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java @@ -20,7 +20,6 @@ import org.springframework.data.annotation.Transient; import org.springframework.data.mongodb.core.mapping.Document; import org.springframework.data.mongodb.core.mapping.Field; -import uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames; import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Xref; @@ -36,16 +35,30 @@ @Document public class Annotation { + private static final String CHROMOSOME_FIELD = "chr"; + + private static final String START_FIELD = "start"; + + private static final String END_FIELD = "end"; + + public static final String XREFS_FIELD = "xrefs"; + + private static final String VEP_VERSION_FIELD = "vepVer"; + + public static final String VEP_CACHE_VERSION_FIELD = "cacheVer"; + + public static final String CONSEQUENCE_TYPE_FIELD = "ct"; + @Id private String id; - @Field(value = AnnotationFieldNames.CHROMOSOME_FIELD) + @Field(value = CHROMOSOME_FIELD) private String chromosome; - @Field(value = AnnotationFieldNames.START_FIELD) + @Field(value = START_FIELD) private int start; - @Field(value = AnnotationFieldNames.END_FIELD) + @Field(value = END_FIELD) private int end; @Transient @@ -54,16 +67,16 @@ public class Annotation { @Transient private String alternativeAllele; - @Field(value = AnnotationFieldNames.ENSEMBL_VERSION_FIELD) - private String ensemblVersion; + @Field(value = VEP_VERSION_FIELD) + private String vepVersion; - @Field(value = AnnotationFieldNames.VEP_CACHE_VERSION_FIELD) + @Field(value = VEP_CACHE_VERSION_FIELD) private String vepCacheVersion; - @Field(value = AnnotationFieldNames.XREFS_FIELD) + @Field(value = XREFS_FIELD) private Set xrefs; - @Field(value = AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD) + @Field(value = CONSEQUENCE_TYPE_FIELD) private Set consequenceTypes; @Transient @@ -146,12 +159,12 @@ public void setConsequenceTypes(Set consequenceTypes) { this.consequenceTypes = consequenceTypes; } - public String getEnsemblVersion() { - return ensemblVersion; + public String getVepVersion() { + return vepVersion; } - public void setEnsemblVersion(String ensemblVersion) { - this.ensemblVersion = ensemblVersion; + public void setVepVersion(String vepVersion) { + this.vepVersion = vepVersion; } public String getVepCacheVersion() { diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/ConsequenceType.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/ConsequenceType.java index 350f9e918..dd2a84f06 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/ConsequenceType.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/ConsequenceType.java @@ -18,8 +18,6 @@ import org.springframework.data.mongodb.core.mapping.Document; import org.springframework.data.mongodb.core.mapping.Field; -import uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames; -import uk.ac.ebi.eva.commons.models.data.Score; import java.util.Set; @@ -29,46 +27,74 @@ @Document public class ConsequenceType { - @Field(value = AnnotationFieldNames.GENE_NAME_FIELD) + private static final String GENE_NAME_FIELD = "gn"; + + private static final String ENSEMBL_GENE_ID_FIELD = "ensg"; + + private static final String ENSEMBL_TRANSCRIPT_ID_FIELD = "enst"; + + private static final String STRAND_FIELD = "strand"; + + private static final String BIOTYPE_FIELD = "bt"; + + private static final String C_DNA_POSITION_FIELD = "cDnaPos"; + + private static final String CDS_POSITION_FIELD = "cdsPos"; + + private static final String AA_POSITION_FIELD = "aaPos"; + + private static final String AA_CHANGE_FIELD = "aaChange"; + + private static final String CODON_FIELD = "codon"; + + public static final String SIFT_FIELD = "sift"; + + public static final String POLYPHEN_FIELD = "polyphen"; + + public static final String SO_ACCESSION_FIELD = "so"; + + private static final String RELATIVE_POS_FIELD = "relPos"; + + @Field(value = GENE_NAME_FIELD) private String geneName; - @Field(value = AnnotationFieldNames.ENSEMBL_GENE_ID_FIELD) + @Field(value = ENSEMBL_GENE_ID_FIELD) private String ensemblGeneId; - @Field(value = AnnotationFieldNames.ENSEMBL_TRANSCRIPT_ID_FIELD) + @Field(value = ENSEMBL_TRANSCRIPT_ID_FIELD) private String ensemblTranscriptId; - @Field(value = AnnotationFieldNames.STRAND_FIELD) + @Field(value = STRAND_FIELD) private String strand; - @Field(value = AnnotationFieldNames.BIOTYPE_FIELD) + @Field(value = BIOTYPE_FIELD) private String biotype; - @Field(value = AnnotationFieldNames.C_DNA_POSITION_FIELD) + @Field(value = C_DNA_POSITION_FIELD) private Integer cDnaPosition; - @Field(value = AnnotationFieldNames.CDS_POSITION_FIELD) + @Field(value = CDS_POSITION_FIELD) private Integer cdsPosition; - @Field(value = AnnotationFieldNames.AA_POSITION_FIELD) + @Field(value = AA_POSITION_FIELD) private Integer aaPosition; - @Field(value = AnnotationFieldNames.AA_CHANGE_FIELD) + @Field(value = AA_CHANGE_FIELD) private String aaChange; - @Field(value = AnnotationFieldNames.CODON_FIELD) + @Field(value = CODON_FIELD) private String codon; - @Field(value = AnnotationFieldNames.SIFT_FIELD) + @Field(value = SIFT_FIELD) private Score sift; - @Field(value = AnnotationFieldNames.POLYPHEN_FIELD) + @Field(value = POLYPHEN_FIELD) private Score polyphen; - @Field(value = AnnotationFieldNames.SO_ACCESSION_FIELD) + @Field(value = SO_ACCESSION_FIELD) private Set soAccessions; - @Field(value = AnnotationFieldNames.RELATIVE_POS_FIELD) + @Field(value = RELATIVE_POS_FIELD) private Integer relativePosition; public ConsequenceType() { diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/Score.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Score.java similarity index 86% rename from src/main/java/uk/ac/ebi/eva/commons/models/data/Score.java rename to src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Score.java index 5fe241a1b..34a72fcac 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/Score.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Score.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package uk.ac.ebi.eva.commons.models.data; +package uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments; import org.springframework.data.mongodb.core.mapping.Document; import org.springframework.data.mongodb.core.mapping.Field; @@ -25,10 +25,14 @@ @Document public class Score { - @Field(value = AnnotationFieldNames.SCORE_SCORE_FIELD) + public final static String SCORE_SCORE_FIELD = "sc"; + + public final static String SCORE_DESCRIPTION_FIELD = "desc"; + + @Field(value = SCORE_SCORE_FIELD) private Double score; - @Field(value = AnnotationFieldNames.SCORE_DESCRIPTION_FIELD) + @Field(value = SCORE_DESCRIPTION_FIELD) private String description; public Score(Double score, String description) { diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java index 3972afc53..ffbf3c650 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java @@ -17,7 +17,6 @@ import org.springframework.data.mongodb.core.mapping.Document; import org.springframework.data.mongodb.core.mapping.Field; -import uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames; /** * From org.opencb.biodata.models.variant.annotation.Xref @@ -25,10 +24,14 @@ @Document public class Xref { - @Field(value = AnnotationFieldNames.XREF_ID_FIELD) + private final static String XREF_ID_FIELD = "id"; + + private final static String XREF_SOURCE_FIELD = "src"; + + @Field(value = XREF_ID_FIELD) private String id; - @Field(value = AnnotationFieldNames.XREF_SOURCE_FIELD) + @Field(value = XREF_SOURCE_FIELD) private String src; public Xref(String id, String src) { diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java index 8396fa4a4..afed5b0a2 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java @@ -24,7 +24,7 @@ import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; -import uk.ac.ebi.eva.commons.models.data.Score; +import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Score; import java.util.Arrays; import java.util.HashMap; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java index 6bc8b9438..6eaf85a24 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java @@ -25,13 +25,11 @@ import org.springframework.data.mongodb.core.query.BasicQuery; import org.springframework.data.mongodb.core.query.BasicUpdate; import org.springframework.util.Assert; - import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; +import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Score; +import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; -import uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames; import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; -import uk.ac.ebi.eva.commons.models.data.Score; -import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Xref; import uk.ac.ebi.eva.utils.MongoDBHelper; @@ -45,10 +43,12 @@ import java.util.Set; import java.util.stream.Collectors; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.POLYPHEN_FIELD; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.SIFT_FIELD; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.SO_ACCESSION_FIELD; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.XREFS_FIELD; +import static uk.ac.ebi.eva.commons.models.data.VariantAnnotation.VEP_VERSION_FIELD; +import static uk.ac.ebi.eva.commons.models.data.VariantAnnotation.XREFS_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.VEP_CACHE_VERSION_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType.POLYPHEN_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType.SIFT_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType.SO_ACCESSION_FIELD; /** * Update the {@link uk.ac.ebi.eva.commons.models.data.Variant} mongo document with {@link VariantAnnotation} @@ -78,9 +78,9 @@ public class AnnotationInVariantMongoWriter extends MongoItemWriter private final String ANNOTATION_POLYPHEN = VariantToDBObjectConverter.ANNOTATION_FIELD + ".$." + POLYPHEN_FIELD; - private final String ANNOTATION_ENSEMBL_VERSION = VariantToDBObjectConverter.ANNOTATION_FIELD + "." + AnnotationFieldNames.ENSEMBL_VERSION_FIELD; + private final String ANNOTATION_ENSEMBL_VERSION = VariantToDBObjectConverter.ANNOTATION_FIELD + "." + VEP_VERSION_FIELD; - private final String ANNOTATION_VEP_CACHE_VERSION = VariantToDBObjectConverter.ANNOTATION_FIELD + "." + AnnotationFieldNames.VEP_CACHE_VERSION_FIELD; + private final String ANNOTATION_VEP_CACHE_VERSION = VariantToDBObjectConverter.ANNOTATION_FIELD + "." + VEP_CACHE_VERSION_FIELD; public AnnotationInVariantMongoWriter(MongoOperations mongoOperations, String collection, @@ -139,8 +139,8 @@ private void updateExistingVariantAnnotation(VariantAnnotation variantAnnotation String storageId, List existingAnnotations) { for (BasicDBObject existingAnnotation : existingAnnotations) { - if (existingAnnotation.getString(AnnotationFieldNames.ENSEMBL_VERSION_FIELD) - .equals(vepVersion) && existingAnnotation.getString(AnnotationFieldNames.VEP_CACHE_VERSION_FIELD) + if (existingAnnotation.getString(VEP_VERSION_FIELD) + .equals(vepVersion) && existingAnnotation.getString(VEP_CACHE_VERSION_FIELD) .equals(vepCacheVersion)) { Set xrefs = variantAnnotation.getXrefIds(); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java index 5a25571c2..499cf9e89 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java @@ -32,8 +32,8 @@ import java.util.List; import java.util.Map; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.XREFS_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.CONSEQUENCE_TYPE_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.XREFS_FIELD; /** * Write a list of {@link Annotation} into MongoDB @@ -107,7 +107,7 @@ protected void doWrite(List annotations) { } annotation.setId(storageId); - annotation.setEnsemblVersion(vepVersion); + annotation.setVepVersion(vepVersion); annotation.setVepCacheVersion(vepCacheVersion); annotation.generateXrefsFromConsequenceTypes(); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java index c31321bb0..d5d2f1ab8 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java @@ -19,7 +19,7 @@ import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; -import uk.ac.ebi.eva.commons.models.data.Score; +import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Score; import uk.ac.ebi.eva.test.data.VepOutputContent; import java.util.Set; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java index beaa97f3f..42ee28ca4 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java @@ -45,10 +45,10 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.POLYPHEN_FIELD; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.SIFT_FIELD; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.SO_ACCESSION_FIELD; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.XREFS_FIELD; +import static uk.ac.ebi.eva.commons.models.data.VariantAnnotation.POLYPHEN_FIELD; +import static uk.ac.ebi.eva.commons.models.data.VariantAnnotation.SIFT_FIELD; +import static uk.ac.ebi.eva.commons.models.data.VariantAnnotation.SO_ACCESSION_FIELD; +import static uk.ac.ebi.eva.commons.models.data.VariantAnnotation.XREFS_FIELD; import static uk.ac.ebi.eva.test.data.VepOutputContent.vepOutputContentWithExtraFields; import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResourceUrl; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java index 32c65d4b8..47a15477a 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java @@ -33,7 +33,7 @@ import org.springframework.test.context.junit4.SpringRunner; import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; -import uk.ac.ebi.eva.commons.models.data.Score; +import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Score; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; import uk.ac.ebi.eva.pipeline.io.mappers.AnnotationLineMapper; @@ -54,12 +54,13 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.POLYPHEN_FIELD; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.SCORE_DESCRIPTION_FIELD; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.SCORE_SCORE_FIELD; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.SIFT_FIELD; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.XREFS_FIELD; + +import static uk.ac.ebi.eva.commons.models.data.VariantAnnotation.POLYPHEN_FIELD; +import static uk.ac.ebi.eva.commons.models.data.VariantAnnotation.SIFT_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.CONSEQUENCE_TYPE_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.XREFS_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Score.SCORE_DESCRIPTION_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Score.SCORE_SCORE_FIELD; import static uk.ac.ebi.eva.test.data.VepOutputContent.vepOutputContent; /** diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java index 9d99d0489..57b0fe9c9 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java @@ -45,7 +45,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.CONSEQUENCE_TYPE_FIELD; import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResourceUrl; import static uk.ac.ebi.eva.utils.FileUtils.getResource; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java index 1e0b5f7ee..561c658bf 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java @@ -46,12 +46,12 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.POLYPHEN_FIELD; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.SIFT_FIELD; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.SO_ACCESSION_FIELD; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.XREFS_FIELD; import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; +import static uk.ac.ebi.eva.commons.models.data.VariantAnnotation.POLYPHEN_FIELD; +import static uk.ac.ebi.eva.commons.models.data.VariantAnnotation.SO_ACCESSION_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.CONSEQUENCE_TYPE_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.XREFS_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType.SIFT_FIELD; import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResourceUrl; /** diff --git a/src/test/java/uk/ac/ebi/eva/test/utils/GenotypedVcfJobTestUtils.java b/src/test/java/uk/ac/ebi/eva/test/utils/GenotypedVcfJobTestUtils.java index b53152fa9..f52740509 100644 --- a/src/test/java/uk/ac/ebi/eva/test/utils/GenotypedVcfJobTestUtils.java +++ b/src/test/java/uk/ac/ebi/eva/test/utils/GenotypedVcfJobTestUtils.java @@ -28,7 +28,7 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; -import static uk.ac.ebi.eva.commons.models.data.AnnotationFieldNames.CONSEQUENCE_TYPE_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.CONSEQUENCE_TYPE_FIELD; import static uk.ac.ebi.eva.test.utils.JobTestUtils.count; import static uk.ac.ebi.eva.test.utils.JobTestUtils.getLines; import static uk.ac.ebi.eva.utils.FileUtils.getResource; From a29b89d9e89c0d96754b466590a537ab7529c07f Mon Sep 17 00:00:00 2001 From: jorizci Date: Tue, 9 May 2017 11:09:07 +0100 Subject: [PATCH 09/48] Multiple changes to mongo database documents. -Isolate id generation to their respective classes. -Modified Annotation to require the vep / cache version. -Removed transient parameters. --- .../data/VariantToDBObjectConverter.java | 4 +- .../ebi/eva/commons/models/data/Variant.java | 41 ++++-- .../models/mongo/documents/Annotation.java | 117 +++++++----------- .../AnnotationReaderConfiguration.java | 3 +- .../io/mappers/AnnotationLineMapper.java | 14 ++- .../io/readers/AnnotationFlatFileReader.java | 8 +- .../AnnotationInVariantMongoWriter.java | 18 ++- .../io/writers/AnnotationMongoWriter.java | 20 +-- .../io/writers/VariantMongoWriter.java | 6 +- .../data/VariantToMongoDbObjectConverter.java | 2 - .../uk/ac/ebi/eva/utils/MongoDBHelper.java | 52 -------- .../io/mappers/AnnotationLineMapperTest.java | 18 +-- .../readers/AnnotationFlatFileReaderTest.java | 13 +- .../AnnotationInVariantMongoWriterTest.java | 2 +- .../io/writers/AnnotationMongoWriterTest.java | 7 +- .../ac/ebi/eva/utils/MongoDBHelperTest.java | 6 +- 16 files changed, 136 insertions(+), 195 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverter.java b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverter.java index 279d49e7c..d785faf98 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverter.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverter.java @@ -110,8 +110,8 @@ public VariantToDBObjectConverter( @Override public DBObject convert(Variant object) { - String id = MongoDBHelper.buildVariantStorageId(object.getChromosome(), object.getStart(), object.getReference(), - object.getAlternate()); + String id = Variant.buildVariantId(object.getChromosome(), object.getStart(), object.getReference(), + object.getAlternate()); BasicDBObject mongoVariant = new BasicDBObject("_id", id) // Do not include IDs: the MongoWriter will take care in the query using an $addToSet diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java index 3e9c12636..a8d449727 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java @@ -16,6 +16,7 @@ */ package uk.ac.ebi.eva.commons.models.data; +import org.opencb.commons.utils.CryptoUtils; import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; import java.util.HashMap; @@ -119,7 +120,7 @@ public enum VariantType { /** * Annotations of the genomic variation. */ - private Annotation annotation; + private Set annotations; public Variant() { @@ -149,7 +150,6 @@ public Variant(String chromosome, int start, int end, String reference, String a } this.sourceEntries = new HashMap<>(); - this.annotation = new Annotation(this.chromosome, this.start, this.end, this.reference); } public VariantType getType() { @@ -289,14 +289,6 @@ public VariantStats getStats(String studyId, String fileId) { return file.getStats(); } - public Annotation getAnnotation() { - return annotation; - } - - public void setAnnotation(Annotation annotation) { - this.annotation = annotation; - } - /** * Copies the current variant and returns the copy in Ensembl format. * see http://www.ensembl.org/info/docs/tools/vep/vep_formats.html @@ -399,7 +391,6 @@ public boolean equals(Object obj) { */ public Variant clone() { Variant variant = new Variant(chromosome, start, end, reference, alternate); - variant.setAnnotation(this.getAnnotation()); variant.setIds(this.getIds()); variant.setSourceEntries(this.getSourceEntries()); variant.setType(this.getType()); @@ -412,4 +403,32 @@ private String composeId(String studyId, String fileId) { return studyId + "_" + fileId; } + public String buildVariantId(){ + return buildVariantId(chromosome, start, reference, alternate); + } + + public static String buildVariantId(String chromosome, int start, String reference, String alternate) { + StringBuilder builder = new StringBuilder(chromosome); + builder.append("_"); + builder.append(start); + builder.append("_"); + if (!reference.equals("-")) { + if (reference.length() < 50) { + builder.append(reference); + } else { + builder.append(new String(CryptoUtils.encryptSha1(reference))); + } + } + + builder.append("_"); + if (!alternate.equals("-")) { + if (alternate.length() < 50) { + builder.append(alternate); + } else { + builder.append(new String(CryptoUtils.encryptSha1(alternate))); + } + } + + return builder.toString(); + } } diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java index a2d0e3d2d..6c50f08cd 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java @@ -17,15 +17,14 @@ import com.google.common.base.Strings; import org.springframework.data.annotation.Id; -import org.springframework.data.annotation.Transient; import org.springframework.data.mongodb.core.mapping.Document; import org.springframework.data.mongodb.core.mapping.Field; +import uk.ac.ebi.eva.commons.models.data.Variant; import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Xref; -import java.util.HashMap; +import java.util.Collections; import java.util.HashSet; -import java.util.Map; import java.util.Set; /** @@ -61,132 +60,106 @@ public class Annotation { @Field(value = END_FIELD) private int end; - @Transient - private String referenceAllele; - - @Transient - private String alternativeAllele; - @Field(value = VEP_VERSION_FIELD) private String vepVersion; @Field(value = VEP_CACHE_VERSION_FIELD) private String vepCacheVersion; - @Field(value = XREFS_FIELD) - private Set xrefs; - @Field(value = CONSEQUENCE_TYPE_FIELD) private Set consequenceTypes; - @Transient - private Map additionalAttributes; + @Field(value = XREFS_FIELD) + private Set xrefs; - Annotation(){ + Annotation() { // Empty document constructor for spring-data } - public Annotation(String chromosome, int start, int end, String referenceAllele) { - this(chromosome, start, end, referenceAllele, ""); - } - - public Annotation(String chromosome, int start, int end, String referenceAllele, String alternativeAllele) { + public Annotation(String chromosome, int start, int end, String referenceAllele, String alternativeAllele, + String vepVersion, String vepCacheVersion) { this.chromosome = chromosome; this.start = start; this.end = end; - this.referenceAllele = referenceAllele; - this.alternativeAllele = alternativeAllele; + this.vepVersion = vepVersion; + this.vepCacheVersion = vepCacheVersion; - this.id = ""; + this.id = buildAnnotationId(chromosome, start, referenceAllele, alternativeAllele, vepVersion, vepCacheVersion); this.xrefs = new HashSet<>(); this.consequenceTypes = new HashSet<>(); - this.additionalAttributes = new HashMap<>(); } public String getChromosome() { return chromosome; } - public void setChromosome(String chromosome) { - this.chromosome = chromosome; - } - public int getStart() { return start; } - public void setStart(int start) { - this.start = start; - } - public int getEnd() { return end; } - public void setEnd(int end) { - this.end = end; - } - - public String getReferenceAllele() { - return referenceAllele; - } - - public String getAlternativeAllele() { - return alternativeAllele; - } - public String getId() { return id; } - public void setId(String id) { - this.id = id; - } - public Set getXrefs() { return xrefs; } - public void setXrefs(Set xrefs) { - this.xrefs = xrefs; + public Set getConsequenceTypes() { + return Collections.unmodifiableSet(consequenceTypes); } - public Set getConsequenceTypes() { - return consequenceTypes; + public void addConsequenceType(ConsequenceType consequenceType) { + consequenceTypes.add(consequenceType); + generateXrefsFromConsequenceType(consequenceType); } - public void setConsequenceTypes(Set consequenceTypes) { - this.consequenceTypes = consequenceTypes; + public void addConsequenceTypes(Set consequenceTypes) { + for(ConsequenceType consequenceType: consequenceTypes){ + addConsequenceType(consequenceType); + } } public String getVepVersion() { return vepVersion; } - public void setVepVersion(String vepVersion) { - this.vepVersion = vepVersion; - } - public String getVepCacheVersion() { return vepCacheVersion; } - public void setVepCacheVersion(String vepCacheVersion) { - this.vepCacheVersion = vepCacheVersion; + private void generateXrefsFromConsequenceType(ConsequenceType consequenceType) { + if (!Strings.isNullOrEmpty(consequenceType.getGeneName())) { + xrefs.add(new Xref(consequenceType.getGeneName(), "HGNC")); + } + if (!Strings.isNullOrEmpty(consequenceType.getEnsemblGeneId())) { + xrefs.add(new Xref(consequenceType.getEnsemblGeneId(), "ensemblGene")); + } + if (!Strings.isNullOrEmpty(consequenceType.getEnsemblTranscriptId())) { + xrefs.add(new Xref(consequenceType.getEnsemblTranscriptId(), "ensemblTranscript")); + } } - public void generateXrefsFromConsequenceTypes() { - for (ConsequenceType consequenceType : consequenceTypes) { - if (!Strings.isNullOrEmpty(consequenceType.getGeneName())) { - xrefs.add(new Xref(consequenceType.getGeneName(), "HGNC")); - } - if (!Strings.isNullOrEmpty(consequenceType.getEnsemblGeneId())) { - xrefs.add(new Xref(consequenceType.getEnsemblGeneId(), "ensemblGene")); - } - if (!Strings.isNullOrEmpty(consequenceType.getEnsemblTranscriptId())) { - xrefs.add(new Xref(consequenceType.getEnsemblTranscriptId(), "ensemblTranscript")); - } - } + public static String buildAnnotationId(String chromosome, int start, String reference, String alternate, + String vepVersion, String vepCacheVersion) { + StringBuilder builder = new StringBuilder(Variant.buildVariantId(chromosome, start, reference, alternate)); + builder.append("_"); + builder.append(vepVersion); + builder.append("_"); + builder.append(vepCacheVersion); + return builder.toString(); } + /** + * Builds the variant id from the current annotation id. In essence we remove the two extra fields added at the end + * of the id and the underscores. + * @return + */ + public String buildVariantId() { + return getId().substring(0, getId().length() - vepVersion.length() -vepCacheVersion.length() -2); + } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/AnnotationReaderConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/AnnotationReaderConfiguration.java index 533d657a9..2db5b51a9 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/AnnotationReaderConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/AnnotationReaderConfiguration.java @@ -35,7 +35,8 @@ public class AnnotationReaderConfiguration { @Bean(VARIANT_ANNOTATION_READER) @StepScope public ItemStreamReader annotationReader(AnnotationParameters annotationParameters) { - return new AnnotationFlatFileReader(annotationParameters.getVepOutput()); + return new AnnotationFlatFileReader(annotationParameters.getVepOutput(), annotationParameters.getVepVersion(), + annotationParameters.getVepCacheVersion()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java index afed5b0a2..28b27ff67 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java @@ -53,6 +53,14 @@ public class AnnotationLineMapper implements LineMapper { private static final Logger logger = LoggerFactory.getLogger(AnnotationLineMapper.class); + private final String vepVersion; + private final String vepCacheVersion; + + public AnnotationLineMapper(String vepVersion, String vepCacheVersion) { + this.vepVersion = vepVersion; + this.vepCacheVersion = vepCacheVersion; + } + /** * Map a line in VEP output file to {@link Annotation} * @param line in VEP output @@ -75,7 +83,9 @@ public Annotation mapLine(String line, int lineNumber) { variantMap.get("chromosome"), Integer.valueOf(variantMap.get("start")), Integer.valueOf(variantMap.get("end")), variantMap.get("reference"), - variantMap.get("alternative")); + variantMap.get("alternative"), + vepVersion, + vepCacheVersion); /** * parses extra column and populates fields as required. @@ -92,7 +102,7 @@ public Annotation mapLine(String line, int lineNumber) { } else { consequenceType.setSoAccessions(mapSoTermsToSoAccessions(lineFields[6].split(","))); } - currentAnnotation.getConsequenceTypes().add(consequenceType); + currentAnnotation.addConsequenceType(consequenceType); return currentAnnotation; } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReader.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReader.java index a12bfb6a1..d28ef5739 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReader.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReader.java @@ -36,13 +36,13 @@ */ public class AnnotationFlatFileReader extends FlatFileItemReader { - public AnnotationFlatFileReader(File file) { + public AnnotationFlatFileReader(File file, String vepVersion, String vepCacheVersion) { Resource resource = new GzipLazyResource(file); setResource(resource); - setLineMapper(new AnnotationLineMapper()); + setLineMapper(new AnnotationLineMapper(vepVersion, vepCacheVersion)); } - public AnnotationFlatFileReader(String string) { - this(new File(string)); + public AnnotationFlatFileReader(String string, String vepVersion, String vepCacheVersion) { + this(new File(string), vepVersion, vepCacheVersion); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java index 6eaf85a24..698faff89 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java @@ -31,7 +31,6 @@ import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Xref; -import uk.ac.ebi.eva.utils.MongoDBHelper; import java.util.ArrayList; import java.util.Arrays; @@ -99,9 +98,10 @@ public AnnotationInVariantMongoWriter(MongoOperations mongoOperations, this.vepCacheVersion = vepCacheVersion; } + // TODO rewrite this to be bulk-friendly @Override protected void doWrite(List annotations) { - Map> annotationsByStorageId = groupAnnotationById(annotations); + Map> annotationsByStorageId = groupAnnotationByVariantId(annotations); for (Map.Entry> annotationsIdEntry : annotationsByStorageId.entrySet()) { VariantAnnotation variantAnnotation = extractFieldsFromAnnotations(annotationsIdEntry.getValue()); @@ -202,7 +202,6 @@ private VariantAnnotation extractFieldsFromAnnotations(List annotati VariantAnnotation variantAnnotation = new VariantAnnotation(vepVersion, vepCacheVersion); for (Annotation annotation : annotations) { - annotation.generateXrefsFromConsequenceTypes(); Set xrefs = annotation.getXrefs(); if (xrefs != null) { variantAnnotation.addXrefIds(xrefs.stream().map(Xref::getId).collect(Collectors.toSet())); @@ -243,16 +242,15 @@ private Set calculateRangeOfScores(Set scores) { } } - private Map> groupAnnotationById(List annotations) { - Map> annotationsByStorageId = new HashMap<>(); + private Map> groupAnnotationByVariantId(List annotations) { + Map> annotationsByVariantId = new HashMap<>(); for (Annotation annotation : annotations) { - String id = MongoDBHelper.buildVariantStorageId(annotation); - - annotationsByStorageId.putIfAbsent(id, new ArrayList<>()); - annotationsByStorageId.get(id).add(annotation); + String id = annotation.buildVariantId(); + annotationsByVariantId.putIfAbsent(id, new ArrayList<>()); + annotationsByVariantId.get(id).add(annotation); } - return annotationsByStorageId; + return annotationsByVariantId; } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java index 499cf9e89..8aa7871ca 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java @@ -105,13 +105,6 @@ protected void doWrite(List annotations) { annotation = concatenateOtherAnnotations( annotation, annotationsById.subList(1, annotationsById.size())); } - - annotation.setId(storageId); - annotation.setVepVersion(vepVersion); - annotation.setVepCacheVersion(vepCacheVersion); - - annotation.generateXrefsFromConsequenceTypes(); - writeAnnotationInMongoDb(storageId, annotation); } } @@ -119,8 +112,7 @@ protected void doWrite(List annotations) { private Map> groupAnnotationById(List annotations) { Map> annotationsByStorageId = new HashMap<>(); for (Annotation annotation : annotations) { - String id = buildAnnotationtorageId(annotation); - + String id = annotation.getId(); annotationsByStorageId.putIfAbsent(id, new ArrayList<>()); annotationsByStorageId.get(id).add(annotation); } @@ -138,10 +130,9 @@ private Map> groupAnnotationById(List otherAnnotationsToConcatenate) { - for (Annotation annotationToAppend : otherAnnotationsToConcatenate) { if (annotationToAppend.getConsequenceTypes() != null) { - annotation.getConsequenceTypes().addAll(annotationToAppend.getConsequenceTypes()); + annotation.addConsequenceTypes(annotationToAppend.getConsequenceTypes()); } } @@ -167,13 +158,6 @@ private void writeAnnotationInMongoDb(String storageId, Annotation annotation) { } } - private String buildAnnotationtorageId(Annotation annotation) { - return MongoDBHelper.buildAnnotationStorageId(annotation.getChromosome(), annotation.getStart(), - annotation.getReferenceAllele(), - annotation.getAlternativeAllele(), vepVersion, - vepCacheVersion); - } - private void createIndexes() { mongoOperations.getCollection(collection).createIndex( new BasicDBObject(ANNOTATION_XREF_ID_FIELD, 1), diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java index 1778e8311..95bb9050e 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java @@ -75,8 +75,8 @@ private void initializeConverters(boolean includeStats, boolean includeSamples) protected void doWrite(List variants) { BulkWriteOperation bulk = mongoOperations.getCollection(collection).initializeUnorderedBulkOperation(); for (Variant variant : variants) { - String id = MongoDBHelper.buildVariantStorageId(variant.getChromosome(), variant.getStart(), - variant.getReference(), variant.getAlternate()); + String id = Variant.buildVariantId(variant.getChromosome(), variant.getStart(), + variant.getReference(), variant.getAlternate()); // the chromosome and start appear just as shard keys, in an unsharded cluster they wouldn't be needed BasicDBObject query = new BasicDBObject("_id", id) @@ -124,8 +124,6 @@ private DBObject generateUpdate(Variant variant) { Assert.notNull(variant, "Variant should not be null. Please provide a valid Variant object"); logger.trace("Convert variant {} into mongo object", variant); - variant.setAnnotation(null); - BasicDBObject addToSet = new BasicDBObject(); if (!variant.getSourceEntries().isEmpty()) { diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/model/converters/data/VariantToMongoDbObjectConverter.java b/src/main/java/uk/ac/ebi/eva/pipeline/model/converters/data/VariantToMongoDbObjectConverter.java index d0189499a..1b07c4aeb 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/model/converters/data/VariantToMongoDbObjectConverter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/model/converters/data/VariantToMongoDbObjectConverter.java @@ -63,8 +63,6 @@ public DBObject convert(Variant variant) { Assert.notNull(variant, "Variant should not be null. Please provide a valid Variant object"); logger.trace("Convert variant {} into mongo object", variant); - variant.setAnnotation(null); - VariantSourceEntry variantSourceEntry = variant.getSourceEntries().values().iterator().next(); BasicDBObject addToSet = new BasicDBObject().append(VariantToDBObjectConverter.FILES_FIELD, diff --git a/src/main/java/uk/ac/ebi/eva/utils/MongoDBHelper.java b/src/main/java/uk/ac/ebi/eva/utils/MongoDBHelper.java index 205d8117e..c205d50b9 100644 --- a/src/main/java/uk/ac/ebi/eva/utils/MongoDBHelper.java +++ b/src/main/java/uk/ac/ebi/eva/utils/MongoDBHelper.java @@ -52,56 +52,4 @@ public static List parseServerAddresses(String hosts) throws Unkn return serverAddresses; } - public static String buildVariantStorageId(Variant v) { - return buildVariantStorageId(v.getChromosome(), v.getStart(), v.getReference(), v.getAlternate()); - } - - public static String buildVariantStorageId(Annotation va) { - return buildVariantStorageId(va.getChromosome(), va.getStart(), va.getReferenceAllele(), va.getAlternativeAllele()); - } - - /** - * From org.opencb.opencga.storage.mongodb.variant.VariantToDBObjectConverter - * #buildVariantStorageId(java.lang.String, int, java.lang.String, java.lang.String) - */ - public static String buildVariantStorageId(String chromosome, int start, String reference, String alternate) { - StringBuilder builder = new StringBuilder(chromosome); - builder.append("_"); - builder.append(start); - builder.append("_"); - if (!reference.equals("-")) { - if (reference.length() < 50) { - builder.append(reference); - } else { - builder.append(new String(CryptoUtils.encryptSha1(reference))); - } - } - - builder.append("_"); - if (!alternate.equals("-")) { - if (alternate.length() < 50) { - builder.append(alternate); - } else { - builder.append(new String(CryptoUtils.encryptSha1(alternate))); - } - } - - return builder.toString(); - } - - public static String buildAnnotationStorageId(String chromosome, - int start, - String reference, - String alternate, - String vepVersion, - String vepCacheVersion) { - StringBuilder builder = new StringBuilder(buildVariantStorageId(chromosome, start, reference, alternate)); - builder.append("_"); - builder.append(vepVersion); - builder.append("_"); - builder.append(vepCacheVersion); - - return builder.toString(); - } - } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java index d5d2f1ab8..55dca8f54 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java @@ -35,9 +35,13 @@ */ public class AnnotationLineMapperTest { + private static final String VEP_VERSION = "1"; + + private static final String VEP_CACHE_VERSION = "1"; + @Test public void shouldParseAllDefaultFieldsInVepOutput() throws Exception { - AnnotationLineMapper lineMapper = new AnnotationLineMapper(); + AnnotationLineMapper lineMapper = new AnnotationLineMapper(VEP_VERSION, VEP_CACHE_VERSION); for (String annotLine : VepOutputContent.vepOutputContent.split("\n")) { Annotation annotation = lineMapper.mapLine(annotLine, 0); assertNotNull(annotation.getConsequenceTypes()); @@ -46,7 +50,7 @@ public void shouldParseAllDefaultFieldsInVepOutput() throws Exception { @Test public void shouldParseAllTranscriptFieldsInVepOutput() { - AnnotationLineMapper lineMapper = new AnnotationLineMapper(); + AnnotationLineMapper lineMapper = new AnnotationLineMapper(VEP_VERSION, VEP_CACHE_VERSION); Annotation annotation = lineMapper.mapLine(VepOutputContent.vepOutputContentTranscriptFields, 0); Set consequenceTypes = annotation.getConsequenceTypes(); @@ -64,7 +68,7 @@ public void shouldParseAllTranscriptFieldsInVepOutput() { @Test public void shouldParseVepOutputWithoutTranscript() { - AnnotationLineMapper lineMapper = new AnnotationLineMapper(); + AnnotationLineMapper lineMapper = new AnnotationLineMapper(VEP_VERSION, VEP_CACHE_VERSION); Annotation annotation = lineMapper.mapLine(VepOutputContent.vepOutputContentWithOutTranscript, 0); Set consequenceTypes = annotation.getConsequenceTypes(); @@ -83,13 +87,13 @@ public void shouldParseVepOutputWithoutTranscript() { @Test(expected = ArrayIndexOutOfBoundsException.class) public void shouldNotParseVepOutputWithMalformedCoordinates() { - AnnotationLineMapper lineMapper = new AnnotationLineMapper(); + AnnotationLineMapper lineMapper = new AnnotationLineMapper(VEP_VERSION, VEP_CACHE_VERSION); lineMapper.mapLine(VepOutputContent.vepOutputContentMalformedCoordinates, 0); } @Test public void shouldParseVepOutputWithChromosomeIdWithUnderscore() { - AnnotationLineMapper lineMapper = new AnnotationLineMapper(); + AnnotationLineMapper lineMapper = new AnnotationLineMapper(VEP_VERSION, VEP_CACHE_VERSION); Annotation annotation = lineMapper .mapLine(VepOutputContent.vepOutputContentChromosomeIdWithUnderscore, 0); @@ -98,13 +102,13 @@ public void shouldParseVepOutputWithChromosomeIdWithUnderscore() { @Test(expected = ArrayIndexOutOfBoundsException.class) public void shouldNotParseVepOutputWithMalformedVariantFields() { - AnnotationLineMapper lineMapper = new AnnotationLineMapper(); + AnnotationLineMapper lineMapper = new AnnotationLineMapper(VEP_VERSION, VEP_CACHE_VERSION); lineMapper.mapLine(VepOutputContent.vepOutputContentMalformedVariantFields, 0); } @Test public void shouldParseVepOutputWithExtraFields() { - AnnotationLineMapper lineMapper = new AnnotationLineMapper(); + AnnotationLineMapper lineMapper = new AnnotationLineMapper(VEP_VERSION, VEP_CACHE_VERSION); Annotation annotation = lineMapper.mapLine(VepOutputContent.vepOutputContentWithExtraFieldsSingleAnnotation, 0); Set consequenceTypes = annotation.getConsequenceTypes(); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReaderTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReaderTest.java index 06176ab07..ae68277e3 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReaderTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReaderTest.java @@ -41,6 +41,10 @@ */ public class AnnotationFlatFileReaderTest { + private static final String VEP_VERSION = "1"; + + private static final String VEP_CACHE_VERSION = "1"; + @Rule public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); @@ -51,7 +55,8 @@ public void shouldReadAllLinesInVepOutput() throws Exception { //simulate VEP output file File file = temporaryFolderRule.newGzipFile(VepOutputContent.vepOutputContent); - AnnotationFlatFileReader annotationFlatFileReader = new AnnotationFlatFileReader(file); + AnnotationFlatFileReader annotationFlatFileReader = new AnnotationFlatFileReader(file, VEP_VERSION, + VEP_CACHE_VERSION); annotationFlatFileReader.setSaveState(false); annotationFlatFileReader.open(executionContext); @@ -78,7 +83,8 @@ public void malformedCoordinatesAnnotationLinesShouldBeSkipped() throws Exceptio ExecutionContext executionContext = MetaDataInstanceFactory.createStepExecution().getExecutionContext(); File file = temporaryFolderRule.newGzipFile(VepOutputContent.vepOutputContentMalformedCoordinates); - AnnotationFlatFileReader annotationFlatFileReader = new AnnotationFlatFileReader(file); + AnnotationFlatFileReader annotationFlatFileReader = new AnnotationFlatFileReader(file, VEP_VERSION, + VEP_CACHE_VERSION); annotationFlatFileReader.open(executionContext); annotationFlatFileReader.read(); } @@ -89,7 +95,8 @@ public void malformedVariantFieldsAnnotationLinesShouldBeSkipped() throws Except ExecutionContext executionContext = MetaDataInstanceFactory.createStepExecution().getExecutionContext(); File file = temporaryFolderRule.newGzipFile(VepOutputContent.vepOutputContentMalformedVariantFields); - AnnotationFlatFileReader annotationFlatFileReader = new AnnotationFlatFileReader(file); + AnnotationFlatFileReader annotationFlatFileReader = new AnnotationFlatFileReader(file, VEP_VERSION, + VEP_CACHE_VERSION); annotationFlatFileReader.open(executionContext); annotationFlatFileReader.read(); } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java index 42ee28ca4..862e0f6f3 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java @@ -83,7 +83,7 @@ public class AnnotationInVariantMongoWriterTest { @Before public void setUp() throws Exception { - AnnotationLineMapper = new AnnotationLineMapper(); + AnnotationLineMapper = new AnnotationLineMapper(VEP_VERSION, VEP_CACHE_VERSION); } @Test diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java index 47a15477a..01cf2ef6b 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java @@ -95,7 +95,7 @@ public class AnnotationMongoWriterTest { @Before public void setUp() throws Exception { - AnnotationLineMapper = new AnnotationLineMapper(); + AnnotationLineMapper = new AnnotationLineMapper(VEP_VERSION, VEP_CACHE_VERSION); } @Test @@ -188,7 +188,8 @@ public void shouldWriteAllFieldsIntoMongoDbMultipleSetsAnnotations() throws Exce public void shouldWriteSubstitutionScoresIntoMongoDb() throws Exception { String databaseName = mongoRule.getRandomTemporaryDatabaseName(); - Annotation annotation = new Annotation("X", 1, 10, "A", "T"); + Annotation annotation = new Annotation("X", 1, 10, "A", "T", + VEP_VERSION, VEP_CACHE_VERSION); Score siftScore = new Score(0.02, "deleterious"); Score polyphenScore = new Score(0.846, "possibly_damaging"); @@ -197,7 +198,7 @@ public void shouldWriteSubstitutionScoresIntoMongoDb() throws Exception { consequenceType.setSift(siftScore); consequenceType.setPolyphen(polyphenScore); - annotation.setConsequenceTypes(new HashSet<>(Collections.singletonList(consequenceType))); + annotation.addConsequenceType(consequenceType); MongoOperations operations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, mongoMappingContext); diff --git a/src/test/java/uk/ac/ebi/eva/utils/MongoDBHelperTest.java b/src/test/java/uk/ac/ebi/eva/utils/MongoDBHelperTest.java index 17ea4c57a..debbadc93 100644 --- a/src/test/java/uk/ac/ebi/eva/utils/MongoDBHelperTest.java +++ b/src/test/java/uk/ac/ebi/eva/utils/MongoDBHelperTest.java @@ -12,19 +12,19 @@ public class MongoDBHelperTest { @Test public void testBuildStorageIdSnv() { Variant variant = new Variant("1", 1000, 1000, "A", "C"); - assertEquals("1_1000_A_C", MongoDBHelper.buildVariantStorageId(variant)); + assertEquals("1_1000_A_C", variant.buildVariantId()); } @Test public void testBuildStorageIdIndel() { Variant variant = new Variant("1", 1000, 1002, "", "CA"); - assertEquals("1_1000__CA", MongoDBHelper.buildVariantStorageId(variant)); + assertEquals("1_1000__CA", variant.buildVariantId()); } @Test public void testBuildStorageIdStructural() { String alt = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT"; Variant variant = new Variant("1", 1000, 1002, "TAG", alt); - assertEquals("1_1000_TAG_" + new String(CryptoUtils.encryptSha1(alt)), MongoDBHelper.buildVariantStorageId(variant)); + assertEquals("1_1000_TAG_" + new String(CryptoUtils.encryptSha1(alt)), variant.buildVariantId()); } } From 7366188e34db4c9727c53ab3f92c5431dc6199b3 Mon Sep 17 00:00:00 2001 From: jorizci Date: Tue, 9 May 2017 11:11:35 +0100 Subject: [PATCH 10/48] Remove document annotation from subdocuments. --- .../models/mongo/documents/subdocuments/ConsequenceType.java | 2 -- .../eva/commons/models/mongo/documents/subdocuments/Score.java | 2 -- .../eva/commons/models/mongo/documents/subdocuments/Xref.java | 2 -- 3 files changed, 6 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/ConsequenceType.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/ConsequenceType.java index dd2a84f06..10c3659ae 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/ConsequenceType.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/ConsequenceType.java @@ -16,7 +16,6 @@ package uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments; -import org.springframework.data.mongodb.core.mapping.Document; import org.springframework.data.mongodb.core.mapping.Field; import java.util.Set; @@ -24,7 +23,6 @@ /** * org.opencb.biodata.models.variant.annotation.ConsequenceType */ -@Document public class ConsequenceType { private static final String GENE_NAME_FIELD = "gn"; diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Score.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Score.java index 34a72fcac..131ac787f 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Score.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Score.java @@ -16,13 +16,11 @@ package uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments; -import org.springframework.data.mongodb.core.mapping.Document; import org.springframework.data.mongodb.core.mapping.Field; /** * From {@link org.opencb.biodata.models.variant.annotation.Score} */ -@Document public class Score { public final static String SCORE_SCORE_FIELD = "sc"; diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java index ffbf3c650..360c72d9c 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java @@ -15,13 +15,11 @@ */ package uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments; -import org.springframework.data.mongodb.core.mapping.Document; import org.springframework.data.mongodb.core.mapping.Field; /** * From org.opencb.biodata.models.variant.annotation.Xref */ -@Document public class Xref { private final static String XREF_ID_FIELD = "id"; From cf1e9e230be1e6274ea770affeb6975d5d41f4f0 Mon Sep 17 00:00:00 2001 From: jorizci Date: Tue, 9 May 2017 11:20:34 +0100 Subject: [PATCH 11/48] Changed interface required for injection in annotation loader step. --- .../ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java index d4f3930af..efbc23650 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java @@ -22,8 +22,8 @@ import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing; import org.springframework.batch.core.configuration.annotation.StepBuilderFactory; import org.springframework.batch.item.ItemStreamReader; +import org.springframework.batch.item.ItemWriter; import org.springframework.batch.item.file.FlatFileParseException; -import org.springframework.batch.item.support.CompositeItemWriter; import org.springframework.batch.repeat.policy.SimpleCompletionPolicy; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; @@ -72,7 +72,7 @@ public class AnnotationLoaderStep { @Autowired @Qualifier(COMPOSITE_ANNOTATION_VARIANT_WRITER) - private CompositeItemWriter compositeAnnotationVariantItemWriter; + private ItemWriter compositeAnnotationVariantItemWriter; @Bean(LOAD_VEP_ANNOTATION_STEP) public Step loadVepAnnotationStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions, From 8fe1e19da95906a66d7524e0b4162e6063e7ab49 Mon Sep 17 00:00:00 2001 From: jorizci Date: Tue, 9 May 2017 11:40:27 +0100 Subject: [PATCH 12/48] Removed unused parameters in writer. --- .../ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java index 8aa7871ca..3e5373489 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java @@ -68,10 +68,6 @@ public class AnnotationMongoWriter extends MongoItemWriter { private String collection; - private String vepVersion; - - private String vepCacheVersion; - public AnnotationMongoWriter(MongoOperations mongoOperations, String collection, String vepVersion, @@ -85,8 +81,6 @@ public AnnotationMongoWriter(MongoOperations mongoOperations, this.mongoOperations = mongoOperations; this.collection = collection; - this.vepVersion = vepVersion; - this.vepCacheVersion = vepCacheVersion; createIndexes(); } From 6b5b5b35331e6135c03a3fc137aab26ca22e6980 Mon Sep 17 00:00:00 2001 From: jorizci Date: Wed, 10 May 2017 11:10:10 +0100 Subject: [PATCH 13/48] Refactored writer to do operations in bulk. --- ...notationToSimplifiedDBObjectConverter.java | 42 +++++++++ .../models/mongo/documents/Annotation.java | 19 ++-- .../AnnotationWriterConfiguration.java | 2 +- .../io/writers/AnnotationMongoWriter.java | 89 ++++++++----------- 4 files changed, 93 insertions(+), 59 deletions(-) create mode 100644 src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationToSimplifiedDBObjectConverter.java diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationToSimplifiedDBObjectConverter.java b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationToSimplifiedDBObjectConverter.java new file mode 100644 index 000000000..badce9334 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationToSimplifiedDBObjectConverter.java @@ -0,0 +1,42 @@ +/* + * Copyright 2014-2016 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.commons.models.converters.data; + +import com.mongodb.BasicDBObject; +import com.mongodb.DBObject; +import org.springframework.core.convert.converter.Converter; +import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; + +/** + * Class to convert a Annotation document to a DBObject that contains a simplified version of the document. + * This is done to aggregate the different consequences of the same Annotation in the database itself. It is not a + * complete converter of the Annotation object. + */ +public class AnnotationToSimplifiedDBObjectConverter implements Converter { + + @Override + public DBObject convert(Annotation source) { + DBObject dbObject = new BasicDBObject(); + dbObject.put("_id",source.getId()); + dbObject.put(Annotation.CHROMOSOME_FIELD,source.getChromosome()); + dbObject.put(Annotation.START_FIELD,source.getStart()); + dbObject.put(Annotation.END_FIELD,source.getEnd()); + dbObject.put(Annotation.VEP_VERSION_FIELD,source.getVepVersion()); + dbObject.put(Annotation.VEP_CACHE_VERSION_FIELD,source.getVepCacheVersion()); + return dbObject; + } + +} diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java index 6c50f08cd..7fd808431 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java @@ -34,20 +34,20 @@ @Document public class Annotation { - private static final String CHROMOSOME_FIELD = "chr"; + public static final String CHROMOSOME_FIELD = "chr"; - private static final String START_FIELD = "start"; + public static final String START_FIELD = "start"; - private static final String END_FIELD = "end"; + public static final String END_FIELD = "end"; - public static final String XREFS_FIELD = "xrefs"; - - private static final String VEP_VERSION_FIELD = "vepVer"; + public static final String VEP_VERSION_FIELD = "vepVer"; public static final String VEP_CACHE_VERSION_FIELD = "cacheVer"; public static final String CONSEQUENCE_TYPE_FIELD = "ct"; + public static final String XREFS_FIELD = "xrefs"; + @Id private String id; @@ -162,4 +162,11 @@ public static String buildAnnotationId(String chromosome, int start, String refe public String buildVariantId() { return getId().substring(0, getId().length() - vepVersion.length() -vepCacheVersion.length() -2); } + + public Annotation concatenate(Annotation annotation) { + if(annotation.getConsequenceTypes()!=null){ + addConsequenceTypes(annotation.getConsequenceTypes()); + } + return this; + } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java index 707da36b6..7f4d9d2ef 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java @@ -21,7 +21,7 @@ import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Profile; import org.springframework.data.mongodb.core.MongoOperations; - +import org.springframework.data.mongodb.core.MongoTemplate; import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.io.writers.AnnotationMongoWriter; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java index 3e5373489..d3fbb11d4 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java @@ -17,17 +17,22 @@ package uk.ac.ebi.eva.pipeline.io.writers; import com.mongodb.BasicDBObject; +import com.mongodb.BulkWriteOperation; +import com.mongodb.DBObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.batch.item.data.MongoItemWriter; +import org.springframework.data.mongodb.core.BulkOperations; import org.springframework.data.mongodb.core.MongoOperations; +import org.springframework.data.mongodb.core.MongoTemplate; import org.springframework.data.mongodb.core.query.BasicQuery; import org.springframework.data.mongodb.core.query.BasicUpdate; import org.springframework.util.Assert; +import uk.ac.ebi.eva.commons.models.converters.data.AnnotationToSimplifiedDBObjectConverter; import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; import uk.ac.ebi.eva.utils.MongoDBHelper; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -68,6 +73,8 @@ public class AnnotationMongoWriter extends MongoItemWriter { private String collection; + private final AnnotationToSimplifiedDBObjectConverter converter; + public AnnotationMongoWriter(MongoOperations mongoOperations, String collection, String vepVersion, @@ -75,80 +82,58 @@ public AnnotationMongoWriter(MongoOperations mongoOperations, super(); Assert.notNull(mongoOperations, "A Mongo instance is required"); Assert.hasText(collection, "A collection name is required"); - - setCollection(collection); - setTemplate(mongoOperations); + converter = new AnnotationToSimplifiedDBObjectConverter(); this.mongoOperations = mongoOperations; this.collection = collection; + setCollection(collection); + setTemplate(mongoOperations); createIndexes(); } @Override protected void doWrite(List annotations) { - Map> annotationsByStorageId = groupAnnotationById(annotations); + BulkOperations bulk = mongoOperations.bulkOps(BulkOperations.BulkMode.UNORDERED, collection); + System.out.println(mongoOperations.getConverter().getClass()); - for (Map.Entry> annotationsIdEntry : annotationsByStorageId.entrySet()) { - String storageId = annotationsIdEntry.getKey(); - List annotationsById = annotationsIdEntry.getValue(); - - Annotation annotation = annotationsById.get(0); - - if (annotationsById.size() > 1) { - annotation = concatenateOtherAnnotations( - annotation, annotationsById.subList(1, annotationsById.size())); - } - writeAnnotationInMongoDb(storageId, annotation); + Map annotationsByStorageId = groupAnnotationById(annotations); + for (Annotation annotation : annotationsByStorageId.values()) { + writeAnnotationInMongoDb(bulk, annotation); } + executeBulk(bulk, annotationsByStorageId.size()); } - private Map> groupAnnotationById(List annotations) { - Map> annotationsByStorageId = new HashMap<>(); + private Map groupAnnotationById(List annotations) { + Map groupedAnnotations = new HashMap<>(); for (Annotation annotation : annotations) { String id = annotation.getId(); - annotationsByStorageId.putIfAbsent(id, new ArrayList<>()); - annotationsByStorageId.get(id).add(annotation); + groupedAnnotations.computeIfPresent(id, (key, oldVar) -> oldVar.concatenate(annotation)); + groupedAnnotations.putIfAbsent(id, annotation); } - - return annotationsByStorageId; + return groupedAnnotations; } - /** - * Append multiple annotation into a single {@link Annotation} - * Updated fields are ConsequenceTypes and Hgvs - * - * @param annotation annotation where other annotations will be appended - * @param otherAnnotationsToConcatenate annotations to be appended - * @return a single {@link Annotation} ready to be persisted - */ - private Annotation concatenateOtherAnnotations(Annotation annotation, - List otherAnnotationsToConcatenate) { - for (Annotation annotationToAppend : otherAnnotationsToConcatenate) { - if (annotationToAppend.getConsequenceTypes() != null) { - annotation.addConsequenceTypes(annotationToAppend.getConsequenceTypes()); - } - } + private void writeAnnotationInMongoDb(BulkOperations bulk, Annotation annotation) { + logger.trace("Writing annotations into mongo id: {}", annotation.getId()); - return annotation; - } + DBObject convertedSimplifiedAnnotation = converter.convert(annotation); + final BasicDBObject addToSetValue = new BasicDBObject(); - private void writeAnnotationInMongoDb(String storageId, Annotation annotation) { - logger.trace("Writing annotations into mongo id: {}", storageId); + addToSetValue.append(CONSEQUENCE_TYPE_FIELD, new BasicDBObject("$each", + mongoOperations.getConverter().convertToMongoType(annotation.getConsequenceTypes()))); + addToSetValue.append(XREFS_FIELD, new BasicDBObject("$each", + mongoOperations.getConverter().convertToMongoType(annotation.getXrefs()))); - BasicDBObject id = new BasicDBObject("_id", storageId); + BasicDBObject update = new BasicDBObject("$addToSet", addToSetValue); - if (mongoOperations.exists(new BasicQuery(id), collection)) { - BasicDBObject updateConsequenceTypes = new BasicDBObject("$addToSet", - new BasicDBObject(CONSEQUENCE_TYPE_FIELD, - new BasicDBObject("$each",annotation.getConsequenceTypes()))); - BasicDBObject updateXrefs = new BasicDBObject("$addToSet", - new BasicDBObject(XREFS_FIELD, new BasicDBObject("$each", annotation.getXrefs()))); + bulk.upsert(new BasicQuery(convertedSimplifiedAnnotation),new BasicUpdate(update)); + } - mongoOperations.upsert(new BasicQuery(id), new BasicUpdate(updateConsequenceTypes), collection); - mongoOperations.upsert(new BasicQuery(id), new BasicUpdate(updateXrefs), collection); - } else { - mongoOperations.save(annotation, collection); + private void executeBulk(BulkOperations bulk, int currentBulkSize) { + if (currentBulkSize != 0) { + logger.trace("Execute mongo bulk. BulkSize : " + currentBulkSize); + bulk.execute(); } } From 2695c9ae1f080263c5c8a9c267749131490ab35c Mon Sep 17 00:00:00 2001 From: jorizci Date: Wed, 10 May 2017 11:50:07 +0100 Subject: [PATCH 14/48] Refactorization of AnnotationMongoWriter -AnnotationMongoWriter now extends ItemWriter. -Code cleaning. --- .../mongo/documents/subdocuments/Xref.java | 2 +- .../AnnotationWriterConfiguration.java | 7 +-- .../io/writers/AnnotationMongoWriter.java | 56 ++++++++++--------- .../io/writers/AnnotationMongoWriterTest.java | 12 ++-- 4 files changed, 39 insertions(+), 38 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java index 360c72d9c..b583a347c 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java @@ -22,7 +22,7 @@ */ public class Xref { - private final static String XREF_ID_FIELD = "id"; + public final static String XREF_ID_FIELD = "id"; private final static String XREF_SOURCE_FIELD = "src"; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java index 7f4d9d2ef..7d658df39 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java @@ -37,10 +37,7 @@ public class AnnotationWriterConfiguration { @StepScope @Profile(Application.VARIANT_ANNOTATION_MONGO_PROFILE) public ItemWriter annotationItemWriter(MongoOperations mongoOperations, - DatabaseParameters databaseParameters, - AnnotationParameters annotationParameters) { - return new AnnotationMongoWriter(mongoOperations, databaseParameters.getCollectionAnnotationsName(), - annotationParameters.getVepVersion(), - annotationParameters.getVepCacheVersion()); + DatabaseParameters databaseParameters) { + return new AnnotationMongoWriter(mongoOperations, databaseParameters.getCollectionAnnotationsName()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java index d3fbb11d4..e4da776e4 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java @@ -17,20 +17,20 @@ package uk.ac.ebi.eva.pipeline.io.writers; import com.mongodb.BasicDBObject; -import com.mongodb.BulkWriteOperation; -import com.mongodb.DBObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.springframework.batch.item.data.MongoItemWriter; +import org.springframework.batch.item.ItemWriter; import org.springframework.data.mongodb.core.BulkOperations; import org.springframework.data.mongodb.core.MongoOperations; -import org.springframework.data.mongodb.core.MongoTemplate; import org.springframework.data.mongodb.core.query.BasicQuery; import org.springframework.data.mongodb.core.query.BasicUpdate; +import org.springframework.data.mongodb.core.query.Query; +import org.springframework.data.mongodb.core.query.Update; import org.springframework.util.Assert; import uk.ac.ebi.eva.commons.models.converters.data.AnnotationToSimplifiedDBObjectConverter; import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; +import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Xref; import uk.ac.ebi.eva.utils.MongoDBHelper; import java.util.HashMap; @@ -62,38 +62,33 @@ * { "id" : "ENST00000608838", "src" : "ensemblTranscript" }, * { "id" : "ENSG00000178591", "src" : "ensemblGene" */ -public class AnnotationMongoWriter extends MongoItemWriter { +public class AnnotationMongoWriter implements ItemWriter { private static final Logger logger = LoggerFactory.getLogger(AnnotationMongoWriter.class); - private static final String ANNOTATION_XREF_ID_FIELD = "xrefs.id"; + private static final String ANNOTATION_XREF_ID_FIELD = Annotation.XREFS_FIELD + "." + Xref.XREF_ID_FIELD; - private static final String ANNOTATION_CT_SO_FIELD = "ct.so"; + private static final String ANNOTATION_CT_SO_FIELD = Annotation.CONSEQUENCE_TYPE_FIELD + "." + + ConsequenceType.SO_ACCESSION_FIELD; - private MongoOperations mongoOperations; + private final MongoOperations mongoOperations; - private String collection; + private final String collection; private final AnnotationToSimplifiedDBObjectConverter converter; - public AnnotationMongoWriter(MongoOperations mongoOperations, - String collection, - String vepVersion, - String vepCacheVersion) { + public AnnotationMongoWriter(MongoOperations mongoOperations, String collection) { super(); Assert.notNull(mongoOperations, "A Mongo instance is required"); Assert.hasText(collection, "A collection name is required"); converter = new AnnotationToSimplifiedDBObjectConverter(); - this.mongoOperations = mongoOperations; this.collection = collection; - setCollection(collection); - setTemplate(mongoOperations); createIndexes(); } @Override - protected void doWrite(List annotations) { + public void write(List annotations) throws Exception { BulkOperations bulk = mongoOperations.bulkOps(BulkOperations.BulkMode.UNORDERED, collection); System.out.println(mongoOperations.getConverter().getClass()); @@ -115,19 +110,30 @@ private Map groupAnnotationById(List a } private void writeAnnotationInMongoDb(BulkOperations bulk, Annotation annotation) { - logger.trace("Writing annotations into mongo id: {}", annotation.getId()); + logger.trace("Adding annotations into mongo bulk id: {}", annotation.getId()); + + Query upsertQuery = new BasicQuery(converter.convert(annotation)); + Update update = buildUpdateQuery(annotation); + bulk.upsert(upsertQuery, update); + } - DBObject convertedSimplifiedAnnotation = converter.convert(annotation); + private BasicUpdate buildUpdateQuery(Annotation annotation) { final BasicDBObject addToSetValue = new BasicDBObject(); + addToSetValue.append(CONSEQUENCE_TYPE_FIELD, buildInsertConsequenceTypeQuery(annotation)); + addToSetValue.append(XREFS_FIELD, buildInsertXrefsQuery(annotation)); + return new BasicUpdate(new BasicDBObject("$addToSet", addToSetValue)); + } - addToSetValue.append(CONSEQUENCE_TYPE_FIELD, new BasicDBObject("$each", - mongoOperations.getConverter().convertToMongoType(annotation.getConsequenceTypes()))); - addToSetValue.append(XREFS_FIELD, new BasicDBObject("$each", - mongoOperations.getConverter().convertToMongoType(annotation.getXrefs()))); + private BasicDBObject buildInsertXrefsQuery(Annotation annotation) { + return new BasicDBObject("$each", convertToMongo(annotation.getXrefs())); + } - BasicDBObject update = new BasicDBObject("$addToSet", addToSetValue); + private BasicDBObject buildInsertConsequenceTypeQuery(Annotation annotation) { + return new BasicDBObject("$each", convertToMongo(annotation.getConsequenceTypes())); + } - bulk.upsert(new BasicQuery(convertedSimplifiedAnnotation),new BasicUpdate(update)); + private Object convertToMongo(Object object) { + return mongoOperations.getConverter().convertToMongoType(object); } private void executeBulk(BulkOperations bulk, int currentBulkSize) { diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java index 01cf2ef6b..e4ba2ca5a 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java @@ -74,6 +74,7 @@ @TestPropertySource({"classpath:test-mongo.properties"}) @ContextConfiguration(classes = {MongoConnection.class, MongoMappingContext.class}) public class AnnotationMongoWriterTest { + private static final String COLLECTION_ANNOTATIONS_NAME = "annotations"; private static final String VEP_VERSION = "1"; @@ -110,8 +111,7 @@ public void shouldWriteAllFieldsIntoMongoDb() throws Exception { // load the annotation MongoOperations operations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, mongoMappingContext); - annotationWriter = new AnnotationMongoWriter(operations, COLLECTION_ANNOTATIONS_NAME, VEP_VERSION, - VEP_CACHE_VERSION); + annotationWriter = new AnnotationMongoWriter(operations, COLLECTION_ANNOTATIONS_NAME); annotationWriter.write(annotations); // and finally check that documents in annotation collection have annotations @@ -163,8 +163,7 @@ public void shouldWriteAllFieldsIntoMongoDbMultipleSetsAnnotations() throws Exce // load the annotation MongoOperations operations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, mongoMappingContext); - annotationWriter = new AnnotationMongoWriter(operations, COLLECTION_ANNOTATIONS_NAME, VEP_VERSION, - VEP_CACHE_VERSION); + annotationWriter = new AnnotationMongoWriter(operations, COLLECTION_ANNOTATIONS_NAME); annotationWriter.write(annotationSet1); annotationWriter.write(annotationSet2); @@ -202,8 +201,7 @@ public void shouldWriteSubstitutionScoresIntoMongoDb() throws Exception { MongoOperations operations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, mongoMappingContext); - annotationWriter = new AnnotationMongoWriter(operations, COLLECTION_ANNOTATIONS_NAME, VEP_VERSION, - VEP_CACHE_VERSION); + annotationWriter = new AnnotationMongoWriter(operations, COLLECTION_ANNOTATIONS_NAME); annotationWriter.write(Collections.singletonList(annotation)); @@ -234,7 +232,7 @@ public void indexesShouldBeCreatedInBackground() throws UnknownHostException { MongoOperations mongoOperations = MongoConfiguration.getMongoOperations(dbName, mongoConnection, mongoMappingContext); DBCollection dbCollection = mongoOperations.getCollection(COLLECTION_ANNOTATIONS_NAME); - AnnotationMongoWriter writer = new AnnotationMongoWriter(mongoOperations, COLLECTION_ANNOTATIONS_NAME, VEP_VERSION, VEP_CACHE_VERSION); + AnnotationMongoWriter writer = new AnnotationMongoWriter(mongoOperations, COLLECTION_ANNOTATIONS_NAME); List indexInfo = dbCollection.getIndexInfo(); From 77554355278ac7c546bfbb41f29187837aaae3fc Mon Sep 17 00:00:00 2001 From: jorizci Date: Wed, 10 May 2017 12:06:09 +0100 Subject: [PATCH 15/48] AnnotationInVariantMongoWriter now implements ItemWriter. --- .../io/writers/AnnotationInVariantMongoWriter.java | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java index 698faff89..2bd4cd2b8 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java @@ -20,16 +20,16 @@ import com.mongodb.DBObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.springframework.batch.item.data.MongoItemWriter; +import org.springframework.batch.item.ItemWriter; import org.springframework.data.mongodb.core.MongoOperations; import org.springframework.data.mongodb.core.query.BasicQuery; import org.springframework.data.mongodb.core.query.BasicUpdate; import org.springframework.util.Assert; import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; -import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Score; import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; +import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Score; import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Xref; import java.util.ArrayList; @@ -58,7 +58,7 @@ * - soAccessions * - Xref Ids */ -public class AnnotationInVariantMongoWriter extends MongoItemWriter { +public class AnnotationInVariantMongoWriter implements ItemWriter { private static final Logger logger = LoggerFactory.getLogger(AnnotationInVariantMongoWriter.class); private final MongoOperations mongoOperations; @@ -89,9 +89,6 @@ public AnnotationInVariantMongoWriter(MongoOperations mongoOperations, Assert.notNull(mongoOperations, "A Mongo instance is required"); Assert.hasText(collection, "A collection name is required"); - setCollection(collection); - setTemplate(mongoOperations); - this.mongoOperations = mongoOperations; this.collection = collection; this.vepVersion = vepVersion; @@ -100,7 +97,7 @@ public AnnotationInVariantMongoWriter(MongoOperations mongoOperations, // TODO rewrite this to be bulk-friendly @Override - protected void doWrite(List annotations) { + public void write(List annotations) throws Exception { Map> annotationsByStorageId = groupAnnotationByVariantId(annotations); for (Map.Entry> annotationsIdEntry : annotationsByStorageId.entrySet()) { From 5915fe1532cfba3be3a1d94f95bae66bc31b3c47 Mon Sep 17 00:00:00 2001 From: jorizci Date: Tue, 16 May 2017 10:36:59 +0100 Subject: [PATCH 16/48] Modified AnnotationInVariantMongoWriter to write operations in batch. - Modified VariantAnnotation to have polyphen and sift as lists instead of sets. --- ...notationToSimplifiedDBObjectConverter.java | 4 +- .../models/data/VariantAnnotation.java | 116 ++++++++- .../AnnotationInVariantMongoWriter.java | 244 ++++++------------ .../io/writers/AnnotationMongoWriter.java | 30 +-- .../AnnotationInVariantMongoWriterTest.java | 4 +- 5 files changed, 208 insertions(+), 190 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationToSimplifiedDBObjectConverter.java b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationToSimplifiedDBObjectConverter.java index badce9334..c828dfd37 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationToSimplifiedDBObjectConverter.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationToSimplifiedDBObjectConverter.java @@ -27,10 +27,12 @@ */ public class AnnotationToSimplifiedDBObjectConverter implements Converter { + public static final String ID = "_id"; + @Override public DBObject convert(Annotation source) { DBObject dbObject = new BasicDBObject(); - dbObject.put("_id",source.getId()); + dbObject.put(ID,source.getId()); dbObject.put(Annotation.CHROMOSOME_FIELD,source.getChromosome()); dbObject.put(Annotation.START_FIELD,source.getStart()); dbObject.put(Annotation.END_FIELD,source.getEnd()); diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java index 41b5d7c81..07725340d 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java @@ -15,18 +15,25 @@ */ package uk.ac.ebi.eva.commons.models.data; +import org.springframework.data.annotation.Transient; import org.springframework.data.mongodb.core.mapping.Document; import org.springframework.data.mongodb.core.mapping.Field; import org.springframework.util.Assert; +import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; +import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Score; +import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Xref; +import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; +import java.util.List; import java.util.Set; /** * Annotations of the genomic variation */ -@Document public class VariantAnnotation { public static final String VEP_VERSION_FIELD = "vepVer"; @@ -48,10 +55,10 @@ public class VariantAnnotation { private String vepCacheVersion; @Field(value = SIFT_FIELD) - private Set sifts = new HashSet<>(); + private List sifts = new ArrayList<>(); @Field(value = POLYPHEN_FIELD) - private Set polyphens = new HashSet<>(); + private List polyphens = new ArrayList<>(); @Field(value = SO_ACCESSION_FIELD) private Set soAccessions = new HashSet<>(); @@ -59,10 +66,14 @@ public class VariantAnnotation { @Field(value = XREFS_FIELD) private Set xrefIds = new HashSet<>(); + VariantAnnotation() { + // Spring empty constructor + } + /** * Variant annotation constructor. Requires non empty values, otherwise throws {@link IllegalArgumentException} * - * @param vepVersion non empty value required, otherwise throws {@link IllegalArgumentException} + * @param vepVersion non empty value required, otherwise throws {@link IllegalArgumentException} * @param vepCacheVersion non empty value required, otherwise throws {@link IllegalArgumentException} */ public VariantAnnotation(String vepVersion, String vepCacheVersion) { @@ -72,6 +83,87 @@ public VariantAnnotation(String vepVersion, String vepCacheVersion) { this.vepCacheVersion = vepCacheVersion; } + VariantAnnotation(VariantAnnotation variantAnnotation) { + this(variantAnnotation.getVepVersion(), variantAnnotation.getVepCacheVersion()); + doConcatenate(variantAnnotation); + } + + public VariantAnnotation(Annotation annotation) { + this(annotation.getVepVersion(), annotation.getVepCacheVersion()); + doConcatenate(annotation); + } + + private void doConcatenate(VariantAnnotation variantAnnotation) { + xrefIds.addAll(variantAnnotation.getXrefIds()); + for (Double siftLimit : variantAnnotation.getSifts()) { + concatenateSiftRange(siftLimit); + } + for (Double polyphenLimit : variantAnnotation.getPolyphens()) { + concatenatePolyphenRange(polyphenLimit); + } + soAccessions.addAll(variantAnnotation.getSoAccessions()); + } + + private void doConcatenate(Annotation annotation) { + for (Xref xref : annotation.getXrefs()) { + xrefIds.add(xref.getId()); + } + for (ConsequenceType consequenceType : annotation.getConsequenceTypes()) { + final Score sift = consequenceType.getSift(); + if (sift != null) { + concatenateSiftRange(sift.getScore()); + } + final Score polyphen = consequenceType.getPolyphen(); + if (polyphen != null) { + concatenatePolyphenRange(polyphen.getScore()); + } + final Set soAccessions = consequenceType.getSoAccessions(); + if (soAccessions != null) { + this.soAccessions.addAll(soAccessions); + } + } + } + + private Double maxOf(Collection collection) { + if (collection == null || collection.isEmpty()) { + return null; + } + return Collections.max(collection); + } + + private Double minOf(Collection collection) { + if (collection == null || collection.isEmpty()) { + return null; + } + return Collections.min(collection); + } + + private void concatenateRange(Collection collection, Double score) { + Double min = minOf(collection); + Double max = maxOf(collection); + if(min == null || max == null){ + setRange(collection, score, score); + } else if (score < min){ + setRange(collection, score, max); + } else if (score > max){ + setRange(collection, min, score); + } + } + + private void setRange(Collection collection, Double minScore, Double maxScore) { + collection.clear(); + collection.add(minScore); + collection.add(maxScore); + } + + private void concatenateSiftRange(Double score) { + concatenateRange(sifts, score); + } + + private void concatenatePolyphenRange(Double score) { + concatenateRange(polyphens, score); + } + public void addSift(Double sift) { this.sifts.add(sift); } @@ -96,11 +188,11 @@ public void addsoAccessions(Set soAccessions) { this.soAccessions.addAll(soAccessions); } - public Set getSifts() { + public List getSifts() { return sifts; } - public Set getPolyphens() { + public List getPolyphens() { return polyphens; } @@ -119,4 +211,16 @@ public String getVepVersion() { public String getVepCacheVersion() { return vepCacheVersion; } + + public VariantAnnotation concatenate(Annotation annotation) { + VariantAnnotation temp = new VariantAnnotation(this); + temp.doConcatenate(annotation); + return temp; + } + + public VariantAnnotation concatenate(VariantAnnotation annotation) { + VariantAnnotation temp = new VariantAnnotation(this); + temp.doConcatenate(annotation); + return temp; + } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java index 2bd4cd2b8..bd911985b 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java @@ -18,36 +18,23 @@ import com.mongodb.BasicDBList; import com.mongodb.BasicDBObject; import com.mongodb.DBObject; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.springframework.batch.item.ItemWriter; +import org.springframework.data.mongodb.core.BulkOperations; import org.springframework.data.mongodb.core.MongoOperations; import org.springframework.data.mongodb.core.query.BasicQuery; import org.springframework.data.mongodb.core.query.BasicUpdate; import org.springframework.util.Assert; -import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; -import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; -import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Score; -import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Xref; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; +import static org.opencb.opencga.storage.mongodb.variant.DBObjectToVariantConverter.ANNOTATION_FIELD; import static uk.ac.ebi.eva.commons.models.data.VariantAnnotation.VEP_VERSION_FIELD; -import static uk.ac.ebi.eva.commons.models.data.VariantAnnotation.XREFS_FIELD; import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.VEP_CACHE_VERSION_FIELD; -import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType.POLYPHEN_FIELD; -import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType.SIFT_FIELD; -import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType.SO_ACCESSION_FIELD; /** * Update the {@link uk.ac.ebi.eva.commons.models.data.Variant} mongo document with {@link VariantAnnotation} @@ -59,195 +46,128 @@ * - Xref Ids */ public class AnnotationInVariantMongoWriter implements ItemWriter { - private static final Logger logger = LoggerFactory.getLogger(AnnotationInVariantMongoWriter.class); + + public static final String ID = "_id"; + public static final String SET = "$set"; + public static final String ADD_TO_SET = "$addToSet"; + public static final String IN = "$in"; + public static final String ELEM_MATCH = "$elemMatch"; + public static final String ANNOTATION_IN_LIST = ANNOTATION_FIELD + ".$"; private final MongoOperations mongoOperations; private final String collection; - private String vepVersion; - - private String vepCacheVersion; - - private final String ANNOTATION_XREFS = VariantToDBObjectConverter.ANNOTATION_FIELD + ".$." + XREFS_FIELD; - - private final String ANNOTATION_SO = VariantToDBObjectConverter.ANNOTATION_FIELD + ".$." + SO_ACCESSION_FIELD; - - private final String ANNOTATION_SIFT = VariantToDBObjectConverter.ANNOTATION_FIELD + ".$." + SIFT_FIELD; + private final String vepVersion; - private final String ANNOTATION_POLYPHEN = VariantToDBObjectConverter.ANNOTATION_FIELD + ".$." + POLYPHEN_FIELD; - - private final String ANNOTATION_ENSEMBL_VERSION = VariantToDBObjectConverter.ANNOTATION_FIELD + "." + VEP_VERSION_FIELD; - - private final String ANNOTATION_VEP_CACHE_VERSION = VariantToDBObjectConverter.ANNOTATION_FIELD + "." + VEP_CACHE_VERSION_FIELD; + private final String vepCacheVersion; public AnnotationInVariantMongoWriter(MongoOperations mongoOperations, String collection, String vepVersion, String vepCacheVersion) { super(); - Assert.notNull(mongoOperations, "A Mongo instance is required"); - Assert.hasText(collection, "A collection name is required"); + Assert.notNull(mongoOperations); + Assert.hasText(collection); + Assert.hasText(vepVersion); + Assert.hasText(vepCacheVersion); this.mongoOperations = mongoOperations; this.collection = collection; this.vepVersion = vepVersion; this.vepCacheVersion = vepCacheVersion; } + + private HashMap generateVariantAnnotations(List annotations) { + HashMap variantAnnotations = new HashMap<>(); + + for (Annotation annotation : annotations) { + String id = annotation.buildVariantId(); + variantAnnotations.putIfAbsent(id, new VariantAnnotation(annotation)); + variantAnnotations.computeIfPresent(id, (key, oldVar) -> oldVar.concatenate(annotation)); + } + return variantAnnotations; + } - // TODO rewrite this to be bulk-friendly @Override public void write(List annotations) throws Exception { - Map> annotationsByStorageId = groupAnnotationByVariantId(annotations); - - for (Map.Entry> annotationsIdEntry : annotationsByStorageId.entrySet()) { - VariantAnnotation variantAnnotation = extractFieldsFromAnnotations(annotationsIdEntry.getValue()); - - String storageId = annotationsIdEntry.getKey(); - BasicDBObject id = new BasicDBObject("_id", storageId); - - DBObject dbObject = mongoOperations.getCollection(collection).findOne(id); + Map variantAnnotations = generateVariantAnnotations(annotations); - if (dbObject != null) { - logger.trace("Writing annotations into variant : {}, collection: {}", storageId, collection); - - List existingAnnotations = (List) dbObject - .get(VariantToDBObjectConverter.ANNOTATION_FIELD); - - if (existingAnnotations != null) { - updateExistingVariantAnnotation(variantAnnotation, storageId, existingAnnotations); - } else { - addNewVariantAnnotation(variantAnnotation, id); - } - - } - } + BulkOperations bulkOperations = mongoOperations.bulkOps(BulkOperations.BulkMode.UNORDERED, collection); + bulkPrepare(bulkOperations, variantAnnotations); + bulkOperations.execute(); } - /** - * Update {@link VariantAnnotation} fields if some are already present in the {@link uk.ac.ebi.eva.commons.models.data.Variant} - * Just make sure to update the specific version of annotation! - * - * @param variantAnnotation the current annotation to append - * @param storageId - * @param existingAnnotations already in {@link uk.ac.ebi.eva.commons.models.data.Variant} - */ - private void updateExistingVariantAnnotation(VariantAnnotation variantAnnotation, - String storageId, - List existingAnnotations) { - for (BasicDBObject existingAnnotation : existingAnnotations) { - if (existingAnnotation.getString(VEP_VERSION_FIELD) - .equals(vepVersion) && existingAnnotation.getString(VEP_CACHE_VERSION_FIELD) - .equals(vepCacheVersion)) { - - Set xrefs = variantAnnotation.getXrefIds(); - BasicDBObject addToSetValue = new BasicDBObject(ANNOTATION_XREFS, new BasicDBObject("$each", xrefs)); - - Set soAccessions = variantAnnotation.getSoAccessions(); - addToSetValue.append(ANNOTATION_SO, new BasicDBObject("$each", soAccessions)); - - BasicDBObject update = new BasicDBObject("$addToSet", addToSetValue); - - variantAnnotation - .addSifts(lookupExistingSubstitutionScore((BasicDBList) existingAnnotation.get(SIFT_FIELD))); - Set sifts = calculateRangeOfScores(variantAnnotation.getSifts()); - BasicDBObject setValue = new BasicDBObject(ANNOTATION_SIFT, sifts); - - variantAnnotation.addPolyphens( - lookupExistingSubstitutionScore((BasicDBList) existingAnnotation.get(POLYPHEN_FIELD))); - Set polyphens = calculateRangeOfScores(variantAnnotation.getPolyphens()); - setValue.append(ANNOTATION_POLYPHEN, polyphens); - - update.append("$set", setValue); - - BasicDBObject versionedId = new BasicDBObject("_id", storageId); - versionedId.append(ANNOTATION_ENSEMBL_VERSION, vepVersion); - versionedId.append(ANNOTATION_VEP_CACHE_VERSION, vepCacheVersion); - - mongoOperations.updateFirst(new BasicQuery(versionedId), new BasicUpdate(update), collection); + private void bulkPrepare(BulkOperations bulkOperations, Map variantAnnotations) { + Map storedVariantAnnotations = getStoredVariantAnnotations(variantAnnotations); + + for (Map.Entry entry : variantAnnotations.entrySet()) { + final String key = entry.getKey(); + if (storedVariantAnnotations.containsKey(key)) { + bulkUpdate(bulkOperations, key, storedVariantAnnotations.get(key).concatenate(entry.getValue())); + } else { + bulkAddToSet(bulkOperations, key, entry.getValue()); } } } - /** - * Append a new {@link VariantAnnotation} field into {@link uk.ac.ebi.eva.commons.models.data.Variant} - * - * @param variantAnnotation - * @param id - */ - private void addNewVariantAnnotation(VariantAnnotation variantAnnotation, BasicDBObject id) { - Set variantAnnotations = new HashSet<>(Collections.singletonList(variantAnnotation)); - BasicDBObject updateAnnotation = new BasicDBObject("$set", new BasicDBObject( - VariantToDBObjectConverter.ANNOTATION_FIELD, variantAnnotations)); - mongoOperations.upsert(new BasicQuery(id), new BasicUpdate(updateAnnotation), collection); + private void bulkAddToSet(BulkOperations bulkOperations, String variantId, VariantAnnotation value) { + DBObject id = new BasicDBObject(ID, variantId); + DBObject variantAnnotation = convertToMongo(value); + BasicDBObject addToSet = new BasicDBObject(ADD_TO_SET, new BasicDBObject(ANNOTATION_FIELD, variantAnnotation)); + bulkOperations.updateOne(new BasicQuery(id), new BasicUpdate(addToSet)); } - private Set lookupExistingSubstitutionScore(BasicDBList scores) { - Set substitutionScores = new HashSet<>(); + private void bulkUpdate(BulkOperations bulkOperations, String variantId, VariantAnnotation value) { + BasicDBObject query = generateQueryForAnnotationInVariant(variantId); - if (scores != null) { - substitutionScores.addAll(scores.stream().map(score -> (Double) score).collect(Collectors.toSet())); - } + DBObject variantAnnotation = convertToMongo(value); + final BasicDBObject annotation = new BasicDBObject(ANNOTATION_IN_LIST, variantAnnotation); + BasicDBObject setAnnotation = new BasicDBObject(SET, annotation); - return substitutionScores; + bulkOperations.updateOne(new BasicQuery(query), new BasicUpdate(setAnnotation)); } - /** - * Extract Xrefs, so terms and protein substitution score from {@link Annotation} - */ - private VariantAnnotation extractFieldsFromAnnotations(List annotations) { - VariantAnnotation variantAnnotation = new VariantAnnotation(vepVersion, vepCacheVersion); - - for (Annotation annotation : annotations) { - Set xrefs = annotation.getXrefs(); - if (xrefs != null) { - variantAnnotation.addXrefIds(xrefs.stream().map(Xref::getId).collect(Collectors.toSet())); - } - - extractSubstitutionScores(variantAnnotation, annotation.getConsequenceTypes()); + private BasicDBObject generateQueryForAnnotationInVariant(String... variantIds) { + BasicDBObject query = new BasicDBObject(); + if (variantIds.length == 1) { + query.append(ID, variantIds[0]); + } else { + query.append(ID, new BasicDBObject(IN, variantIds)); } - - return variantAnnotation; + query.append(ANNOTATION_FIELD, createQueryMatchForVepAndCacheVersion()); + return query; } - private void extractSubstitutionScores(VariantAnnotation variantAnnotation, Set consequenceTypes) { - if (consequenceTypes != null) { - for (ConsequenceType consequenceType : consequenceTypes) { - Score sift = consequenceType.getSift(); - if (sift != null) { - variantAnnotation.addSift(sift.getScore()); - } - - Score polyphen = consequenceType.getPolyphen(); - if (polyphen != null) { - variantAnnotation.addPolyphen(polyphen.getScore()); - } + private BasicDBObject createQueryMatchForVepAndCacheVersion() { + BasicDBObject annotationQuery = new BasicDBObject(); + annotationQuery.append(VEP_VERSION_FIELD, vepVersion); + annotationQuery.append(VEP_CACHE_VERSION_FIELD, vepCacheVersion); + return new BasicDBObject(ELEM_MATCH, annotationQuery); + } - variantAnnotation.addsoAccessions(consequenceType.getSoAccessions()); + private Map getStoredVariantAnnotations(Map variantAnnotations) { + Map storedVariantAnnotations = new HashMap<>(); + BasicDBObject query = generateQueryForAnnotationInVariant(variantAnnotations.keySet().toArray(new String[]{})); + + Iterator iterator = mongoOperations.getCollection(collection).find(query).iterator(); + while (iterator.hasNext()) { + final DBObject object = iterator.next(); + final String id = (String) object.get(ID); + final BasicDBList dbAnnotations = (BasicDBList) object.get(ANNOTATION_FIELD); + if (dbAnnotations != null && !dbAnnotations.isEmpty()) { + final DBObject dbAnnotation = (DBObject) dbAnnotations.get(0); + storedVariantAnnotations.put(id, convertToVariantAnnotation(dbAnnotation)); } } + return storedVariantAnnotations; } - /** - * Return the min and max in case of multiple ProteinSubstitutionScores (sift/polyphen...) - */ - private Set calculateRangeOfScores(Set scores) { - if (scores.size() <= 1) { - return scores; - } else { - return new HashSet<>(Arrays.asList(Collections.min(scores), Collections.max(scores))); - } + private VariantAnnotation convertToVariantAnnotation(DBObject dbAnnotation) { + return mongoOperations.getConverter().read(VariantAnnotation.class, dbAnnotation); } - private Map> groupAnnotationByVariantId(List annotations) { - Map> annotationsByVariantId = new HashMap<>(); - for (Annotation annotation : annotations) { - String id = annotation.buildVariantId(); - annotationsByVariantId.putIfAbsent(id, new ArrayList<>()); - annotationsByVariantId.get(id).add(annotation); - } - - return annotationsByVariantId; + private DBObject convertToMongo(VariantAnnotation value) { + return (DBObject) mongoOperations.getConverter().convertToMongoType(value); } - } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java index e4da776e4..daa9897be 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java @@ -17,8 +17,6 @@ package uk.ac.ebi.eva.pipeline.io.writers; import com.mongodb.BasicDBObject; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.springframework.batch.item.ItemWriter; import org.springframework.data.mongodb.core.BulkOperations; import org.springframework.data.mongodb.core.MongoOperations; @@ -63,12 +61,13 @@ * { "id" : "ENSG00000178591", "src" : "ensemblGene" */ public class AnnotationMongoWriter implements ItemWriter { - private static final Logger logger = LoggerFactory.getLogger(AnnotationMongoWriter.class); private static final String ANNOTATION_XREF_ID_FIELD = Annotation.XREFS_FIELD + "." + Xref.XREF_ID_FIELD; private static final String ANNOTATION_CT_SO_FIELD = Annotation.CONSEQUENCE_TYPE_FIELD + "." + ConsequenceType.SO_ACCESSION_FIELD; + public static final String EACH = "$each"; + public static final String ADD_TO_SET = "$addToSet"; private final MongoOperations mongoOperations; @@ -78,8 +77,8 @@ public class AnnotationMongoWriter implements ItemWriter { public AnnotationMongoWriter(MongoOperations mongoOperations, String collection) { super(); - Assert.notNull(mongoOperations, "A Mongo instance is required"); - Assert.hasText(collection, "A collection name is required"); + Assert.notNull(mongoOperations); + Assert.hasText(collection); converter = new AnnotationToSimplifiedDBObjectConverter(); this.mongoOperations = mongoOperations; this.collection = collection; @@ -90,13 +89,15 @@ public AnnotationMongoWriter(MongoOperations mongoOperations, String collection) @Override public void write(List annotations) throws Exception { BulkOperations bulk = mongoOperations.bulkOps(BulkOperations.BulkMode.UNORDERED, collection); - System.out.println(mongoOperations.getConverter().getClass()); + prepareBulk(annotations, bulk); + bulk.execute(); + } + private void prepareBulk(List annotations, BulkOperations bulk) { Map annotationsByStorageId = groupAnnotationById(annotations); for (Annotation annotation : annotationsByStorageId.values()) { writeAnnotationInMongoDb(bulk, annotation); } - executeBulk(bulk, annotationsByStorageId.size()); } private Map groupAnnotationById(List annotations) { @@ -110,8 +111,6 @@ private Map groupAnnotationById(List a } private void writeAnnotationInMongoDb(BulkOperations bulk, Annotation annotation) { - logger.trace("Adding annotations into mongo bulk id: {}", annotation.getId()); - Query upsertQuery = new BasicQuery(converter.convert(annotation)); Update update = buildUpdateQuery(annotation); bulk.upsert(upsertQuery, update); @@ -121,28 +120,21 @@ private BasicUpdate buildUpdateQuery(Annotation annotation) { final BasicDBObject addToSetValue = new BasicDBObject(); addToSetValue.append(CONSEQUENCE_TYPE_FIELD, buildInsertConsequenceTypeQuery(annotation)); addToSetValue.append(XREFS_FIELD, buildInsertXrefsQuery(annotation)); - return new BasicUpdate(new BasicDBObject("$addToSet", addToSetValue)); + return new BasicUpdate(new BasicDBObject(ADD_TO_SET, addToSetValue)); } private BasicDBObject buildInsertXrefsQuery(Annotation annotation) { - return new BasicDBObject("$each", convertToMongo(annotation.getXrefs())); + return new BasicDBObject(EACH, convertToMongo(annotation.getXrefs())); } private BasicDBObject buildInsertConsequenceTypeQuery(Annotation annotation) { - return new BasicDBObject("$each", convertToMongo(annotation.getConsequenceTypes())); + return new BasicDBObject(EACH, convertToMongo(annotation.getConsequenceTypes())); } private Object convertToMongo(Object object) { return mongoOperations.getConverter().convertToMongoType(object); } - private void executeBulk(BulkOperations bulk, int currentBulkSize) { - if (currentBulkSize != 0) { - logger.trace("Execute mongo bulk. BulkSize : " + currentBulkSize); - bulk.execute(); - } - } - private void createIndexes() { mongoOperations.getCollection(collection).createIndex( new BasicDBObject(ANNOTATION_XREF_ID_FIELD, 1), diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java index 862e0f6f3..878ef3705 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java @@ -154,7 +154,7 @@ public void shouldWriteAllFieldsIntoMongoDb() throws Exception { BasicDBList sifts = (BasicDBList) annotationField.get(SIFT_FIELD); assertNotNull(sifts); - assertTrue(sifts.size() == 1); + assertTrue(sifts.size() == 2); BasicDBList so = (BasicDBList) annotationField.get(SO_ACCESSION_FIELD); assertNotNull(so); @@ -162,7 +162,7 @@ public void shouldWriteAllFieldsIntoMongoDb() throws Exception { BasicDBList polyphen = (BasicDBList) annotationField.get(POLYPHEN_FIELD); assertNotNull(polyphen); - assertTrue(polyphen.size() == 1); + assertTrue(polyphen.size() == 2); BasicDBList geneNames = (BasicDBList) annotationField.get(XREFS_FIELD); assertNotNull(geneNames); From 15de4505d73ed3676f3757c297ebdd317ee16294 Mon Sep 17 00:00:00 2001 From: jorizci Date: Tue, 16 May 2017 12:58:25 +0100 Subject: [PATCH 17/48] General code cleaning. -Modified tags for vep and cache version to match documentation. --- .../models/mongo/documents/Annotation.java | 36 ++++++- .../subdocuments}/VariantAnnotation.java | 101 +++++++++++------- .../AnnotationInVariantMongoWriter.java | 10 +- .../AnnotationInVariantMongoWriterTest.java | 11 +- .../io/writers/AnnotationMongoWriterTest.java | 4 +- .../jobs/steps/AnnotationLoaderStepTest.java | 4 +- 6 files changed, 107 insertions(+), 59 deletions(-) rename src/main/java/uk/ac/ebi/eva/commons/models/{data => mongo/documents/subdocuments}/VariantAnnotation.java (69%) diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java index 7fd808431..f0069c6da 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java @@ -40,9 +40,9 @@ public class Annotation { public static final String END_FIELD = "end"; - public static final String VEP_VERSION_FIELD = "vepVer"; + public static final String VEP_VERSION_FIELD = "vepv"; - public static final String VEP_CACHE_VERSION_FIELD = "cacheVer"; + public static final String VEP_CACHE_VERSION_FIELD = "cachev"; public static final String CONSEQUENCE_TYPE_FIELD = "ct"; @@ -89,6 +89,26 @@ public Annotation(String chromosome, int start, int end, String referenceAllele, this.consequenceTypes = new HashSet<>(); } + /** + * Private copy constructor + * + * @param annotation + */ + private Annotation(Annotation annotation) { + chromosome = annotation.chromosome; + start = annotation.start; + end = annotation.end; + vepVersion = annotation.vepVersion; + vepCacheVersion = annotation.vepCacheVersion; + + id = annotation.id; + xrefs = new HashSet<>(); + consequenceTypes = new HashSet<>(); + + xrefs.addAll(annotation.xrefs); + consequenceTypes.addAll(annotation.consequenceTypes); + } + public String getChromosome() { return chromosome; } @@ -163,10 +183,18 @@ public String buildVariantId() { return getId().substring(0, getId().length() - vepVersion.length() -vepCacheVersion.length() -2); } + /** + * Concatenate two annotations in a new one. This method returns a new instance with the concatenated array of + * consequence types and computed xrefs. + * + * @param annotation + * @return + */ public Annotation concatenate(Annotation annotation) { + Annotation temp = new Annotation(this); if(annotation.getConsequenceTypes()!=null){ - addConsequenceTypes(annotation.getConsequenceTypes()); + temp.addConsequenceTypes(annotation.getConsequenceTypes()); } - return this; + return temp; } } diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/VariantAnnotation.java similarity index 69% rename from src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java rename to src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/VariantAnnotation.java index 07725340d..2ebd223a9 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantAnnotation.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/VariantAnnotation.java @@ -13,16 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.commons.models.data; +package uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments; -import org.springframework.data.annotation.Transient; -import org.springframework.data.mongodb.core.mapping.Document; import org.springframework.data.mongodb.core.mapping.Field; import org.springframework.util.Assert; import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; -import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; -import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Score; -import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Xref; import java.util.ArrayList; import java.util.Collection; @@ -36,9 +31,9 @@ */ public class VariantAnnotation { - public static final String VEP_VERSION_FIELD = "vepVer"; + public static final String VEP_VERSION_FIELD = "vepv"; - public static final String VEP_CACHE_VERSION_FIELD = "cacheVer"; + public static final String VEP_CACHE_VERSION_FIELD = "cachev"; public static final String SIFT_FIELD = "sift"; @@ -55,10 +50,10 @@ public class VariantAnnotation { private String vepCacheVersion; @Field(value = SIFT_FIELD) - private List sifts = new ArrayList<>(); + private List sifts; @Field(value = POLYPHEN_FIELD) - private List polyphens = new ArrayList<>(); + private List polyphens; @Field(value = SO_ACCESSION_FIELD) private Set soAccessions = new HashSet<>(); @@ -77,13 +72,18 @@ public class VariantAnnotation { * @param vepCacheVersion non empty value required, otherwise throws {@link IllegalArgumentException} */ public VariantAnnotation(String vepVersion, String vepCacheVersion) { - Assert.hasText(vepVersion, "A non empty vepVersion is required"); - Assert.hasText(vepCacheVersion, "A non empty vepCacheVersion is required"); + Assert.hasText(vepVersion); + Assert.hasText(vepCacheVersion); this.vepVersion = vepVersion; this.vepCacheVersion = vepCacheVersion; } - VariantAnnotation(VariantAnnotation variantAnnotation) { + /** + * Private copy constructor + * + * @param variantAnnotation + */ + private VariantAnnotation(VariantAnnotation variantAnnotation) { this(variantAnnotation.getVepVersion(), variantAnnotation.getVepCacheVersion()); doConcatenate(variantAnnotation); } @@ -94,19 +94,27 @@ public VariantAnnotation(Annotation annotation) { } private void doConcatenate(VariantAnnotation variantAnnotation) { - xrefIds.addAll(variantAnnotation.getXrefIds()); - for (Double siftLimit : variantAnnotation.getSifts()) { - concatenateSiftRange(siftLimit); + if (variantAnnotation.getXrefIds() != null) { + addXrefIds(variantAnnotation.getXrefIds()); } - for (Double polyphenLimit : variantAnnotation.getPolyphens()) { - concatenatePolyphenRange(polyphenLimit); + if (variantAnnotation.getSifts() != null) { + for (Double siftLimit : variantAnnotation.getSifts()) { + concatenateSiftRange(siftLimit); + } + } + if (variantAnnotation.getPolyphens() != null) { + for (Double polyphenLimit : variantAnnotation.getPolyphens()) { + concatenatePolyphenRange(polyphenLimit); + } + } + if (variantAnnotation.getSoAccessions() != null) { + addsoAccessions(variantAnnotation.getSoAccessions()); } - soAccessions.addAll(variantAnnotation.getSoAccessions()); } private void doConcatenate(Annotation annotation) { for (Xref xref : annotation.getXrefs()) { - xrefIds.add(xref.getId()); + addXrefId(xref.getId()); } for (ConsequenceType consequenceType : annotation.getConsequenceTypes()) { final Score sift = consequenceType.getSift(); @@ -119,7 +127,7 @@ private void doConcatenate(Annotation annotation) { } final Set soAccessions = consequenceType.getSoAccessions(); if (soAccessions != null) { - this.soAccessions.addAll(soAccessions); + addsoAccessions(soAccessions); } } } @@ -141,11 +149,11 @@ private Double minOf(Collection collection) { private void concatenateRange(Collection collection, Double score) { Double min = minOf(collection); Double max = maxOf(collection); - if(min == null || max == null){ + if (min == null || max == null) { setRange(collection, score, score); - } else if (score < min){ + } else if (score < min) { setRange(collection, score, max); - } else if (score > max){ + } else if (score > max) { setRange(collection, min, score); } } @@ -157,34 +165,37 @@ private void setRange(Collection collection, Double minScore, Double max } private void concatenateSiftRange(Double score) { + if (sifts == null) { + sifts = new ArrayList<>(); + } concatenateRange(sifts, score); } private void concatenatePolyphenRange(Double score) { + if (polyphens == null) { + polyphens = new ArrayList<>(); + } concatenateRange(polyphens, score); } - public void addSift(Double sift) { - this.sifts.add(sift); - } - - public void addSifts(Collection sifts) { - this.sifts.addAll(sifts); - } - - public void addPolyphen(Double polyphen) { - this.polyphens.add(polyphen); - } - - public void addPolyphens(Collection polyphens) { - this.polyphens.addAll(polyphens); + private void addXrefId(String id) { + if(xrefIds==null){ + xrefIds = new HashSet<>(); + } + xrefIds.add(id); } - public void addXrefIds(Set xrefIds) { - this.xrefIds.addAll(xrefIds); + private void addXrefIds(Set ids) { + if(xrefIds==null){ + xrefIds = new HashSet<>(); + } + xrefIds.addAll(ids); } - public void addsoAccessions(Set soAccessions) { + private void addsoAccessions(Set soAccessions) { + if (this.soAccessions == null) { + this.soAccessions = new HashSet<>(); + } this.soAccessions.addAll(soAccessions); } @@ -218,6 +229,14 @@ public VariantAnnotation concatenate(Annotation annotation) { return temp; } + /** + * Concatenate two VariantAnnotations in a new one. This method returns a new instance of VariantAnnotation with + * the concatenation of xrefIds and soAccessions. This concatenation also has new values for the ranges of + * polyphen and sift values to include the values expressend in the concatenated VariantAnnotation. + * + * @param annotation + * @return + */ public VariantAnnotation concatenate(VariantAnnotation annotation) { VariantAnnotation temp = new VariantAnnotation(this); temp.doConcatenate(annotation); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java index bd911985b..af8eb1811 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java @@ -24,17 +24,17 @@ import org.springframework.data.mongodb.core.query.BasicQuery; import org.springframework.data.mongodb.core.query.BasicUpdate; import org.springframework.util.Assert; -import uk.ac.ebi.eva.commons.models.data.VariantAnnotation; import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; -import static org.opencb.opencga.storage.mongodb.variant.DBObjectToVariantConverter.ANNOTATION_FIELD; -import static uk.ac.ebi.eva.commons.models.data.VariantAnnotation.VEP_VERSION_FIELD; -import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.VEP_CACHE_VERSION_FIELD; +import static uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter.ANNOTATION_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation.VEP_CACHE_VERSION_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation.VEP_VERSION_FIELD; /** * Update the {@link uk.ac.ebi.eva.commons.models.data.Variant} mongo document with {@link VariantAnnotation} @@ -77,7 +77,7 @@ public AnnotationInVariantMongoWriter(MongoOperations mongoOperations, this.vepVersion = vepVersion; this.vepCacheVersion = vepCacheVersion; } - + private HashMap generateVariantAnnotations(List annotations) { HashMap variantAnnotations = new HashMap<>(); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java index 878ef3705..b04e6d911 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java @@ -45,10 +45,10 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -import static uk.ac.ebi.eva.commons.models.data.VariantAnnotation.POLYPHEN_FIELD; -import static uk.ac.ebi.eva.commons.models.data.VariantAnnotation.SIFT_FIELD; -import static uk.ac.ebi.eva.commons.models.data.VariantAnnotation.SO_ACCESSION_FIELD; -import static uk.ac.ebi.eva.commons.models.data.VariantAnnotation.XREFS_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation.POLYPHEN_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation.SIFT_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation.SO_ACCESSION_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation.XREFS_FIELD; import static uk.ac.ebi.eva.test.data.VepOutputContent.vepOutputContentWithExtraFields; import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResourceUrl; @@ -60,6 +60,7 @@ @TestPropertySource({"classpath:test-mongo.properties"}) @ContextConfiguration(classes = {MongoConnection.class, MongoMappingContext.class}) public class AnnotationInVariantMongoWriterTest { + private static final String MONGO_DUMP = "/dump/VariantStatsConfigurationTest_vl"; private static final String COLLECTION_VARIANTS_NAME = "variants"; @@ -79,7 +80,7 @@ public class AnnotationInVariantMongoWriterTest { private AnnotationInVariantMongoWriter annotationInVariantMongoWriter; - private uk.ac.ebi.eva.pipeline.io.mappers.AnnotationLineMapper AnnotationLineMapper; + private AnnotationLineMapper AnnotationLineMapper; @Before public void setUp() throws Exception { diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java index e4ba2ca5a..3a57319f7 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java @@ -55,8 +55,8 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -import static uk.ac.ebi.eva.commons.models.data.VariantAnnotation.POLYPHEN_FIELD; -import static uk.ac.ebi.eva.commons.models.data.VariantAnnotation.SIFT_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation.POLYPHEN_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation.SIFT_FIELD; import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.CONSEQUENCE_TYPE_FIELD; import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.XREFS_FIELD; import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Score.SCORE_DESCRIPTION_FIELD; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java index 561c658bf..ff1aeeb9c 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java @@ -47,8 +47,8 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; -import static uk.ac.ebi.eva.commons.models.data.VariantAnnotation.POLYPHEN_FIELD; -import static uk.ac.ebi.eva.commons.models.data.VariantAnnotation.SO_ACCESSION_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation.POLYPHEN_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation.SO_ACCESSION_FIELD; import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.CONSEQUENCE_TYPE_FIELD; import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.XREFS_FIELD; import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType.SIFT_FIELD; From ea4f47ab8a36f481afcd796d0675bb37e7ff006c Mon Sep 17 00:00:00 2001 From: jorizci Date: Wed, 17 May 2017 15:17:37 +0100 Subject: [PATCH 18/48] Removed non used code in Variant. --- .../ebi/eva/commons/models/data/Variant.java | 46 ------------------- 1 file changed, 46 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java index a8d449727..5ff26526b 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java @@ -289,52 +289,6 @@ public VariantStats getStats(String studyId, String fileId) { return file.getStats(); } - /** - * Copies the current variant and returns the copy in Ensembl format. - * see http://www.ensembl.org/info/docs/tools/vep/vep_formats.html - *

- * This variant remains unchanged, but the copy is a shallow copy, so any changes to the copy will affect the - * original as well. - * - * @return a modified copy - */ - public Variant copyInEnsemblFormat() { - Variant variant = this.clone(); - variant.transformToEnsemblFormat(); - return variant; - } - - /** - * see http://www.ensembl.org/info/docs/tools/vep/vep_formats.html - */ - private void transformToEnsemblFormat() { - if (type == VariantType.INDEL || type == VariantType.SV || length > 1) { - if (!reference.isEmpty() && !alternate.isEmpty() && reference.charAt(0) == alternate.charAt(0)) { - reference = reference.substring(1); - alternate = alternate.substring(1); - start++; - } - - // opencb sets: end = start + max(referenceAllele.length, alternateAllele.length) -1 - // ensembl sets: end = start + reference.length -1 - end = start + reference.length() - 1; // -1 because the range is inclusive: [start, end] - - if (reference.length() < alternate.length()) { // insertion - // and ensembl in insertions sets: start = end+1 - start = end + 1; - } - - length = reference.length(); - - if (reference.equals("")) { - reference = "-"; - } - if (alternate.equals("")) { - alternate = "-"; - } - } - } - @Override public String toString() { return "Variant{" + From 8c859c7f4b14e2a96f2ede6cd5906dfb42d468d1 Mon Sep 17 00:00:00 2001 From: jorizci Date: Wed, 17 May 2017 16:13:28 +0100 Subject: [PATCH 19/48] Removed unused converter. --- .../data/VariantToMongoDbObjectConverter.java | 85 ------------------- 1 file changed, 85 deletions(-) delete mode 100644 src/main/java/uk/ac/ebi/eva/pipeline/model/converters/data/VariantToMongoDbObjectConverter.java diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/model/converters/data/VariantToMongoDbObjectConverter.java b/src/main/java/uk/ac/ebi/eva/pipeline/model/converters/data/VariantToMongoDbObjectConverter.java deleted file mode 100644 index 1b07c4aeb..000000000 --- a/src/main/java/uk/ac/ebi/eva/pipeline/model/converters/data/VariantToMongoDbObjectConverter.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright 2016 EMBL - European Bioinformatics Institute - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package uk.ac.ebi.eva.pipeline.model.converters.data; - -import com.mongodb.BasicDBObject; -import com.mongodb.DBObject; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.core.convert.converter.Converter; -import org.springframework.util.Assert; - -import uk.ac.ebi.eva.commons.models.converters.data.SamplesToDBObjectConverter; -import uk.ac.ebi.eva.commons.models.converters.data.VariantSourceEntryToDBObjectConverter; -import uk.ac.ebi.eva.commons.models.converters.data.VariantStatsToDBObjectConverter; -import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; -import uk.ac.ebi.eva.commons.models.data.Variant; -import uk.ac.ebi.eva.commons.models.data.VariantSourceEntry; - -import java.util.List; - -/** - * Converts a {@link Variant} into mongoDb {@link DBObject} - */ -public class VariantToMongoDbObjectConverter implements Converter { - private static final Logger logger = LoggerFactory.getLogger(VariantToMongoDbObjectConverter.class); - - private VariantToDBObjectConverter variantConverter; - private VariantStatsToDBObjectConverter statsConverter; - private VariantSourceEntryToDBObjectConverter sourceEntryConverter; - - private boolean includeStats; - - public VariantToMongoDbObjectConverter(boolean includeStats, boolean includeSample) { - this(includeStats, includeStats, includeSample); - } - - public VariantToMongoDbObjectConverter(boolean includeStats, boolean calculateStats, boolean includeSample) { - - this.includeStats = includeStats; - this.statsConverter = calculateStats ? new VariantStatsToDBObjectConverter() : null; - - - SamplesToDBObjectConverter sampleConverter = includeSample ? new SamplesToDBObjectConverter() : null; - this.sourceEntryConverter = new VariantSourceEntryToDBObjectConverter(sampleConverter); - this.variantConverter = new VariantToDBObjectConverter(null, null); - } - - @Override - public DBObject convert(Variant variant) { - Assert.notNull(variant, "Variant should not be null. Please provide a valid Variant object"); - logger.trace("Convert variant {} into mongo object", variant); - - VariantSourceEntry variantSourceEntry = variant.getSourceEntries().values().iterator().next(); - - BasicDBObject addToSet = new BasicDBObject().append(VariantToDBObjectConverter.FILES_FIELD, - sourceEntryConverter.convert(variantSourceEntry)); - - if (includeStats) { - List sourceEntryStats = statsConverter.convert(variantSourceEntry); - addToSet.put(VariantToDBObjectConverter.STATS_FIELD, new BasicDBObject("$each", sourceEntryStats)); - } - - if (variant.getIds() != null && !variant.getIds().isEmpty()) { - addToSet.put(VariantToDBObjectConverter.IDS_FIELD, new BasicDBObject("$each", variant.getIds())); - } - - BasicDBObject update = new BasicDBObject(); - update.append("$addToSet", addToSet).append("$setOnInsert", variantConverter.convert(variant)); - - return update; - } -} From c22c46722bbe5fab80793cf25b19b51282837810 Mon Sep 17 00:00:00 2001 From: jorizci Date: Wed, 17 May 2017 16:26:46 +0100 Subject: [PATCH 20/48] Applied suggested changes. --- ...notationToSimplifiedDBObjectConverter.java | 12 +++++------ .../data/VariantToDBObjectConverter.java | 3 +-- .../models/mongo/documents/Annotation.java | 10 ++++----- .../subdocuments/ConsequenceType.java | 2 +- .../mongo/documents/subdocuments/Score.java | 2 +- .../subdocuments/VariantAnnotation.java | 15 ++++++------- .../mongo/documents/subdocuments/Xref.java | 2 +- .../io/mappers/AnnotationLineMapper.java | 1 - .../AnnotationInVariantMongoWriter.java | 15 +++++++------ .../io/writers/AnnotationMongoWriter.java | 3 +-- .../parameters/JobParametersNames.java | 2 +- ...notationLoaderStepParametersValidator.java | 21 ++++++++++--------- .../uk/ac/ebi/eva/utils/MongoDBHelper.java | 4 ---- .../io/mappers/AnnotationLineMapperTest.java | 4 ++-- .../AnnotationInVariantMongoWriterTest.java | 13 ++++++------ src/test/resources/genotype-test.properties | 2 +- 16 files changed, 53 insertions(+), 58 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationToSimplifiedDBObjectConverter.java b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationToSimplifiedDBObjectConverter.java index c828dfd37..6dd660c45 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationToSimplifiedDBObjectConverter.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationToSimplifiedDBObjectConverter.java @@ -32,12 +32,12 @@ public class AnnotationToSimplifiedDBObjectConverter implements Converter consequenceTypes) { - for(ConsequenceType consequenceType: consequenceTypes){ + for (ConsequenceType consequenceType : consequenceTypes) { addConsequenceType(consequenceType); } } @@ -177,10 +176,11 @@ public static String buildAnnotationId(String chromosome, int start, String refe /** * Builds the variant id from the current annotation id. In essence we remove the two extra fields added at the end * of the id and the underscores. + * * @return */ public String buildVariantId() { - return getId().substring(0, getId().length() - vepVersion.length() -vepCacheVersion.length() -2); + return getId().substring(0, getId().length() - vepVersion.length() - vepCacheVersion.length() - 2); } /** @@ -192,7 +192,7 @@ public String buildVariantId() { */ public Annotation concatenate(Annotation annotation) { Annotation temp = new Annotation(this); - if(annotation.getConsequenceTypes()!=null){ + if (annotation.getConsequenceTypes() != null) { temp.addConsequenceTypes(annotation.getConsequenceTypes()); } return temp; diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/ConsequenceType.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/ConsequenceType.java index 10c3659ae..24cd7212e 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/ConsequenceType.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/ConsequenceType.java @@ -21,7 +21,7 @@ import java.util.Set; /** - * org.opencb.biodata.models.variant.annotation.ConsequenceType + * Mongo database representation of a consequence type in a genomic variant annotation. */ public class ConsequenceType { diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Score.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Score.java index 131ac787f..fbe1ba9ff 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Score.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Score.java @@ -19,7 +19,7 @@ import org.springframework.data.mongodb.core.mapping.Field; /** - * From {@link org.opencb.biodata.models.variant.annotation.Score} + * Mongo database representation of a score / description pair of values. */ public class Score { diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/VariantAnnotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/VariantAnnotation.java index 2ebd223a9..571c7cd47 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/VariantAnnotation.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/VariantAnnotation.java @@ -27,7 +27,7 @@ import java.util.Set; /** - * Annotations of the genomic variation + * Lite version of genomic variant annotation generated using Ensembl VEP for indexing purposes. */ public class VariantAnnotation { @@ -68,8 +68,9 @@ public class VariantAnnotation { /** * Variant annotation constructor. Requires non empty values, otherwise throws {@link IllegalArgumentException} * - * @param vepVersion non empty value required, otherwise throws {@link IllegalArgumentException} - * @param vepCacheVersion non empty value required, otherwise throws {@link IllegalArgumentException} + * @param vepVersion + * @param vepCacheVersion + * @throws IllegalArgumentException */ public VariantAnnotation(String vepVersion, String vepCacheVersion) { Assert.hasText(vepVersion); @@ -146,7 +147,7 @@ private Double minOf(Collection collection) { return Collections.min(collection); } - private void concatenateRange(Collection collection, Double score) { + private void concatenateRange(Collection collection, double score) { Double min = minOf(collection); Double max = maxOf(collection); if (min == null || max == null) { @@ -158,20 +159,20 @@ private void concatenateRange(Collection collection, Double score) { } } - private void setRange(Collection collection, Double minScore, Double maxScore) { + private void setRange(Collection collection, double minScore, double maxScore) { collection.clear(); collection.add(minScore); collection.add(maxScore); } - private void concatenateSiftRange(Double score) { + private void concatenateSiftRange(double score) { if (sifts == null) { sifts = new ArrayList<>(); } concatenateRange(sifts, score); } - private void concatenatePolyphenRange(Double score) { + private void concatenatePolyphenRange(double score) { if (polyphens == null) { polyphens = new ArrayList<>(); } diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java index b583a347c..18d868563 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java @@ -18,7 +18,7 @@ import org.springframework.data.mongodb.core.mapping.Field; /** - * From org.opencb.biodata.models.variant.annotation.Xref + * Mongo database representation of Xref field in a genomic variant annotation */ public class Xref { diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java index 28b27ff67..dbdd5c12c 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java @@ -74,7 +74,6 @@ public AnnotationLineMapper(String vepVersion, String vepCacheVersion) { */ @Override public Annotation mapLine(String line, int lineNumber) { - //logger.debug("Mapping line {} to Annotation", line); ConsequenceType consequenceType = new ConsequenceType(); String[] lineFields = line.split("\t"); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java index af8eb1811..789e3c98e 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java @@ -66,7 +66,6 @@ public AnnotationInVariantMongoWriter(MongoOperations mongoOperations, String collection, String vepVersion, String vepCacheVersion) { - super(); Assert.notNull(mongoOperations); Assert.hasText(collection); Assert.hasText(vepVersion); @@ -78,7 +77,7 @@ public AnnotationInVariantMongoWriter(MongoOperations mongoOperations, this.vepCacheVersion = vepCacheVersion; } - private HashMap generateVariantAnnotations(List annotations) { + private Map generateVariantAnnotations(List annotations) { HashMap variantAnnotations = new HashMap<>(); for (Annotation annotation : annotations) { @@ -102,11 +101,11 @@ private void bulkPrepare(BulkOperations bulkOperations, Map storedVariantAnnotations = getStoredVariantAnnotations(variantAnnotations); for (Map.Entry entry : variantAnnotations.entrySet()) { - final String key = entry.getKey(); - if (storedVariantAnnotations.containsKey(key)) { - bulkUpdate(bulkOperations, key, storedVariantAnnotations.get(key).concatenate(entry.getValue())); + final String variantId = entry.getKey(); + if (storedVariantAnnotations.containsKey(variantId)) { + bulkUpdate(bulkOperations, variantId, storedVariantAnnotations.get(variantId).concatenate(entry.getValue())); } else { - bulkAddToSet(bulkOperations, key, entry.getValue()); + bulkAddToSet(bulkOperations, variantId, entry.getValue()); } } } @@ -153,11 +152,11 @@ private Map getStoredVariantAnnotations(Map iterator = mongoOperations.getCollection(collection).find(query).iterator(); while (iterator.hasNext()) { final DBObject object = iterator.next(); - final String id = (String) object.get(ID); + final String variantId = (String) object.get(ID); final BasicDBList dbAnnotations = (BasicDBList) object.get(ANNOTATION_FIELD); if (dbAnnotations != null && !dbAnnotations.isEmpty()) { final DBObject dbAnnotation = (DBObject) dbAnnotations.get(0); - storedVariantAnnotations.put(id, convertToVariantAnnotation(dbAnnotation)); + storedVariantAnnotations.put(variantId, convertToVariantAnnotation(dbAnnotation)); } } return storedVariantAnnotations; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java index daa9897be..d57522b99 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java @@ -76,10 +76,9 @@ public class AnnotationMongoWriter implements ItemWriter { private final AnnotationToSimplifiedDBObjectConverter converter; public AnnotationMongoWriter(MongoOperations mongoOperations, String collection) { - super(); Assert.notNull(mongoOperations); Assert.hasText(collection); - converter = new AnnotationToSimplifiedDBObjectConverter(); + this.converter = new AnnotationToSimplifiedDBObjectConverter(); this.mongoOperations = mongoOperations; this.collection = collection; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/JobParametersNames.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/JobParametersNames.java index a85bd5591..cd0dcd015 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/JobParametersNames.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/JobParametersNames.java @@ -83,7 +83,7 @@ public class JobParametersNames { public static final String DB_COLLECTIONS_STATISTICS_NAME = "db.collections.stats.name"; - public static final String DB_COLLECTIONS_ANNOTATION_METADATA_NAME = "db.collections.annotation.metadata.name"; + public static final String DB_COLLECTIONS_ANNOTATION_METADATA_NAME = "db.collections.annotation-metadata.name"; public static final String DB_COLLECTIONS_ANNOTATIONS_NAME = "db.collections.annotations.name"; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidator.java index 00d3b7441..41f9fd06a 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidator.java @@ -45,9 +45,10 @@ public class AnnotationLoaderStepParametersValidator extends DefaultJobParameter public AnnotationLoaderStepParametersValidator(boolean isStudyIdRequired) { super(new String[]{JobParametersNames.DB_COLLECTIONS_ANNOTATIONS_NAME, - JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME, JobParametersNames.DB_NAME, - JobParametersNames.OUTPUT_DIR_ANNOTATION}, - new String[]{}); + JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME, + JobParametersNames.DB_NAME, + JobParametersNames.OUTPUT_DIR_ANNOTATION}, + new String[]{}); this.isStudyIdRequired = isStudyIdRequired; } @@ -60,13 +61,13 @@ public void validate(JobParameters parameters) throws JobParametersInvalidExcept private CompositeJobParametersValidator compositeJobParametersValidator() { List jobParametersValidators = new ArrayList<>(); Collections.addAll(jobParametersValidators, - new DbCollectionsAnnotationsNameValidator(), - new DbCollectionsVariantsNameValidator(), - new DbNameValidator(), - new OutputDirAnnotationValidator(), - new OptionalValidator(new ConfigRestartabilityAllowValidator(), - JobParametersNames.CONFIG_RESTARTABILITY_ALLOW), - new OptionalValidator(new ConfigChunkSizeValidator(), JobParametersNames.CONFIG_CHUNK_SIZE) + new DbCollectionsAnnotationsNameValidator(), + new DbCollectionsVariantsNameValidator(), + new DbNameValidator(), + new OutputDirAnnotationValidator(), + new OptionalValidator(new ConfigRestartabilityAllowValidator(), + JobParametersNames.CONFIG_RESTARTABILITY_ALLOW), + new OptionalValidator(new ConfigChunkSizeValidator(), JobParametersNames.CONFIG_CHUNK_SIZE) ); if (isStudyIdRequired) { diff --git a/src/main/java/uk/ac/ebi/eva/utils/MongoDBHelper.java b/src/main/java/uk/ac/ebi/eva/utils/MongoDBHelper.java index c205d50b9..ded2f2de7 100644 --- a/src/main/java/uk/ac/ebi/eva/utils/MongoDBHelper.java +++ b/src/main/java/uk/ac/ebi/eva/utils/MongoDBHelper.java @@ -16,10 +16,6 @@ package uk.ac.ebi.eva.utils; import com.mongodb.ServerAddress; -import org.opencb.commons.utils.CryptoUtils; - -import uk.ac.ebi.eva.commons.models.data.Variant; -import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; import java.net.UnknownHostException; import java.util.LinkedList; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java index 55dca8f54..933943ea8 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java @@ -127,7 +127,7 @@ public void shouldParseVepOutputWithExtraFields() { Score expectedSift = new Score(0.07, "tolerated"); Score expectedPolyphen = new Score(0.859, "possibly_damaging"); - assertEquals(sifts, expectedSift); - assertEquals(polyphen, expectedPolyphen); + assertEquals(expectedSift, sifts); + assertEquals(expectedPolyphen, polyphen); } } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java index b04e6d911..f8db7b5c0 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java @@ -43,6 +43,7 @@ import java.util.Arrays; import java.util.List; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation.POLYPHEN_FIELD; @@ -112,9 +113,9 @@ public void shouldWriteAllFieldsIntoMongoDb() throws Exception { // load the annotation MongoOperations operations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, - mongoMappingContext); + mongoMappingContext); annotationInVariantMongoWriter = new AnnotationInVariantMongoWriter(operations, COLLECTION_VARIANTS_NAME, - VEP_VERSION, VEP_CACHE_VERSION); + VEP_VERSION, VEP_CACHE_VERSION); annotationInVariantMongoWriter.write(annotationSet1); annotationInVariantMongoWriter.write(annotationSet2); @@ -155,19 +156,19 @@ public void shouldWriteAllFieldsIntoMongoDb() throws Exception { BasicDBList sifts = (BasicDBList) annotationField.get(SIFT_FIELD); assertNotNull(sifts); - assertTrue(sifts.size() == 2); + assertEquals(2, sifts.size()); BasicDBList so = (BasicDBList) annotationField.get(SO_ACCESSION_FIELD); assertNotNull(so); - assertTrue(so.size() == 1); + assertEquals(1, so.size()); BasicDBList polyphen = (BasicDBList) annotationField.get(POLYPHEN_FIELD); assertNotNull(polyphen); - assertTrue(polyphen.size() == 2); + assertEquals(2, polyphen.size()); BasicDBList geneNames = (BasicDBList) annotationField.get(XREFS_FIELD); assertNotNull(geneNames); - assertTrue(geneNames.size() == 4); + assertEquals(4, geneNames.size()); } } cursor.close(); diff --git a/src/test/resources/genotype-test.properties b/src/test/resources/genotype-test.properties index 5ba267ad5..210c0b385 100644 --- a/src/test/resources/genotype-test.properties +++ b/src/test/resources/genotype-test.properties @@ -13,4 +13,4 @@ db.collections.variants.name=variants db.collections.files.name=files db.collections.features.name=features db.collections.stats.name=populationStatistics -db.collections.annotation.metadata.name=annotationMetadata +db.collections.annotation-metadata.name=annotationMetadata From 819203432b5b00a9a4df9730b4b66289e0a08d6c Mon Sep 17 00:00:00 2001 From: jorizci Date: Wed, 17 May 2017 16:27:55 +0100 Subject: [PATCH 21/48] Applied suggested package name change from documents to entity. --- ...nnotationToSimplifiedDBObjectConverter.java | 2 +- .../ebi/eva/commons/models/data/Variant.java | 2 +- .../{documents => entity}/Annotation.java | 6 +++--- .../subdocuments/ConsequenceType.java | 2 +- .../subdocuments/Score.java | 2 +- .../subdocuments/VariantAnnotation.java | 4 ++-- .../subdocuments/Xref.java | 2 +- .../readers/AnnotationReaderConfiguration.java | 2 +- ...AnnotationCompositeWriterConfiguration.java | 2 +- ...AnnotationInVariantWriterConfiguration.java | 2 +- .../writers/AnnotationWriterConfiguration.java | 4 +--- .../io/mappers/AnnotationLineMapper.java | 6 +++--- .../io/readers/AnnotationFlatFileReader.java | 2 +- .../AnnotationInVariantMongoWriter.java | 8 ++++---- .../io/writers/AnnotationMongoWriter.java | 10 +++++----- .../jobs/steps/AnnotationLoaderStep.java | 2 +- .../io/mappers/AnnotationLineMapperTest.java | 6 +++--- .../readers/AnnotationFlatFileReaderTest.java | 2 +- .../AnnotationInVariantMongoWriterTest.java | 10 +++++----- .../io/writers/AnnotationMongoWriterTest.java | 18 +++++++++--------- .../eva/pipeline/jobs/AnnotationJobTest.java | 2 +- .../jobs/steps/AnnotationLoaderStepTest.java | 10 +++++----- .../test/utils/GenotypedVcfJobTestUtils.java | 7 +------ 23 files changed, 53 insertions(+), 60 deletions(-) rename src/main/java/uk/ac/ebi/eva/commons/models/mongo/{documents => entity}/Annotation.java (96%) rename src/main/java/uk/ac/ebi/eva/commons/models/mongo/{documents => entity}/subdocuments/ConsequenceType.java (99%) rename src/main/java/uk/ac/ebi/eva/commons/models/mongo/{documents => entity}/subdocuments/Score.java (96%) rename src/main/java/uk/ac/ebi/eva/commons/models/mongo/{documents => entity}/subdocuments/VariantAnnotation.java (98%) rename src/main/java/uk/ac/ebi/eva/commons/models/mongo/{documents => entity}/subdocuments/Xref.java (96%) diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationToSimplifiedDBObjectConverter.java b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationToSimplifiedDBObjectConverter.java index 6dd660c45..c04835b02 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationToSimplifiedDBObjectConverter.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationToSimplifiedDBObjectConverter.java @@ -18,7 +18,7 @@ import com.mongodb.BasicDBObject; import com.mongodb.DBObject; import org.springframework.core.convert.converter.Converter; -import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; /** * Class to convert a Annotation document to a DBObject that contains a simplified version of the document. diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java index 5ff26526b..34a088209 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java @@ -17,7 +17,7 @@ package uk.ac.ebi.eva.commons.models.data; import org.opencb.commons.utils.CryptoUtils; -import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; import java.util.HashMap; import java.util.HashSet; diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/Annotation.java similarity index 96% rename from src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java rename to src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/Annotation.java index 2220f187d..63bb48644 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/Annotation.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/Annotation.java @@ -13,15 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.commons.models.mongo.documents; +package uk.ac.ebi.eva.commons.models.mongo.entity; import com.google.common.base.Strings; import org.springframework.data.annotation.Id; import org.springframework.data.mongodb.core.mapping.Document; import org.springframework.data.mongodb.core.mapping.Field; import uk.ac.ebi.eva.commons.models.data.Variant; -import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; -import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Xref; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.ConsequenceType; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.Xref; import java.util.Collections; import java.util.HashSet; diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/ConsequenceType.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/ConsequenceType.java similarity index 99% rename from src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/ConsequenceType.java rename to src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/ConsequenceType.java index 24cd7212e..dcfdc4bdd 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/ConsequenceType.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/ConsequenceType.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments; +package uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments; import org.springframework.data.mongodb.core.mapping.Field; diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Score.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/Score.java similarity index 96% rename from src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Score.java rename to src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/Score.java index fbe1ba9ff..7853131fd 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Score.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/Score.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments; +package uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments; import org.springframework.data.mongodb.core.mapping.Field; diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/VariantAnnotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantAnnotation.java similarity index 98% rename from src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/VariantAnnotation.java rename to src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantAnnotation.java index 571c7cd47..60a3e99f3 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/VariantAnnotation.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantAnnotation.java @@ -13,11 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments; +package uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments; import org.springframework.data.mongodb.core.mapping.Field; import org.springframework.util.Assert; -import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; import java.util.ArrayList; import java.util.Collection; diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/Xref.java similarity index 96% rename from src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java rename to src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/Xref.java index 18d868563..c9949915d 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/documents/subdocuments/Xref.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/Xref.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments; +package uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments; import org.springframework.data.mongodb.core.mapping.Field; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/AnnotationReaderConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/AnnotationReaderConfiguration.java index 2db5b51a9..a287fd597 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/AnnotationReaderConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/AnnotationReaderConfiguration.java @@ -20,7 +20,7 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; import uk.ac.ebi.eva.pipeline.io.readers.AnnotationFlatFileReader; import uk.ac.ebi.eva.pipeline.parameters.AnnotationParameters; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationCompositeWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationCompositeWriterConfiguration.java index 565db3c95..d95888b82 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationCompositeWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationCompositeWriterConfiguration.java @@ -25,7 +25,7 @@ import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Profile; -import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; import uk.ac.ebi.eva.pipeline.Application; import java.util.Arrays; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationInVariantWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationInVariantWriterConfiguration.java index 321712cc6..b25667344 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationInVariantWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationInVariantWriterConfiguration.java @@ -22,7 +22,7 @@ import org.springframework.context.annotation.Profile; import org.springframework.data.mongodb.core.MongoOperations; -import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.io.writers.AnnotationInVariantMongoWriter; import uk.ac.ebi.eva.pipeline.parameters.AnnotationParameters; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java index 7d658df39..0f5ddf130 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java @@ -21,11 +21,9 @@ import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Profile; import org.springframework.data.mongodb.core.MongoOperations; -import org.springframework.data.mongodb.core.MongoTemplate; -import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.io.writers.AnnotationMongoWriter; -import uk.ac.ebi.eva.pipeline.parameters.AnnotationParameters; import uk.ac.ebi.eva.pipeline.parameters.DatabaseParameters; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.ANNOTATION_WRITER; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java index dbdd5c12c..528375fae 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapper.java @@ -22,9 +22,9 @@ import org.slf4j.LoggerFactory; import org.springframework.batch.item.file.LineMapper; -import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; -import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; -import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Score; +import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.ConsequenceType; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.Score; import java.util.Arrays; import java.util.HashMap; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReader.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReader.java index d28ef5739..35f70f34c 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReader.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReader.java @@ -18,7 +18,7 @@ import org.springframework.batch.item.file.FlatFileItemReader; import org.springframework.core.io.Resource; -import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; import uk.ac.ebi.eva.pipeline.io.GzipLazyResource; import uk.ac.ebi.eva.pipeline.io.mappers.AnnotationLineMapper; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java index 789e3c98e..2f8b21b66 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java @@ -24,8 +24,8 @@ import org.springframework.data.mongodb.core.query.BasicQuery; import org.springframework.data.mongodb.core.query.BasicUpdate; import org.springframework.util.Assert; -import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; -import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation; +import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation; import java.util.HashMap; import java.util.Iterator; @@ -33,8 +33,8 @@ import java.util.Map; import static uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter.ANNOTATION_FIELD; -import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation.VEP_CACHE_VERSION_FIELD; -import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation.VEP_VERSION_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.VEP_CACHE_VERSION_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.VEP_VERSION_FIELD; /** * Update the {@link uk.ac.ebi.eva.commons.models.data.Variant} mongo document with {@link VariantAnnotation} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java index d57522b99..e6a34ac54 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java @@ -26,17 +26,17 @@ import org.springframework.data.mongodb.core.query.Update; import org.springframework.util.Assert; import uk.ac.ebi.eva.commons.models.converters.data.AnnotationToSimplifiedDBObjectConverter; -import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; -import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; -import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Xref; +import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.ConsequenceType; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.Xref; import uk.ac.ebi.eva.utils.MongoDBHelper; import java.util.HashMap; import java.util.List; import java.util.Map; -import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.CONSEQUENCE_TYPE_FIELD; -import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.XREFS_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.Annotation.CONSEQUENCE_TYPE_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.Annotation.XREFS_FIELD; /** * Write a list of {@link Annotation} into MongoDB diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java index efbc23650..05733d429 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java @@ -30,7 +30,7 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; -import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; import uk.ac.ebi.eva.pipeline.configuration.ChunkSizeCompletionPolicyConfiguration; import uk.ac.ebi.eva.pipeline.configuration.readers.AnnotationReaderConfiguration; import uk.ac.ebi.eva.pipeline.configuration.writers.AnnotationCompositeWriterConfiguration; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java index 933943ea8..e95bf1b30 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/AnnotationLineMapperTest.java @@ -17,9 +17,9 @@ import org.junit.Test; -import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; -import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; -import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Score; +import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.ConsequenceType; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.Score; import uk.ac.ebi.eva.test.data.VepOutputContent; import java.util.Set; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReaderTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReaderTest.java index ae68277e3..27d6d5f0b 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReaderTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReaderTest.java @@ -21,7 +21,7 @@ import org.springframework.batch.item.file.FlatFileParseException; import org.springframework.batch.test.MetaDataInstanceFactory; -import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; import uk.ac.ebi.eva.test.data.VepOutputContent; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.utils.JobTestUtils; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java index f8db7b5c0..512d2dc99 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java @@ -32,7 +32,7 @@ import org.springframework.test.context.junit4.SpringRunner; import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; -import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; import uk.ac.ebi.eva.pipeline.io.mappers.AnnotationLineMapper; @@ -46,10 +46,10 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation.POLYPHEN_FIELD; -import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation.SIFT_FIELD; -import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation.SO_ACCESSION_FIELD; -import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation.XREFS_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.POLYPHEN_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.SIFT_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.SO_ACCESSION_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.XREFS_FIELD; import static uk.ac.ebi.eva.test.data.VepOutputContent.vepOutputContentWithExtraFields; import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResourceUrl; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java index 3a57319f7..206664342 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java @@ -31,9 +31,9 @@ import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; -import uk.ac.ebi.eva.commons.models.mongo.documents.Annotation; -import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType; -import uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Score; +import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.ConsequenceType; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.Score; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; import uk.ac.ebi.eva.pipeline.io.mappers.AnnotationLineMapper; @@ -55,12 +55,12 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation.POLYPHEN_FIELD; -import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation.SIFT_FIELD; -import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.CONSEQUENCE_TYPE_FIELD; -import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.XREFS_FIELD; -import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Score.SCORE_DESCRIPTION_FIELD; -import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.Score.SCORE_SCORE_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.POLYPHEN_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.SIFT_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.Annotation.CONSEQUENCE_TYPE_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.Annotation.XREFS_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.Score.SCORE_DESCRIPTION_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.Score.SCORE_SCORE_FIELD; import static uk.ac.ebi.eva.test.data.VepOutputContent.vepOutputContent; /** diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java index 57b0fe9c9..d16628b21 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java @@ -45,7 +45,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; -import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.CONSEQUENCE_TYPE_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.Annotation.CONSEQUENCE_TYPE_FIELD; import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResourceUrl; import static uk.ac.ebi.eva.utils.FileUtils.getResource; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java index ff1aeeb9c..76c2ab0a1 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java @@ -47,11 +47,11 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; -import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation.POLYPHEN_FIELD; -import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.VariantAnnotation.SO_ACCESSION_FIELD; -import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.CONSEQUENCE_TYPE_FIELD; -import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.XREFS_FIELD; -import static uk.ac.ebi.eva.commons.models.mongo.documents.subdocuments.ConsequenceType.SIFT_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.POLYPHEN_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.SO_ACCESSION_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.Annotation.CONSEQUENCE_TYPE_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.Annotation.XREFS_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.ConsequenceType.SIFT_FIELD; import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResourceUrl; /** diff --git a/src/test/java/uk/ac/ebi/eva/test/utils/GenotypedVcfJobTestUtils.java b/src/test/java/uk/ac/ebi/eva/test/utils/GenotypedVcfJobTestUtils.java index f52740509..4db0b6dc9 100644 --- a/src/test/java/uk/ac/ebi/eva/test/utils/GenotypedVcfJobTestUtils.java +++ b/src/test/java/uk/ac/ebi/eva/test/utils/GenotypedVcfJobTestUtils.java @@ -11,15 +11,10 @@ import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; import uk.ac.ebi.eva.utils.URLHelper; -import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; -import java.io.InputStreamReader; import java.net.URISyntaxException; -import java.net.URL; -import java.util.ArrayList; -import java.util.Collections; import java.util.List; import java.util.stream.Collectors; import java.util.zip.GZIPInputStream; @@ -28,7 +23,7 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; -import static uk.ac.ebi.eva.commons.models.mongo.documents.Annotation.CONSEQUENCE_TYPE_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.Annotation.CONSEQUENCE_TYPE_FIELD; import static uk.ac.ebi.eva.test.utils.JobTestUtils.count; import static uk.ac.ebi.eva.test.utils.JobTestUtils.getLines; import static uk.ac.ebi.eva.utils.FileUtils.getResource; From 94fbf91dca0d41a101d9098fbbb8a73c7864ee5d Mon Sep 17 00:00:00 2001 From: jorizci Date: Thu, 18 May 2017 09:32:10 +0100 Subject: [PATCH 22/48] Modified query to use the projection of annotation field. --- .../pipeline/io/writers/AnnotationInVariantMongoWriter.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java index 2f8b21b66..57e6a719e 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java @@ -148,8 +148,9 @@ private BasicDBObject createQueryMatchForVepAndCacheVersion() { private Map getStoredVariantAnnotations(Map variantAnnotations) { Map storedVariantAnnotations = new HashMap<>(); BasicDBObject query = generateQueryForAnnotationInVariant(variantAnnotations.keySet().toArray(new String[]{})); + BasicDBObject projection = new BasicDBObject(ANNOTATION_FIELD, 1); - Iterator iterator = mongoOperations.getCollection(collection).find(query).iterator(); + Iterator iterator = mongoOperations.getCollection(collection).find(query, projection).iterator(); while (iterator.hasNext()) { final DBObject object = iterator.next(); final String variantId = (String) object.get(ID); From 25d21c258c3d478604b93924687ee2eb4b1bd1f2 Mon Sep 17 00:00:00 2001 From: jorizci Date: Thu, 18 May 2017 12:13:50 +0100 Subject: [PATCH 23/48] Removed custom converter for SimplifiedAnnotation in favour of a new Spring generated one. --- ...notationToSimplifiedDBObjectConverter.java | 44 -------------- .../projections/SimplifiedAnnotation.java | 59 +++++++++++++++++++ .../io/writers/AnnotationMongoWriter.java | 8 +-- 3 files changed, 62 insertions(+), 49 deletions(-) delete mode 100644 src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationToSimplifiedDBObjectConverter.java create mode 100644 src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedAnnotation.java diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationToSimplifiedDBObjectConverter.java b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationToSimplifiedDBObjectConverter.java deleted file mode 100644 index c04835b02..000000000 --- a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/AnnotationToSimplifiedDBObjectConverter.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright 2014-2016 EMBL - European Bioinformatics Institute - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package uk.ac.ebi.eva.commons.models.converters.data; - -import com.mongodb.BasicDBObject; -import com.mongodb.DBObject; -import org.springframework.core.convert.converter.Converter; -import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; - -/** - * Class to convert a Annotation document to a DBObject that contains a simplified version of the document. - * This is done to aggregate the different consequences of the same Annotation in the database itself. It is not a - * complete converter of the Annotation object. - */ -public class AnnotationToSimplifiedDBObjectConverter implements Converter { - - public static final String ID = "_id"; - - @Override - public DBObject convert(Annotation source) { - DBObject dbObject = new BasicDBObject(); - dbObject.put(ID, source.getId()); - dbObject.put(Annotation.CHROMOSOME_FIELD, source.getChromosome()); - dbObject.put(Annotation.START_FIELD, source.getStart()); - dbObject.put(Annotation.END_FIELD, source.getEnd()); - dbObject.put(Annotation.VEP_VERSION_FIELD, source.getVepVersion()); - dbObject.put(Annotation.VEP_CACHE_VERSION_FIELD, source.getVepCacheVersion()); - return dbObject; - } - -} diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedAnnotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedAnnotation.java new file mode 100644 index 000000000..5825acba5 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedAnnotation.java @@ -0,0 +1,59 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.commons.models.mongo.entity.projections; + +import org.springframework.data.annotation.Id; +import org.springframework.data.mongodb.core.mapping.Field; +import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; + +import static uk.ac.ebi.eva.commons.models.mongo.entity.Annotation.CHROMOSOME_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.Annotation.END_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.Annotation.START_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.Annotation.VEP_CACHE_VERSION_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.Annotation.VEP_VERSION_FIELD; + +/** + * Simplified form of {@link Annotation} used to improve the update of annotations in mongo. + */ +public class SimplifiedAnnotation { + + @Id + private String id; + + @Field(value = CHROMOSOME_FIELD) + private String chromosome; + + @Field(value = START_FIELD) + private int start; + + @Field(value = END_FIELD) + private int end; + + @Field(value = VEP_VERSION_FIELD) + private String vepVersion; + + @Field(value = VEP_CACHE_VERSION_FIELD) + private String vepCacheVersion; + + public SimplifiedAnnotation(Annotation annotation) { + this.id = annotation.getId(); + this.chromosome = annotation.getChromosome(); + this.start = annotation.getStart(); + this.end = annotation.getEnd(); + this.vepVersion = annotation.getVepVersion(); + this.vepCacheVersion = annotation.getVepCacheVersion(); + } +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java index e6a34ac54..2fa6bfbf1 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java @@ -17,6 +17,7 @@ package uk.ac.ebi.eva.pipeline.io.writers; import com.mongodb.BasicDBObject; +import com.mongodb.DBObject; import org.springframework.batch.item.ItemWriter; import org.springframework.data.mongodb.core.BulkOperations; import org.springframework.data.mongodb.core.MongoOperations; @@ -25,8 +26,8 @@ import org.springframework.data.mongodb.core.query.Query; import org.springframework.data.mongodb.core.query.Update; import org.springframework.util.Assert; -import uk.ac.ebi.eva.commons.models.converters.data.AnnotationToSimplifiedDBObjectConverter; import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.entity.projections.SimplifiedAnnotation; import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.ConsequenceType; import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.Xref; import uk.ac.ebi.eva.utils.MongoDBHelper; @@ -73,12 +74,9 @@ public class AnnotationMongoWriter implements ItemWriter { private final String collection; - private final AnnotationToSimplifiedDBObjectConverter converter; - public AnnotationMongoWriter(MongoOperations mongoOperations, String collection) { Assert.notNull(mongoOperations); Assert.hasText(collection); - this.converter = new AnnotationToSimplifiedDBObjectConverter(); this.mongoOperations = mongoOperations; this.collection = collection; @@ -110,7 +108,7 @@ private Map groupAnnotationById(List a } private void writeAnnotationInMongoDb(BulkOperations bulk, Annotation annotation) { - Query upsertQuery = new BasicQuery(converter.convert(annotation)); + Query upsertQuery = new BasicQuery((DBObject)convertToMongo(new SimplifiedAnnotation(annotation))); Update update = buildUpdateQuery(annotation); bulk.upsert(upsertQuery, update); } From 23bd8ad30252d91837574642bbfd4415270ddaed Mon Sep 17 00:00:00 2001 From: jorizci Date: Thu, 18 May 2017 13:58:50 +0100 Subject: [PATCH 24/48] Added missing mongo indexes. --- .../eva/pipeline/io/writers/VariantMongoWriter.java | 11 ++++++++--- .../pipeline/io/writers/VariantMongoWriterTest.java | 4 ++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java index 95bb9050e..3a8046fa3 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java @@ -33,6 +33,10 @@ import java.util.List; +import static uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter.ANNOTATION_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.XREFS_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.SO_ACCESSION_FIELD; + /** * Write a list of {@link Variant} into MongoDB * See also {@link org.opencb.opencga.storage.mongodb.variant.VariantMongoDBWriter} @@ -41,8 +45,6 @@ public class VariantMongoWriter extends MongoItemWriter { private static final Logger logger = LoggerFactory.getLogger(VariantMongoWriter.class); - private static final String VARIANT_ANNOTATION_SO_FIELD = "so"; - private final MongoOperations mongoOperations; private final String collection; @@ -116,7 +118,10 @@ private void createIndexes() { new BasicDBObject(MongoDBHelper.BACKGROUND_INDEX, true)); mongoOperations.getCollection(collection).createIndex( - new BasicDBObject(VARIANT_ANNOTATION_SO_FIELD, 1), + new BasicDBObject(ANNOTATION_FIELD+"."+XREFS_FIELD, 1), + new BasicDBObject(MongoDBHelper.BACKGROUND_INDEX, true)); + mongoOperations.getCollection(collection).createIndex( + new BasicDBObject(ANNOTATION_FIELD+"."+ SO_ACCESSION_FIELD, 1), new BasicDBObject(MongoDBHelper.BACKGROUND_INDEX, true)); } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriterTest.java index e313fe092..11ac06726 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriterTest.java @@ -119,8 +119,8 @@ public void indexesShouldBeCreatedInBackground() throws UnknownHostException { Set createdIndexes = indexInfo.stream().map(index -> index.get("name").toString()) .collect(Collectors.toSet()); Set expectedIndexes = new HashSet<>(); - expectedIndexes.addAll(Arrays.asList("so_1", "chr_1_start_1_end_1", "files.sid_1_files.fid_1", "_id_", - "ids_1")); + expectedIndexes.addAll(Arrays.asList("annot.xrefs_1", "files.sid_1_files.fid_1", "chr_1_start_1_end_1", + "annot.so_1", "_id_", "ids_1")); assertEquals(expectedIndexes, createdIndexes); indexInfo.stream().filter(index -> !("_id_".equals(index.get("name").toString()))) From 464b9ac6d73ebb3f7024012893bea3b2963b707b Mon Sep 17 00:00:00 2001 From: jorizci Date: Fri, 19 May 2017 10:39:00 +0100 Subject: [PATCH 25/48] Modified convert function to be more strict. --- .../pipeline/io/writers/AnnotationMongoWriter.java | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java index 2fa6bfbf1..b3307be31 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriter.java @@ -16,6 +16,7 @@ package uk.ac.ebi.eva.pipeline.io.writers; +import com.mongodb.BasicDBList; import com.mongodb.BasicDBObject; import com.mongodb.DBObject; import org.springframework.batch.item.ItemWriter; @@ -32,6 +33,7 @@ import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.Xref; import uk.ac.ebi.eva.utils.MongoDBHelper; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -108,7 +110,7 @@ private Map groupAnnotationById(List a } private void writeAnnotationInMongoDb(BulkOperations bulk, Annotation annotation) { - Query upsertQuery = new BasicQuery((DBObject)convertToMongo(new SimplifiedAnnotation(annotation))); + Query upsertQuery = new BasicQuery(convertToMongo(new SimplifiedAnnotation(annotation))); Update update = buildUpdateQuery(annotation); bulk.upsert(upsertQuery, update); } @@ -128,8 +130,12 @@ private BasicDBObject buildInsertConsequenceTypeQuery(Annotation annotation) { return new BasicDBObject(EACH, convertToMongo(annotation.getConsequenceTypes())); } - private Object convertToMongo(Object object) { - return mongoOperations.getConverter().convertToMongoType(object); + private DBObject convertToMongo(SimplifiedAnnotation simplifiedAnnotation) { + return (DBObject) mongoOperations.getConverter().convertToMongoType(simplifiedAnnotation); + } + + private BasicDBList convertToMongo(Collection object) { + return (BasicDBList) mongoOperations.getConverter().convertToMongoType(object); } private void createIndexes() { From 0308faa8e1210d30940e3b1c2d1741873b88d8dc Mon Sep 17 00:00:00 2001 From: jorizci Date: Tue, 23 May 2017 12:09:06 +0100 Subject: [PATCH 26/48] Modified javadoc. --- .../models/mongo/entity/subdocuments/VariantAnnotation.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantAnnotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantAnnotation.java index 60a3e99f3..8d2b81f0e 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantAnnotation.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantAnnotation.java @@ -70,7 +70,7 @@ public class VariantAnnotation { * * @param vepVersion * @param vepCacheVersion - * @throws IllegalArgumentException + * @throws IllegalArgumentException If {@param vepVersion} or {@param vepCacheVersion} are null or empty values. */ public VariantAnnotation(String vepVersion, String vepCacheVersion) { Assert.hasText(vepVersion); @@ -180,14 +180,14 @@ private void concatenatePolyphenRange(double score) { } private void addXrefId(String id) { - if(xrefIds==null){ + if (xrefIds == null) { xrefIds = new HashSet<>(); } xrefIds.add(id); } private void addXrefIds(Set ids) { - if(xrefIds==null){ + if (xrefIds == null) { xrefIds = new HashSet<>(); } xrefIds.addAll(ids); From d40aff405779192535f501a464675178bbce0984 Mon Sep 17 00:00:00 2001 From: jorizci Date: Tue, 23 May 2017 15:38:00 +0100 Subject: [PATCH 27/48] Refactored converters -Generated entity models to access databases -Separated logic from converters / writers / entities / model -Modified tests to check the new converters --- .../data/SamplesToDBObjectConverter.java | 83 ------- ...VariantSourceEntryToDBObjectConverter.java | 123 ----------- .../data/VariantStatsToDBObjectConverter.java | 114 ---------- .../data/VariantToDBObjectConverter.java | 193 ---------------- .../models/mongo/entity/VariantDocument.java | 185 ++++++++++++++++ .../entity/projections/SimplifiedVariant.java | 72 ++++++ .../mongo/entity/subdocuments/HgvsMongo.java | 21 ++ .../mongo/entity/subdocuments/VariantAt.java | 24 ++ .../subdocuments/VariantSourceEntryMongo.java | 156 +++++++++++++ .../subdocuments/VariantStatsMongo.java | 85 ++++++++ .../io/readers/VariantsMongoReader.java | 8 +- .../AnnotationInVariantMongoWriter.java | 2 +- .../io/writers/VariantMongoWriter.java | 115 +++++++--- ...StatisticsFromVariantGivenStudyIdStep.java | 4 +- .../SingleStudyVariantsDropperStep.java | 3 +- .../ac/ebi/eva/utils/CompressionHelper.java | 18 ++ ...antSourceEntryToDBObjectConverterTest.java | 64 +++--- .../VariantStatsToDBObjectConverterTest.java | 77 ++++--- .../data/VariantToDBObjectConverterTest.java | 206 ++++++++++++------ .../AnnotationInVariantMongoWriterTest.java | 7 +- .../jobs/steps/AnnotationLoaderStepTest.java | 10 +- .../MongoOperationConfiguration.java | 37 ++++ .../eva/test/utils/DropStudyJobTestUtils.java | 5 +- .../ac/ebi/eva/test/utils/JobTestUtils.java | 4 +- 24 files changed, 916 insertions(+), 700 deletions(-) delete mode 100644 src/main/java/uk/ac/ebi/eva/commons/models/converters/data/SamplesToDBObjectConverter.java delete mode 100644 src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantSourceEntryToDBObjectConverter.java delete mode 100644 src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantStatsToDBObjectConverter.java delete mode 100644 src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverter.java create mode 100644 src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/VariantDocument.java create mode 100644 src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedVariant.java create mode 100644 src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/HgvsMongo.java create mode 100644 src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantAt.java create mode 100644 src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantSourceEntryMongo.java create mode 100644 src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantStatsMongo.java create mode 100644 src/test/java/uk/ac/ebi/eva/test/configuration/MongoOperationConfiguration.java diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/SamplesToDBObjectConverter.java b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/SamplesToDBObjectConverter.java deleted file mode 100644 index 3c3fdc280..000000000 --- a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/SamplesToDBObjectConverter.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright 2014-2016 EMBL - European Bioinformatics Institute - * Copyright 2015 OpenCB - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package uk.ac.ebi.eva.commons.models.converters.data; - -import com.mongodb.BasicDBObject; -import com.mongodb.DBObject; -import org.opencb.biodata.models.feature.Genotype; -import org.springframework.core.convert.converter.Converter; - -import uk.ac.ebi.eva.commons.models.data.VariantSourceEntry; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * Simple samples converter that does not require the names of the samples, as it will compress them in mongo. - *

- * This class is based on OpenCGA MongoDB converters. - */ -public class SamplesToDBObjectConverter implements Converter { - - @Override - public DBObject convert(VariantSourceEntry object) { - Map> genotypeCodes = new HashMap<>(); - - // Classify samples by genotype - for (int i = 0; i < object.getSamplesData().size(); i++) { - String genotype = object.getSampleData(i).get("GT"); - if (genotype != null) { - Genotype g = new Genotype(genotype); - List samplesWithGenotype = genotypeCodes.get(g); - if (samplesWithGenotype == null) { - samplesWithGenotype = new ArrayList<>(); - genotypeCodes.put(g, samplesWithGenotype); - } - samplesWithGenotype.add(i); - } - } - - // Get the most common genotype - Map.Entry> longestList = null; - for (Map.Entry> entry : genotypeCodes.entrySet()) { - List genotypeList = entry.getValue(); - if (longestList == null || genotypeList.size() > longestList.getValue().size()) { - longestList = entry; - } - } - - // In Mongo, samples are stored in a map, classified by their genotype. - // The most common genotype will be marked as "default" and the specific - // positions where it is shown will not be stored. Example from 1000G: - // "def" : 0|0, - // "0|1" : [ 41, 311, 342, 358, 881, 898, 903 ], - // "1|0" : [ 262, 290, 300, 331, 343, 369, 374, 391, 879, 918, 930 ] - BasicDBObject mongoSamples = new BasicDBObject(); - for (Map.Entry> entry : genotypeCodes.entrySet()) { - String genotypeStr = entry.getKey().toString().replace(".", "-1"); - if (longestList != null && entry.getKey().equals(longestList.getKey())) { - mongoSamples.append("def", genotypeStr); - } else { - mongoSamples.append(genotypeStr, entry.getValue()); - } - } - - return mongoSamples; - } -} diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantSourceEntryToDBObjectConverter.java b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantSourceEntryToDBObjectConverter.java deleted file mode 100644 index 982d50790..000000000 --- a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantSourceEntryToDBObjectConverter.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright 2014-2016 EMBL - European Bioinformatics Institute - * Copyright 2015 OpenCB - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package uk.ac.ebi.eva.commons.models.converters.data; - -import com.mongodb.BasicDBObject; -import com.mongodb.DBObject; -import org.springframework.core.convert.converter.Converter; - -import uk.ac.ebi.eva.commons.models.data.VariantSourceEntry; - -import java.io.IOException; -import java.util.Map; -import java.util.logging.Level; -import java.util.logging.Logger; - -/** - * Converter of VariantSourceEntry to DBObject. Implements spring's interface of converter. - *

- * This class is based on OpenCGA MongoDB converters. - */ -public class VariantSourceEntryToDBObjectConverter implements Converter { - - public final static String FILEID_FIELD = "fid"; - - public final static String STUDYID_FIELD = "sid"; - - public final static String ALTERNATES_FIELD = "alts"; - - public final static String ATTRIBUTES_FIELD = "attrs"; - - public final static String FORMAT_FIELD = "fm"; - - public final static String SAMPLES_FIELD = "samp"; - - static final char CHARACTER_TO_REPLACE_DOTS = (char) 163; // <-- £ - - - private SamplesToDBObjectConverter samplesConverter; - - /** - * Create a converter between VariantSourceEntry and DBObject entities when - * there is no need to add the samples data to the DBObject. - */ - public VariantSourceEntryToDBObjectConverter() { - this(null); - } - - /** - * Create a converter from VariantSourceEntry to DBObject entities. A samples converter may be provided in case - * those should be processed during the conversion. - * - * @param samplesConverter The object used to convert the samples. If null, won't convert - */ - public VariantSourceEntryToDBObjectConverter(SamplesToDBObjectConverter samplesConverter) { - this.samplesConverter = samplesConverter; - } - - @Override - public DBObject convert(VariantSourceEntry object) { - BasicDBObject mongoFile = new BasicDBObject(FILEID_FIELD, object.getFileId()) - .append(STUDYID_FIELD, object.getStudyId()); - - // Alternate alleles - // assuming secondaryAlternates doesn't contain the primary alternate - if (object.getSecondaryAlternates().length > 0) { - mongoFile.append(ALTERNATES_FIELD, object.getSecondaryAlternates()); - } - - // Attributes - if (object.getAttributes().size() > 0) { - BasicDBObject attrs = null; - for (Map.Entry entry : object.getAttributes().entrySet()) { - Object value = entry.getValue(); - if (entry.getKey().equals("src")) { - String[] fields = entry.getValue().split("\t"); - StringBuilder sb = new StringBuilder(); - sb.append(fields[0]); - for (int i = 1; i < fields.length && i < 8; i++) { - sb.append("\t").append(fields[i]); - } - try { - value = org.opencb.commons.utils.StringUtils.gzip(sb.toString()); - } catch (IOException ex) { - Logger.getLogger(VariantSourceEntryToDBObjectConverter.class.getName()) - .log(Level.SEVERE, null, ex); - } - } - - if (attrs == null) { - attrs = new BasicDBObject(entry.getKey().replace('.', CHARACTER_TO_REPLACE_DOTS), value); - } else { - attrs.append(entry.getKey().replace('.', CHARACTER_TO_REPLACE_DOTS), value); - } - } - - if (attrs != null) { - mongoFile.put(ATTRIBUTES_FIELD, attrs); - } - } - - if (samplesConverter != null) { - mongoFile.append(FORMAT_FIELD, object.getFormat()); // Useless field if genotypeCodes are not stored - mongoFile.put(SAMPLES_FIELD, samplesConverter.convert(object)); - } - - return mongoFile; - } - -} diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantStatsToDBObjectConverter.java b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantStatsToDBObjectConverter.java deleted file mode 100644 index 4455b2b22..000000000 --- a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantStatsToDBObjectConverter.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright 2014-2016 EMBL - European Bioinformatics Institute - * Copyright 2015 OpenCB - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package uk.ac.ebi.eva.commons.models.converters.data; - -import com.mongodb.BasicDBObject; -import com.mongodb.DBObject; -import org.opencb.biodata.models.feature.Genotype; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.core.convert.converter.Converter; - -import uk.ac.ebi.eva.commons.models.data.VariantSourceEntry; -import uk.ac.ebi.eva.commons.models.data.VariantStats; - -import java.util.LinkedList; -import java.util.List; -import java.util.Map; - -/** - * Converts VariantStats into MongoDb objects. Implements spring's interface of converter. - *

- * This class is based on OpenCGA MongoDB converters. - */ -public class VariantStatsToDBObjectConverter implements Converter> { - - public final static String COHORT_ID = "cid"; - - public final static String STUDY_ID = "sid"; - - public final static String FILE_ID = "fid"; - - public final static String MAF_FIELD = "maf"; - - public final static String MGF_FIELD = "mgf"; - - public final static String MAFALLELE_FIELD = "mafAl"; - - public final static String MGFGENOTYPE_FIELD = "mgfGt"; - - public final static String MISSALLELE_FIELD = "missAl"; - - public final static String MISSGENOTYPE_FIELD = "missGt"; - - public final static String NUMGT_FIELD = "numGt"; - - protected static Logger logger = LoggerFactory.getLogger(VariantStatsToDBObjectConverter.class); - - /** - * converts all the cohortstats within the sourceEntries - * - * @param sourceEntry for instance, you can pass in variant.getSourceEntries() - * @return list of VariantStats (as DBObjects) - */ - @Override - public List convert(VariantSourceEntry sourceEntry) { - return convertCohorts(sourceEntry.getCohortStats(), sourceEntry.getStudyId(), sourceEntry.getFileId()); - } - - /** - * converts just some cohorts stats in one VariantSourceEntry. - * - * @param cohortStats for instance, you can pass in sourceEntry.getCohortStats() - * @param studyId of the source entry - * @param fileId of the source entry - * @return list of VariantStats (as DBObjects) - */ - private List convertCohorts(Map cohortStats, String studyId, String fileId) { - List cohortsStatsList = new LinkedList<>(); - VariantStats variantStats; - for (Map.Entry variantStatsEntry : cohortStats.entrySet()) { - variantStats = variantStatsEntry.getValue(); - DBObject variantStatsDBObject = convertStats(variantStats); - variantStatsDBObject.put(VariantStatsToDBObjectConverter.COHORT_ID, variantStatsEntry.getKey()); - variantStatsDBObject.put(VariantStatsToDBObjectConverter.STUDY_ID, studyId); - variantStatsDBObject.put(VariantStatsToDBObjectConverter.FILE_ID, fileId); - cohortsStatsList.add(variantStatsDBObject); - } - return cohortsStatsList; - } - - private DBObject convertStats(VariantStats variantStats) { - // Basic fields - BasicDBObject mongoStats = new BasicDBObject(MAF_FIELD, variantStats.getMaf()); - mongoStats.append(MGF_FIELD, variantStats.getMgf()); - mongoStats.append(MAFALLELE_FIELD, variantStats.getMafAllele()); - mongoStats.append(MGFGENOTYPE_FIELD, variantStats.getMgfGenotype()); - mongoStats.append(MISSALLELE_FIELD, variantStats.getMissingAlleles()); - mongoStats.append(MISSGENOTYPE_FIELD, variantStats.getMissingGenotypes()); - - // Genotype counts - BasicDBObject genotypes = new BasicDBObject(); - for (Map.Entry g : variantStats.getGenotypesCount().entrySet()) { - String genotypeStr = g.getKey().toString().replace(".", "-1"); - genotypes.append(genotypeStr, g.getValue()); - } - mongoStats.append(NUMGT_FIELD, genotypes); - return mongoStats; - } -} - diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverter.java b/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverter.java deleted file mode 100644 index 4b0692f91..000000000 --- a/src/main/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverter.java +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright 2014-2016 EMBL - European Bioinformatics Institute - * Copyright 2015 OpenCB - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package uk.ac.ebi.eva.commons.models.converters.data; - -import com.mongodb.BasicDBList; -import com.mongodb.BasicDBObject; -import com.mongodb.DBObject; -import org.opencb.opencga.storage.mongodb.variant.VariantMongoDBWriter; -import org.springframework.core.convert.converter.Converter; - -import uk.ac.ebi.eva.commons.models.data.Variant; -import uk.ac.ebi.eva.commons.models.data.VariantSourceEntry; -import uk.ac.ebi.eva.utils.MongoDBHelper; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Set; - -/** - * Converts Variants into MongoDb objects. Implements spring's interface of converter. - *

- * Design policies: - *

- * IDS: The ids of a Variant will NOT be put in the DBObject, as using addToSet(ids) and - * setOnInsert(without ids) avoids overwriting ids. In a DBObject, both an empty ids array or no ids property, - * converts to a Variant with an empty set of ids. - *

- * This class is based on OpenCGA MongoDB converters. - */ -public class VariantToDBObjectConverter implements Converter { - - public static final String ONE_THOUSAND_STRING = VariantMongoDBWriter.CHUNK_SIZE_SMALL / 1000 + "k"; - - public static final String TEN_THOUSAND_STRING = VariantMongoDBWriter.CHUNK_SIZE_BIG / 1000 + "k"; - - public final static String CHROMOSOME_FIELD = "chr"; - - public final static String START_FIELD = "start"; - - public final static String END_FIELD = "end"; - - public final static String LENGTH_FIELD = "len"; - - public final static String REFERENCE_FIELD = "ref"; - - public final static String ALTERNATE_FIELD = "alt"; - - // public final static String ID_FIELD = "id"; - public final static String IDS_FIELD = "ids"; - - public final static String HGVS_FIELD = "hgvs"; - - public final static String TYPE_FIELD = "type"; - - public final static String NAME_FIELD = "name"; - - public final static String FILES_FIELD = "files"; - - public final static String EFFECTS_FIELD = "effs"; - - public final static String SOTERM_FIELD = "so"; - - public final static String GENE_FIELD = "gene"; - - public final static String ANNOTATION_FIELD = "annot"; - - public final static String STATS_FIELD = "st"; - - private VariantSourceEntryToDBObjectConverter variantSourceEntryConverter; - - private VariantStatsToDBObjectConverter statsConverter; - - /** - * Create a converter between Variant and DBObject entities when the fields of VariantSourceEntry, - * Annotation and VariantStats should not be written. - */ - public VariantToDBObjectConverter() { - this(null, null); - } - - /** - * Create a converter between Variant and DBObject entities. For complex inner fields (VariantSourceEntry, - * VariantStats, Annotation), converters must be provided. If they are null, it is assumed that the field - * should not be written. - * - * @param variantSourceEntryConverter Nullable - * @param VariantStatsConverter Nullable - */ - public VariantToDBObjectConverter( - VariantSourceEntryToDBObjectConverter variantSourceEntryConverter, - VariantStatsToDBObjectConverter VariantStatsConverter) { - this.variantSourceEntryConverter = variantSourceEntryConverter; - this.statsConverter = VariantStatsConverter; - } - - @Override - public DBObject convert(Variant object) { - String id = object.buildVariantId(); - - BasicDBObject mongoVariant = new BasicDBObject("_id", id) - // Do not include IDs: the MongoWriter will take care in the query using an $addToSet - //.append(IDS_FIELD, object.getIds()) - .append(TYPE_FIELD, object.getType().name()) - .append(CHROMOSOME_FIELD, object.getChromosome()) - .append(START_FIELD, object.getStart()) - .append(END_FIELD, object.getEnd()) - .append(LENGTH_FIELD, object.getLength()) - .append(REFERENCE_FIELD, object.getReference()) - .append(ALTERNATE_FIELD, object.getAlternate()); - - appendAt(object, mongoVariant); - appendHgvs(object, mongoVariant); - appendFiles(object, mongoVariant); - appendStatistics(object, mongoVariant); - - return mongoVariant; - } - - /** - * Internal fields used for query optimization (dictionary named "_at") - */ - private void appendAt(Variant object, BasicDBObject mongoVariant) { - BasicDBObject _at = new BasicDBObject(); - _at.append("chunkIds", getChunkIds(object)); - mongoVariant.append("_at", _at); - } - - /** - * ChunkIDs (1k and 10k) are a field that all variants within a chunk will share. This is intended for fast access - * to a specific region using a small index table (compared with and index on chr+start, for instance). - * This design should be reevaluated if we are using sharded databases. - */ - private BasicDBList getChunkIds(Variant object) { - int smallChunkId = object.getStart() / VariantMongoDBWriter.CHUNK_SIZE_SMALL; - int bigChunkId = object.getStart() / VariantMongoDBWriter.CHUNK_SIZE_BIG; - String chunkSmall = object.getChromosome() + "_" + smallChunkId + "_" + ONE_THOUSAND_STRING; - String chunkBig = object.getChromosome() + "_" + bigChunkId + "_" + TEN_THOUSAND_STRING; - - BasicDBList chunkIds = new BasicDBList(); - chunkIds.add(chunkSmall); - chunkIds.add(chunkBig); - return chunkIds; - } - - /** - * Transform HGVS: Map of sets -> List of map entries - * {k1 -> {v1_1, v1_2}, k2 -> {v2}} changes to [{k1, v1_1}, {k1, v1_2}, {k2, v2}] - */ - private void appendHgvs(Variant object, BasicDBObject mongoVariant) { - BasicDBList hgvs = new BasicDBList(); - for (Map.Entry> entry : object.getHgvs().entrySet()) { - for (String value : entry.getValue()) { - hgvs.add(new BasicDBObject(TYPE_FIELD, entry.getKey()).append(NAME_FIELD, value)); - } - } - mongoVariant.append(HGVS_FIELD, hgvs); - } - - private void appendFiles(Variant object, BasicDBObject mongoVariant) { - if (variantSourceEntryConverter != null) { - BasicDBList mongoFiles = new BasicDBList(); - for (VariantSourceEntry archiveFile : object.getSourceEntries().values()) { - mongoFiles.add(variantSourceEntryConverter.convert(archiveFile)); - } - mongoVariant.append(FILES_FIELD, mongoFiles); - } - } - - private void appendStatistics(Variant object, BasicDBObject mongoVariant) { - if (statsConverter != null) { - List mongoStats = new ArrayList<>(); - for (VariantSourceEntry variantSourceEntry : object.getSourceEntries().values()) { - mongoStats.addAll(statsConverter.convert(variantSourceEntry)); - } - mongoVariant.put(STATS_FIELD, mongoStats); - } - } -} diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/VariantDocument.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/VariantDocument.java new file mode 100644 index 000000000..d17e88250 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/VariantDocument.java @@ -0,0 +1,185 @@ +package uk.ac.ebi.eva.commons.models.mongo.entity; + +import org.opencb.commons.utils.CryptoUtils; +import org.opencb.opencga.storage.mongodb.variant.VariantMongoDBWriter; +import org.springframework.data.annotation.Id; +import org.springframework.data.mongodb.core.mapping.Document; +import org.springframework.data.mongodb.core.mapping.Field; +import uk.ac.ebi.eva.commons.models.data.Variant; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.HgvsMongo; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAt; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantSourceEntryMongo; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantStatsMongo; + +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +@Document +public class VariantDocument { + + public static final String ONE_THOUSAND_STRING = VariantMongoDBWriter.CHUNK_SIZE_SMALL / 1000 + "k"; + + public static final String TEN_THOUSAND_STRING = VariantMongoDBWriter.CHUNK_SIZE_BIG / 1000 + "k"; + + public final static String TYPE_FIELD = "type"; + + public final static String CHROMOSOME_FIELD = "chr"; + + public final static String START_FIELD = "start"; + + public final static String END_FIELD = "end"; + + public final static String LENGTH_FIELD = "len"; + + public final static String REFERENCE_FIELD = "ref"; + + public final static String ALTERNATE_FIELD = "alt"; + + public static final String AT_FIELD = "_at"; + + public final static String HGVS_FIELD = "hgvs"; + + public final static String IDS_FIELD = "ids"; + + public final static String FILES_FIELD = "files"; + + public final static String STATS_FIELD = "st"; + + public final static String ANNOTATION_FIELD = "annot"; + + @Id + private String id; + + @Field(TYPE_FIELD) + private Variant.VariantType variantType; + + @Field(CHROMOSOME_FIELD) + private String chromosome; + + @Field(START_FIELD) + private int start; + + @Field(END_FIELD) + private int end; + + @Field(LENGTH_FIELD) + private int length; + + @Field(REFERENCE_FIELD) + private String reference; + + @Field(ALTERNATE_FIELD) + private String alternate; + + @Field(AT_FIELD) + private VariantAt at; + + @Field(HGVS_FIELD) + private Set hgvs; + + @Field(IDS_FIELD) + private Set ids; + + @Field(FILES_FIELD) + private Set variantSources; + + @Field(STATS_FIELD) + private Set variantStatsMongo; + + @Field(ANNOTATION_FIELD) + private Set annotations; + + public VariantDocument(Variant.VariantType variantType, String chromosome, int start, int end, int length, + String reference, String alternate, Map> hgvs, Set ids, + Set variantSources) { + this.id = buildVariantId(chromosome, start, reference, alternate); + this.variantType = variantType; + this.chromosome = chromosome; + this.start = start; + this.end = end; + this.length = length; + this.reference = reference; + this.alternate = alternate; + this.at = generateAtField(chromosome, start); + if (hgvs != null) { + this.hgvs = createHgvsMongo(hgvs); + } + if (ids != null) { + this.ids = new HashSet<>(ids); + } + if (variantSources != null) { + this.variantSources = new HashSet<>(variantSources); + } + } + + public VariantDocument(Variant.VariantType variantType, String chromosome, int start, int end, int length, + String reference, String alternate, Set hgvs, Set ids, + Set variantSources) { + this.id = buildVariantId(chromosome, start, reference, alternate); + this.variantType = variantType; + this.chromosome = chromosome; + this.start = start; + this.end = end; + this.length = length; + this.reference = reference; + this.alternate = alternate; + this.at = generateAtField(chromosome, start); + if (hgvs != null && !hgvs.isEmpty()) { + this.hgvs = new HashSet<>(hgvs); + } + if (ids != null && !ids.isEmpty()) { + this.ids = new HashSet<>(ids); + } + if (variantSources != null && !variantSources.isEmpty()) { + this.variantSources = new HashSet<>(variantSources); + } + } + + public static String buildVariantId(String chromosome, int start, String reference, String alternate) { + StringBuilder builder = new StringBuilder(chromosome); + builder.append("_"); + builder.append(start); + builder.append("_"); + if (!reference.equals("-")) { + if (reference.length() < 50) { + builder.append(reference); + } else { + builder.append(new String(CryptoUtils.encryptSha1(reference))); + } + } + + builder.append("_"); + if (!alternate.equals("-")) { + if (alternate.length() < 50) { + builder.append(alternate); + } else { + builder.append(new String(CryptoUtils.encryptSha1(alternate))); + } + } + + return builder.toString(); + } + + public static VariantAt generateAtField(String chromosome, int start) { + int smallChunkId = start / VariantMongoDBWriter.CHUNK_SIZE_SMALL; + int bigChunkId = start / VariantMongoDBWriter.CHUNK_SIZE_BIG; + String chunkSmall = chromosome + "_" + smallChunkId + "_" + ONE_THOUSAND_STRING; + String chunkBig = chromosome + "_" + bigChunkId + "_" + TEN_THOUSAND_STRING; + + return new VariantAt(chunkSmall, chunkBig); + } + + public static Set createHgvsMongo(Map> hgvs) { + Set hgvsMongo = new HashSet<>(); + for (Map.Entry> entry : hgvs.entrySet()) { + for (String value : entry.getValue()) { + hgvsMongo.add(new HgvsMongo(entry.getKey(), value)); + } + } + return hgvsMongo; + + } + +} diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedVariant.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedVariant.java new file mode 100644 index 000000000..7f080ffb6 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedVariant.java @@ -0,0 +1,72 @@ +package uk.ac.ebi.eva.commons.models.mongo.entity.projections; + +import org.springframework.data.annotation.Id; +import org.springframework.data.mongodb.core.mapping.Field; +import uk.ac.ebi.eva.commons.models.data.Variant; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.HgvsMongo; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAt; + +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.ALTERNATE_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.AT_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.CHROMOSOME_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.END_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.HGVS_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.LENGTH_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.REFERENCE_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.START_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.TYPE_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.buildVariantId; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.createHgvsMongo; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.generateAtField; + +public class SimplifiedVariant { + + @Id + private String id; + + @Field(TYPE_FIELD) + private Variant.VariantType variantType; + + @Field(CHROMOSOME_FIELD) + private String chromosome; + + @Field(START_FIELD) + private int start; + + @Field(END_FIELD) + private int end; + + @Field(LENGTH_FIELD) + private int length; + + @Field(REFERENCE_FIELD) + private String reference; + + @Field(ALTERNATE_FIELD) + private String alternate; + + @Field(AT_FIELD) + private VariantAt at; + + @Field(HGVS_FIELD) + private Set hgvs; + + public SimplifiedVariant(Variant.VariantType variantType, String chromosome, int start, int end, int length, + String reference, String alternate, Map> hgvs) { + this.id = buildVariantId(chromosome, start, reference, alternate); + this.variantType = variantType; + this.chromosome = chromosome; + this.start = start; + this.end = end; + this.length = length; + this.reference = reference; + this.alternate = alternate; + this.at = generateAtField(chromosome, start); + this.hgvs = createHgvsMongo(hgvs); + } + +} diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/HgvsMongo.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/HgvsMongo.java new file mode 100644 index 000000000..88fefaa53 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/HgvsMongo.java @@ -0,0 +1,21 @@ +package uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments; + +import org.springframework.data.mongodb.core.mapping.Field; + +public class HgvsMongo { + + private static final String TYPE_FIELD = "type"; + + private static final String NAME_FIELD = "name"; + + @Field(TYPE_FIELD) + private final String type; + + @Field(NAME_FIELD) + private final String name; + + public HgvsMongo(String type, String name) { + this.type = type; + this.name = name; + } +} diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantAt.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantAt.java new file mode 100644 index 000000000..11364c811 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantAt.java @@ -0,0 +1,24 @@ +package uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments; + +import org.springframework.data.mongodb.core.mapping.Field; + +import java.util.HashSet; +import java.util.Set; + +public class VariantAt { + + private static final String CHUNK_IDS_FIELD = "chunkIds"; + + @Field(CHUNK_IDS_FIELD) + private Set chunkIds; + + VariantAt(){ + //Empty constructor for spring + } + + public VariantAt(String chunkSmall, String chunkBig) { + chunkIds = new HashSet<>(); + chunkIds.add(chunkSmall); + chunkIds.add(chunkBig); + } +} diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantSourceEntryMongo.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantSourceEntryMongo.java new file mode 100644 index 000000000..0e0d67e22 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantSourceEntryMongo.java @@ -0,0 +1,156 @@ +package uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments; + +import com.mongodb.BasicDBObject; +import org.opencb.biodata.models.feature.Genotype; +import org.springframework.data.mongodb.core.mapping.Field; +import uk.ac.ebi.eva.utils.CompressionHelper; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class VariantSourceEntryMongo { + + public final static char CHARACTER_TO_REPLACE_DOTS = (char) 163; // <-- £ + + public final static String FILEID_FIELD = "fid"; + + public final static String STUDYID_FIELD = "sid"; + + public final static String ALTERNATES_FIELD = "alts"; + + public final static String ATTRIBUTES_FIELD = "attrs"; + + public final static String FORMAT_FIELD = "fm"; + + public final static String SAMPLES_FIELD = "samp"; + + @Field(FILEID_FIELD) + private String fileId; + + @Field(STUDYID_FIELD) + private String studyId; + + @Field(ALTERNATES_FIELD) + private String[] alternates; + + @Field(ATTRIBUTES_FIELD) + private BasicDBObject attrs; + + @Field(FORMAT_FIELD) + private String format; + + @Field(SAMPLES_FIELD) + private BasicDBObject samp; + + + VariantSourceEntryMongo() { + // Spring empty constructor + } + + public VariantSourceEntryMongo(String fileId, String studyId, String[] alternates, Map attributes) { + this.fileId = fileId; + this.studyId = studyId; + if (alternates != null && alternates.length > 0) { + this.alternates = new String[alternates.length]; + System.arraycopy(alternates, 0, this.alternates, 0, alternates.length); + } + attrs = buildAttributes(attributes); + + this.format = null; + this.samp = null; + } + + public VariantSourceEntryMongo(String fileId, String studyId, String[] alternates, Map + attributes, String format, List> samplesData) { + this(fileId, studyId, alternates, attributes); + this.format = format; + this.samp = buildSampleData(samplesData); + } + + private BasicDBObject buildSampleData(List> samplesData) { + Map> genotypeCodes = classifySamplesByGenotype(samplesData); + + // Get the most common genotype + Map.Entry> longestList = getLongestGenotypeList(genotypeCodes); + + // In Mongo, samples are stored in a map, classified by their genotype. + // The most common genotype will be marked as "default" and the specific + // positions where it is shown will not be stored. Example from 1000G: + // "def" : 0|0, + // "0|1" : [ 41, 311, 342, 358, 881, 898, 903 ], + // "1|0" : [ 262, 290, 300, 331, 343, 369, 374, 391, 879, 918, 930 ] + BasicDBObject mongoSamples = new BasicDBObject(); + for (Map.Entry> entry : genotypeCodes.entrySet()) { + String genotypeStr = entry.getKey().toString().replace(".", "-1"); + if (longestList != null && entry.getKey().equals(longestList.getKey())) { + mongoSamples.append("def", genotypeStr); + } else { + mongoSamples.append(genotypeStr, entry.getValue()); + } + } + + return mongoSamples; + } + + private Map.Entry> getLongestGenotypeList(Map> genotypeCodes) { + Map.Entry> longestList = null; + for (Map.Entry> entry : genotypeCodes.entrySet()) { + List genotypeList = entry.getValue(); + if (longestList == null || genotypeList.size() > longestList.getValue().size()) { + longestList = entry; + } + } + return longestList; + } + + private Map> classifySamplesByGenotype(List> samplesData) { + Map> genotypeCodes = new HashMap<>(); + + for (int i = 0; i < samplesData.size(); i++) { + String genotype = samplesData.get(i).get("GT"); + if (genotype != null) { + Genotype g = new Genotype(genotype); + List samplesWithGenotype = genotypeCodes.get(g); + if (samplesWithGenotype == null) { + samplesWithGenotype = new ArrayList<>(); + genotypeCodes.put(g, samplesWithGenotype); + } + samplesWithGenotype.add(i); + } + } + return genotypeCodes; + } + + private BasicDBObject buildAttributes(Map attributes) { + BasicDBObject attrs = null; + for (Map.Entry entry : attributes.entrySet()) { + Object value = entry.getValue(); + if (entry.getKey().equals("src")) { + String[] fields = entry.getValue().split("\t"); + StringBuilder sb = new StringBuilder(); + sb.append(fields[0]); + for (int i = 1; i < fields.length && i < 8; i++) { + sb.append("\t").append(fields[i]); + } + try { + value = CompressionHelper.gzip(sb.toString()); + } catch (IOException ex) { + Logger.getLogger(VariantSourceEntryMongo.class.getName()).log(Level.SEVERE, null, ex); + } + } + + if (attrs == null) { + attrs = new BasicDBObject(entry.getKey().replace('.', CHARACTER_TO_REPLACE_DOTS), value); + } else { + attrs.append(entry.getKey().replace('.', CHARACTER_TO_REPLACE_DOTS), value); + } + } + return attrs; + } + +} diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantStatsMongo.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantStatsMongo.java new file mode 100644 index 000000000..35cfa0df2 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantStatsMongo.java @@ -0,0 +1,85 @@ +package uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments; + +import com.mongodb.BasicDBObject; +import org.opencb.biodata.models.feature.Genotype; +import org.springframework.data.mongodb.core.mapping.Field; +import uk.ac.ebi.eva.commons.models.data.VariantStats; + +import java.util.HashMap; +import java.util.Map; + +public class VariantStatsMongo { + + public final static String COHORT_ID = "cid"; + + public final static String STUDY_ID = "sid"; + + public final static String FILE_ID = "fid"; + + public final static String MAF_FIELD = "maf"; + + public final static String MGF_FIELD = "mgf"; + + public final static String MAFALLELE_FIELD = "mafAl"; + + public final static String MGFGENOTYPE_FIELD = "mgfGt"; + + public final static String MISSALLELE_FIELD = "missAl"; + + public final static String MISSGENOTYPE_FIELD = "missGt"; + + public final static String NUMGT_FIELD = "numGt"; + + @Field(STUDY_ID) + private String studyId; + + @Field(FILE_ID) + private String fileId; + + @Field(COHORT_ID) + private String cohortId; + + @Field(MAF_FIELD) + private float maf; + + @Field(MGF_FIELD) + private float mgf; + + @Field(MAFALLELE_FIELD) + private String mafAllele; + + @Field(MGFGENOTYPE_FIELD) + private String mgfGenotype; + + @Field(MISSALLELE_FIELD) + private int missingAlleles; + + @Field(MISSGENOTYPE_FIELD) + private int missingGenotypes; + + @Field(NUMGT_FIELD) + private Map numGt; + + public VariantStatsMongo(String studyId, String fileId, String cohortId, VariantStats stats) { + this.studyId = studyId; + this.fileId = fileId; + this.cohortId = cohortId; + this.maf = stats.getMaf(); + this.mgf = stats.getMgf(); + this.mafAllele = stats.getMafAllele(); + this.mgfGenotype = stats.getMgfGenotype(); + this.missingAlleles = stats.getMissingAlleles(); + this.missingGenotypes = stats.getMissingGenotypes(); + this.numGt = buildGenotypes(stats.getGenotypesCount()); + + } + + private Map buildGenotypes(Map genotypesCount) { + Map genotypes = new HashMap<>(); + for (Map.Entry g : genotypesCount.entrySet()) { + String genotypeStr = g.getKey().toString().replace(".", "-1"); + genotypes.put(genotypeStr, g.getValue()); + } + return genotypes; + } +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReader.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReader.java index 1ad3c623d..1b6d9be13 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReader.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReader.java @@ -24,9 +24,8 @@ import org.springframework.beans.factory.InitializingBean; import org.springframework.data.mongodb.core.MongoOperations; import org.springframework.util.ClassUtils; - -import uk.ac.ebi.eva.commons.models.converters.data.VariantSourceEntryToDBObjectConverter; -import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; +import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantSourceEntryMongo; import uk.ac.ebi.eva.pipeline.model.VariantWrapper; import javax.annotation.PostConstruct; @@ -44,8 +43,7 @@ public class VariantsMongoReader private DBObjectToVariantConverter converter; - private static final String STUDY_KEY = VariantToDBObjectConverter.FILES_FIELD + "." - + VariantSourceEntryToDBObjectConverter.STUDYID_FIELD; + private static final String STUDY_KEY = VariantDocument.FILES_FIELD + "." + VariantSourceEntryMongo.STUDYID_FIELD; /** * @param studyId Can be the empty string or null, meaning to bring all non-annotated variants in the collection. diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java index 57e6a719e..8600bec0d 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java @@ -32,7 +32,7 @@ import java.util.List; import java.util.Map; -import static uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter.ANNOTATION_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.ANNOTATION_FIELD; import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.VEP_CACHE_VERSION_FIELD; import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.VEP_VERSION_FIELD; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java index 3a8046fa3..d8792f36d 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java @@ -15,6 +15,7 @@ */ package uk.ac.ebi.eva.pipeline.io.writers; +import com.mongodb.BasicDBList; import com.mongodb.BasicDBObject; import com.mongodb.BulkWriteOperation; import com.mongodb.DBObject; @@ -23,19 +24,23 @@ import org.springframework.batch.item.data.MongoItemWriter; import org.springframework.data.mongodb.core.MongoOperations; import org.springframework.util.Assert; -import uk.ac.ebi.eva.commons.models.converters.data.SamplesToDBObjectConverter; -import uk.ac.ebi.eva.commons.models.converters.data.VariantSourceEntryToDBObjectConverter; -import uk.ac.ebi.eva.commons.models.converters.data.VariantStatsToDBObjectConverter; -import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; import uk.ac.ebi.eva.commons.models.data.Variant; import uk.ac.ebi.eva.commons.models.data.VariantSourceEntry; +import uk.ac.ebi.eva.commons.models.data.VariantStats; +import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument; +import uk.ac.ebi.eva.commons.models.mongo.entity.projections.SimplifiedVariant; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantSourceEntryMongo; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantStatsMongo; import uk.ac.ebi.eva.utils.MongoDBHelper; +import java.util.ArrayList; import java.util.List; +import java.util.Map; -import static uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter.ANNOTATION_FIELD; -import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.XREFS_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.ANNOTATION_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.IDS_FIELD; import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.SO_ACCESSION_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.XREFS_FIELD; /** * Write a list of {@link Variant} into MongoDB @@ -48,10 +53,8 @@ public class VariantMongoWriter extends MongoItemWriter { private final MongoOperations mongoOperations; private final String collection; - - private VariantToDBObjectConverter variantConverter; - private VariantStatsToDBObjectConverter statsConverter; - private VariantSourceEntryToDBObjectConverter sourceEntryConverter; + private final boolean includeStats; + private final boolean includeSamples; public VariantMongoWriter(String collection, MongoOperations mongoOperations, boolean includeStats, boolean includeSamples) { @@ -61,18 +64,12 @@ public VariantMongoWriter(String collection, MongoOperations mongoOperations, bo this.mongoOperations = mongoOperations; this.collection = collection; setTemplate(mongoOperations); + this.includeStats = includeStats; + this.includeSamples = includeSamples; - initializeConverters(includeStats, includeSamples); createIndexes(); } - private void initializeConverters(boolean includeStats, boolean includeSamples) { - this.statsConverter = includeStats ? new VariantStatsToDBObjectConverter() : null; - SamplesToDBObjectConverter sampleConverter = includeSamples ? new SamplesToDBObjectConverter() : null; - this.sourceEntryConverter = new VariantSourceEntryToDBObjectConverter(sampleConverter); - this.variantConverter = new VariantToDBObjectConverter(); - } - @Override protected void doWrite(List variants) { BulkWriteOperation bulk = mongoOperations.getCollection(collection).initializeUnorderedBulkOperation(); @@ -82,8 +79,8 @@ protected void doWrite(List variants) { // the chromosome and start appear just as shard keys, in an unsharded cluster they wouldn't be needed BasicDBObject query = new BasicDBObject("_id", id) - .append(VariantToDBObjectConverter.CHROMOSOME_FIELD, variant.getChromosome()) - .append(VariantToDBObjectConverter.START_FIELD, variant.getStart()); + .append(VariantDocument.CHROMOSOME_FIELD, variant.getChromosome()) + .append(VariantDocument.START_FIELD, variant.getStart()); bulk.find(query).upsert().updateOne(generateUpdate(variant)); @@ -101,27 +98,27 @@ private void executeBulk(BulkWriteOperation bulk, int currentBulkSize) { private void createIndexes() { mongoOperations.getCollection(collection).createIndex( - new BasicDBObject(VariantToDBObjectConverter.CHROMOSOME_FIELD, 1) - .append(VariantToDBObjectConverter.START_FIELD, 1).append(VariantToDBObjectConverter.END_FIELD, 1), + new BasicDBObject(VariantDocument.CHROMOSOME_FIELD, 1) + .append(VariantDocument.START_FIELD, 1).append(VariantDocument.END_FIELD, 1), new BasicDBObject(MongoDBHelper.BACKGROUND_INDEX, true)); mongoOperations.getCollection(collection).createIndex( - new BasicDBObject(VariantToDBObjectConverter.IDS_FIELD, 1), + new BasicDBObject(VariantDocument.IDS_FIELD, 1), new BasicDBObject(MongoDBHelper.BACKGROUND_INDEX, true)); - String filesStudyIdField = String.format("%s.%s", VariantToDBObjectConverter.FILES_FIELD, - VariantSourceEntryToDBObjectConverter.STUDYID_FIELD); - String filesFileIdField = String.format("%s.%s", VariantToDBObjectConverter.FILES_FIELD, - VariantSourceEntryToDBObjectConverter.FILEID_FIELD); + String filesStudyIdField = String.format("%s.%s", VariantDocument.FILES_FIELD, + VariantSourceEntryMongo.STUDYID_FIELD); + String filesFileIdField = String.format("%s.%s", VariantDocument.FILES_FIELD, + VariantSourceEntryMongo.FILEID_FIELD); mongoOperations.getCollection(collection).createIndex( new BasicDBObject(filesStudyIdField, 1).append(filesFileIdField, 1), new BasicDBObject(MongoDBHelper.BACKGROUND_INDEX, true)); mongoOperations.getCollection(collection).createIndex( - new BasicDBObject(ANNOTATION_FIELD+"."+XREFS_FIELD, 1), + new BasicDBObject(ANNOTATION_FIELD + "." + XREFS_FIELD, 1), new BasicDBObject(MongoDBHelper.BACKGROUND_INDEX, true)); mongoOperations.getCollection(collection).createIndex( - new BasicDBObject(ANNOTATION_FIELD+"."+ SO_ACCESSION_FIELD, 1), + new BasicDBObject(ANNOTATION_FIELD + "." + SO_ACCESSION_FIELD, 1), new BasicDBObject(MongoDBHelper.BACKGROUND_INDEX, true)); } @@ -134,24 +131,72 @@ private DBObject generateUpdate(Variant variant) { if (!variant.getSourceEntries().isEmpty()) { VariantSourceEntry variantSourceEntry = variant.getSourceEntries().values().iterator().next(); - addToSet.put(VariantToDBObjectConverter.FILES_FIELD, sourceEntryConverter.convert(variantSourceEntry)); + addToSet.put(VariantDocument.FILES_FIELD, convert(variantSourceEntry)); - if (statsConverter != null) { - List sourceEntryStats = statsConverter.convert(variantSourceEntry); - addToSet.put(VariantToDBObjectConverter.STATS_FIELD, new BasicDBObject("$each", sourceEntryStats)); + if (includeStats) { + BasicDBList basicDBList = convertStatistics(variantSourceEntry); + addToSet.put(VariantDocument.STATS_FIELD, new BasicDBObject("$each", basicDBList)); } } if (variant.getIds() != null && !variant.getIds().isEmpty()) { - addToSet.put(VariantToDBObjectConverter.IDS_FIELD, new BasicDBObject("$each", variant.getIds())); + addToSet.put(IDS_FIELD, new BasicDBObject("$each", variant.getIds())); } BasicDBObject update = new BasicDBObject(); if (!addToSet.isEmpty()) { update.put("$addToSet", addToSet); } - update.append("$setOnInsert", variantConverter.convert(variant)); + update.append("$setOnInsert", convert(variant)); return update; } + + private BasicDBList convertStatistics(VariantSourceEntry variantSourceEntry) { + List variantStats = new ArrayList<>(); + for (Map.Entry variantStatsEntry : variantSourceEntry.getCohortStats().entrySet()) { + variantStats.add(new VariantStatsMongo( + variantSourceEntry.getStudyId(), + variantSourceEntry.getFileId(), + variantStatsEntry.getKey(), + variantStatsEntry.getValue() + )); + } + return (BasicDBList) mongoOperations.getConverter().convertToMongoType(variantStats); + } + + private DBObject convert(VariantSourceEntry variantSourceEntry) { + VariantSourceEntryMongo variantSource = null; + if (includeSamples) { + variantSource = new VariantSourceEntryMongo( + variantSourceEntry.getFileId(), + variantSourceEntry.getStudyId(), + variantSourceEntry.getSecondaryAlternates(), + variantSourceEntry.getAttributes(), + variantSourceEntry.getFormat(), + variantSourceEntry.getSamplesData() + ); + } else { + variantSource = new VariantSourceEntryMongo( + variantSourceEntry.getFileId(), + variantSourceEntry.getStudyId(), + variantSourceEntry.getSecondaryAlternates(), + variantSourceEntry.getAttributes() + ); + } + return (DBObject) mongoOperations.getConverter().convertToMongoType(variantSource); + } + + private DBObject convert(Variant variant) { + SimplifiedVariant simplifiedVariant = new SimplifiedVariant( + variant.getType(), + variant.getChromosome(), + variant.getStart(), + variant.getEnd(), + variant.getLength(), + variant.getReference(), + variant.getAlternate(), + variant.getHgvs()); + return (DBObject) mongoOperations.getConverter().convertToMongoType(simplifiedVariant); + } } \ No newline at end of file diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PullFilesAndStatisticsFromVariantGivenStudyIdStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PullFilesAndStatisticsFromVariantGivenStudyIdStep.java index fc935e403..e14ee3380 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PullFilesAndStatisticsFromVariantGivenStudyIdStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PullFilesAndStatisticsFromVariantGivenStudyIdStep.java @@ -33,9 +33,9 @@ import uk.ac.ebi.eva.pipeline.parameters.DatabaseParameters; import uk.ac.ebi.eva.pipeline.parameters.InputParameters; -import static uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter.FILES_FIELD; -import static uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter.STATS_FIELD; import static uk.ac.ebi.eva.commons.models.data.VariantSourceEntity.STUDYID_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.FILES_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.STATS_FIELD; /** * Tasklet that removes the files and statistics in a variant given a studyId. The id is readed from the jobParameter diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/SingleStudyVariantsDropperStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/SingleStudyVariantsDropperStep.java index 00f463111..29a03f39a 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/SingleStudyVariantsDropperStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/SingleStudyVariantsDropperStep.java @@ -26,12 +26,11 @@ import org.springframework.data.mongodb.core.MongoOperations; import org.springframework.data.mongodb.core.query.Criteria; import org.springframework.data.mongodb.core.query.Query; - import uk.ac.ebi.eva.pipeline.parameters.DatabaseParameters; import uk.ac.ebi.eva.pipeline.parameters.InputParameters; -import static uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter.FILES_FIELD; import static uk.ac.ebi.eva.commons.models.data.VariantSourceEntity.STUDYID_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.FILES_FIELD; /** * Tasklet that removes from mongo all the variants that have only one entry from a given study to delete. diff --git a/src/main/java/uk/ac/ebi/eva/utils/CompressionHelper.java b/src/main/java/uk/ac/ebi/eva/utils/CompressionHelper.java index 7b0a1dd88..bbd038f27 100644 --- a/src/main/java/uk/ac/ebi/eva/utils/CompressionHelper.java +++ b/src/main/java/uk/ac/ebi/eva/utils/CompressionHelper.java @@ -15,10 +15,13 @@ */ package uk.ac.ebi.eva.utils; +import java.io.BufferedOutputStream; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; import java.util.zip.ZipException; /** @@ -39,4 +42,19 @@ public static boolean isGzip(File file) throws IOException { } return true; } + + public static byte[] gzip(String text) throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + BufferedOutputStream bufos = new BufferedOutputStream(new GZIPOutputStream(bos)); + + try { + bufos.write(text.getBytes()); + } finally { + bufos.close(); + } + + byte[] retval = bos.toByteArray(); + bos.close(); + return retval; + } } diff --git a/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantSourceEntryToDBObjectConverterTest.java b/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantSourceEntryToDBObjectConverterTest.java index 4bc4e6137..78b5b2e74 100644 --- a/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantSourceEntryToDBObjectConverterTest.java +++ b/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantSourceEntryToDBObjectConverterTest.java @@ -19,23 +19,37 @@ import com.mongodb.DBObject; import org.junit.Before; import org.junit.Test; - +import org.junit.runner.RunWith; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.data.mongodb.core.MongoOperations; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.TestPropertySource; +import org.springframework.test.context.junit4.SpringRunner; import uk.ac.ebi.eva.commons.models.data.VariantSourceEntry; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantSourceEntryMongo; +import uk.ac.ebi.eva.test.configuration.MongoOperationConfiguration; import java.util.Arrays; import java.util.HashMap; import java.util.Map; import static org.junit.Assert.assertEquals; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantSourceEntryMongo.ATTRIBUTES_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantSourceEntryMongo.CHARACTER_TO_REPLACE_DOTS; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantSourceEntryMongo.FILEID_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantSourceEntryMongo.STUDYID_FIELD; /** - * Tests {@link VariantSourceEntryToDBObjectConverter} - *

- * Input: {@link VariantSourceEntry} - * output: DBObject representing the {@link VariantSourceEntry} + * Tests automatic conversion from {@link VariantSourceEntryMongo} to {@link DBObject} */ +@RunWith(SpringRunner.class) +@TestPropertySource({"classpath:test-mongo.properties"}) +@ContextConfiguration(classes = {MongoOperationConfiguration.class}) public class VariantSourceEntryToDBObjectConverterTest { + @Autowired + private MongoOperations mongoOperations; + private VariantSourceEntry file; private BasicDBObject mongoFile; @@ -49,7 +63,6 @@ public void setUp() { file.addAttribute("QUAL", "0.01"); file.addAttribute("AN", "2"); file.addAttribute("MAX.PROC", "2"); - file.setFormat("GT"); Map na001 = new HashMap<>(); na001.put("GT", "0/0"); @@ -62,20 +75,13 @@ public void setUp() { int indexNa003 = file.addSampleData(na003); // MongoDB object - mongoFile = new BasicDBObject(VariantSourceEntryToDBObjectConverter.FILEID_FIELD, file.getFileId()) - .append(VariantSourceEntryToDBObjectConverter.STUDYID_FIELD, file.getStudyId()) - .append(VariantSourceEntryToDBObjectConverter.FORMAT_FIELD, file.getFormat()); + mongoFile = new BasicDBObject(FILEID_FIELD, file.getFileId()) + .append(STUDYID_FIELD, file.getStudyId()); BasicDBObject attributes = new BasicDBObject("QUAL", "0.01") .append("AN", "2") - .append("MAX" + VariantSourceEntryToDBObjectConverter.CHARACTER_TO_REPLACE_DOTS + "PROC", "2"); - mongoFile.append(VariantSourceEntryToDBObjectConverter.ATTRIBUTES_FIELD, attributes); - - BasicDBObject genotypeCodes = new BasicDBObject(); - genotypeCodes.append("def", "0/0"); - genotypeCodes.append("0/1", Arrays.asList(1)); - genotypeCodes.append("1/1", Arrays.asList(2)); - mongoFile.append(VariantSourceEntryToDBObjectConverter.SAMPLES_FIELD, genotypeCodes); + .append("MAX" + CHARACTER_TO_REPLACE_DOTS + "PROC", "2"); + mongoFile.append(ATTRIBUTES_FIELD, attributes); mongoFileWithIds = new BasicDBObject((this.mongoFile.toMap())); mongoFileWithIds.put("samp", new BasicDBObject()); @@ -86,17 +92,25 @@ public void setUp() { @Test public void testConvertToStorageTypeWithoutSamples() { - VariantSourceEntryToDBObjectConverter converter; - converter = new VariantSourceEntryToDBObjectConverter(new SamplesToDBObjectConverter()); - DBObject converted = converter.convert(file); - assertEquals(mongoFile, converted); + VariantSourceEntryMongo variantSource = new VariantSourceEntryMongo( + file.getFileId(), + file.getStudyId(), + file.getSecondaryAlternates(), + file.getAttributes() + ); + assertEquals(mongoFile, mongoOperations.getConverter().convertToMongoType(variantSource)); } @Test public void testConvertToStorageTypeWithSamples() { - VariantSourceEntryToDBObjectConverter converter; - converter = new VariantSourceEntryToDBObjectConverter(new SamplesToDBObjectConverter()); - DBObject convertedMongo = converter.convert(file); - assertEquals(mongoFileWithIds, convertedMongo); + VariantSourceEntryMongo variantSource = new VariantSourceEntryMongo( + file.getFileId(), + file.getStudyId(), + file.getSecondaryAlternates(), + file.getAttributes(), + file.getFormat(), + file.getSamplesData() + ); + assertEquals(mongoFileWithIds, mongoOperations.getConverter().convertToMongoType(variantSource)); } } diff --git a/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantStatsToDBObjectConverterTest.java b/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantStatsToDBObjectConverterTest.java index 33a15e8e5..2f384c2fe 100644 --- a/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantStatsToDBObjectConverterTest.java +++ b/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantStatsToDBObjectConverterTest.java @@ -19,45 +19,53 @@ import com.mongodb.DBObject; import org.junit.BeforeClass; import org.junit.Test; +import org.junit.runner.RunWith; import org.opencb.biodata.models.feature.Genotype; - +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.data.mongodb.core.MongoOperations; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.TestPropertySource; +import org.springframework.test.context.junit4.SpringRunner; import uk.ac.ebi.eva.commons.models.data.Variant; import uk.ac.ebi.eva.commons.models.data.VariantSourceEntry; import uk.ac.ebi.eva.commons.models.data.VariantStats; - -import java.util.List; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantStatsMongo; +import uk.ac.ebi.eva.test.configuration.MongoOperationConfiguration; import static org.junit.Assert.assertEquals; /** - * Tests {@link VariantStatsToDBObjectConverter} - *

- * Input: {@link VariantSourceEntry} - * output: DBObject representing the {@link VariantStats} inside the {@link VariantSourceEntry} + * Tests automatic conversion of {@link VariantStatsMongo} to a {@link DBObject} */ +@RunWith(SpringRunner.class) +@TestPropertySource({"classpath:test-mongo.properties"}) +@ContextConfiguration(classes = {MongoOperationConfiguration.class}) public class VariantStatsToDBObjectConverterTest { + @Autowired + private MongoOperations mongoOperations; + private static BasicDBObject mongoStats; private static VariantSourceEntry sourceEntry; @BeforeClass public static void setUpClass() { - mongoStats = new BasicDBObject(VariantStatsToDBObjectConverter.MAF_FIELD, 0.1); - mongoStats.append(VariantStatsToDBObjectConverter.MGF_FIELD, 0.01); - mongoStats.append(VariantStatsToDBObjectConverter.MAFALLELE_FIELD, "A"); - mongoStats.append(VariantStatsToDBObjectConverter.MGFGENOTYPE_FIELD, "A/A"); - mongoStats.append(VariantStatsToDBObjectConverter.MISSALLELE_FIELD, 10); - mongoStats.append(VariantStatsToDBObjectConverter.MISSGENOTYPE_FIELD, 5); + mongoStats = new BasicDBObject(VariantStatsMongo.MAF_FIELD, 0.1); + mongoStats.append(VariantStatsMongo.MGF_FIELD, 0.01); + mongoStats.append(VariantStatsMongo.MAFALLELE_FIELD, "A"); + mongoStats.append(VariantStatsMongo.MGFGENOTYPE_FIELD, "A/A"); + mongoStats.append(VariantStatsMongo.MISSALLELE_FIELD, 10); + mongoStats.append(VariantStatsMongo.MISSGENOTYPE_FIELD, 5); BasicDBObject genotypes = new BasicDBObject(); genotypes.append("0/0", 100); genotypes.append("0/1", 50); genotypes.append("1/1", 10); - mongoStats.append(VariantStatsToDBObjectConverter.NUMGT_FIELD, genotypes); + mongoStats.append(VariantStatsMongo.NUMGT_FIELD, genotypes); VariantStats stats = new VariantStats(null, -1, null, null, Variant.VariantType.SNV, 0.1f, 0.01f, "A", "A/A", - 10, 5, -1, -1, -1, -1, -1); + 10, 5, -1, -1, -1, -1, -1); stats.addGenotype(new Genotype("0/0"), 100); stats.addGenotype(new Genotype("0/1"), 50); stats.addGenotype(new Genotype("1/1"), 10); @@ -69,23 +77,28 @@ public static void setUpClass() { @Test public void testConvertToStorageType() { - VariantStatsToDBObjectConverter converter = new VariantStatsToDBObjectConverter(); - List convertedSourceEntry = converter.convert(sourceEntry); - assertEquals(1, convertedSourceEntry.size()); - - DBObject converted = convertedSourceEntry.get(0); VariantStats stats = sourceEntry.getCohortStats("ALL"); - - assertEquals(stats.getMaf(), (float) converted.get(VariantStatsToDBObjectConverter.MAF_FIELD), 1e-6); - assertEquals(stats.getMgf(), (float) converted.get(VariantStatsToDBObjectConverter.MGF_FIELD), 1e-6); - assertEquals(stats.getMafAllele(), converted.get(VariantStatsToDBObjectConverter.MAFALLELE_FIELD)); - assertEquals(stats.getMgfGenotype(), converted.get(VariantStatsToDBObjectConverter.MGFGENOTYPE_FIELD)); - - assertEquals(stats.getMissingAlleles(), converted.get(VariantStatsToDBObjectConverter.MISSALLELE_FIELD)); - assertEquals(stats.getMissingGenotypes(), converted.get(VariantStatsToDBObjectConverter.MISSGENOTYPE_FIELD)); - - assertEquals(100, ((DBObject) converted.get(VariantStatsToDBObjectConverter.NUMGT_FIELD)).get("0/0")); - assertEquals(50, ((DBObject) converted.get(VariantStatsToDBObjectConverter.NUMGT_FIELD)).get("0/1")); - assertEquals(10, ((DBObject) converted.get(VariantStatsToDBObjectConverter.NUMGT_FIELD)).get("1/1")); + DBObject converted = (DBObject) mongoOperations.getConverter().convertToMongoType( + new VariantStatsMongo( + sourceEntry.getStudyId(), + sourceEntry.getFileId(), + "ALL", + stats + ) + ); + + //DBObject converted = convertedSourceEntry.get(0); + + assertEquals(stats.getMaf(), (float) converted.get(VariantStatsMongo.MAF_FIELD), 1e-6); + assertEquals(stats.getMgf(), (float) converted.get(VariantStatsMongo.MGF_FIELD), 1e-6); + assertEquals(stats.getMafAllele(), converted.get(VariantStatsMongo.MAFALLELE_FIELD)); + assertEquals(stats.getMgfGenotype(), converted.get(VariantStatsMongo.MGFGENOTYPE_FIELD)); + + assertEquals(stats.getMissingAlleles(), converted.get(VariantStatsMongo.MISSALLELE_FIELD)); + assertEquals(stats.getMissingGenotypes(), converted.get(VariantStatsMongo.MISSGENOTYPE_FIELD)); + + assertEquals(100, ((DBObject) converted.get(VariantStatsMongo.NUMGT_FIELD)).get("0/0")); + assertEquals(50, ((DBObject) converted.get(VariantStatsMongo.NUMGT_FIELD)).get("0/1")); + assertEquals(10, ((DBObject) converted.get(VariantStatsMongo.NUMGT_FIELD)).get("1/1")); } } diff --git a/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverterTest.java b/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverterTest.java index f38f1d4f5..c57269419 100644 --- a/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverterTest.java +++ b/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverterTest.java @@ -18,41 +18,103 @@ import com.mongodb.BasicDBList; import com.mongodb.BasicDBObject; import com.mongodb.DBObject; -import org.junit.Before; import org.junit.Test; - +import org.junit.runner.RunWith; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.data.mongodb.core.MongoOperations; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.TestPropertySource; +import org.springframework.test.context.junit4.SpringRunner; import uk.ac.ebi.eva.commons.models.data.Variant; import uk.ac.ebi.eva.commons.models.data.VariantSourceEntry; +import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.HgvsMongo; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantSourceEntryMongo; +import uk.ac.ebi.eva.test.configuration.MongoOperationConfiguration; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; +import java.util.List; import java.util.Map; +import java.util.Set; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; /** - * Tests {@link VariantToDBObjectConverter} - *

- * Input: {@link Variant} - * output: DBObject representing the {@link Variant} + * Tests the automatic conversion of {@link VariantDocument} to {@link DBObject} */ +@RunWith(SpringRunner.class) +@TestPropertySource({"classpath:test-mongo.properties"}) +@ContextConfiguration(classes = {MongoOperationConfiguration.class}) public class VariantToDBObjectConverterTest { - private BasicDBObject mongoVariant; + @Autowired + private MongoOperations mongoOperations; + + private VariantDocument buildVariantDocument(VariantSourceEntryMongo variantSource, boolean withIds) { + Set variantSources = variantSource == null ? null : + Collections.singleton(variantSource); + return new VariantDocument( + Variant.VariantType.SNV, + "1", + 1000, + 1000, + 1, + "A", + "C", + Collections.singleton(new HgvsMongo("genomic", "1:g.1000A>C")), + withIds == true ? Collections.singleton("rs666") : null, + variantSources + ); + } + + private VariantSourceEntryMongo buildVariantSourceEntryWithSamples() { + Map na001 = new HashMap<>(); + na001.put("GT", "0/0"); + na001.put("DP", "4"); + + Map na002 = new HashMap<>(); + na002.put("GT", "0/1"); + na002.put("DP", "5"); - private Variant variant; + List> samples = new ArrayList<>(); + samples.add(na001); + samples.add(na002); + + return new VariantSourceEntryMongo( + "f1", + "s1", + null, + buildAttributes(), + "GT:DP", + samples + ); + } - private VariantSourceEntry variantSourceEntry; + private VariantSourceEntryMongo buildVariantSourceEntryWithoutSamples() { + return new VariantSourceEntryMongo( + "f1", + "s1", + null, + buildAttributes() + ); + } + + private Map buildAttributes() { + HashMap attributes = new HashMap(); + attributes.put("QUAL", "0.01"); + attributes.put("AN", "2"); + return attributes; + } - @Before - public void setUp() { + private BasicDBObject buildMongoVariant(boolean withFiles) { //Setup variant - variant = new Variant("1", 1000, 1000, "A", "C"); + Variant variant = new Variant("1", 1000, 1000, "A", "C"); variant.setIds(Collections.singleton("rs666")); + //Setup variantSourceEntry - variantSourceEntry = new VariantSourceEntry("f1", "s1"); + VariantSourceEntry variantSourceEntry = new VariantSourceEntry("f1", "s1"); variantSourceEntry.addAttribute("QUAL", "0.01"); variantSourceEntry.addAttribute("AN", "2"); variantSourceEntry.setFormat("GT:DP"); @@ -67,92 +129,92 @@ public void setUp() { variantSourceEntry.addSampleData(na002); variant.addSourceEntry(variantSourceEntry); + HashMap attributes = new HashMap(); + attributes.put("QUAL", "0.01"); + attributes.put("AN", "2"); + + List> samples = new ArrayList<>(); + samples.add(na001); + samples.add(na002); + //Setup mongoVariant - mongoVariant = new BasicDBObject("_id", "1_1000_A_C") - .append(VariantToDBObjectConverter.IDS_FIELD, variant.getIds()) - .append(VariantToDBObjectConverter.TYPE_FIELD, variant.getType().name()) - .append(VariantToDBObjectConverter.CHROMOSOME_FIELD, variant.getChromosome()) - .append(VariantToDBObjectConverter.START_FIELD, variant.getStart()) - .append(VariantToDBObjectConverter.END_FIELD, variant.getStart()) - .append(VariantToDBObjectConverter.LENGTH_FIELD, variant.getLength()) - .append(VariantToDBObjectConverter.REFERENCE_FIELD, variant.getReference()) - .append(VariantToDBObjectConverter.ALTERNATE_FIELD, variant.getAlternate()); + BasicDBObject mongoVariant = new BasicDBObject("_id", "1_1000_A_C") + .append(VariantDocument.IDS_FIELD, variant.getIds()) + .append(VariantDocument.TYPE_FIELD, variant.getType().name()) + .append(VariantDocument.CHROMOSOME_FIELD, variant.getChromosome()) + .append(VariantDocument.START_FIELD, variant.getStart()) + .append(VariantDocument.END_FIELD, variant.getStart()) + .append(VariantDocument.LENGTH_FIELD, variant.getLength()) + .append(VariantDocument.REFERENCE_FIELD, variant.getReference()) + .append(VariantDocument.ALTERNATE_FIELD, variant.getAlternate()); BasicDBList chunkIds = new BasicDBList(); - chunkIds.add("1_1_1k"); chunkIds.add("1_0_10k"); + chunkIds.add("1_1_1k"); mongoVariant.append("_at", new BasicDBObject("chunkIds", chunkIds)); BasicDBList hgvs = new BasicDBList(); hgvs.add(new BasicDBObject("type", "genomic").append("name", "1:g.1000A>C")); mongoVariant.append("hgvs", hgvs); + + if (withFiles) { + // MongoDB object + BasicDBObject mongoFile = new BasicDBObject(VariantSourceEntryMongo.FILEID_FIELD, + variantSourceEntry.getFileId()) + .append(VariantSourceEntryMongo.STUDYID_FIELD, variantSourceEntry.getStudyId()) + .append(VariantSourceEntryMongo.ATTRIBUTES_FIELD, + new BasicDBObject("QUAL", "0.01").append("AN", "2")) + .append(VariantSourceEntryMongo.FORMAT_FIELD, variantSourceEntry.getFormat()); + + BasicDBObject genotypeCodes = new BasicDBObject(); + genotypeCodes.append("def", "0/0"); + genotypeCodes.append("0/1", Collections.singletonList(1)); + mongoFile.append(VariantSourceEntryMongo.SAMPLES_FIELD, genotypeCodes); + BasicDBList files = new BasicDBList(); + files.add(mongoFile); + mongoVariant.append("files", files); + } + + return mongoVariant; } @Test public void testConvertToStorageTypeWithFiles() { + DBObject converted = (DBObject) mongoOperations.getConverter().convertToMongoType( + buildVariantDocument(buildVariantSourceEntryWithSamples(), true) + ); - variant.addSourceEntry(variantSourceEntry); - - // MongoDB object - BasicDBObject mongoFile = new BasicDBObject(VariantSourceEntryToDBObjectConverter.FILEID_FIELD, - variantSourceEntry.getFileId()) - .append(VariantSourceEntryToDBObjectConverter.STUDYID_FIELD, variantSourceEntry.getStudyId()) - .append(VariantSourceEntryToDBObjectConverter.ATTRIBUTES_FIELD, - new BasicDBObject("QUAL", "0.01").append("AN", "2")) - .append(VariantSourceEntryToDBObjectConverter.FORMAT_FIELD, variantSourceEntry.getFormat()); - - BasicDBObject genotypeCodes = new BasicDBObject(); - genotypeCodes.append("def", "0/0"); - genotypeCodes.append("0/1", Collections.singletonList(1)); - mongoFile.append(VariantSourceEntryToDBObjectConverter.SAMPLES_FIELD, genotypeCodes); - BasicDBList files = new BasicDBList(); - files.add(mongoFile); - mongoVariant.append("files", files); - - VariantToDBObjectConverter converter = new VariantToDBObjectConverter( - new VariantSourceEntryToDBObjectConverter(new SamplesToDBObjectConverter()), null); - DBObject converted = converter.convert(variant); - assertFalse(converted.containsField(VariantToDBObjectConverter.IDS_FIELD)); //IDs must be added manually. - converted.put(VariantToDBObjectConverter.IDS_FIELD, variant.getIds()); //Add IDs - assertEquals(mongoVariant, converted); + assertEquals(buildMongoVariant(true), converted); } @Test public void testConvertToStorageTypeWithoutFiles() { - VariantToDBObjectConverter converter = new VariantToDBObjectConverter(); - DBObject converted = converter.convert(variant); - assertFalse(converted.containsField(VariantToDBObjectConverter.IDS_FIELD)); //IDs must be added manually. - converted.put(VariantToDBObjectConverter.IDS_FIELD, variant.getIds()); //Add IDs - assertEquals(mongoVariant, converted); + DBObject converted = (DBObject) mongoOperations.getConverter().convertToMongoType( + buildVariantDocument(null, true) + ); + + assertEquals(buildMongoVariant(false), converted); } - /** - * @see VariantToDBObjectConverter ids policy - */ @Test public void testConvertToStorageTypeNullIds() { - variant.setIds(null); - - VariantToDBObjectConverter converter = new VariantToDBObjectConverter(); - DBObject converted = converter.convert(variant); - assertFalse(converted.containsField(VariantToDBObjectConverter.IDS_FIELD)); //IDs must be added manually. + DBObject converted = (DBObject) mongoOperations.getConverter().convertToMongoType( + buildVariantDocument(null, false) + ); - mongoVariant.remove(VariantToDBObjectConverter.IDS_FIELD); + BasicDBObject mongoVariant = buildMongoVariant(false); + mongoVariant.remove(VariantDocument.IDS_FIELD); assertEquals(mongoVariant, converted); } - /** - * @see VariantToDBObjectConverter ids policy - */ @Test public void testConvertToStorageTypeEmptyIds() { - variant.setIds(new HashSet<>()); - - VariantToDBObjectConverter converter = new VariantToDBObjectConverter(); - DBObject converted = converter.convert(variant); - assertFalse(converted.containsField(VariantToDBObjectConverter.IDS_FIELD)); //IDs must be added manually. + DBObject converted = (DBObject) mongoOperations.getConverter().convertToMongoType( + buildVariantDocument(null, false) + ); - mongoVariant.remove(VariantToDBObjectConverter.IDS_FIELD); + BasicDBObject mongoVariant = buildMongoVariant(false); + mongoVariant.remove(VariantDocument.IDS_FIELD); assertEquals(mongoVariant, converted); } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java index 512d2dc99..e9f59a8c4 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java @@ -30,9 +30,8 @@ import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; - -import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; +import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; import uk.ac.ebi.eva.pipeline.io.mappers.AnnotationLineMapper; @@ -130,7 +129,7 @@ public void shouldWriteAllFieldsIntoMongoDb() throws Exception { if (id.equals("20_63360_C_T")) { BasicDBObject annotationField = (BasicDBObject) ((BasicDBList) (variant).get( - VariantToDBObjectConverter.ANNOTATION_FIELD)).get(0); + VariantDocument.ANNOTATION_FIELD)).get(0); BasicDBList sifts = (BasicDBList) annotationField.get(SIFT_FIELD); assertNotNull(sifts); @@ -152,7 +151,7 @@ public void shouldWriteAllFieldsIntoMongoDb() throws Exception { if (id.equals("20_63399_G_A")) { BasicDBObject annotationField = (BasicDBObject) ((BasicDBList) (variant).get( - VariantToDBObjectConverter.ANNOTATION_FIELD)).get(0); + VariantDocument.ANNOTATION_FIELD)).get(0); BasicDBList sifts = (BasicDBList) annotationField.get(SIFT_FIELD); assertNotNull(sifts); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java index 76c2ab0a1..38a2428af 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java @@ -31,7 +31,7 @@ import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; -import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; +import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; import uk.ac.ebi.eva.pipeline.jobs.AnnotationJob; @@ -46,12 +46,12 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; -import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.POLYPHEN_FIELD; -import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.SO_ACCESSION_FIELD; import static uk.ac.ebi.eva.commons.models.mongo.entity.Annotation.CONSEQUENCE_TYPE_FIELD; import static uk.ac.ebi.eva.commons.models.mongo.entity.Annotation.XREFS_FIELD; import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.ConsequenceType.SIFT_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.POLYPHEN_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.SO_ACCESSION_FIELD; +import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResourceUrl; /** @@ -135,7 +135,7 @@ public void shouldLoadAllAnnotations() throws Exception { DBObject variant = variantCursor.next(); if (variant.get("_id").equals("20_63351_A_G")) { BasicDBObject annotationField = (BasicDBObject) ((BasicDBList) (variant).get( - VariantToDBObjectConverter.ANNOTATION_FIELD)).get(0); + VariantDocument.ANNOTATION_FIELD)).get(0); assertNotNull(annotationField.get(SIFT_FIELD)); assertNotNull(annotationField.get(SO_ACCESSION_FIELD)); assertNotNull(annotationField.get(POLYPHEN_FIELD)); diff --git a/src/test/java/uk/ac/ebi/eva/test/configuration/MongoOperationConfiguration.java b/src/test/java/uk/ac/ebi/eva/test/configuration/MongoOperationConfiguration.java new file mode 100644 index 000000000..76309d768 --- /dev/null +++ b/src/test/java/uk/ac/ebi/eva/test/configuration/MongoOperationConfiguration.java @@ -0,0 +1,37 @@ +package uk.ac.ebi.eva.test.configuration; + +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.data.mongodb.core.MongoOperations; +import org.springframework.data.mongodb.core.mapping.MongoMappingContext; +import uk.ac.ebi.eva.pipeline.parameters.MongoConnection; + +import java.net.UnknownHostException; + +import static uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration.getMongoOperations; + +/** + * Configuration to get a MongoOperations context tied to a specific static mongo database. + */ +@Configuration +public class MongoOperationConfiguration { + + private static final String DUMMY_STATIC = "dummy_test"; + + @Bean + public MongoConnection mongoConnection() { + return new MongoConnection(); + } + + @Bean + public MongoMappingContext mongoMappingContext() { + return new MongoMappingContext(); + } + + @Bean + public MongoOperations mongoTemplate(MongoConnection mongoConnection, MongoMappingContext mongoMappingContext) + throws UnknownHostException { + return getMongoOperations(DUMMY_STATIC, mongoConnection, mongoMappingContext); + } + +} diff --git a/src/test/java/uk/ac/ebi/eva/test/utils/DropStudyJobTestUtils.java b/src/test/java/uk/ac/ebi/eva/test/utils/DropStudyJobTestUtils.java index a184ba288..37e16899d 100644 --- a/src/test/java/uk/ac/ebi/eva/test/utils/DropStudyJobTestUtils.java +++ b/src/test/java/uk/ac/ebi/eva/test/utils/DropStudyJobTestUtils.java @@ -20,9 +20,10 @@ import com.mongodb.DBCollection; import static org.junit.Assert.assertEquals; -import static uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter.FILES_FIELD; -import static uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter.STATS_FIELD; + import static uk.ac.ebi.eva.commons.models.data.VariantSourceEntity.STUDYID_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.FILES_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.STATS_FIELD; public class DropStudyJobTestUtils { diff --git a/src/test/java/uk/ac/ebi/eva/test/utils/JobTestUtils.java b/src/test/java/uk/ac/ebi/eva/test/utils/JobTestUtils.java index bf2da4653..70260cdd1 100644 --- a/src/test/java/uk/ac/ebi/eva/test/utils/JobTestUtils.java +++ b/src/test/java/uk/ac/ebi/eva/test/utils/JobTestUtils.java @@ -45,8 +45,8 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -import static uk.ac.ebi.eva.commons.models.converters.data.VariantSourceEntryToDBObjectConverter.FILEID_FIELD; -import static uk.ac.ebi.eva.commons.models.converters.data.VariantSourceEntryToDBObjectConverter.STUDYID_FIELD; +import static uk.ac.ebi.eva.commons.models.data.VariantSourceEntity.FILEID_FIELD; +import static uk.ac.ebi.eva.commons.models.data.VariantSourceEntity.STUDYID_FIELD; public abstract class JobTestUtils { private static final Logger logger = LoggerFactory.getLogger(JobTestUtils.class); From 9669197b000022526d151dc597a3ce646e6e621e Mon Sep 17 00:00:00 2001 From: jose miguel mut Date: Fri, 26 May 2017 13:28:34 +0100 Subject: [PATCH 28/48] updated annotation-collection branch with annotation-job --- .../validation/job/AnnotationJobParametersValidatorTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AnnotationJobParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AnnotationJobParametersValidatorTest.java index eb4e585d7..20915efb3 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AnnotationJobParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AnnotationJobParametersValidatorTest.java @@ -59,6 +59,7 @@ public void setUp() throws Exception { requiredParameters.put(JobParametersNames.APP_VEP_NUMFORKS, new JobParameter("6")); requiredParameters.put(JobParametersNames.APP_VEP_TIMEOUT, new JobParameter("600")); requiredParameters.put(JobParametersNames.ANNOTATION_OVERWRITE, new JobParameter("false")); + requiredParameters.put(JobParametersNames.DB_COLLECTIONS_ANNOTATIONS_NAME, new JobParameter("annotations")); requiredParameters.put(JobParametersNames.DB_COLLECTIONS_ANNOTATION_METADATA_NAME, new JobParameter("annotationMetadata")); requiredParameters.put(JobParametersNames.APP_VEP_CACHE_PATH, From f83941a18249b59b343a159d2d81532eb83f4e76 Mon Sep 17 00:00:00 2001 From: jorizci Date: Tue, 23 May 2017 15:44:37 +0100 Subject: [PATCH 29/48] Separated variant ID generation logic from the model to entity. --- .../ebi/eva/commons/models/data/Variant.java | 29 ------------------- .../models/mongo/entity/Annotation.java | 5 +++- .../io/writers/VariantMongoWriter.java | 2 +- .../ac/ebi/eva/utils/MongoDBHelperTest.java | 27 ++++++++++++----- 4 files changed, 24 insertions(+), 39 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java index 34a088209..ac11da547 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java @@ -356,33 +356,4 @@ public Variant clone() { private String composeId(String studyId, String fileId) { return studyId + "_" + fileId; } - - public String buildVariantId(){ - return buildVariantId(chromosome, start, reference, alternate); - } - - public static String buildVariantId(String chromosome, int start, String reference, String alternate) { - StringBuilder builder = new StringBuilder(chromosome); - builder.append("_"); - builder.append(start); - builder.append("_"); - if (!reference.equals("-")) { - if (reference.length() < 50) { - builder.append(reference); - } else { - builder.append(new String(CryptoUtils.encryptSha1(reference))); - } - } - - builder.append("_"); - if (!alternate.equals("-")) { - if (alternate.length() < 50) { - builder.append(alternate); - } else { - builder.append(new String(CryptoUtils.encryptSha1(alternate))); - } - } - - return builder.toString(); - } } diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/Annotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/Annotation.java index 63bb48644..1cd191d97 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/Annotation.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/Annotation.java @@ -165,7 +165,10 @@ private void generateXrefsFromConsequenceType(ConsequenceType consequenceType) { public static String buildAnnotationId(String chromosome, int start, String reference, String alternate, String vepVersion, String vepCacheVersion) { - StringBuilder builder = new StringBuilder(Variant.buildVariantId(chromosome, start, reference, alternate)); + StringBuilder builder = new StringBuilder(VariantDocument.buildVariantId( + chromosome, start, + reference, + alternate)); builder.append("_"); builder.append(vepVersion); builder.append("_"); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java index d8792f36d..77dc67b60 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriter.java @@ -74,7 +74,7 @@ public VariantMongoWriter(String collection, MongoOperations mongoOperations, bo protected void doWrite(List variants) { BulkWriteOperation bulk = mongoOperations.getCollection(collection).initializeUnorderedBulkOperation(); for (Variant variant : variants) { - String id = Variant.buildVariantId(variant.getChromosome(), variant.getStart(), + String id = VariantDocument.buildVariantId(variant.getChromosome(), variant.getStart(), variant.getReference(), variant.getAlternate()); // the chromosome and start appear just as shard keys, in an unsharded cluster they wouldn't be needed diff --git a/src/test/java/uk/ac/ebi/eva/utils/MongoDBHelperTest.java b/src/test/java/uk/ac/ebi/eva/utils/MongoDBHelperTest.java index debbadc93..48f027c74 100644 --- a/src/test/java/uk/ac/ebi/eva/utils/MongoDBHelperTest.java +++ b/src/test/java/uk/ac/ebi/eva/utils/MongoDBHelperTest.java @@ -2,8 +2,7 @@ import org.junit.Test; import org.opencb.commons.utils.CryptoUtils; - -import uk.ac.ebi.eva.commons.models.data.Variant; +import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument; import static org.junit.Assert.assertEquals; @@ -11,20 +10,32 @@ public class MongoDBHelperTest { @Test public void testBuildStorageIdSnv() { - Variant variant = new Variant("1", 1000, 1000, "A", "C"); - assertEquals("1_1000_A_C", variant.buildVariantId()); + assertEquals("1_1000_A_C", VariantDocument.buildVariantId( + "1", + 1000, + "A", + "C" + )); } @Test public void testBuildStorageIdIndel() { - Variant variant = new Variant("1", 1000, 1002, "", "CA"); - assertEquals("1_1000__CA", variant.buildVariantId()); + assertEquals("1_1000__CA", VariantDocument.buildVariantId( + "1", + 1000, + "", + "CA" + )); } @Test public void testBuildStorageIdStructural() { String alt = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT"; - Variant variant = new Variant("1", 1000, 1002, "TAG", alt); - assertEquals("1_1000_TAG_" + new String(CryptoUtils.encryptSha1(alt)), variant.buildVariantId()); + assertEquals("1_1000_TAG_" + new String(CryptoUtils.encryptSha1(alt)), VariantDocument.buildVariantId( + "1", + 1000, + "TAG", + alt + )); } } From ac7a6cafa62c6587f3d18302805d2cf697ce8271 Mon Sep 17 00:00:00 2001 From: jose miguel mut Date: Tue, 4 Apr 2017 13:12:11 +0100 Subject: [PATCH 30/48] moving configuration classes under "configuration" package --- .../eva/pipeline/configuration/BeanNames.java | 2 +- .../AnnotationReaderConfiguration.java | 4 +-- .../readers/GeneReaderConfiguration.java | 5 ++-- .../VariantsMongoReaderConfiguration.java | 4 +-- .../readers/VcfReaderConfiguration.java | 5 ++-- ...nnotationCompositeWriterConfiguration.java | 4 +-- ...nnotationInVariantWriterConfiguration.java | 4 +-- .../AnnotationWriterConfiguration.java | 4 +-- .../writers/GeneWriterConfiguration.java | 5 ++-- .../writers/VariantWriterConfiguration.java | 5 ++-- .../VepAnnotationFileWriterConfiguration.java | 5 ++-- .../jobs/AggregatedVcfJob.java | 12 ++++---- .../jobs/AnnotationJob.java | 6 ++-- .../jobs/DatabaseInitializationJob.java | 15 +++++----- .../jobs/DropStudyJob.java | 11 ++++--- .../jobs/GenotypedVcfJob.java | 12 ++++---- .../jobs/PopulationStatisticsJob.java | 7 ++--- .../jobs/flows/AnnotationFlow.java | 10 +++---- .../jobs/flows/AnnotationFlowOptional.java | 2 +- .../ParallelStatisticsAndAnnotationFlow.java | 2 +- .../jobs/flows/PopulationStatisticsFlow.java | 8 ++--- .../PopulationStatisticsOptionalFlow.java | 2 +- .../AnnotationMetadataStepConfiguration.java} | 6 ++-- ...CalculateStatisticsStepConfiguration.java} | 18 +++++------ ...eateDatabaseIndexesStepConfiguration.java} | 20 ++++++------- .../DropFilesByStudyStepConfiguration.java} | 18 +++++------ ...SingleStudyVariantsStepConfiguration.java} | 17 ++++++----- ...nerateVepAnnotationStepConfiguration.java} | 12 ++++---- .../steps/LoadFileStepConfiguration.java} | 18 +++++------ .../steps/LoadGenesStepConfiguration.java} | 20 ++++++------- .../LoadStatisticsStepConfiguration.java} | 18 +++++------ .../steps/LoadVariantsStepConfiguration.java} | 12 ++++---- .../LoadVepAnnotationStepConfiguration.java} | 19 +++++++----- ...ndStatisticsByStudyStepConfiguration.java} | 17 ++++++----- ...p.java => CalculateStatisticsTasklet.java} | 4 +-- ...java => CreateDatabaseIndexesTasklet.java} | 2 +- ...Step.java => DropFilesByStudyTasklet.java} | 4 +-- ...va => DropSingleStudyVariantsTasklet.java} | 4 +-- ...leLoaderStep.java => LoadFileTasklet.java} | 2 +- ...erStep.java => LoadStatisticsTasklet.java} | 6 ++-- ...PullFilesAndStatisticsByStudyTasklet.java} | 4 +-- .../parameters/AnnotationParameters.java | 4 +-- .../AggregatedVcfJobParametersValidator.java | 15 +++++----- .../job/DropStudyJobParametersValidator.java | 15 +++++----- .../GenotypedVcfJobParametersValidator.java | 23 +++++++------- ...tationMetadataStepParametersValidator.java | 3 +- ...ateStatisticsStepParametersValidator.java} | 8 ++--- ...tabaseIndexesStepParametersValidator.java} | 7 +++-- ...pFilesByStudyStepParametersValidator.java} | 7 +++-- ...StudyVariantsStepParametersValidator.java} | 7 +++-- ...eVepAnnotationStepParametersValidator.java | 4 +-- ...a => LoadFileStepParametersValidator.java} | 7 +++-- ... => LoadGenesStepParametersValidator.java} | 7 +++-- ...oadStatisticsStepParametersValidator.java} | 8 ++--- ... LoadVariantsStepParametersValidator.java} | 7 +++-- ...VepAnnotationStepParametersValidator.java} | 13 ++++---- ...isticsByStudyStepParametersValidator.java} | 7 +++-- ...aPipelineJobLauncherCommandLineRunner.java | 1 + .../jobs/AggregatedVcfJobTest.java | 5 ++-- .../jobs/AnnotationJobTest.java | 5 ++-- .../jobs/DropStudyJobTest.java | 7 +++-- .../jobs/GenotypedVcfJobTest.java | 7 +++-- .../jobs/GenotypedVcfJobWorkflowTest.java | 7 +++-- .../jobs/PopulationStatisticsJobTest.java | 5 ++-- .../steps/AnnotationMetadataStepTest.java | 7 +++-- .../steps/CalculateStatisticsStepTest.java} | 11 ++++--- .../steps/CreateDatabaseIndexesStepTest.java} | 11 ++++--- .../jobs/steps/DropFilesByStudyStepTest.java | 6 ++-- .../DropSingleStudyVariantsStepTest.java | 6 ++-- .../steps/GenerateVepAnnotationStepTest.java | 8 ++--- .../jobs/steps/LoadFileStepTest.java | 10 +++---- .../jobs/steps/LoadStatisticsStepTest.java} | 30 ++++++++++++++----- .../jobs/steps/LoadVariantsStepTest.java} | 12 ++++---- .../steps/LoadVepAnnotationStepTest.java} | 10 +++---- ...PullFilesAndStatisticsByStudyStepTest.java | 7 +++-- .../io/readers/VariantsMongoReaderTest.java | 3 +- .../pipeline/io/writers/GeneWriterTest.java | 3 +- .../io/writers/StatisticsMongoWriterTest.java | 3 +- .../io/writers/VariantMongoWriterTest.java | 3 +- .../VariantSourceEntityMongoWriterTest.java | 7 +++-- ...gregatedVcfJobParametersValidatorTest.java | 3 +- .../DropStudyJobParametersValidatorTest.java | 3 +- ...enotypedVcfJobParametersValidatorTest.java | 3 +- ...onMetadataStepParametersValidatorTest.java | 3 +- ...tatisticsStepParametersValidatorTest.java} | 10 +++---- ...seIndexesStepParametersValidatorTest.java} | 9 +++--- ...esByStudyStepParametersValidatorTest.java} | 9 +++--- ...yVariantsStepParametersValidatorTest.java} | 9 +++--- ...AnnotationStepParametersValidatorTest.java | 4 +-- ... LoadFileStepParametersValidatorTest.java} | 9 +++--- ...LoadGenesStepParametersValidatorTest.java} | 9 +++--- ...tatisticsStepParametersValidatorTest.java} | 10 +++---- ...dVariantsStepParametersValidatorTest.java} | 7 +++-- ...nnotationStepParametersValidatorTest.java} | 13 ++++---- ...csByStudyStepParametersValidatorTest.java} | 9 +++--- ...elineJobLauncherCommandLineRunnerTest.java | 1 + 96 files changed, 422 insertions(+), 360 deletions(-) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/{ => io}/readers/AnnotationReaderConfiguration.java (92%) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/{ => io}/readers/GeneReaderConfiguration.java (91%) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/{ => io}/readers/VariantsMongoReaderConfiguration.java (94%) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/{ => io}/readers/VcfReaderConfiguration.java (95%) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/{ => io}/writers/AnnotationCompositeWriterConfiguration.java (94%) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/{ => io}/writers/AnnotationInVariantWriterConfiguration.java (94%) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/{ => io}/writers/AnnotationWriterConfiguration.java (93%) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/{ => io}/writers/GeneWriterConfiguration.java (92%) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/{ => io}/writers/VariantWriterConfiguration.java (94%) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/{ => io}/writers/VepAnnotationFileWriterConfiguration.java (92%) rename src/main/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/AggregatedVcfJob.java (89%) rename src/main/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/AnnotationJob.java (93%) rename src/main/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/DatabaseInitializationJob.java (83%) rename src/main/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/DropStudyJob.java (86%) rename src/main/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/GenotypedVcfJob.java (88%) rename src/main/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/PopulationStatisticsJob.java (91%) rename src/main/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/flows/AnnotationFlow.java (85%) rename src/main/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/flows/AnnotationFlowOptional.java (97%) rename src/main/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/flows/ParallelStatisticsAndAnnotationFlow.java (97%) rename src/main/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/flows/PopulationStatisticsFlow.java (86%) rename src/main/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/flows/PopulationStatisticsOptionalFlow.java (97%) rename src/main/java/uk/ac/ebi/eva/pipeline/{jobs/steps/AnnotationMetadataStep.java => configuration/jobs/steps/AnnotationMetadataStepConfiguration.java} (93%) rename src/main/java/uk/ac/ebi/eva/pipeline/{jobs/steps/CalculateStatisticsStep.java => configuration/jobs/steps/CalculateStatisticsStepConfiguration.java} (76%) rename src/main/java/uk/ac/ebi/eva/pipeline/{jobs/steps/CreateDatabaseIndexesStep.java => configuration/jobs/steps/CreateDatabaseIndexesStepConfiguration.java} (74%) rename src/main/java/uk/ac/ebi/eva/pipeline/{jobs/steps/DropFilesByStudyStep.java => configuration/jobs/steps/DropFilesByStudyStepConfiguration.java} (75%) rename src/main/java/uk/ac/ebi/eva/pipeline/{jobs/steps/DropSingleStudyVariantsStep.java => configuration/jobs/steps/DropSingleStudyVariantsStepConfiguration.java} (79%) rename src/main/java/uk/ac/ebi/eva/pipeline/{jobs/steps/GenerateVepAnnotationStep.java => configuration/jobs/steps/GenerateVepAnnotationStepConfiguration.java} (89%) rename src/main/java/uk/ac/ebi/eva/pipeline/{jobs/steps/LoadFileStep.java => configuration/jobs/steps/LoadFileStepConfiguration.java} (80%) rename src/main/java/uk/ac/ebi/eva/pipeline/{jobs/steps/GeneLoaderStep.java => configuration/jobs/steps/LoadGenesStepConfiguration.java} (86%) rename src/main/java/uk/ac/ebi/eva/pipeline/{jobs/steps/LoadStatisticsStep.java => configuration/jobs/steps/LoadStatisticsStepConfiguration.java} (77%) rename src/main/java/uk/ac/ebi/eva/pipeline/{jobs/steps/VariantLoaderStep.java => configuration/jobs/steps/LoadVariantsStepConfiguration.java} (90%) rename src/main/java/uk/ac/ebi/eva/pipeline/{jobs/steps/AnnotationLoaderStep.java => configuration/jobs/steps/LoadVepAnnotationStepConfiguration.java} (86%) rename src/main/java/uk/ac/ebi/eva/pipeline/{jobs/steps/PullFilesAndStatisticsByStudyStep.java => configuration/jobs/steps/PullFilesAndStatisticsByStudyStepConfiguration.java} (77%) rename src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/{PopulationStatisticsGeneratorStep.java => CalculateStatisticsTasklet.java} (98%) rename src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/{IndexesGeneratorStep.java => CreateDatabaseIndexesTasklet.java} (96%) rename src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/{FilesByStudyDropperStep.java => DropFilesByStudyTasklet.java} (96%) rename src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/{SingleStudyVariantsDropperStep.java => DropSingleStudyVariantsTasklet.java} (96%) rename src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/{FileLoaderStep.java => LoadFileTasklet.java} (98%) rename src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/{PopulationStatisticsLoaderStep.java => LoadStatisticsTasklet.java} (98%) rename src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/{PullFilesAndStatisticsFromVariantGivenStudyIdStep.java => PullFilesAndStatisticsByStudyTasklet.java} (95%) rename src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{PopulationStatisticsGeneratorStepParametersValidator.java => CalculateStatisticsStepParametersValidator.java} (90%) rename src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{IndexesGeneratorStepParametersValidator.java => CreateDatabaseIndexesStepParametersValidator.java} (88%) rename src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{DropFilesByStudyStepValidator.java => DropFilesByStudyStepParametersValidator.java} (88%) rename src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{DropSingleStudyVariantsStepValidator.java => DropSingleStudyVariantsStepParametersValidator.java} (89%) rename src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{FileLoaderStepParametersValidator.java => LoadFileStepParametersValidator.java} (91%) rename src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{GeneLoaderStepParametersValidator.java => LoadGenesStepParametersValidator.java} (90%) rename src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{PopulationStatisticsLoaderStepParametersValidator.java => LoadStatisticsStepParametersValidator.java} (91%) rename src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{VariantLoaderStepParametersValidator.java => LoadVariantsStepParametersValidator.java} (93%) rename src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{AnnotationLoaderStepParametersValidator.java => LoadVepAnnotationStepParametersValidator.java} (85%) rename src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{PullFilesAndStatisticsByStudyStepValidator.java => PullFilesAndStatisticsByStudyStepParametersValidator.java} (89%) rename src/test/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/AggregatedVcfJobTest.java (97%) rename src/test/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/AnnotationJobTest.java (97%) rename src/test/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/DropStudyJobTest.java (95%) rename src/test/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/GenotypedVcfJobTest.java (96%) rename src/test/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/GenotypedVcfJobWorkflowTest.java (98%) rename src/test/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/PopulationStatisticsJobTest.java (96%) rename src/test/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/steps/AnnotationMetadataStepTest.java (97%) rename src/test/java/uk/ac/ebi/eva/pipeline/{jobs/steps/PopulationStatisticsGeneratorStepTest.java => configuration/jobs/steps/CalculateStatisticsStepTest.java} (94%) rename src/test/java/uk/ac/ebi/eva/pipeline/{jobs/steps/IndexesGeneratorStepTest.java => configuration/jobs/steps/CreateDatabaseIndexesStepTest.java} (92%) rename src/test/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/steps/DropFilesByStudyStepTest.java (96%) rename src/test/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/steps/DropSingleStudyVariantsStepTest.java (96%) rename src/test/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/steps/GenerateVepAnnotationStepTest.java (96%) rename src/test/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/steps/LoadFileStepTest.java (92%) rename src/test/java/uk/ac/ebi/eva/pipeline/{jobs/steps/PopulationStatisticsLoaderStepTest.java => configuration/jobs/steps/LoadStatisticsStepTest.java} (87%) rename src/test/java/uk/ac/ebi/eva/pipeline/{jobs/steps/VariantLoaderStepTest.java => configuration/jobs/steps/LoadVariantsStepTest.java} (92%) rename src/test/java/uk/ac/ebi/eva/pipeline/{jobs/steps/AnnotationLoaderStepTest.java => configuration/jobs/steps/LoadVepAnnotationStepTest.java} (95%) rename src/test/java/uk/ac/ebi/eva/pipeline/{ => configuration}/jobs/steps/PullFilesAndStatisticsByStudyStepTest.java (97%) rename src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{PopulationStatisticsGeneratorStepParametersValidatorTest.java => CalculateStatisticsStepParametersValidatorTest.java} (90%) rename src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{IndexesGeneratorStepParametersValidatorTest.java => CreateDatabaseIndexesStepParametersValidatorTest.java} (87%) rename src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{DropFilesByStudyStepValidatorTest.java => DropFilesByStudyStepParametersValidatorTest.java} (89%) rename src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{DropSingleStudyVariantsStepValidatorTest.java => DropSingleStudyVariantsStepParametersValidatorTest.java} (89%) rename src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{FileLoaderStepParametersValidatorTest.java => LoadFileStepParametersValidatorTest.java} (93%) rename src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{GeneLoaderStepParametersValidatorTest.java => LoadGenesStepParametersValidatorTest.java} (91%) rename src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{PopulationStatisticsLoaderStepParametersValidatorTest.java => LoadStatisticsStepParametersValidatorTest.java} (92%) rename src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{VariantLoaderStepParametersValidatorTest.java => LoadVariantsStepParametersValidatorTest.java} (94%) rename src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{AnnotationLoaderStepParametersValidatorTest.java => LoadVepAnnotationStepParametersValidatorTest.java} (90%) rename src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{PullFilesAndStatisticsByStudyStepValidatorTest.java => PullFilesAndStatisticsByStudyStepParametersValidatorTest.java} (88%) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/BeanNames.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/BeanNames.java index fee4beb7a..6fa82ac8d 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/BeanNames.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/BeanNames.java @@ -44,7 +44,7 @@ public class BeanNames { public static final String LOAD_VEP_ANNOTATION_STEP = "load-vep-annotation-step"; public static final String CALCULATE_STATISTICS_STEP = "calculate-statistics-step"; public static final String CREATE_DATABASE_INDEXES_STEP = "create-database-indexes-step"; - public static final String GENES_LOAD_STEP = "genes-load-step"; + public static final String LOAD_GENES_STEP = "load-genes-step"; public static final String GENERATE_VEP_ANNOTATION_STEP = "generate-vep-annotation"; public static final String LOAD_STATISTICS_STEP = "load-statistics-step"; public static final String LOAD_VARIANTS_STEP = "load-variants-step"; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/AnnotationReaderConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/readers/AnnotationReaderConfiguration.java similarity index 92% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/AnnotationReaderConfiguration.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/readers/AnnotationReaderConfiguration.java index a287fd597..4806bccf5 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/AnnotationReaderConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/readers/AnnotationReaderConfiguration.java @@ -1,5 +1,5 @@ /* - * Copyright 2016-2017 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.configuration.readers; +package uk.ac.ebi.eva.pipeline.configuration.io.readers; import org.springframework.batch.core.configuration.annotation.StepScope; import org.springframework.batch.item.ItemStreamReader; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/GeneReaderConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/readers/GeneReaderConfiguration.java similarity index 91% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/GeneReaderConfiguration.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/readers/GeneReaderConfiguration.java index 63cc9fd2e..b496c8d5f 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/GeneReaderConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/readers/GeneReaderConfiguration.java @@ -1,5 +1,5 @@ /* - * Copyright 2016-2017 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.configuration.readers; + +package uk.ac.ebi.eva.pipeline.configuration.io.readers; import org.springframework.batch.core.configuration.annotation.StepScope; import org.springframework.batch.item.ItemStreamReader; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/VariantsMongoReaderConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/readers/VariantsMongoReaderConfiguration.java similarity index 94% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/VariantsMongoReaderConfiguration.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/readers/VariantsMongoReaderConfiguration.java index fe3e39ffe..6fa2dfb5a 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/VariantsMongoReaderConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/readers/VariantsMongoReaderConfiguration.java @@ -1,5 +1,5 @@ /* - * Copyright 2016-2017 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.configuration.readers; +package uk.ac.ebi.eva.pipeline.configuration.io.readers; import org.springframework.batch.core.configuration.annotation.StepScope; import org.springframework.context.annotation.Bean; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/VcfReaderConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/readers/VcfReaderConfiguration.java similarity index 95% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/VcfReaderConfiguration.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/readers/VcfReaderConfiguration.java index 98a403269..86baabc42 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/VcfReaderConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/readers/VcfReaderConfiguration.java @@ -1,5 +1,5 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.configuration.readers; + +package uk.ac.ebi.eva.pipeline.configuration.io.readers; import org.opencb.biodata.models.variant.VariantSource; import org.springframework.batch.core.configuration.annotation.StepScope; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationCompositeWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/AnnotationCompositeWriterConfiguration.java similarity index 94% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationCompositeWriterConfiguration.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/AnnotationCompositeWriterConfiguration.java index d95888b82..8069dd77f 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationCompositeWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/AnnotationCompositeWriterConfiguration.java @@ -1,5 +1,5 @@ /* - * Copyright 2017 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.configuration.writers; +package uk.ac.ebi.eva.pipeline.configuration.io.writers; import org.springframework.batch.core.configuration.annotation.StepScope; import org.springframework.batch.item.ItemWriter; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationInVariantWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/AnnotationInVariantWriterConfiguration.java similarity index 94% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationInVariantWriterConfiguration.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/AnnotationInVariantWriterConfiguration.java index b25667344..f9c7cfb59 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationInVariantWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/AnnotationInVariantWriterConfiguration.java @@ -1,5 +1,5 @@ /* - * Copyright 2017 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.configuration.writers; +package uk.ac.ebi.eva.pipeline.configuration.io.writers; import org.springframework.batch.core.configuration.annotation.StepScope; import org.springframework.batch.item.ItemWriter; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/AnnotationWriterConfiguration.java similarity index 93% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/AnnotationWriterConfiguration.java index 0f5ddf130..3437dc59a 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/AnnotationWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/AnnotationWriterConfiguration.java @@ -1,5 +1,5 @@ /* - * Copyright 2016-2017 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.configuration.writers; +package uk.ac.ebi.eva.pipeline.configuration.io.writers; import org.springframework.batch.core.configuration.annotation.StepScope; import org.springframework.batch.item.ItemWriter; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/GeneWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/GeneWriterConfiguration.java similarity index 92% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/GeneWriterConfiguration.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/GeneWriterConfiguration.java index 2a687300b..d983e9fe3 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/GeneWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/GeneWriterConfiguration.java @@ -1,5 +1,5 @@ /* - * Copyright 2016-2017 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.configuration.writers; + +package uk.ac.ebi.eva.pipeline.configuration.io.writers; import org.springframework.batch.core.configuration.annotation.StepScope; import org.springframework.batch.item.ItemWriter; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VariantWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/VariantWriterConfiguration.java similarity index 94% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VariantWriterConfiguration.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/VariantWriterConfiguration.java index 1c4b9cca6..70be86083 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VariantWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/VariantWriterConfiguration.java @@ -1,5 +1,5 @@ /* - * Copyright 2016-2017 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.configuration.writers; + +package uk.ac.ebi.eva.pipeline.configuration.io.writers; import org.opencb.biodata.models.variant.VariantSource; import org.springframework.batch.core.configuration.annotation.StepScope; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VepAnnotationFileWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/VepAnnotationFileWriterConfiguration.java similarity index 92% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VepAnnotationFileWriterConfiguration.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/VepAnnotationFileWriterConfiguration.java index 94ef2dc60..00ec38322 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VepAnnotationFileWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/VepAnnotationFileWriterConfiguration.java @@ -1,5 +1,5 @@ /* - * Copyright 2016-2017 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.configuration.writers; + +package uk.ac.ebi.eva.pipeline.configuration.io.writers; import org.springframework.batch.core.configuration.annotation.StepScope; import org.springframework.batch.item.ItemWriter; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/AggregatedVcfJob.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AggregatedVcfJob.java similarity index 89% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/AggregatedVcfJob.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AggregatedVcfJob.java index bdf0a0f1f..2afac40ad 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/AggregatedVcfJob.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AggregatedVcfJob.java @@ -13,7 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs; + +package uk.ac.ebi.eva.pipeline.configuration.jobs; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,7 +25,6 @@ import org.springframework.batch.core.job.builder.FlowJobBuilder; import org.springframework.batch.core.job.builder.JobBuilder; import org.springframework.batch.core.job.flow.Flow; -import org.springframework.batch.core.launch.support.RunIdIncrementer; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; @@ -32,9 +32,9 @@ import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Scope; -import uk.ac.ebi.eva.pipeline.jobs.flows.AnnotationFlowOptional; -import uk.ac.ebi.eva.pipeline.jobs.steps.LoadFileStep; -import uk.ac.ebi.eva.pipeline.jobs.steps.VariantLoaderStep; +import uk.ac.ebi.eva.pipeline.configuration.jobs.flows.AnnotationFlowOptional; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadFileStepConfiguration; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadVariantsStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.NewJobIncrementer; import uk.ac.ebi.eva.pipeline.parameters.validation.job.AggregatedVcfJobParametersValidator; @@ -53,7 +53,7 @@ */ @Configuration @EnableBatchProcessing -@Import({VariantLoaderStep.class, LoadFileStep.class, AnnotationFlowOptional.class}) +@Import({LoadVariantsStepConfiguration.class, LoadFileStepConfiguration.class, AnnotationFlowOptional.class}) public class AggregatedVcfJob { private static final Logger logger = LoggerFactory.getLogger(AggregatedVcfJob.class); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJob.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AnnotationJob.java similarity index 93% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJob.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AnnotationJob.java index 8926981b2..6afc4a89c 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJob.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AnnotationJob.java @@ -1,5 +1,5 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs; +package uk.ac.ebi.eva.pipeline.configuration.jobs; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,7 +30,7 @@ import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Scope; -import uk.ac.ebi.eva.pipeline.jobs.flows.AnnotationFlow; +import uk.ac.ebi.eva.pipeline.configuration.jobs.flows.AnnotationFlow; import uk.ac.ebi.eva.pipeline.parameters.NewJobIncrementer; import uk.ac.ebi.eva.pipeline.parameters.validation.job.AnnotationJobParametersValidator; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/DatabaseInitializationJob.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DatabaseInitializationJob.java similarity index 83% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/DatabaseInitializationJob.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DatabaseInitializationJob.java index 7c23d22c5..d1eb6d0a7 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/DatabaseInitializationJob.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DatabaseInitializationJob.java @@ -1,5 +1,5 @@ /* - * Copyright 2015-2016 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs; +package uk.ac.ebi.eva.pipeline.configuration.jobs; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -22,7 +22,6 @@ import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing; import org.springframework.batch.core.configuration.annotation.JobBuilderFactory; import org.springframework.batch.core.job.builder.JobBuilder; -import org.springframework.batch.core.launch.support.RunIdIncrementer; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; @@ -30,13 +29,13 @@ import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Scope; -import uk.ac.ebi.eva.pipeline.jobs.steps.CreateDatabaseIndexesStep; -import uk.ac.ebi.eva.pipeline.jobs.steps.GeneLoaderStep; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.CreateDatabaseIndexesStepConfiguration; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadGenesStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.NewJobIncrementer; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.CREATE_DATABASE_INDEXES_STEP; -import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.GENES_LOAD_STEP; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.INIT_DATABASE_JOB; +import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.LOAD_GENES_STEP; /** * Job to initialize the databases that will be used in later jobs. @@ -48,13 +47,13 @@ */ @Configuration @EnableBatchProcessing -@Import({GeneLoaderStep.class, CreateDatabaseIndexesStep.class}) +@Import({LoadGenesStepConfiguration.class, CreateDatabaseIndexesStepConfiguration.class}) public class DatabaseInitializationJob { private static final Logger logger = LoggerFactory.getLogger(DatabaseInitializationJob.class); @Autowired - @Qualifier(GENES_LOAD_STEP) + @Qualifier(LOAD_GENES_STEP) private Step genesLoadStep; @Autowired diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/DropStudyJob.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJob.java similarity index 86% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/DropStudyJob.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJob.java index c7a6b3ca9..3f6c62767 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/DropStudyJob.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJob.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs; +package uk.ac.ebi.eva.pipeline.configuration.jobs; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,7 +24,6 @@ import org.springframework.batch.core.configuration.annotation.JobBuilderFactory; import org.springframework.batch.core.job.builder.JobBuilder; import org.springframework.batch.core.job.builder.SimpleJobBuilder; -import org.springframework.batch.core.launch.support.RunIdIncrementer; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; @@ -32,9 +31,9 @@ import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Scope; -import uk.ac.ebi.eva.pipeline.jobs.steps.DropFilesByStudyStep; -import uk.ac.ebi.eva.pipeline.jobs.steps.DropSingleStudyVariantsStep; -import uk.ac.ebi.eva.pipeline.jobs.steps.PullFilesAndStatisticsByStudyStep; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.DropFilesByStudyStepConfiguration; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.DropSingleStudyVariantsStepConfiguration; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.PullFilesAndStatisticsByStudyStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.NewJobIncrementer; import uk.ac.ebi.eva.pipeline.parameters.validation.job.DropStudyJobParametersValidator; @@ -50,7 +49,7 @@ */ @Configuration @EnableBatchProcessing -@Import({DropSingleStudyVariantsStep.class, PullFilesAndStatisticsByStudyStep.class, DropFilesByStudyStep.class}) +@Import({DropSingleStudyVariantsStepConfiguration.class, PullFilesAndStatisticsByStudyStepConfiguration.class, DropFilesByStudyStepConfiguration.class}) public class DropStudyJob { private static final Logger logger = LoggerFactory.getLogger(DropStudyJob.class); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJob.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJob.java similarity index 88% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJob.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJob.java index f0fac98dc..f8f5019a1 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJob.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJob.java @@ -13,7 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs; + +package uk.ac.ebi.eva.pipeline.configuration.jobs; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,7 +25,6 @@ import org.springframework.batch.core.job.builder.FlowJobBuilder; import org.springframework.batch.core.job.builder.JobBuilder; import org.springframework.batch.core.job.flow.Flow; -import org.springframework.batch.core.launch.support.RunIdIncrementer; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; @@ -32,9 +32,9 @@ import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Scope; -import uk.ac.ebi.eva.pipeline.jobs.flows.ParallelStatisticsAndAnnotationFlow; -import uk.ac.ebi.eva.pipeline.jobs.steps.LoadFileStep; -import uk.ac.ebi.eva.pipeline.jobs.steps.VariantLoaderStep; +import uk.ac.ebi.eva.pipeline.configuration.jobs.flows.ParallelStatisticsAndAnnotationFlow; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadFileStepConfiguration; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadVariantsStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.NewJobIncrementer; import uk.ac.ebi.eva.pipeline.parameters.validation.job.GenotypedVcfJobParametersValidator; @@ -54,7 +54,7 @@ */ @Configuration @EnableBatchProcessing -@Import({VariantLoaderStep.class, LoadFileStep.class, ParallelStatisticsAndAnnotationFlow.class}) +@Import({LoadVariantsStepConfiguration.class, LoadFileStepConfiguration.class, ParallelStatisticsAndAnnotationFlow.class}) public class GenotypedVcfJob { private static final Logger logger = LoggerFactory.getLogger(GenotypedVcfJob.class); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/PopulationStatisticsJob.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/PopulationStatisticsJob.java similarity index 91% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/PopulationStatisticsJob.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/PopulationStatisticsJob.java index e30f9cbf5..17ee37861 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/PopulationStatisticsJob.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/PopulationStatisticsJob.java @@ -1,5 +1,5 @@ /* - * Copyright 2015 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs; +package uk.ac.ebi.eva.pipeline.configuration.jobs; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -22,7 +22,6 @@ import org.springframework.batch.core.configuration.annotation.JobBuilderFactory; import org.springframework.batch.core.job.builder.JobBuilder; import org.springframework.batch.core.job.flow.Flow; -import org.springframework.batch.core.launch.support.RunIdIncrementer; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; @@ -30,7 +29,7 @@ import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Scope; -import uk.ac.ebi.eva.pipeline.jobs.flows.PopulationStatisticsFlow; +import uk.ac.ebi.eva.pipeline.configuration.jobs.flows.PopulationStatisticsFlow; import uk.ac.ebi.eva.pipeline.parameters.NewJobIncrementer; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.CALCULATE_STATISTICS_FLOW; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/AnnotationFlow.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/AnnotationFlow.java similarity index 85% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/AnnotationFlow.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/AnnotationFlow.java index 74111cf63..93bbe9d24 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/AnnotationFlow.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/AnnotationFlow.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.flows; +package uk.ac.ebi.eva.pipeline.configuration.jobs.flows; import org.springframework.batch.core.BatchStatus; import org.springframework.batch.core.Step; @@ -27,9 +27,9 @@ import org.springframework.context.annotation.Import; import uk.ac.ebi.eva.pipeline.jobs.deciders.EmptyVepOutputDecider; -import uk.ac.ebi.eva.pipeline.jobs.steps.AnnotationLoaderStep; -import uk.ac.ebi.eva.pipeline.jobs.steps.AnnotationMetadataStep; -import uk.ac.ebi.eva.pipeline.jobs.steps.GenerateVepAnnotationStep; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadVepAnnotationStepConfiguration; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.AnnotationMetadataStepConfiguration; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.GenerateVepAnnotationStepConfiguration; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.GENERATE_VEP_ANNOTATION_STEP; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.LOAD_ANNOTATION_METADATA_STEP; @@ -44,7 +44,7 @@ */ @Configuration @EnableBatchProcessing -@Import({GenerateVepAnnotationStep.class, AnnotationLoaderStep.class, AnnotationMetadataStep.class}) +@Import({GenerateVepAnnotationStepConfiguration.class, LoadVepAnnotationStepConfiguration.class, AnnotationMetadataStepConfiguration.class}) public class AnnotationFlow { @Autowired diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/AnnotationFlowOptional.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/AnnotationFlowOptional.java similarity index 97% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/AnnotationFlowOptional.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/AnnotationFlowOptional.java index 1877fd484..9ff502a14 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/AnnotationFlowOptional.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/AnnotationFlowOptional.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.flows; +package uk.ac.ebi.eva.pipeline.configuration.jobs.flows; import org.springframework.batch.core.BatchStatus; import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/ParallelStatisticsAndAnnotationFlow.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/ParallelStatisticsAndAnnotationFlow.java similarity index 97% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/ParallelStatisticsAndAnnotationFlow.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/ParallelStatisticsAndAnnotationFlow.java index 3a8e1c071..8bb3b9012 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/ParallelStatisticsAndAnnotationFlow.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/ParallelStatisticsAndAnnotationFlow.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.flows; +package uk.ac.ebi.eva.pipeline.configuration.jobs.flows; import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing; import org.springframework.batch.core.job.builder.FlowBuilder; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/PopulationStatisticsFlow.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/PopulationStatisticsFlow.java similarity index 86% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/PopulationStatisticsFlow.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/PopulationStatisticsFlow.java index f718b959f..79c97df2a 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/PopulationStatisticsFlow.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/PopulationStatisticsFlow.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.flows; +package uk.ac.ebi.eva.pipeline.configuration.jobs.flows; import org.springframework.batch.core.Step; import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing; @@ -25,8 +25,8 @@ import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; -import uk.ac.ebi.eva.pipeline.jobs.steps.CalculateStatisticsStep; -import uk.ac.ebi.eva.pipeline.jobs.steps.LoadStatisticsStep; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.CalculateStatisticsStepConfiguration; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadStatisticsStepConfiguration; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.CALCULATE_STATISTICS_FLOW; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.CALCULATE_STATISTICS_STEP; @@ -38,7 +38,7 @@ */ @Configuration @EnableBatchProcessing -@Import({CalculateStatisticsStep.class, LoadStatisticsStep.class}) +@Import({CalculateStatisticsStepConfiguration.class, LoadStatisticsStepConfiguration.class}) public class PopulationStatisticsFlow { @Autowired diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/PopulationStatisticsOptionalFlow.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/PopulationStatisticsOptionalFlow.java similarity index 97% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/PopulationStatisticsOptionalFlow.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/PopulationStatisticsOptionalFlow.java index d444be95a..a4f109968 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/PopulationStatisticsOptionalFlow.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/PopulationStatisticsOptionalFlow.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.flows; +package uk.ac.ebi.eva.pipeline.configuration.jobs.flows; import org.springframework.batch.core.BatchStatus; import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationMetadataStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/AnnotationMetadataStepConfiguration.java similarity index 93% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationMetadataStep.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/AnnotationMetadataStepConfiguration.java index 896f7106f..cb483f13f 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationMetadataStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/AnnotationMetadataStepConfiguration.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,9 +36,9 @@ */ @Configuration @EnableBatchProcessing -public class AnnotationMetadataStep { +public class AnnotationMetadataStepConfiguration { - private static final Logger logger = LoggerFactory.getLogger(AnnotationMetadataStep.class); + private static final Logger logger = LoggerFactory.getLogger(AnnotationMetadataStepConfiguration.class); @Bean @StepScope diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/CalculateStatisticsStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/CalculateStatisticsStepConfiguration.java similarity index 76% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/CalculateStatisticsStep.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/CalculateStatisticsStepConfiguration.java index 094386d9f..70009cc9a 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/CalculateStatisticsStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/CalculateStatisticsStepConfiguration.java @@ -1,5 +1,5 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,32 +24,32 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.PopulationStatisticsGeneratorStep; +import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.CalculateStatisticsTasklet; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; import uk.ac.ebi.eva.utils.TaskletUtils; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.CALCULATE_STATISTICS_STEP; /** - * Configuration class that inject a step created with the tasklet {@link PopulationStatisticsGeneratorStep} + * Configuration class that inject a step created with the tasklet {@link CalculateStatisticsTasklet} */ @Configuration @EnableBatchProcessing -public class CalculateStatisticsStep { +public class CalculateStatisticsStepConfiguration { - private static final Logger logger = LoggerFactory.getLogger(CalculateStatisticsStep.class); + private static final Logger logger = LoggerFactory.getLogger(CalculateStatisticsStepConfiguration.class); @Bean @StepScope - public PopulationStatisticsGeneratorStep populationStatisticsGeneratorStep() { - return new PopulationStatisticsGeneratorStep(); + public CalculateStatisticsTasklet calculateStatisticsTasklet() { + return new CalculateStatisticsTasklet(); } @Bean(CALCULATE_STATISTICS_STEP) public TaskletStep calculateStatisticsStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions) { logger.debug("Building '" + CALCULATE_STATISTICS_STEP + "'"); return TaskletUtils.generateStep(stepBuilderFactory, CALCULATE_STATISTICS_STEP, - populationStatisticsGeneratorStep(), jobOptions.isAllowStartIfComplete()); + calculateStatisticsTasklet(), jobOptions.isAllowStartIfComplete()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/CreateDatabaseIndexesStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/CreateDatabaseIndexesStepConfiguration.java similarity index 74% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/CreateDatabaseIndexesStep.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/CreateDatabaseIndexesStepConfiguration.java index 6f33328e0..333a6fa51 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/CreateDatabaseIndexesStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/CreateDatabaseIndexesStepConfiguration.java @@ -1,5 +1,5 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,32 +24,32 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.IndexesGeneratorStep; +import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.CreateDatabaseIndexesTasklet; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; import uk.ac.ebi.eva.utils.TaskletUtils; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.CREATE_DATABASE_INDEXES_STEP; /** - * Configuration class that inject a step created with the tasklet {@link IndexesGeneratorStep} + * Configuration class that inject a step created with the tasklet {@link CreateDatabaseIndexesTasklet} */ @Configuration @EnableBatchProcessing -public class CreateDatabaseIndexesStep { +public class CreateDatabaseIndexesStepConfiguration { - private static final Logger logger = LoggerFactory.getLogger(CreateDatabaseIndexesStep.class); + private static final Logger logger = LoggerFactory.getLogger(CreateDatabaseIndexesStepConfiguration.class); @Bean @StepScope - public IndexesGeneratorStep indexesGeneratorStep() { - return new IndexesGeneratorStep(); + public CreateDatabaseIndexesTasklet createDatabaseIndexesTasklet() { + return new CreateDatabaseIndexesTasklet(); } @Bean(CREATE_DATABASE_INDEXES_STEP) public TaskletStep createDatabaseIndexesStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions) { logger.debug("Building '" + CREATE_DATABASE_INDEXES_STEP + "'"); - return TaskletUtils.generateStep(stepBuilderFactory, CREATE_DATABASE_INDEXES_STEP, indexesGeneratorStep(), - jobOptions.isAllowStartIfComplete()); + return TaskletUtils.generateStep(stepBuilderFactory, CREATE_DATABASE_INDEXES_STEP, + createDatabaseIndexesTasklet(), jobOptions.isAllowStartIfComplete()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/DropFilesByStudyStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropFilesByStudyStepConfiguration.java similarity index 75% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/DropFilesByStudyStep.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropFilesByStudyStepConfiguration.java index c7820c7a1..16d1786c6 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/DropFilesByStudyStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropFilesByStudyStepConfiguration.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -25,32 +25,32 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.FilesByStudyDropperStep; +import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.DropFilesByStudyTasklet; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; import uk.ac.ebi.eva.utils.TaskletUtils; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.DROP_FILES_BY_STUDY_STEP; /** - * Configuration class that inject a step created with the tasklet {@link FilesByStudyDropperStep} + * Configuration class that inject a step created with the tasklet {@link DropFilesByStudyTasklet} */ @Configuration @EnableBatchProcessing -public class DropFilesByStudyStep { +public class DropFilesByStudyStepConfiguration { - private static final Logger logger = LoggerFactory.getLogger(DropFilesByStudyStep.class); + private static final Logger logger = LoggerFactory.getLogger(DropFilesByStudyStepConfiguration.class); @Bean @StepScope - public FilesByStudyDropperStep fileDropperStep() { - return new FilesByStudyDropperStep(); + public DropFilesByStudyTasklet dropFilesByStudyTasklet() { + return new DropFilesByStudyTasklet(); } @Bean(DROP_FILES_BY_STUDY_STEP) - public TaskletStep dropSingleStudyVariantsStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions) { + public TaskletStep dropFilesByStudyStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions) { logger.debug("Building '" + DROP_FILES_BY_STUDY_STEP + "'"); return TaskletUtils.generateStep(stepBuilderFactory, DROP_FILES_BY_STUDY_STEP, - fileDropperStep(), jobOptions.isAllowStartIfComplete()); + dropFilesByStudyTasklet(), jobOptions.isAllowStartIfComplete()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/DropSingleStudyVariantsStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropSingleStudyVariantsStepConfiguration.java similarity index 79% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/DropSingleStudyVariantsStep.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropSingleStudyVariantsStepConfiguration.java index d29a83183..5f1acd536 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/DropSingleStudyVariantsStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropSingleStudyVariantsStepConfiguration.java @@ -13,7 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; + +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,32 +25,32 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.SingleStudyVariantsDropperStep; +import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.DropSingleStudyVariantsTasklet; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; import uk.ac.ebi.eva.utils.TaskletUtils; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.DROP_SINGLE_STUDY_VARIANTS_STEP; /** - * Configuration class that inject a step created with the tasklet {@link SingleStudyVariantsDropperStep} + * Configuration class that inject a step created with the tasklet {@link DropSingleStudyVariantsTasklet} */ @Configuration @EnableBatchProcessing -public class DropSingleStudyVariantsStep { +public class DropSingleStudyVariantsStepConfiguration { - private static final Logger logger = LoggerFactory.getLogger(DropSingleStudyVariantsStep.class); + private static final Logger logger = LoggerFactory.getLogger(DropSingleStudyVariantsStepConfiguration.class); @Bean @StepScope - public SingleStudyVariantsDropperStep singleStudyVariantsDropperStep() { - return new SingleStudyVariantsDropperStep(); + public DropSingleStudyVariantsTasklet dropSingleStudyVariantsTasklet() { + return new DropSingleStudyVariantsTasklet(); } @Bean(DROP_SINGLE_STUDY_VARIANTS_STEP) public TaskletStep dropSingleStudyVariantsStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions) { logger.debug("Building '" + DROP_SINGLE_STUDY_VARIANTS_STEP + "'"); return TaskletUtils.generateStep(stepBuilderFactory, DROP_SINGLE_STUDY_VARIANTS_STEP, - singleStudyVariantsDropperStep(), jobOptions.isAllowStartIfComplete()); + dropSingleStudyVariantsTasklet(), jobOptions.isAllowStartIfComplete()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/GenerateVepAnnotationStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/GenerateVepAnnotationStepConfiguration.java similarity index 89% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/GenerateVepAnnotationStep.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/GenerateVepAnnotationStepConfiguration.java index 8ff710658..50023d659 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/GenerateVepAnnotationStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/GenerateVepAnnotationStepConfiguration.java @@ -1,5 +1,5 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,8 +30,8 @@ import org.springframework.context.annotation.Import; import uk.ac.ebi.eva.pipeline.configuration.ChunkSizeCompletionPolicyConfiguration; -import uk.ac.ebi.eva.pipeline.configuration.readers.VariantsMongoReaderConfiguration; -import uk.ac.ebi.eva.pipeline.configuration.writers.VepAnnotationFileWriterConfiguration; +import uk.ac.ebi.eva.pipeline.configuration.io.readers.VariantsMongoReaderConfiguration; +import uk.ac.ebi.eva.pipeline.configuration.io.writers.VepAnnotationFileWriterConfiguration; import uk.ac.ebi.eva.pipeline.io.readers.AnnotationFlatFileReader; import uk.ac.ebi.eva.pipeline.listeners.StepProgressListener; import uk.ac.ebi.eva.pipeline.model.VariantWrapper; @@ -53,9 +53,9 @@ @EnableBatchProcessing @Import({VariantsMongoReaderConfiguration.class, VepAnnotationFileWriterConfiguration.class, ChunkSizeCompletionPolicyConfiguration.class}) -public class GenerateVepAnnotationStep { +public class GenerateVepAnnotationStepConfiguration { - private static final Logger logger = LoggerFactory.getLogger(GenerateVepAnnotationStep.class); + private static final Logger logger = LoggerFactory.getLogger(GenerateVepAnnotationStepConfiguration.class); @Autowired @Qualifier(VARIANTS_READER) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/LoadFileStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadFileStepConfiguration.java similarity index 80% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/LoadFileStep.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadFileStepConfiguration.java index 63037c53c..cec766f4a 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/LoadFileStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadFileStepConfiguration.java @@ -1,5 +1,5 @@ /* - * Copyright 2016-2017 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,31 +24,31 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.FileLoaderStep; +import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.LoadFileTasklet; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; import uk.ac.ebi.eva.utils.TaskletUtils; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.LOAD_FILE_STEP; /** - * Configuration class that inject a step created with the tasklet {@link FileLoaderStep} + * Configuration class that inject a step created with the tasklet {@link LoadFileTasklet} */ @Configuration @EnableBatchProcessing -public class LoadFileStep { +public class LoadFileStepConfiguration { - private static final Logger logger = LoggerFactory.getLogger(LoadFileStep.class); + private static final Logger logger = LoggerFactory.getLogger(LoadFileStepConfiguration.class); @Bean @StepScope - public FileLoaderStep fileLoaderStep() { - return new FileLoaderStep(); + public LoadFileTasklet loadFileTasklet() { + return new LoadFileTasklet(); } @Bean(LOAD_FILE_STEP) public TaskletStep loadFileStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions) { logger.debug("Building '" + LOAD_FILE_STEP + "'"); - return TaskletUtils.generateStep(stepBuilderFactory, LOAD_FILE_STEP, fileLoaderStep(), + return TaskletUtils.generateStep(stepBuilderFactory, LOAD_FILE_STEP, loadFileTasklet(), jobOptions.isAllowStartIfComplete()); } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/GeneLoaderStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadGenesStepConfiguration.java similarity index 86% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/GeneLoaderStep.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadGenesStepConfiguration.java index 4b645b23b..5dcb34a2f 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/GeneLoaderStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadGenesStepConfiguration.java @@ -1,5 +1,5 @@ /* - * Copyright 2016-2017 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -32,8 +32,8 @@ import org.springframework.context.annotation.Import; import uk.ac.ebi.eva.pipeline.configuration.ChunkSizeCompletionPolicyConfiguration; -import uk.ac.ebi.eva.pipeline.configuration.readers.GeneReaderConfiguration; -import uk.ac.ebi.eva.pipeline.configuration.writers.GeneWriterConfiguration; +import uk.ac.ebi.eva.pipeline.configuration.io.readers.GeneReaderConfiguration; +import uk.ac.ebi.eva.pipeline.configuration.io.writers.GeneWriterConfiguration; import uk.ac.ebi.eva.pipeline.io.mappers.GeneLineMapper; import uk.ac.ebi.eva.pipeline.io.readers.GeneReader; import uk.ac.ebi.eva.pipeline.io.writers.GeneWriter; @@ -42,7 +42,7 @@ import uk.ac.ebi.eva.pipeline.model.FeatureCoordinates; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.GENES_LOAD_STEP; +import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.LOAD_GENES_STEP; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.GENE_READER; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.GENE_WRITER; @@ -62,9 +62,9 @@ @Configuration @EnableBatchProcessing @Import({GeneReaderConfiguration.class, GeneWriterConfiguration.class, ChunkSizeCompletionPolicyConfiguration.class}) -public class GeneLoaderStep { +public class LoadGenesStepConfiguration { - private static final Logger logger = LoggerFactory.getLogger(GeneLoaderStep.class); + private static final Logger logger = LoggerFactory.getLogger(LoadGenesStepConfiguration.class); @Autowired @Qualifier(GENE_READER) @@ -74,12 +74,12 @@ public class GeneLoaderStep { @Qualifier(GENE_WRITER) private ItemWriter writer; - @Bean(GENES_LOAD_STEP) + @Bean(LOAD_GENES_STEP) public Step genesLoadStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions, SimpleCompletionPolicy chunkSizeCompletionPolicy) { - logger.debug("Building '" + GENES_LOAD_STEP + "'"); + logger.debug("Building '" + LOAD_GENES_STEP + "'"); - return stepBuilderFactory.get(GENES_LOAD_STEP) + return stepBuilderFactory.get(LOAD_GENES_STEP) .chunk(chunkSizeCompletionPolicy) .reader(reader) .processor(new GeneFilterProcessor()) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/LoadStatisticsStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadStatisticsStepConfiguration.java similarity index 77% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/LoadStatisticsStep.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadStatisticsStepConfiguration.java index bce43fa74..6c78880f8 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/LoadStatisticsStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadStatisticsStepConfiguration.java @@ -1,5 +1,5 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,32 +24,32 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.PopulationStatisticsLoaderStep; +import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.LoadStatisticsTasklet; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; import uk.ac.ebi.eva.utils.TaskletUtils; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.LOAD_STATISTICS_STEP; /** - * Configuration class that inject a step created with the tasklet {@link LoadStatisticsStep} + * Configuration class that inject a step created with the tasklet {@link LoadStatisticsStepConfiguration} */ @Configuration @EnableBatchProcessing -public class LoadStatisticsStep { +public class LoadStatisticsStepConfiguration { - private static final Logger logger = LoggerFactory.getLogger(LoadStatisticsStep.class); + private static final Logger logger = LoggerFactory.getLogger(LoadStatisticsStepConfiguration.class); @Bean @StepScope - public PopulationStatisticsLoaderStep populationStatisticsLoaderStep() { - return new PopulationStatisticsLoaderStep(); + public LoadStatisticsTasklet loadStatisticsTasklet() { + return new LoadStatisticsTasklet(); } @Bean(LOAD_STATISTICS_STEP) public TaskletStep loadStatisticsStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions) { logger.debug("Building '" + LOAD_STATISTICS_STEP + "'"); return TaskletUtils.generateStep(stepBuilderFactory, LOAD_STATISTICS_STEP, - populationStatisticsLoaderStep(), jobOptions.isAllowStartIfComplete()); + loadStatisticsTasklet(), jobOptions.isAllowStartIfComplete()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/VariantLoaderStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVariantsStepConfiguration.java similarity index 90% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/VariantLoaderStep.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVariantsStepConfiguration.java index 19d477098..90a515535 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/VariantLoaderStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVariantsStepConfiguration.java @@ -1,5 +1,5 @@ /* - * Copyright 2015-2016 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,8 +31,8 @@ import uk.ac.ebi.eva.commons.models.data.Variant; import uk.ac.ebi.eva.pipeline.configuration.ChunkSizeCompletionPolicyConfiguration; -import uk.ac.ebi.eva.pipeline.configuration.readers.VcfReaderConfiguration; -import uk.ac.ebi.eva.pipeline.configuration.writers.VariantWriterConfiguration; +import uk.ac.ebi.eva.pipeline.configuration.io.readers.VcfReaderConfiguration; +import uk.ac.ebi.eva.pipeline.configuration.io.writers.VariantWriterConfiguration; import uk.ac.ebi.eva.pipeline.jobs.steps.processors.VariantNoAlternateFilterProcessor; import uk.ac.ebi.eva.pipeline.listeners.SkippedItemListener; import uk.ac.ebi.eva.pipeline.listeners.StepProgressListener; @@ -52,9 +52,9 @@ @Configuration @EnableBatchProcessing @Import({VcfReaderConfiguration.class, VariantWriterConfiguration.class, ChunkSizeCompletionPolicyConfiguration.class}) -public class VariantLoaderStep { +public class LoadVariantsStepConfiguration { - private static final Logger logger = LoggerFactory.getLogger(VariantLoaderStep.class); + private static final Logger logger = LoggerFactory.getLogger(LoadVariantsStepConfiguration.class); @Autowired @Qualifier(VARIANT_READER) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVepAnnotationStepConfiguration.java similarity index 86% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVepAnnotationStepConfiguration.java index 05733d429..6256f096b 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVepAnnotationStepConfiguration.java @@ -1,5 +1,5 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,11 +30,13 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; + import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; import uk.ac.ebi.eva.pipeline.configuration.ChunkSizeCompletionPolicyConfiguration; -import uk.ac.ebi.eva.pipeline.configuration.readers.AnnotationReaderConfiguration; -import uk.ac.ebi.eva.pipeline.configuration.writers.AnnotationCompositeWriterConfiguration; +import uk.ac.ebi.eva.pipeline.configuration.io.readers.AnnotationReaderConfiguration; +import uk.ac.ebi.eva.pipeline.configuration.io.writers.AnnotationCompositeWriterConfiguration; import uk.ac.ebi.eva.pipeline.io.readers.AnnotationFlatFileReader; +import uk.ac.ebi.eva.pipeline.io.writers.AnnotationInVariantMongoWriter; import uk.ac.ebi.eva.pipeline.io.writers.AnnotationMongoWriter; import uk.ac.ebi.eva.pipeline.listeners.AnnotationLoaderStepStatisticsListener; import uk.ac.ebi.eva.pipeline.listeners.SkippedItemListener; @@ -57,14 +59,15 @@ * 20_60479_C/T 20:60479 T - - - intergenic_variant - - - - - rs149529999 GMAF=T:0.0018;AFR_MAF=T:0.01;AMR_MAF=T:0.0028 *

* each line of the file is loaded with {@link AnnotationFlatFileReader} into a {@link Annotation} and then sent - * to mongo with {@link AnnotationMongoWriter}. + * to mongo with {@link AnnotationMongoWriter} and {@link AnnotationInVariantMongoWriter}. */ @Configuration @EnableBatchProcessing -@Import({AnnotationReaderConfiguration.class, AnnotationCompositeWriterConfiguration.class, ChunkSizeCompletionPolicyConfiguration.class}) -public class AnnotationLoaderStep { - private static final Logger logger = LoggerFactory.getLogger(AnnotationLoaderStep.class); +@Import({AnnotationReaderConfiguration.class, AnnotationCompositeWriterConfiguration.class, + ChunkSizeCompletionPolicyConfiguration.class}) +public class LoadVepAnnotationStepConfiguration { + private static final Logger logger = LoggerFactory.getLogger(LoadVepAnnotationStepConfiguration.class); @Autowired @Qualifier(VARIANT_ANNOTATION_READER) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/PullFilesAndStatisticsByStudyStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/PullFilesAndStatisticsByStudyStepConfiguration.java similarity index 77% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/PullFilesAndStatisticsByStudyStep.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/PullFilesAndStatisticsByStudyStepConfiguration.java index b362aa6cf..22cccd326 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/PullFilesAndStatisticsByStudyStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/PullFilesAndStatisticsByStudyStepConfiguration.java @@ -13,7 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; + +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,7 +25,7 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.PullFilesAndStatisticsFromVariantGivenStudyIdStep; +import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.PullFilesAndStatisticsByStudyTasklet; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; import uk.ac.ebi.eva.utils.TaskletUtils; @@ -35,21 +36,21 @@ */ @Configuration @EnableBatchProcessing -public class PullFilesAndStatisticsByStudyStep { +public class PullFilesAndStatisticsByStudyStepConfiguration { - private static final Logger logger = LoggerFactory.getLogger(PullFilesAndStatisticsByStudyStep.class); + private static final Logger logger = LoggerFactory.getLogger(PullFilesAndStatisticsByStudyStepConfiguration.class); @Bean @StepScope - public PullFilesAndStatisticsFromVariantGivenStudyIdStep variantsAndStatisticsDropperTasklet() { - return new PullFilesAndStatisticsFromVariantGivenStudyIdStep(); + public PullFilesAndStatisticsByStudyTasklet pullFilesAndStatisticsByStudyTasklet() { + return new PullFilesAndStatisticsByStudyTasklet(); } @Bean(PULL_FILES_AND_STATISTICS_BY_STUDY_STEP) - public TaskletStep dropVariantsAndStatisticsByStudioStep(StepBuilderFactory stepBuilderFactory, + public TaskletStep pullFilesAndStatisticsByStudyStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions) { return TaskletUtils.generateStep(stepBuilderFactory, PULL_FILES_AND_STATISTICS_BY_STUDY_STEP, - variantsAndStatisticsDropperTasklet(), jobOptions.isAllowStartIfComplete()); + pullFilesAndStatisticsByStudyTasklet(), jobOptions.isAllowStartIfComplete()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PopulationStatisticsGeneratorStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/CalculateStatisticsTasklet.java similarity index 98% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PopulationStatisticsGeneratorStep.java rename to src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/CalculateStatisticsTasklet.java index 06b20c493..040e796c8 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PopulationStatisticsGeneratorStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/CalculateStatisticsTasklet.java @@ -49,8 +49,8 @@ * Input: variants loaded into mongodb * Output: file containing statistics (.variants.stats.json.gz) */ -public class PopulationStatisticsGeneratorStep implements Tasklet { - private static final Logger logger = LoggerFactory.getLogger(PopulationStatisticsGeneratorStep.class); +public class CalculateStatisticsTasklet implements Tasklet { + private static final Logger logger = LoggerFactory.getLogger(CalculateStatisticsTasklet.class); @Autowired private InputParameters inputParameters; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/IndexesGeneratorStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/CreateDatabaseIndexesTasklet.java similarity index 96% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/IndexesGeneratorStep.java rename to src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/CreateDatabaseIndexesTasklet.java index 0da8cfea2..0358b3f0a 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/IndexesGeneratorStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/CreateDatabaseIndexesTasklet.java @@ -30,7 +30,7 @@ *

* Currently it only has indexes for the features collection. */ -public class IndexesGeneratorStep implements Tasklet { +public class CreateDatabaseIndexesTasklet implements Tasklet { @Autowired private MongoOperations mongoOperations; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/FilesByStudyDropperStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/DropFilesByStudyTasklet.java similarity index 96% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/FilesByStudyDropperStep.java rename to src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/DropFilesByStudyTasklet.java index 76b439679..92e9a0e7e 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/FilesByStudyDropperStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/DropFilesByStudyTasklet.java @@ -40,9 +40,9 @@ *

* Output: all files that are in that study are removed */ -public class FilesByStudyDropperStep implements Tasklet { +public class DropFilesByStudyTasklet implements Tasklet { - private static final Logger logger = LoggerFactory.getLogger(FilesByStudyDropperStep.class); + private static final Logger logger = LoggerFactory.getLogger(DropFilesByStudyTasklet.class); @Autowired private MongoOperations mongoOperations; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/SingleStudyVariantsDropperStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/DropSingleStudyVariantsTasklet.java similarity index 96% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/SingleStudyVariantsDropperStep.java rename to src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/DropSingleStudyVariantsTasklet.java index 29a03f39a..b9603742f 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/SingleStudyVariantsDropperStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/DropSingleStudyVariantsTasklet.java @@ -42,9 +42,9 @@ *

* Output: those variants are removed */ -public class SingleStudyVariantsDropperStep implements Tasklet { +public class DropSingleStudyVariantsTasklet implements Tasklet { - private static final Logger logger = LoggerFactory.getLogger(SingleStudyVariantsDropperStep.class); + private static final Logger logger = LoggerFactory.getLogger(DropSingleStudyVariantsTasklet.class); @Autowired private MongoOperations mongoOperations; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/FileLoaderStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/LoadFileTasklet.java similarity index 98% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/FileLoaderStep.java rename to src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/LoadFileTasklet.java index 492f2cbee..19e33357d 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/FileLoaderStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/LoadFileTasklet.java @@ -39,7 +39,7 @@ *

* Output: the collection "files" contains the metadata of the VCF. */ -public class FileLoaderStep implements Tasklet { +public class LoadFileTasklet implements Tasklet { @Autowired private MongoOperations mongoOperations; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PopulationStatisticsLoaderStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/LoadStatisticsTasklet.java similarity index 98% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PopulationStatisticsLoaderStep.java rename to src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/LoadStatisticsTasklet.java index 61bf69a66..7afb939fd 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PopulationStatisticsLoaderStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/LoadStatisticsTasklet.java @@ -94,8 +94,8 @@ * } * } */ -public class PopulationStatisticsLoaderStep implements Tasklet { - private static final Logger logger = LoggerFactory.getLogger(PopulationStatisticsLoaderStep.class); +public class LoadStatisticsTasklet implements Tasklet { + private static final Logger logger = LoggerFactory.getLogger(LoadStatisticsTasklet.class); @Autowired private InputParameters inputParameters; @@ -110,7 +110,7 @@ public class PopulationStatisticsLoaderStep implements Tasklet { private ObjectMapper jsonObjectMapper; - public PopulationStatisticsLoaderStep() { + public LoadStatisticsTasklet() { jsonFactory = new JsonFactory(); jsonObjectMapper = new ObjectMapper(jsonFactory); jsonObjectMapper.addMixIn(VariantStats.class, VariantStatsJsonMixin.class); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PullFilesAndStatisticsFromVariantGivenStudyIdStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PullFilesAndStatisticsByStudyTasklet.java similarity index 95% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PullFilesAndStatisticsFromVariantGivenStudyIdStep.java rename to src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PullFilesAndStatisticsByStudyTasklet.java index e14ee3380..280fa43a3 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PullFilesAndStatisticsFromVariantGivenStudyIdStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PullFilesAndStatisticsByStudyTasklet.java @@ -41,10 +41,10 @@ * Tasklet that removes the files and statistics in a variant given a studyId. The id is readed from the jobParameter * studyId. */ -public class PullFilesAndStatisticsFromVariantGivenStudyIdStep implements Tasklet { +public class PullFilesAndStatisticsByStudyTasklet implements Tasklet { private static final Logger logger = LoggerFactory.getLogger( - PullFilesAndStatisticsFromVariantGivenStudyIdStep.class); + PullFilesAndStatisticsByStudyTasklet.class); @Autowired private MongoOperations mongoOperations; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/AnnotationParameters.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/AnnotationParameters.java index 1051f2c02..3dcb16d02 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/AnnotationParameters.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/AnnotationParameters.java @@ -25,8 +25,8 @@ * Service that holds access to the values for annotatation steps like VEP etc. * * NOTE the @StepScope this is probably because the Step/Tasklet in this case the - * {@link uk.ac.ebi.eva.pipeline.jobs.flows.AnnotationFlow} is executed in parallel with statistics - * {@link uk.ac.ebi.eva.pipeline.jobs.flows.PopulationStatisticsFlow} and they are not sharing the same context. + * {@link uk.ac.ebi.eva.pipeline.configuration.jobs.flows.AnnotationFlow} is executed in parallel with statistics + * {@link uk.ac.ebi.eva.pipeline.configuration.jobs.flows.PopulationStatisticsFlow} and they are not sharing the same context. * With @JobScope will not work! */ @Service diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AggregatedVcfJobParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AggregatedVcfJobParametersValidator.java index 4dfe0dc6f..c1cf71b18 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AggregatedVcfJobParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AggregatedVcfJobParametersValidator.java @@ -21,18 +21,19 @@ import org.springframework.batch.core.job.CompositeJobParametersValidator; import org.springframework.batch.core.job.DefaultJobParametersValidator; +import uk.ac.ebi.eva.pipeline.configuration.jobs.AggregatedVcfJob; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; -import uk.ac.ebi.eva.pipeline.parameters.validation.step.AnnotationLoaderStepParametersValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.step.LoadVepAnnotationStepParametersValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.step.AnnotationMetadataStepParametersValidator; -import uk.ac.ebi.eva.pipeline.parameters.validation.step.FileLoaderStepParametersValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.step.LoadFileStepParametersValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.step.GenerateVepAnnotationStepParametersValidator; -import uk.ac.ebi.eva.pipeline.parameters.validation.step.VariantLoaderStepParametersValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.step.LoadVariantsStepParametersValidator; import java.util.ArrayList; import java.util.List; /** - * Validates the job parameters necessary to execute an {@link uk.ac.ebi.eva.pipeline.jobs.AggregatedVcfJob} + * Validates the job parameters necessary to execute an {@link AggregatedVcfJob} */ public class AggregatedVcfJobParametersValidator extends DefaultJobParametersValidator { @@ -44,15 +45,15 @@ public void validate(JobParameters parameters) throws JobParametersInvalidExcept private CompositeJobParametersValidator compositeJobParametersValidator(JobParameters jobParameters) { List jobParametersValidators = new ArrayList<>(); - jobParametersValidators.add(new VariantLoaderStepParametersValidator()); - jobParametersValidators.add(new FileLoaderStepParametersValidator()); + jobParametersValidators.add(new LoadVariantsStepParametersValidator()); + jobParametersValidators.add(new LoadFileStepParametersValidator()); Boolean skipAnnotation = Boolean.valueOf(jobParameters.getString(JobParametersNames.ANNOTATION_SKIP)); if (!skipAnnotation) { boolean studyIdRequired = true; jobParametersValidators.add(new GenerateVepAnnotationStepParametersValidator(studyIdRequired)); - jobParametersValidators.add(new AnnotationLoaderStepParametersValidator(studyIdRequired)); + jobParametersValidators.add(new LoadVepAnnotationStepParametersValidator(studyIdRequired)); jobParametersValidators.add(new AnnotationMetadataStepParametersValidator()); } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/DropStudyJobParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/DropStudyJobParametersValidator.java index c1ecf35f7..1d23bb482 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/DropStudyJobParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/DropStudyJobParametersValidator.java @@ -21,15 +21,16 @@ import org.springframework.batch.core.job.CompositeJobParametersValidator; import org.springframework.batch.core.job.DefaultJobParametersValidator; -import uk.ac.ebi.eva.pipeline.parameters.validation.step.DropFilesByStudyStepValidator; -import uk.ac.ebi.eva.pipeline.parameters.validation.step.DropSingleStudyVariantsStepValidator; -import uk.ac.ebi.eva.pipeline.parameters.validation.step.PullFilesAndStatisticsByStudyStepValidator; +import uk.ac.ebi.eva.pipeline.configuration.jobs.DropStudyJob; +import uk.ac.ebi.eva.pipeline.parameters.validation.step.DropFilesByStudyStepParametersValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.step.DropSingleStudyVariantsStepParametersValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.step.PullFilesAndStatisticsByStudyStepParametersValidator; import java.util.ArrayList; import java.util.List; /** - * Validates the job parameters necessary to execute an {@link uk.ac.ebi.eva.pipeline.jobs.DropStudyJob} + * Validates the job parameters necessary to execute an {@link DropStudyJob} */ public class DropStudyJobParametersValidator extends DefaultJobParametersValidator { @@ -41,9 +42,9 @@ public void validate(JobParameters parameters) throws JobParametersInvalidExcept private CompositeJobParametersValidator compositeJobParametersValidator() { List jobParametersValidators = new ArrayList<>(); - jobParametersValidators.add(new DropSingleStudyVariantsStepValidator()); - jobParametersValidators.add(new PullFilesAndStatisticsByStudyStepValidator()); - jobParametersValidators.add(new DropFilesByStudyStepValidator()); + jobParametersValidators.add(new DropSingleStudyVariantsStepParametersValidator()); + jobParametersValidators.add(new PullFilesAndStatisticsByStudyStepParametersValidator()); + jobParametersValidators.add(new DropFilesByStudyStepParametersValidator()); CompositeJobParametersValidator compositeJobParametersValidator = new CompositeJobParametersValidator(); compositeJobParametersValidator.setValidators(jobParametersValidators); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidator.java index 51a4c8fe1..d255d3bef 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidator.java @@ -21,20 +21,21 @@ import org.springframework.batch.core.job.CompositeJobParametersValidator; import org.springframework.batch.core.job.DefaultJobParametersValidator; +import uk.ac.ebi.eva.pipeline.configuration.jobs.GenotypedVcfJob; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; -import uk.ac.ebi.eva.pipeline.parameters.validation.step.AnnotationLoaderStepParametersValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.step.LoadVepAnnotationStepParametersValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.step.AnnotationMetadataStepParametersValidator; -import uk.ac.ebi.eva.pipeline.parameters.validation.step.FileLoaderStepParametersValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.step.LoadFileStepParametersValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.step.GenerateVepAnnotationStepParametersValidator; -import uk.ac.ebi.eva.pipeline.parameters.validation.step.PopulationStatisticsGeneratorStepParametersValidator; -import uk.ac.ebi.eva.pipeline.parameters.validation.step.PopulationStatisticsLoaderStepParametersValidator; -import uk.ac.ebi.eva.pipeline.parameters.validation.step.VariantLoaderStepParametersValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.step.CalculateStatisticsStepParametersValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.step.LoadStatisticsStepParametersValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.step.LoadVariantsStepParametersValidator; import java.util.ArrayList; import java.util.List; /** - * Validates the job parameters necessary to execute an {@link uk.ac.ebi.eva.pipeline.jobs.GenotypedVcfJob} + * Validates the job parameters necessary to execute an {@link GenotypedVcfJob} */ public class GenotypedVcfJobParametersValidator extends DefaultJobParametersValidator { @@ -46,22 +47,22 @@ public void validate(JobParameters parameters) throws JobParametersInvalidExcept private CompositeJobParametersValidator compositeJobParametersValidator(JobParameters jobParameters) { List jobParametersValidators = new ArrayList<>(); - jobParametersValidators.add(new VariantLoaderStepParametersValidator()); - jobParametersValidators.add(new FileLoaderStepParametersValidator()); + jobParametersValidators.add(new LoadVariantsStepParametersValidator()); + jobParametersValidators.add(new LoadFileStepParametersValidator()); Boolean skipAnnotation = Boolean.valueOf(jobParameters.getString(JobParametersNames.ANNOTATION_SKIP)); if (!skipAnnotation) { boolean studyIdRequired = true; jobParametersValidators.add(new GenerateVepAnnotationStepParametersValidator(studyIdRequired)); - jobParametersValidators.add(new AnnotationLoaderStepParametersValidator(studyIdRequired)); + jobParametersValidators.add(new LoadVepAnnotationStepParametersValidator(studyIdRequired)); jobParametersValidators.add(new AnnotationMetadataStepParametersValidator()); } Boolean skipStats = Boolean.valueOf(jobParameters.getString(JobParametersNames.STATISTICS_SKIP)); if (!skipStats) { - jobParametersValidators.add(new PopulationStatisticsGeneratorStepParametersValidator()); - jobParametersValidators.add(new PopulationStatisticsLoaderStepParametersValidator()); + jobParametersValidators.add(new CalculateStatisticsStepParametersValidator()); + jobParametersValidators.add(new LoadStatisticsStepParametersValidator()); } CompositeJobParametersValidator compositeJobParametersValidator = new CompositeJobParametersValidator(); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationMetadataStepParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationMetadataStepParametersValidator.java index a8ec30c76..3b1ceef9d 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationMetadataStepParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationMetadataStepParametersValidator.java @@ -21,6 +21,7 @@ import org.springframework.batch.core.job.CompositeJobParametersValidator; import org.springframework.batch.core.job.DefaultJobParametersValidator; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadVepAnnotationStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.pipeline.parameters.validation.DbCollectionsAnnotationMetadataNameValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.VepCacheVersionValidator; @@ -30,7 +31,7 @@ import java.util.List; /** - * Validates the job parameters necessary to execute an {@link uk.ac.ebi.eva.pipeline.jobs.steps.AnnotationLoaderStep} + * Validates the job parameters necessary to execute an {@link LoadVepAnnotationStepConfiguration} */ public class AnnotationMetadataStepParametersValidator extends DefaultJobParametersValidator { diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PopulationStatisticsGeneratorStepParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/CalculateStatisticsStepParametersValidator.java similarity index 90% rename from src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PopulationStatisticsGeneratorStepParametersValidator.java rename to src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/CalculateStatisticsStepParametersValidator.java index ba1666c73..d98b78d3a 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PopulationStatisticsGeneratorStepParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/CalculateStatisticsStepParametersValidator.java @@ -21,7 +21,7 @@ import org.springframework.batch.core.job.CompositeJobParametersValidator; import org.springframework.batch.core.job.DefaultJobParametersValidator; -import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.PopulationStatisticsGeneratorStep; +import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.CalculateStatisticsTasklet; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.pipeline.parameters.validation.DbNameValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.InputStudyIdValidator; @@ -35,11 +35,11 @@ /** * Validates the job parameters necessary to execute a - * {@link PopulationStatisticsGeneratorStep} + * {@link CalculateStatisticsTasklet} */ -public class PopulationStatisticsGeneratorStepParametersValidator extends DefaultJobParametersValidator { +public class CalculateStatisticsStepParametersValidator extends DefaultJobParametersValidator { - public PopulationStatisticsGeneratorStepParametersValidator() { + public CalculateStatisticsStepParametersValidator() { super(new String[]{JobParametersNames.DB_NAME, JobParametersNames.INPUT_STUDY_ID, JobParametersNames.INPUT_VCF_ID, diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/IndexesGeneratorStepParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/CreateDatabaseIndexesStepParametersValidator.java similarity index 88% rename from src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/IndexesGeneratorStepParametersValidator.java rename to src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/CreateDatabaseIndexesStepParametersValidator.java index 67083c488..96882d090 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/IndexesGeneratorStepParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/CreateDatabaseIndexesStepParametersValidator.java @@ -21,6 +21,7 @@ import org.springframework.batch.core.job.CompositeJobParametersValidator; import org.springframework.batch.core.job.DefaultJobParametersValidator; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadGenesStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.pipeline.parameters.validation.ConfigRestartabilityAllowValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.DbCollectionsFeaturesNameValidator; @@ -31,11 +32,11 @@ import java.util.List; /** - * Validates the job parameters necessary to execute an {@link uk.ac.ebi.eva.pipeline.jobs.steps.GeneLoaderStep} + * Validates the job parameters necessary to execute an {@link LoadGenesStepConfiguration} */ -public class IndexesGeneratorStepParametersValidator extends DefaultJobParametersValidator { +public class CreateDatabaseIndexesStepParametersValidator extends DefaultJobParametersValidator { - public IndexesGeneratorStepParametersValidator() { + public CreateDatabaseIndexesStepParametersValidator() { super(new String[]{JobParametersNames.DB_COLLECTIONS_FEATURES_NAME, JobParametersNames.DB_NAME}, new String[]{}); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropFilesByStudyStepValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropFilesByStudyStepParametersValidator.java similarity index 88% rename from src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropFilesByStudyStepValidator.java rename to src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropFilesByStudyStepParametersValidator.java index 61702360b..d80571825 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropFilesByStudyStepValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropFilesByStudyStepParametersValidator.java @@ -21,6 +21,7 @@ import org.springframework.batch.core.job.CompositeJobParametersValidator; import org.springframework.batch.core.job.DefaultJobParametersValidator; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.DropFilesByStudyStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.pipeline.parameters.validation.ConfigRestartabilityAllowValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.DbCollectionsFilesNameValidator; @@ -32,11 +33,11 @@ import java.util.List; /** - * Validates the job parameters necessary to execute a {@link uk.ac.ebi.eva.pipeline.jobs.steps.DropFilesByStudyStep} + * Validates the job parameters necessary to execute a {@link DropFilesByStudyStepConfiguration} */ -public class DropFilesByStudyStepValidator extends DefaultJobParametersValidator { +public class DropFilesByStudyStepParametersValidator extends DefaultJobParametersValidator { - public DropFilesByStudyStepValidator() { + public DropFilesByStudyStepParametersValidator() { super(new String[]{JobParametersNames.DB_COLLECTIONS_FILES_NAME, JobParametersNames.DB_NAME, JobParametersNames.INPUT_STUDY_ID}, diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropSingleStudyVariantsStepValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropSingleStudyVariantsStepParametersValidator.java similarity index 89% rename from src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropSingleStudyVariantsStepValidator.java rename to src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropSingleStudyVariantsStepParametersValidator.java index 918643b28..84d1f89e7 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropSingleStudyVariantsStepValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropSingleStudyVariantsStepParametersValidator.java @@ -21,6 +21,7 @@ import org.springframework.batch.core.job.CompositeJobParametersValidator; import org.springframework.batch.core.job.DefaultJobParametersValidator; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.DropSingleStudyVariantsStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.pipeline.parameters.validation.ConfigRestartabilityAllowValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.DbCollectionsVariantsNameValidator; @@ -33,11 +34,11 @@ /** * Validates the job parameters necessary to execute a - * {@link uk.ac.ebi.eva.pipeline.jobs.steps.DropSingleStudyVariantsStep} + * {@link DropSingleStudyVariantsStepConfiguration} */ -public class DropSingleStudyVariantsStepValidator extends DefaultJobParametersValidator { +public class DropSingleStudyVariantsStepParametersValidator extends DefaultJobParametersValidator { - public DropSingleStudyVariantsStepValidator() { + public DropSingleStudyVariantsStepParametersValidator() { super(new String[]{JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME, JobParametersNames.DB_NAME, JobParametersNames.INPUT_STUDY_ID}, diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/GenerateVepAnnotationStepParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/GenerateVepAnnotationStepParametersValidator.java index 0743cb396..916b9348c 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/GenerateVepAnnotationStepParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/GenerateVepAnnotationStepParametersValidator.java @@ -21,7 +21,7 @@ import org.springframework.batch.core.job.CompositeJobParametersValidator; import org.springframework.batch.core.job.DefaultJobParametersValidator; -import uk.ac.ebi.eva.pipeline.jobs.steps.GenerateVepAnnotationStep; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.GenerateVepAnnotationStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.pipeline.parameters.validation.AnnotationOverwriteValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.ConfigChunkSizeValidator; @@ -44,7 +44,7 @@ import java.util.List; /** - * Validates the job parameters necessary to execute a {@link GenerateVepAnnotationStep} + * Validates the job parameters necessary to execute a {@link GenerateVepAnnotationStepConfiguration} *

* The parameters OUTPUT_DIR_ANNOTATION, INPUT_STUDY_ID and INPUT_VCF_ID are used to build the VEP input/output options * {@see uk.ac.ebi.eva.pipeline.configuration.JobOptions#loadPipelineOptions()} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/FileLoaderStepParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadFileStepParametersValidator.java similarity index 91% rename from src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/FileLoaderStepParametersValidator.java rename to src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadFileStepParametersValidator.java index 0a3795279..14667bc06 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/FileLoaderStepParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadFileStepParametersValidator.java @@ -21,6 +21,7 @@ import org.springframework.batch.core.job.CompositeJobParametersValidator; import org.springframework.batch.core.job.DefaultJobParametersValidator; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadFileStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.pipeline.parameters.validation.DbCollectionsFilesNameValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.DbNameValidator; @@ -35,11 +36,11 @@ import java.util.List; /** - * Validates the job parameters necessary to execute a {@link uk.ac.ebi.eva.pipeline.jobs.steps.LoadFileStep} + * Validates the job parameters necessary to execute a {@link LoadFileStepConfiguration} */ -public class FileLoaderStepParametersValidator extends DefaultJobParametersValidator { +public class LoadFileStepParametersValidator extends DefaultJobParametersValidator { - public FileLoaderStepParametersValidator() { + public LoadFileStepParametersValidator() { super(new String[]{JobParametersNames.DB_NAME, JobParametersNames.DB_COLLECTIONS_FILES_NAME, JobParametersNames.INPUT_STUDY_ID, diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/GeneLoaderStepParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadGenesStepParametersValidator.java similarity index 90% rename from src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/GeneLoaderStepParametersValidator.java rename to src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadGenesStepParametersValidator.java index b52a5ffdb..f693a5d4f 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/GeneLoaderStepParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadGenesStepParametersValidator.java @@ -21,6 +21,7 @@ import org.springframework.batch.core.job.CompositeJobParametersValidator; import org.springframework.batch.core.job.DefaultJobParametersValidator; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadGenesStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.pipeline.parameters.validation.ConfigChunkSizeValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.ConfigRestartabilityAllowValidator; @@ -33,11 +34,11 @@ import java.util.List; /** - * Validates the job parameters necessary to execute an {@link uk.ac.ebi.eva.pipeline.jobs.steps.GeneLoaderStep} + * Validates the job parameters necessary to execute an {@link LoadGenesStepConfiguration} */ -public class GeneLoaderStepParametersValidator extends DefaultJobParametersValidator { +public class LoadGenesStepParametersValidator extends DefaultJobParametersValidator { - public GeneLoaderStepParametersValidator() { + public LoadGenesStepParametersValidator() { super(new String[]{JobParametersNames.DB_COLLECTIONS_FEATURES_NAME, JobParametersNames.DB_NAME, JobParametersNames.INPUT_GTF}, diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PopulationStatisticsLoaderStepParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadStatisticsStepParametersValidator.java similarity index 91% rename from src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PopulationStatisticsLoaderStepParametersValidator.java rename to src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadStatisticsStepParametersValidator.java index db4d4c9ff..678c9c962 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PopulationStatisticsLoaderStepParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadStatisticsStepParametersValidator.java @@ -21,7 +21,7 @@ import org.springframework.batch.core.job.CompositeJobParametersValidator; import org.springframework.batch.core.job.DefaultJobParametersValidator; -import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.PopulationStatisticsLoaderStep; +import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.LoadStatisticsTasklet; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.pipeline.parameters.validation.DbCollectionsFilesNameValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.DbCollectionsVariantsNameValidator; @@ -37,11 +37,11 @@ /** * Validates the job parameters necessary to execute a - * {@link PopulationStatisticsLoaderStep} + * {@link LoadStatisticsTasklet} */ -public class PopulationStatisticsLoaderStepParametersValidator extends DefaultJobParametersValidator { +public class LoadStatisticsStepParametersValidator extends DefaultJobParametersValidator { - public PopulationStatisticsLoaderStepParametersValidator() { + public LoadStatisticsStepParametersValidator() { super(new String[]{JobParametersNames.INPUT_STUDY_ID, JobParametersNames.INPUT_VCF_ID, JobParametersNames.DB_COLLECTIONS_FILES_NAME, diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/VariantLoaderStepParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadVariantsStepParametersValidator.java similarity index 93% rename from src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/VariantLoaderStepParametersValidator.java rename to src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadVariantsStepParametersValidator.java index 684a5132f..7235ba5e9 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/VariantLoaderStepParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadVariantsStepParametersValidator.java @@ -21,6 +21,7 @@ import org.springframework.batch.core.job.CompositeJobParametersValidator; import org.springframework.batch.core.job.DefaultJobParametersValidator; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadVariantsStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.pipeline.parameters.validation.ConfigChunkSizeValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.ConfigRestartabilityAllowValidator; @@ -38,11 +39,11 @@ /** * Validates the job parameters necessary to execute a - * {@link uk.ac.ebi.eva.pipeline.jobs.steps.VariantLoaderStep} + * {@link LoadVariantsStepConfiguration} */ -public class VariantLoaderStepParametersValidator extends DefaultJobParametersValidator { +public class LoadVariantsStepParametersValidator extends DefaultJobParametersValidator { - public VariantLoaderStepParametersValidator() { + public LoadVariantsStepParametersValidator() { super(new String[]{JobParametersNames.DB_NAME, JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME, JobParametersNames.INPUT_STUDY_ID, diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadVepAnnotationStepParametersValidator.java similarity index 85% rename from src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidator.java rename to src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadVepAnnotationStepParametersValidator.java index 41f9fd06a..edd771b30 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadVepAnnotationStepParametersValidator.java @@ -21,6 +21,7 @@ import org.springframework.batch.core.job.CompositeJobParametersValidator; import org.springframework.batch.core.job.DefaultJobParametersValidator; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadVepAnnotationStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.pipeline.parameters.validation.ConfigChunkSizeValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.ConfigRestartabilityAllowValidator; @@ -37,17 +38,17 @@ import java.util.List; /** - * Validates the job parameters necessary to execute an {@link uk.ac.ebi.eva.pipeline.jobs.steps.AnnotationLoaderStep} + * Validates the job parameters necessary to execute an {@link LoadVepAnnotationStepConfiguration} */ -public class AnnotationLoaderStepParametersValidator extends DefaultJobParametersValidator { +public class LoadVepAnnotationStepParametersValidator extends DefaultJobParametersValidator { private boolean isStudyIdRequired; - public AnnotationLoaderStepParametersValidator(boolean isStudyIdRequired) { + public LoadVepAnnotationStepParametersValidator(boolean isStudyIdRequired) { super(new String[]{JobParametersNames.DB_COLLECTIONS_ANNOTATIONS_NAME, - JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME, - JobParametersNames.DB_NAME, - JobParametersNames.OUTPUT_DIR_ANNOTATION}, + JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME, + JobParametersNames.DB_NAME, + JobParametersNames.OUTPUT_DIR_ANNOTATION}, new String[]{}); this.isStudyIdRequired = isStudyIdRequired; } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PullFilesAndStatisticsByStudyStepValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PullFilesAndStatisticsByStudyStepParametersValidator.java similarity index 89% rename from src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PullFilesAndStatisticsByStudyStepValidator.java rename to src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PullFilesAndStatisticsByStudyStepParametersValidator.java index d57b6ac64..1d31a8101 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PullFilesAndStatisticsByStudyStepValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PullFilesAndStatisticsByStudyStepParametersValidator.java @@ -21,6 +21,7 @@ import org.springframework.batch.core.job.CompositeJobParametersValidator; import org.springframework.batch.core.job.DefaultJobParametersValidator; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.PullFilesAndStatisticsByStudyStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.pipeline.parameters.validation.ConfigRestartabilityAllowValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.DbCollectionsVariantsNameValidator; @@ -33,11 +34,11 @@ /** * Validates the job parameters necessary to execute a - * {@link uk.ac.ebi.eva.pipeline.jobs.steps.PullFilesAndStatisticsByStudyStep} + * {@link PullFilesAndStatisticsByStudyStepConfiguration} */ -public class PullFilesAndStatisticsByStudyStepValidator extends DefaultJobParametersValidator { +public class PullFilesAndStatisticsByStudyStepParametersValidator extends DefaultJobParametersValidator { - public PullFilesAndStatisticsByStudyStepValidator() { + public PullFilesAndStatisticsByStudyStepParametersValidator() { super(new String[]{JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME, JobParametersNames.DB_NAME, JobParametersNames.INPUT_STUDY_ID}, diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunner.java b/src/main/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunner.java index bbd05bea4..eb122eb26 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunner.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunner.java @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package uk.ac.ebi.eva.pipeline.runner; import org.slf4j.Logger; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AggregatedVcfJobTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AggregatedVcfJobTest.java similarity index 97% rename from src/test/java/uk/ac/ebi/eva/pipeline/jobs/AggregatedVcfJobTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AggregatedVcfJobTest.java index a73f57a08..c8a506c76 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AggregatedVcfJobTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AggregatedVcfJobTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2015-2016 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs; +package uk.ac.ebi.eva.pipeline.configuration.jobs; import org.junit.Assert; import org.junit.Before; @@ -42,6 +42,7 @@ import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; +import uk.ac.ebi.eva.pipeline.configuration.jobs.AggregatedVcfJob; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AnnotationJobTest.java similarity index 97% rename from src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AnnotationJobTest.java index d16628b21..9c2455fa0 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AnnotationJobTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2016-2017 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs; +package uk.ac.ebi.eva.pipeline.configuration.jobs; import com.mongodb.BasicDBList; import com.mongodb.DBCursor; @@ -32,6 +32,7 @@ import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; +import uk.ac.ebi.eva.pipeline.configuration.jobs.AnnotationJob; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/DropStudyJobTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJobTest.java similarity index 95% rename from src/test/java/uk/ac/ebi/eva/pipeline/jobs/DropStudyJobTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJobTest.java index 1f9921463..9f02a1ebc 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/DropStudyJobTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJobTest.java @@ -13,7 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs; + +package uk.ac.ebi.eva.pipeline.configuration.jobs; import com.mongodb.DBCollection; import org.junit.Rule; @@ -29,6 +30,8 @@ import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; +import uk.ac.ebi.eva.pipeline.configuration.jobs.DropStudyJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.PopulationStatisticsJob; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.data.VariantData; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -45,7 +48,7 @@ import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; /** - * Test for {@link PopulationStatisticsJob} + * Test for {@link DropStudyJob} */ @RunWith(SpringRunner.class) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJobTest.java similarity index 96% rename from src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJobTest.java index 284fc6369..dc959d4ca 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJobTest.java @@ -1,11 +1,11 @@ /* - * Copyright 2016-2017 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -14,7 +14,7 @@ * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs; +package uk.ac.ebi.eva.pipeline.configuration.jobs; import org.junit.Before; import org.junit.Rule; @@ -30,6 +30,7 @@ import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; import uk.ac.ebi.eva.pipeline.Application; +import uk.ac.ebi.eva.pipeline.configuration.jobs.GenotypedVcfJob; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobWorkflowTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJobWorkflowTest.java similarity index 98% rename from src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobWorkflowTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJobWorkflowTest.java index d1dc1883d..5d3be2bad 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobWorkflowTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJobWorkflowTest.java @@ -1,11 +1,11 @@ /* - * Copyright 2016-2017 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -14,7 +14,7 @@ * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs; +package uk.ac.ebi.eva.pipeline.configuration.jobs; import org.junit.Before; import org.junit.Rule; @@ -32,6 +32,7 @@ import org.springframework.test.context.junit4.SpringRunner; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; +import uk.ac.ebi.eva.pipeline.configuration.jobs.GenotypedVcfJob; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/PopulationStatisticsJobTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/PopulationStatisticsJobTest.java similarity index 96% rename from src/test/java/uk/ac/ebi/eva/pipeline/jobs/PopulationStatisticsJobTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/PopulationStatisticsJobTest.java index 9ee5c321d..51ab4f1a5 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/PopulationStatisticsJobTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/PopulationStatisticsJobTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2015-2016 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs; +package uk.ac.ebi.eva.pipeline.configuration.jobs; import org.junit.Before; import org.junit.Rule; @@ -33,6 +33,7 @@ import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; +import uk.ac.ebi.eva.pipeline.configuration.jobs.PopulationStatisticsJob; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationMetadataStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/AnnotationMetadataStepTest.java similarity index 97% rename from src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationMetadataStepTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/AnnotationMetadataStepTest.java index 285154355..9819ec8c3 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationMetadataStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/AnnotationMetadataStepTest.java @@ -13,7 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; + +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import org.junit.Rule; import org.junit.Test; @@ -35,7 +36,7 @@ import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; -import uk.ac.ebi.eva.pipeline.jobs.AnnotationJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.AnnotationJob; import uk.ac.ebi.eva.pipeline.parameters.MongoConnection; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -49,7 +50,7 @@ import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; /** - * TODO jmmut remove import AnnotationJob when we add the stepLauncherTestUtils + * Test for {@link AnnotationMetadataStepConfiguration} */ @RunWith(SpringRunner.class) @ActiveProfiles(Application.VARIANT_ANNOTATION_MONGO_PROFILE) diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/PopulationStatisticsGeneratorStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/CalculateStatisticsStepTest.java similarity index 94% rename from src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/PopulationStatisticsGeneratorStepTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/CalculateStatisticsStepTest.java index 7c9bf8680..01d44236e 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/PopulationStatisticsGeneratorStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/CalculateStatisticsStepTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import org.junit.Before; import org.junit.Rule; @@ -29,8 +29,7 @@ import org.springframework.test.context.junit4.SpringRunner; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.jobs.PopulationStatisticsJob; -import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.PopulationStatisticsGeneratorStep; +import uk.ac.ebi.eva.pipeline.configuration.jobs.PopulationStatisticsJob; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; @@ -49,12 +48,12 @@ import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResourceUrl; /** - * Test for {@link PopulationStatisticsGeneratorStep} + * Test for {@link CalculateStatisticsStepConfiguration} */ @RunWith(SpringRunner.class) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) @ContextConfiguration(classes = {PopulationStatisticsJob.class, BatchTestConfiguration.class}) -public class PopulationStatisticsGeneratorStepTest { +public class CalculateStatisticsStepTest { private static final String SMALL_VCF_FILE = "/input-files/vcf/genotyped.vcf.gz"; private static final String MONGO_DUMP = "/dump/VariantStatsConfigurationTest_vl"; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/IndexesGeneratorStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/CreateDatabaseIndexesStepTest.java similarity index 92% rename from src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/IndexesGeneratorStepTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/CreateDatabaseIndexesStepTest.java index ec02d68ef..f7ec7e28c 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/IndexesGeneratorStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/CreateDatabaseIndexesStepTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import com.mongodb.BasicDBObject; import com.mongodb.DBCollection; @@ -32,8 +32,7 @@ import org.springframework.test.context.junit4.SpringRunner; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.jobs.DatabaseInitializationJob; -import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.IndexesGeneratorStep; +import uk.ac.ebi.eva.pipeline.configuration.jobs.DatabaseInitializationJob; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; import uk.ac.ebi.eva.test.utils.JobTestUtils; @@ -44,12 +43,12 @@ /** - * Test {@link IndexesGeneratorStep} + * Test {@link CreateDatabaseIndexesStepConfiguration} */ @RunWith(SpringRunner.class) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) @ContextConfiguration(classes = {DatabaseInitializationJob.class, BatchTestConfiguration.class}) -public class IndexesGeneratorStepTest { +public class CreateDatabaseIndexesStepTest { private static final String COLLECTION_FEATURES_NAME = "features"; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/DropFilesByStudyStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropFilesByStudyStepTest.java similarity index 96% rename from src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/DropFilesByStudyStepTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropFilesByStudyStepTest.java index 7e67c97c7..0f37e083b 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/DropFilesByStudyStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropFilesByStudyStepTest.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import com.mongodb.DBCollection; import org.junit.Rule; @@ -33,7 +33,7 @@ import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.jobs.DropStudyJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.DropStudyJob; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; import uk.ac.ebi.eva.test.utils.JobTestUtils; @@ -47,7 +47,7 @@ import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; /** - * Test for {@link DropFilesByStudyStep} + * Test for {@link DropFilesByStudyStepConfiguration} */ @RunWith(SpringRunner.class) @ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE, Application.VARIANT_ANNOTATION_MONGO_PROFILE}) diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/DropSingleStudyVariantsStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropSingleStudyVariantsStepTest.java similarity index 96% rename from src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/DropSingleStudyVariantsStepTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropSingleStudyVariantsStepTest.java index 886ee017e..9bd16f6c4 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/DropSingleStudyVariantsStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropSingleStudyVariantsStepTest.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import com.mongodb.DBCollection; import org.junit.Rule; @@ -33,7 +33,7 @@ import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.jobs.DropStudyJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.DropStudyJob; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.data.VariantData; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; @@ -47,7 +47,7 @@ import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; /** - * Test for {@link DropSingleStudyVariantsStep} + * Test for {@link DropSingleStudyVariantsStepConfiguration} */ @RunWith(SpringRunner.class) @ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE, Application.VARIANT_ANNOTATION_MONGO_PROFILE}) diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/GenerateVepAnnotationStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/GenerateVepAnnotationStepTest.java similarity index 96% rename from src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/GenerateVepAnnotationStepTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/GenerateVepAnnotationStepTest.java index f565bc1e1..a53a6b8c1 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/GenerateVepAnnotationStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/GenerateVepAnnotationStepTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2016-2017 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import com.mongodb.BasicDBObject; import com.mongodb.DBCollection; @@ -31,7 +31,7 @@ import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.jobs.AnnotationJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.AnnotationJob; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; @@ -56,7 +56,7 @@ import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** - * Test for {@link GenerateVepAnnotationStep} + * Test for {@link GenerateVepAnnotationStepConfiguration} */ @RunWith(SpringRunner.class) @ActiveProfiles(Application.VARIANT_ANNOTATION_MONGO_PROFILE) diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/LoadFileStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadFileStepTest.java similarity index 92% rename from src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/LoadFileStepTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadFileStepTest.java index 21785dbe8..0fea1cd7f 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/LoadFileStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadFileStepTest.java @@ -1,11 +1,11 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import com.mongodb.DBCollection; import com.mongodb.DBCursor; @@ -33,7 +33,7 @@ import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.jobs.GenotypedVcfJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.GenotypedVcfJob; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; import uk.ac.ebi.eva.test.utils.JobTestUtils; @@ -45,7 +45,7 @@ import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** - * Test for {@link LoadFileStep} + * Test for {@link LoadFileStepConfiguration} */ @RunWith(SpringRunner.class) @ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE, Application.VARIANT_ANNOTATION_MONGO_PROFILE}) diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/PopulationStatisticsLoaderStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadStatisticsStepTest.java similarity index 87% rename from src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/PopulationStatisticsLoaderStepTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadStatisticsStepTest.java index 6cfe074a0..a2760523b 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/PopulationStatisticsLoaderStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadStatisticsStepTest.java @@ -1,4 +1,20 @@ -package uk.ac.ebi.eva.pipeline.jobs.steps; +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import com.mongodb.DBCursor; import org.junit.Rule; @@ -24,9 +40,9 @@ import org.springframework.test.context.junit4.SpringRunner; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.jobs.PopulationStatisticsJob; -import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.PopulationStatisticsLoaderStep; +import uk.ac.ebi.eva.pipeline.configuration.jobs.PopulationStatisticsJob; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; +import uk.ac.ebi.eva.test.configuration.MongoOperationConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; import uk.ac.ebi.eva.test.utils.JobTestUtils; @@ -45,13 +61,13 @@ import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** - * Test for {@link PopulationStatisticsLoaderStep} + * Test for {@link LoadStatisticsStepConfiguration} */ @RunWith(SpringRunner.class) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) -@ContextConfiguration(classes = {PopulationStatisticsJob.class, BatchTestConfiguration.class}) -public class PopulationStatisticsLoaderStepTest { - +@ContextConfiguration(classes = {PopulationStatisticsJob.class, BatchTestConfiguration.class, + MongoOperationConfiguration.class}) +public class LoadStatisticsStepTest { private static final String SMALL_VCF_FILE = "/input-files/vcf/genotyped.vcf.gz"; private static final String MONGO_DUMP = "/dump/VariantStatsConfigurationTest_vl"; private static final String SOURCE_FILE_NAME = "/input-files/statistics/1_1.source.stats.json.gz"; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/VariantLoaderStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVariantsStepTest.java similarity index 92% rename from src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/VariantLoaderStepTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVariantsStepTest.java index 3ff5cf0c1..e921150f1 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/VariantLoaderStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVariantsStepTest.java @@ -1,11 +1,11 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import org.junit.Before; import org.junit.Rule; @@ -36,7 +36,7 @@ import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.jobs.GenotypedVcfJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.GenotypedVcfJob; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; import uk.ac.ebi.eva.test.utils.GenotypedVcfJobTestUtils; @@ -48,13 +48,13 @@ import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** - * Test for {@link VariantLoaderStep} + * Test for {@link LoadVariantsStepConfiguration} */ @RunWith(SpringRunner.class) @ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE, Application.VARIANT_ANNOTATION_MONGO_PROFILE}) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) @ContextConfiguration(classes = {GenotypedVcfJob.class, BatchTestConfiguration.class}) -public class VariantLoaderStepTest { +public class LoadVariantsStepTest { private static final int EXPECTED_VARIANTS = 300; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVepAnnotationStepTest.java similarity index 95% rename from src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVepAnnotationStepTest.java index 38a2428af..6cfe32f04 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVepAnnotationStepTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import com.mongodb.BasicDBList; import com.mongodb.BasicDBObject; @@ -34,7 +34,7 @@ import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.jobs.AnnotationJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.AnnotationJob; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.data.VepOutputContent; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -55,7 +55,7 @@ import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResourceUrl; /** - * Test for {@link AnnotationLoaderStep}. In the context it is loaded {@link AnnotationJob} + * Test for {@link LoadVepAnnotationStepConfiguration}. In the context it is loaded {@link AnnotationJob} * because {@link JobLauncherTestUtils} require one {@link org.springframework.batch.core.Job} to be present in order * to run properly. */ @@ -63,7 +63,7 @@ @ActiveProfiles(Application.VARIANT_ANNOTATION_MONGO_PROFILE) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) @ContextConfiguration(classes = {AnnotationJob.class, BatchTestConfiguration.class}) -public class AnnotationLoaderStepTest { +public class LoadVepAnnotationStepTest { private static final String MONGO_DUMP = "/dump/VariantStatsConfigurationTest_vl"; private static final String COLLECTION_ANNOTATIONS_NAME = "annotations"; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/PullFilesAndStatisticsByStudyStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/PullFilesAndStatisticsByStudyStepTest.java similarity index 97% rename from src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/PullFilesAndStatisticsByStudyStepTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/PullFilesAndStatisticsByStudyStepTest.java index 89547ffda..18e1892e8 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/PullFilesAndStatisticsByStudyStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/PullFilesAndStatisticsByStudyStepTest.java @@ -13,7 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package uk.ac.ebi.eva.pipeline.jobs.steps; + +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; import com.mongodb.DBCollection; import org.junit.Rule; @@ -32,7 +33,7 @@ import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.jobs.DropStudyJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.DropStudyJob; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.data.VariantData; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; @@ -47,7 +48,7 @@ import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; /** - * Test for {@link PullFilesAndStatisticsByStudyStep} + * Test for {@link PullFilesAndStatisticsByStudyStepConfiguration} */ @RunWith(SpringRunner.class) @ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE, Application.VARIANT_ANNOTATION_MONGO_PROFILE}) diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReaderTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReaderTest.java index fe0e2d369..d1acf5507 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReaderTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReaderTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2016-2017 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package uk.ac.ebi.eva.pipeline.io.readers; import org.junit.Rule; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/GeneWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/GeneWriterTest.java index de333b377..c20386c00 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/GeneWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/GeneWriterTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package uk.ac.ebi.eva.pipeline.io.writers; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/StatisticsMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/StatisticsMongoWriterTest.java index eac27b744..0de1f131c 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/StatisticsMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/StatisticsMongoWriterTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2016-2017 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package uk.ac.ebi.eva.pipeline.io.writers; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriterTest.java index 11ac06726..e4320134c 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriterTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2016-2017 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package uk.ac.ebi.eva.pipeline.io.writers; import com.mongodb.BasicDBList; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantSourceEntityMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantSourceEntityMongoWriterTest.java index cb29a148b..8aa843982 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantSourceEntityMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantSourceEntityMongoWriterTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2016-2017 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package uk.ac.ebi.eva.pipeline.io.writers; import com.mongodb.DBCollection; @@ -34,7 +35,7 @@ import uk.ac.ebi.eva.commons.models.data.VariantSourceEntity; import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; import uk.ac.ebi.eva.pipeline.io.readers.VcfHeaderReader; -import uk.ac.ebi.eva.pipeline.jobs.steps.LoadFileStep; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadFileStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.MongoConnection; import uk.ac.ebi.eva.test.configuration.BaseTestConfiguration; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; @@ -63,7 +64,7 @@ */ @RunWith(SpringRunner.class) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) -@ContextConfiguration(classes = {BaseTestConfiguration.class, LoadFileStep.class}) +@ContextConfiguration(classes = {BaseTestConfiguration.class, LoadFileStepConfiguration.class}) public class VariantSourceEntityMongoWriterTest { private static final String SMALL_VCF_FILE = "/input-files/vcf/genotyped.vcf.gz"; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AggregatedVcfJobParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AggregatedVcfJobParametersValidatorTest.java index 89f13f67c..7b6ce8f30 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AggregatedVcfJobParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AggregatedVcfJobParametersValidatorTest.java @@ -22,6 +22,7 @@ import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.JobParametersInvalidException; +import uk.ac.ebi.eva.pipeline.configuration.jobs.AggregatedVcfJob; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -29,7 +30,7 @@ import java.util.TreeMap; /** - * Tests that the arguments necessary to run a {@link uk.ac.ebi.eva.pipeline.jobs.AggregatedVcfJob} are + * Tests that the arguments necessary to run a {@link AggregatedVcfJob} are * correctly validated */ public class AggregatedVcfJobParametersValidatorTest { diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/DropStudyJobParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/DropStudyJobParametersValidatorTest.java index a0b557900..04414ad54 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/DropStudyJobParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/DropStudyJobParametersValidatorTest.java @@ -22,6 +22,7 @@ import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.JobParametersInvalidException; +import uk.ac.ebi.eva.pipeline.configuration.jobs.DropStudyJob; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -29,7 +30,7 @@ import java.util.TreeMap; /** - * Tests that the arguments necessary to run a {@link uk.ac.ebi.eva.pipeline.jobs.DropStudyJob} are + * Tests that the arguments necessary to run a {@link DropStudyJob} are * correctly validated */ public class DropStudyJobParametersValidatorTest { diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidatorTest.java index 5ec32295a..fceaccb4c 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidatorTest.java @@ -22,6 +22,7 @@ import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.JobParametersInvalidException; +import uk.ac.ebi.eva.pipeline.configuration.jobs.GenotypedVcfJob; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -29,7 +30,7 @@ import java.util.TreeMap; /** - * Tests that the arguments necessary to run a {@link uk.ac.ebi.eva.pipeline.jobs.GenotypedVcfJob} are + * Tests that the arguments necessary to run a {@link GenotypedVcfJob} are * correctly validated */ public class GenotypedVcfJobParametersValidatorTest { diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationMetadataStepParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationMetadataStepParametersValidatorTest.java index df4cfb0cf..a8134768e 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationMetadataStepParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationMetadataStepParametersValidatorTest.java @@ -22,6 +22,7 @@ import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.JobParametersInvalidException; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadVepAnnotationStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -30,7 +31,7 @@ import java.util.TreeMap; /** - * Tests that the arguments necessary to run a {@link uk.ac.ebi.eva.pipeline.jobs.steps.AnnotationLoaderStep} are + * Tests that the arguments necessary to run a {@link LoadVepAnnotationStepConfiguration} are * correctly validated */ public class AnnotationMetadataStepParametersValidatorTest { diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PopulationStatisticsGeneratorStepParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/CalculateStatisticsStepParametersValidatorTest.java similarity index 90% rename from src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PopulationStatisticsGeneratorStepParametersValidatorTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/CalculateStatisticsStepParametersValidatorTest.java index ed4c04751..1a73188ae 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PopulationStatisticsGeneratorStepParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/CalculateStatisticsStepParametersValidatorTest.java @@ -22,7 +22,7 @@ import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.JobParametersInvalidException; -import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.PopulationStatisticsGeneratorStep; +import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.CalculateStatisticsTasklet; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -31,12 +31,12 @@ import java.util.TreeMap; /** - * Tests that the arguments necessary to run a {@link PopulationStatisticsGeneratorStep} + * Tests that the arguments necessary to run a {@link CalculateStatisticsTasklet} * are correctly validated */ -public class PopulationStatisticsGeneratorStepParametersValidatorTest { +public class CalculateStatisticsStepParametersValidatorTest { - private PopulationStatisticsGeneratorStepParametersValidator validator; + private CalculateStatisticsStepParametersValidator validator; @Rule public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); @@ -47,7 +47,7 @@ public class PopulationStatisticsGeneratorStepParametersValidatorTest { @Before public void setUp() throws IOException { - validator = new PopulationStatisticsGeneratorStepParametersValidator(); + validator = new CalculateStatisticsStepParametersValidator(); requiredParameters = new TreeMap<>(); requiredParameters.put(JobParametersNames.DB_NAME, new JobParameter("dbName")); requiredParameters.put(JobParametersNames.OUTPUT_DIR_STATISTICS, diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/IndexesGeneratorStepParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/CreateDatabaseIndexesStepParametersValidatorTest.java similarity index 87% rename from src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/IndexesGeneratorStepParametersValidatorTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/CreateDatabaseIndexesStepParametersValidatorTest.java index fe0ad7865..46b24c489 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/IndexesGeneratorStepParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/CreateDatabaseIndexesStepParametersValidatorTest.java @@ -21,6 +21,7 @@ import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.JobParametersInvalidException; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadVepAnnotationStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import java.io.IOException; @@ -28,11 +29,11 @@ import java.util.TreeMap; /** - * Tests that the arguments necessary to run a {@link uk.ac.ebi.eva.pipeline.jobs.steps.AnnotationLoaderStep} are + * Tests that the arguments necessary to run a {@link LoadVepAnnotationStepConfiguration} are * correctly validated */ -public class IndexesGeneratorStepParametersValidatorTest { - private IndexesGeneratorStepParametersValidator validator; +public class CreateDatabaseIndexesStepParametersValidatorTest { + private CreateDatabaseIndexesStepParametersValidator validator; private Map requiredParameters; @@ -40,7 +41,7 @@ public class IndexesGeneratorStepParametersValidatorTest { @Before public void setUp() throws Exception { - validator = new IndexesGeneratorStepParametersValidator(); + validator = new CreateDatabaseIndexesStepParametersValidator(); requiredParameters = new TreeMap<>(); requiredParameters.put(JobParametersNames.DB_NAME, new JobParameter("dbName")); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropFilesByStudyStepValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropFilesByStudyStepParametersValidatorTest.java similarity index 89% rename from src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropFilesByStudyStepValidatorTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropFilesByStudyStepParametersValidatorTest.java index bbc08f887..d621928a3 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropFilesByStudyStepValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropFilesByStudyStepParametersValidatorTest.java @@ -23,6 +23,7 @@ import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.JobParametersInvalidException; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.DropFilesByStudyStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -31,12 +32,12 @@ import java.util.TreeMap; /** - * Tests that the arguments necessary to run a {@link uk.ac.ebi.eva.pipeline.jobs.steps.DropFilesByStudyStep} + * Tests that the arguments necessary to run a {@link DropFilesByStudyStepConfiguration} * are correctly validated */ -public class DropFilesByStudyStepValidatorTest { +public class DropFilesByStudyStepParametersValidatorTest { - private DropFilesByStudyStepValidator validator; + private DropFilesByStudyStepParametersValidator validator; @Rule public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); @@ -47,7 +48,7 @@ public class DropFilesByStudyStepValidatorTest { @Before public void setUp() throws IOException { - validator = new DropFilesByStudyStepValidator(); + validator = new DropFilesByStudyStepParametersValidator(); requiredParameters = new TreeMap<>(); requiredParameters.put(JobParametersNames.DB_NAME, new JobParameter("database")); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropSingleStudyVariantsStepValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropSingleStudyVariantsStepParametersValidatorTest.java similarity index 89% rename from src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropSingleStudyVariantsStepValidatorTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropSingleStudyVariantsStepParametersValidatorTest.java index 4c9287993..7a3c41c10 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropSingleStudyVariantsStepValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropSingleStudyVariantsStepParametersValidatorTest.java @@ -22,6 +22,7 @@ import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.JobParametersInvalidException; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.DropSingleStudyVariantsStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -30,12 +31,12 @@ import java.util.TreeMap; /** - * Tests that the arguments necessary to run a {@link uk.ac.ebi.eva.pipeline.jobs.steps.DropSingleStudyVariantsStep} + * Tests that the arguments necessary to run a {@link DropSingleStudyVariantsStepConfiguration} * are correctly validated */ -public class DropSingleStudyVariantsStepValidatorTest { +public class DropSingleStudyVariantsStepParametersValidatorTest { - private DropSingleStudyVariantsStepValidator validator; + private DropSingleStudyVariantsStepParametersValidator validator; @Rule public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); @@ -46,7 +47,7 @@ public class DropSingleStudyVariantsStepValidatorTest { @Before public void setUp() throws IOException { - validator = new DropSingleStudyVariantsStepValidator(); + validator = new DropSingleStudyVariantsStepParametersValidator(); requiredParameters = new TreeMap<>(); requiredParameters.put(JobParametersNames.DB_NAME, new JobParameter("database")); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/GenerateVepAnnotationStepParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/GenerateVepAnnotationStepParametersValidatorTest.java index d53bc7d16..1c90c158b 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/GenerateVepAnnotationStepParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/GenerateVepAnnotationStepParametersValidatorTest.java @@ -22,7 +22,7 @@ import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.JobParametersInvalidException; -import uk.ac.ebi.eva.pipeline.jobs.steps.GenerateVepAnnotationStep; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.GenerateVepAnnotationStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -31,7 +31,7 @@ import java.util.TreeMap; /** - * Tests that the arguments necessary to run a {@link GenerateVepAnnotationStep} are + * Tests that the arguments necessary to run a {@link GenerateVepAnnotationStepConfiguration} are * correctly validated */ public class GenerateVepAnnotationStepParametersValidatorTest { diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/FileLoaderStepParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadFileStepParametersValidatorTest.java similarity index 93% rename from src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/FileLoaderStepParametersValidatorTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadFileStepParametersValidatorTest.java index 6e46f3946..461503fad 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/FileLoaderStepParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadFileStepParametersValidatorTest.java @@ -22,6 +22,7 @@ import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.JobParametersInvalidException; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadFileStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -30,12 +31,12 @@ import java.util.TreeMap; /** - * Tests that the arguments necessary to run a {@link uk.ac.ebi.eva.pipeline.jobs.steps.LoadFileStep} are + * Tests that the arguments necessary to run a {@link LoadFileStepConfiguration} are * correctly validated */ -public class FileLoaderStepParametersValidatorTest { +public class LoadFileStepParametersValidatorTest { - private FileLoaderStepParametersValidator validator; + private LoadFileStepParametersValidator validator; @Rule public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); @@ -44,7 +45,7 @@ public class FileLoaderStepParametersValidatorTest { @Before public void setUp() throws Exception { - validator = new FileLoaderStepParametersValidator(); + validator = new LoadFileStepParametersValidator(); requiredParameters = new TreeMap<>(); requiredParameters.put(JobParametersNames.DB_NAME, new JobParameter("database")); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/GeneLoaderStepParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadGenesStepParametersValidatorTest.java similarity index 91% rename from src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/GeneLoaderStepParametersValidatorTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadGenesStepParametersValidatorTest.java index be1318de9..853f4c1f7 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/GeneLoaderStepParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadGenesStepParametersValidatorTest.java @@ -22,6 +22,7 @@ import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.JobParametersInvalidException; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadVepAnnotationStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -30,11 +31,11 @@ import java.util.TreeMap; /** - * Tests that the arguments necessary to run a {@link uk.ac.ebi.eva.pipeline.jobs.steps.AnnotationLoaderStep} are + * Tests that the arguments necessary to run a {@link LoadVepAnnotationStepConfiguration} are * correctly validated */ -public class GeneLoaderStepParametersValidatorTest { - private GeneLoaderStepParametersValidator validator; +public class LoadGenesStepParametersValidatorTest { + private LoadGenesStepParametersValidator validator; @Rule public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); @@ -45,7 +46,7 @@ public class GeneLoaderStepParametersValidatorTest { @Before public void setUp() throws Exception { - validator = new GeneLoaderStepParametersValidator(); + validator = new LoadGenesStepParametersValidator(); requiredParameters = new TreeMap<>(); requiredParameters.put(JobParametersNames.DB_NAME, new JobParameter("dbName")); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PopulationStatisticsLoaderStepParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadStatisticsStepParametersValidatorTest.java similarity index 92% rename from src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PopulationStatisticsLoaderStepParametersValidatorTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadStatisticsStepParametersValidatorTest.java index 435828b2d..8d2f545ad 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PopulationStatisticsLoaderStepParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadStatisticsStepParametersValidatorTest.java @@ -22,7 +22,7 @@ import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.JobParametersInvalidException; -import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.PopulationStatisticsLoaderStep; +import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.LoadStatisticsTasklet; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -31,12 +31,12 @@ import java.util.TreeMap; /** - * Tests that the arguments necessary to run a {@link PopulationStatisticsLoaderStep} are + * Tests that the arguments necessary to run a {@link LoadStatisticsTasklet} are * correctly validated */ -public class PopulationStatisticsLoaderStepParametersValidatorTest { +public class LoadStatisticsStepParametersValidatorTest { - private PopulationStatisticsLoaderStepParametersValidator validator; + private LoadStatisticsStepParametersValidator validator; @Rule public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); @@ -47,7 +47,7 @@ public class PopulationStatisticsLoaderStepParametersValidatorTest { @Before public void setUp() throws IOException { - validator = new PopulationStatisticsLoaderStepParametersValidator(); + validator = new LoadStatisticsStepParametersValidator(); requiredParameters = new TreeMap<>(); requiredParameters.put(JobParametersNames.INPUT_STUDY_ID, new JobParameter("inputStudyId")); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/VariantLoaderStepParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadVariantsStepParametersValidatorTest.java similarity index 94% rename from src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/VariantLoaderStepParametersValidatorTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadVariantsStepParametersValidatorTest.java index 1f9ba1db3..fad005d9c 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/VariantLoaderStepParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadVariantsStepParametersValidatorTest.java @@ -23,6 +23,7 @@ import org.springframework.batch.core.JobParametersInvalidException; import org.springframework.batch.core.JobParametersValidator; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadVariantsStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -31,10 +32,10 @@ import java.util.TreeMap; /** - * Tests that the arguments necessary to run a {@link uk.ac.ebi.eva.pipeline.jobs.steps.VariantLoaderStep} are correctly + * Tests that the arguments necessary to run a {@link LoadVariantsStepConfiguration} are correctly * validated */ -public class VariantLoaderStepParametersValidatorTest { +public class LoadVariantsStepParametersValidatorTest { private JobParametersValidator validator; @@ -47,7 +48,7 @@ public class VariantLoaderStepParametersValidatorTest { @Before public void setUp() throws IOException { - validator = new VariantLoaderStepParametersValidator(); + validator = new LoadVariantsStepParametersValidator(); requiredParameters = new TreeMap<>(); requiredParameters.put(JobParametersNames.DB_NAME, new JobParameter("database")); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadVepAnnotationStepParametersValidatorTest.java similarity index 90% rename from src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidatorTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadVepAnnotationStepParametersValidatorTest.java index 922ee5e71..27c68a907 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/LoadVepAnnotationStepParametersValidatorTest.java @@ -22,6 +22,7 @@ import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.JobParametersInvalidException; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadVepAnnotationStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -30,11 +31,11 @@ import java.util.TreeMap; /** - * Tests that the arguments necessary to run a {@link uk.ac.ebi.eva.pipeline.jobs.steps.AnnotationLoaderStep} are + * Tests that the arguments necessary to run a {@link LoadVepAnnotationStepConfiguration} are * correctly validated */ -public class AnnotationLoaderStepParametersValidatorTest { - private AnnotationLoaderStepParametersValidator validator; +public class LoadVepAnnotationStepParametersValidatorTest { + private LoadVepAnnotationStepParametersValidator validator; @Rule public PipelineTemporaryFolderRule temporaryFolder = new PipelineTemporaryFolderRule(); @@ -46,7 +47,7 @@ public class AnnotationLoaderStepParametersValidatorTest { @Before public void setUp() throws Exception { boolean studyIdRequired = true; - validator = new AnnotationLoaderStepParametersValidator(studyIdRequired); + validator = new LoadVepAnnotationStepParametersValidator(studyIdRequired); final String dir = temporaryFolder.getRoot().getCanonicalPath(); requiredParameters = new TreeMap<>(); @@ -112,7 +113,7 @@ public void inputVcfIdIsNotRequired() throws JobParametersInvalidException, IOEx requiredParameters.remove(JobParametersNames.INPUT_VCF_ID); boolean studyIdNotRequired = false; - validator = new AnnotationLoaderStepParametersValidator(studyIdNotRequired); + validator = new LoadVepAnnotationStepParametersValidator(studyIdNotRequired); validator.validate(new JobParameters(requiredParameters)); } @@ -127,7 +128,7 @@ public void inputStudyIdIsNotRequired() throws JobParametersInvalidException, IO requiredParameters.remove(JobParametersNames.INPUT_STUDY_ID); boolean studyIdNotRequired = false; - validator = new AnnotationLoaderStepParametersValidator(studyIdNotRequired); + validator = new LoadVepAnnotationStepParametersValidator(studyIdNotRequired); validator.validate(new JobParameters(requiredParameters)); } } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PullFilesAndStatisticsByStudyStepValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PullFilesAndStatisticsByStudyStepParametersValidatorTest.java similarity index 88% rename from src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PullFilesAndStatisticsByStudyStepValidatorTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PullFilesAndStatisticsByStudyStepParametersValidatorTest.java index 0dae1f7f0..213e2da02 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PullFilesAndStatisticsByStudyStepValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/PullFilesAndStatisticsByStudyStepParametersValidatorTest.java @@ -23,6 +23,7 @@ import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.JobParametersInvalidException; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.PullFilesAndStatisticsByStudyStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -32,11 +33,11 @@ /** * Tests that the arguments necessary to run a - * {@link uk.ac.ebi.eva.pipeline.jobs.steps.PullFilesAndStatisticsByStudyStep} are correctly validated + * {@link PullFilesAndStatisticsByStudyStepConfiguration} are correctly validated */ -public class PullFilesAndStatisticsByStudyStepValidatorTest { +public class PullFilesAndStatisticsByStudyStepParametersValidatorTest { - private PullFilesAndStatisticsByStudyStepValidator validator; + private PullFilesAndStatisticsByStudyStepParametersValidator validator; @Rule public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); @@ -47,7 +48,7 @@ public class PullFilesAndStatisticsByStudyStepValidatorTest { @Before public void setUp() throws IOException { - validator = new PullFilesAndStatisticsByStudyStepValidator(); + validator = new PullFilesAndStatisticsByStudyStepParametersValidator(); requiredParameters = new TreeMap<>(); requiredParameters.put(JobParametersNames.DB_NAME, new JobParameter("database")); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunnerTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunnerTest.java index 757542621..22eda9d66 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunnerTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunnerTest.java @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package uk.ac.ebi.eva.pipeline.runner; import org.junit.Before; From c5c4611335293afa60fa66911832592041439d6a Mon Sep 17 00:00:00 2001 From: jorizci Date: Tue, 23 May 2017 15:51:29 +0100 Subject: [PATCH 31/48] Added documentation and licenses --- .../models/mongo/entity/VariantDocument.java | 18 ++++++++++++++++++ .../entity/projections/SimplifiedVariant.java | 18 ++++++++++++++++++ .../mongo/entity/subdocuments/HgvsMongo.java | 18 ++++++++++++++++++ .../mongo/entity/subdocuments/VariantAt.java | 18 ++++++++++++++++++ .../subdocuments/VariantSourceEntryMongo.java | 18 ++++++++++++++++++ .../subdocuments/VariantStatsMongo.java | 19 ++++++++++++++++++- 6 files changed, 108 insertions(+), 1 deletion(-) diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/VariantDocument.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/VariantDocument.java index d17e88250..e67033b0f 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/VariantDocument.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/VariantDocument.java @@ -1,3 +1,18 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package uk.ac.ebi.eva.commons.models.mongo.entity; import org.opencb.commons.utils.CryptoUtils; @@ -16,6 +31,9 @@ import java.util.Map; import java.util.Set; +/** + * Mongo database representation of a Variant. + */ @Document public class VariantDocument { diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedVariant.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedVariant.java index 7f080ffb6..f1175c996 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedVariant.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedVariant.java @@ -1,3 +1,18 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package uk.ac.ebi.eva.commons.models.mongo.entity.projections; import org.springframework.data.annotation.Id; @@ -23,6 +38,9 @@ import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.createHgvsMongo; import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.generateAtField; +/** + * Simplified representation of variant to be used when inserting or updating a variant + */ public class SimplifiedVariant { @Id diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/HgvsMongo.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/HgvsMongo.java index 88fefaa53..34b5ec163 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/HgvsMongo.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/HgvsMongo.java @@ -1,7 +1,25 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments; import org.springframework.data.mongodb.core.mapping.Field; +/** + * Mongo database representation of HGVS field. + */ public class HgvsMongo { private static final String TYPE_FIELD = "type"; diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantAt.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantAt.java index 11364c811..d07cdad51 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantAt.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantAt.java @@ -1,3 +1,18 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments; import org.springframework.data.mongodb.core.mapping.Field; @@ -5,6 +20,9 @@ import java.util.HashSet; import java.util.Set; +/** + * Mongo database representation of a Variant AT field. + */ public class VariantAt { private static final String CHUNK_IDS_FIELD = "chunkIds"; diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantSourceEntryMongo.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantSourceEntryMongo.java index 0e0d67e22..be8cebdf5 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantSourceEntryMongo.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantSourceEntryMongo.java @@ -1,3 +1,18 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments; import com.mongodb.BasicDBObject; @@ -13,6 +28,9 @@ import java.util.logging.Level; import java.util.logging.Logger; +/** + * Mongo database representation of Variant Source entry. + */ public class VariantSourceEntryMongo { public final static char CHARACTER_TO_REPLACE_DOTS = (char) 163; // <-- £ diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantStatsMongo.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantStatsMongo.java index 35cfa0df2..30099ce36 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantStatsMongo.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantStatsMongo.java @@ -1,6 +1,20 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments; -import com.mongodb.BasicDBObject; import org.opencb.biodata.models.feature.Genotype; import org.springframework.data.mongodb.core.mapping.Field; import uk.ac.ebi.eva.commons.models.data.VariantStats; @@ -8,6 +22,9 @@ import java.util.HashMap; import java.util.Map; +/** + * Mongo database representation of Variant Stats. + */ public class VariantStatsMongo { public final static String COHORT_ID = "cid"; From 289fb2a1bae758602988e3217139e3d8a7dd37ed Mon Sep 17 00:00:00 2001 From: jose miguel mut Date: Wed, 5 Apr 2017 10:57:16 +0100 Subject: [PATCH 32/48] rename flows and jobs, adding "Configuration" --- .../uk/ac/ebi/eva/pipeline/configuration/BeanNames.java | 3 ++- ...atedVcfJob.java => AggregatedVcfJobConfiguration.java} | 8 ++++---- ...AnnotationJob.java => AnnotationJobConfiguration.java} | 8 ++++---- ...b.java => DatabaseInitializationJobConfiguration.java} | 4 ++-- .../{DropStudyJob.java => DropStudyJobConfiguration.java} | 4 ++-- ...typedVcfJob.java => GenotypedVcfJobConfiguration.java} | 8 ++++---- ...Job.java => PopulationStatisticsJobConfiguration.java} | 8 ++++---- ...notationFlow.java => AnnotationFlowConfiguration.java} | 2 +- ...onal.java => AnnotationFlowOptionalConfiguration.java} | 6 +++--- ...ParallelStatisticsAndAnnotationFlowConfiguration.java} | 4 ++-- ...ow.java => PopulationStatisticsFlowConfiguration.java} | 2 +- ...=> PopulationStatisticsOptionalFlowConfiguration.java} | 6 +++--- .../ebi/eva/pipeline/parameters/AnnotationParameters.java | 4 ++-- .../job/AggregatedVcfJobParametersValidator.java | 4 ++-- .../validation/job/AnnotationJobParametersValidator.java | 7 ++++--- .../validation/job/DropStudyJobParametersValidator.java | 4 ++-- .../job/GenotypedVcfJobParametersValidator.java | 4 ++-- .../pipeline/configuration/jobs/AggregatedVcfJobTest.java | 5 ++--- .../pipeline/configuration/jobs/AnnotationJobTest.java | 5 ++--- .../eva/pipeline/configuration/jobs/DropStudyJobTest.java | 6 ++---- .../pipeline/configuration/jobs/GenotypedVcfJobTest.java | 5 ++--- .../configuration/jobs/GenotypedVcfJobWorkflowTest.java | 5 ++--- .../configuration/jobs/PopulationStatisticsJobTest.java | 5 ++--- .../jobs/steps/AnnotationMetadataStepTest.java | 4 ++-- .../jobs/steps/CalculateStatisticsStepTest.java | 4 ++-- .../jobs/steps/CreateDatabaseIndexesStepTest.java | 4 ++-- .../jobs/steps/DropFilesByStudyStepTest.java | 4 ++-- .../jobs/steps/DropSingleStudyVariantsStepTest.java | 4 ++-- .../jobs/steps/GenerateVepAnnotationStepTest.java | 4 ++-- .../configuration/jobs/steps/LoadFileStepTest.java | 4 ++-- .../configuration/jobs/steps/LoadStatisticsStepTest.java | 4 ++-- .../configuration/jobs/steps/LoadVariantsStepTest.java | 4 ++-- .../jobs/steps/LoadVepAnnotationStepTest.java | 6 +++--- .../jobs/steps/PullFilesAndStatisticsByStudyStepTest.java | 4 ++-- .../job/AggregatedVcfJobParametersValidatorTest.java | 4 ++-- .../job/DropStudyJobParametersValidatorTest.java | 4 ++-- .../job/GenotypedVcfJobParametersValidatorTest.java | 4 ++-- .../EvaPipelineJobLauncherCommandLineRunnerTest.java | 2 +- 38 files changed, 86 insertions(+), 91 deletions(-) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/{AggregatedVcfJob.java => AggregatedVcfJobConfiguration.java} (95%) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/{AnnotationJob.java => AnnotationJobConfiguration.java} (95%) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/{DatabaseInitializationJob.java => DatabaseInitializationJobConfiguration.java} (96%) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/{DropStudyJob.java => DropStudyJobConfiguration.java} (97%) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/{GenotypedVcfJob.java => GenotypedVcfJobConfiguration.java} (94%) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/{PopulationStatisticsJob.java => PopulationStatisticsJobConfiguration.java} (93%) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/{AnnotationFlow.java => AnnotationFlowConfiguration.java} (98%) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/{AnnotationFlowOptional.java => AnnotationFlowOptionalConfiguration.java} (92%) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/{ParallelStatisticsAndAnnotationFlow.java => ParallelStatisticsAndAnnotationFlowConfiguration.java} (92%) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/{PopulationStatisticsFlow.java => PopulationStatisticsFlowConfiguration.java} (97%) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/{PopulationStatisticsOptionalFlow.java => PopulationStatisticsOptionalFlowConfiguration.java} (91%) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/BeanNames.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/BeanNames.java index 6fa82ac8d..716dd5458 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/BeanNames.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/BeanNames.java @@ -1,5 +1,5 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package uk.ac.ebi.eva.pipeline.configuration; /** diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AggregatedVcfJob.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AggregatedVcfJobConfiguration.java similarity index 95% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AggregatedVcfJob.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AggregatedVcfJobConfiguration.java index 2afac40ad..51469407e 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AggregatedVcfJob.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AggregatedVcfJobConfiguration.java @@ -32,7 +32,7 @@ import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Scope; -import uk.ac.ebi.eva.pipeline.configuration.jobs.flows.AnnotationFlowOptional; +import uk.ac.ebi.eva.pipeline.configuration.jobs.flows.AnnotationFlowOptionalConfiguration; import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadFileStepConfiguration; import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadVariantsStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.NewJobIncrementer; @@ -53,10 +53,10 @@ */ @Configuration @EnableBatchProcessing -@Import({LoadVariantsStepConfiguration.class, LoadFileStepConfiguration.class, AnnotationFlowOptional.class}) -public class AggregatedVcfJob { +@Import({LoadVariantsStepConfiguration.class, LoadFileStepConfiguration.class, AnnotationFlowOptionalConfiguration.class}) +public class AggregatedVcfJobConfiguration { - private static final Logger logger = LoggerFactory.getLogger(AggregatedVcfJob.class); + private static final Logger logger = LoggerFactory.getLogger(AggregatedVcfJobConfiguration.class); @Autowired @Qualifier(VEP_ANNOTATION_OPTIONAL_FLOW) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AnnotationJob.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AnnotationJobConfiguration.java similarity index 95% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AnnotationJob.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AnnotationJobConfiguration.java index 6afc4a89c..20353dba5 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AnnotationJob.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AnnotationJobConfiguration.java @@ -30,7 +30,7 @@ import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Scope; -import uk.ac.ebi.eva.pipeline.configuration.jobs.flows.AnnotationFlow; +import uk.ac.ebi.eva.pipeline.configuration.jobs.flows.AnnotationFlowConfiguration; import uk.ac.ebi.eva.pipeline.parameters.NewJobIncrementer; import uk.ac.ebi.eva.pipeline.parameters.validation.job.AnnotationJobParametersValidator; @@ -51,10 +51,10 @@ @Configuration @EnableBatchProcessing -@Import({AnnotationFlow.class}) -public class AnnotationJob { +@Import({AnnotationFlowConfiguration.class}) +public class AnnotationJobConfiguration { - private static final Logger logger = LoggerFactory.getLogger(AnnotationJob.class); + private static final Logger logger = LoggerFactory.getLogger(AnnotationJobConfiguration.class); @Autowired @Qualifier(VEP_ANNOTATION_FLOW) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DatabaseInitializationJob.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DatabaseInitializationJobConfiguration.java similarity index 96% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DatabaseInitializationJob.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DatabaseInitializationJobConfiguration.java index d1eb6d0a7..495d9a2a7 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DatabaseInitializationJob.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DatabaseInitializationJobConfiguration.java @@ -48,9 +48,9 @@ @Configuration @EnableBatchProcessing @Import({LoadGenesStepConfiguration.class, CreateDatabaseIndexesStepConfiguration.class}) -public class DatabaseInitializationJob { +public class DatabaseInitializationJobConfiguration { - private static final Logger logger = LoggerFactory.getLogger(DatabaseInitializationJob.class); + private static final Logger logger = LoggerFactory.getLogger(DatabaseInitializationJobConfiguration.class); @Autowired @Qualifier(LOAD_GENES_STEP) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJob.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJobConfiguration.java similarity index 97% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJob.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJobConfiguration.java index 3f6c62767..6a055c053 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJob.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJobConfiguration.java @@ -50,9 +50,9 @@ @Configuration @EnableBatchProcessing @Import({DropSingleStudyVariantsStepConfiguration.class, PullFilesAndStatisticsByStudyStepConfiguration.class, DropFilesByStudyStepConfiguration.class}) -public class DropStudyJob { +public class DropStudyJobConfiguration { - private static final Logger logger = LoggerFactory.getLogger(DropStudyJob.class); + private static final Logger logger = LoggerFactory.getLogger(DropStudyJobConfiguration.class); @Autowired @Qualifier(DROP_SINGLE_STUDY_VARIANTS_STEP) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJob.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJobConfiguration.java similarity index 94% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJob.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJobConfiguration.java index f8f5019a1..11b820919 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJob.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJobConfiguration.java @@ -32,7 +32,7 @@ import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Scope; -import uk.ac.ebi.eva.pipeline.configuration.jobs.flows.ParallelStatisticsAndAnnotationFlow; +import uk.ac.ebi.eva.pipeline.configuration.jobs.flows.ParallelStatisticsAndAnnotationFlowConfiguration; import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadFileStepConfiguration; import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.LoadVariantsStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.NewJobIncrementer; @@ -54,10 +54,10 @@ */ @Configuration @EnableBatchProcessing -@Import({LoadVariantsStepConfiguration.class, LoadFileStepConfiguration.class, ParallelStatisticsAndAnnotationFlow.class}) -public class GenotypedVcfJob { +@Import({LoadVariantsStepConfiguration.class, LoadFileStepConfiguration.class, ParallelStatisticsAndAnnotationFlowConfiguration.class}) +public class GenotypedVcfJobConfiguration { - private static final Logger logger = LoggerFactory.getLogger(GenotypedVcfJob.class); + private static final Logger logger = LoggerFactory.getLogger(GenotypedVcfJobConfiguration.class); @Autowired @Qualifier(PARALLEL_STATISTICS_AND_ANNOTATION) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/PopulationStatisticsJob.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/PopulationStatisticsJobConfiguration.java similarity index 93% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/PopulationStatisticsJob.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/PopulationStatisticsJobConfiguration.java index 17ee37861..4af251c9f 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/PopulationStatisticsJob.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/PopulationStatisticsJobConfiguration.java @@ -29,7 +29,7 @@ import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Scope; -import uk.ac.ebi.eva.pipeline.configuration.jobs.flows.PopulationStatisticsFlow; +import uk.ac.ebi.eva.pipeline.configuration.jobs.flows.PopulationStatisticsFlowConfiguration; import uk.ac.ebi.eva.pipeline.parameters.NewJobIncrementer; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.CALCULATE_STATISTICS_FLOW; @@ -42,10 +42,10 @@ */ @Configuration @EnableBatchProcessing -@Import({PopulationStatisticsFlow.class}) -public class PopulationStatisticsJob { +@Import({PopulationStatisticsFlowConfiguration.class}) +public class PopulationStatisticsJobConfiguration { - private static final Logger logger = LoggerFactory.getLogger(PopulationStatisticsJob.class); + private static final Logger logger = LoggerFactory.getLogger(PopulationStatisticsJobConfiguration.class); @Autowired @Qualifier(CALCULATE_STATISTICS_FLOW) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/AnnotationFlow.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/AnnotationFlowConfiguration.java similarity index 98% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/AnnotationFlow.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/AnnotationFlowConfiguration.java index 93bbe9d24..baa8b107e 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/AnnotationFlow.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/AnnotationFlowConfiguration.java @@ -45,7 +45,7 @@ @Configuration @EnableBatchProcessing @Import({GenerateVepAnnotationStepConfiguration.class, LoadVepAnnotationStepConfiguration.class, AnnotationMetadataStepConfiguration.class}) -public class AnnotationFlow { +public class AnnotationFlowConfiguration { @Autowired @Qualifier(GENERATE_VEP_ANNOTATION_STEP) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/AnnotationFlowOptional.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/AnnotationFlowOptionalConfiguration.java similarity index 92% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/AnnotationFlowOptional.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/AnnotationFlowOptionalConfiguration.java index 9ff502a14..576ca5772 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/AnnotationFlowOptional.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/AnnotationFlowOptionalConfiguration.java @@ -36,12 +36,12 @@ * Configuration class that defines an annotation process that can be skipped. *

* The flow uses the skipStepDecider to execute or not the pipeline depending 'annotation.skip' flag. In the case - * that the annotation flag is enabled, then the annotation flow proceeds as described in {@link AnnotationFlow} + * that the annotation flag is enabled, then the annotation flow proceeds as described in {@link AnnotationFlowConfiguration} */ @Configuration @EnableBatchProcessing -@Import({AnnotationFlow.class, JobExecutionDeciderConfiguration.class}) -public class AnnotationFlowOptional { +@Import({AnnotationFlowConfiguration.class, JobExecutionDeciderConfiguration.class}) +public class AnnotationFlowOptionalConfiguration { @Bean(VEP_ANNOTATION_OPTIONAL_FLOW) public Flow vepAnnotationOptionalFlow(@Qualifier(VEP_ANNOTATION_FLOW) Flow vepAnnotationFlow, diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/ParallelStatisticsAndAnnotationFlow.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/ParallelStatisticsAndAnnotationFlowConfiguration.java similarity index 92% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/ParallelStatisticsAndAnnotationFlow.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/ParallelStatisticsAndAnnotationFlowConfiguration.java index 8bb3b9012..467abd9bd 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/ParallelStatisticsAndAnnotationFlow.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/ParallelStatisticsAndAnnotationFlowConfiguration.java @@ -34,8 +34,8 @@ */ @Configuration @EnableBatchProcessing -@Import({AnnotationFlowOptional.class, PopulationStatisticsOptionalFlow.class}) -public class ParallelStatisticsAndAnnotationFlow { +@Import({AnnotationFlowOptionalConfiguration.class, PopulationStatisticsOptionalFlowConfiguration.class}) +public class ParallelStatisticsAndAnnotationFlowConfiguration { @Autowired @Qualifier(VEP_ANNOTATION_OPTIONAL_FLOW) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/PopulationStatisticsFlow.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/PopulationStatisticsFlowConfiguration.java similarity index 97% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/PopulationStatisticsFlow.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/PopulationStatisticsFlowConfiguration.java index 79c97df2a..4cd1a6d8d 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/PopulationStatisticsFlow.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/PopulationStatisticsFlowConfiguration.java @@ -39,7 +39,7 @@ @Configuration @EnableBatchProcessing @Import({CalculateStatisticsStepConfiguration.class, LoadStatisticsStepConfiguration.class}) -public class PopulationStatisticsFlow { +public class PopulationStatisticsFlowConfiguration { @Autowired @Qualifier(CALCULATE_STATISTICS_STEP) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/PopulationStatisticsOptionalFlow.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/PopulationStatisticsOptionalFlowConfiguration.java similarity index 91% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/PopulationStatisticsOptionalFlow.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/PopulationStatisticsOptionalFlowConfiguration.java index a4f109968..028a04300 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/PopulationStatisticsOptionalFlow.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/flows/PopulationStatisticsOptionalFlowConfiguration.java @@ -34,12 +34,12 @@ /** * Configuration that defines a calculate statistics flow that can be skipped depending on property 'statistics.skip' - * In the case that the property is set to false, then the process executes the flow at {@link PopulationStatisticsFlow} + * In the case that the property is set to false, then the process executes the flow at {@link PopulationStatisticsFlowConfiguration} */ @Configuration @EnableBatchProcessing -@Import({PopulationStatisticsFlow.class, JobExecutionDeciderConfiguration.class}) -public class PopulationStatisticsOptionalFlow { +@Import({PopulationStatisticsFlowConfiguration.class, JobExecutionDeciderConfiguration.class}) +public class PopulationStatisticsOptionalFlowConfiguration { @Bean(CALCULATE_STATISTICS_OPTIONAL_FLOW) public Flow calculateStatisticsOptionalFlow(@Qualifier(CALCULATE_STATISTICS_FLOW) Flow calculateStatisticsflow, diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/AnnotationParameters.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/AnnotationParameters.java index 3dcb16d02..26cfa9b4e 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/AnnotationParameters.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/AnnotationParameters.java @@ -25,8 +25,8 @@ * Service that holds access to the values for annotatation steps like VEP etc. * * NOTE the @StepScope this is probably because the Step/Tasklet in this case the - * {@link uk.ac.ebi.eva.pipeline.configuration.jobs.flows.AnnotationFlow} is executed in parallel with statistics - * {@link uk.ac.ebi.eva.pipeline.configuration.jobs.flows.PopulationStatisticsFlow} and they are not sharing the same context. + * {@link uk.ac.ebi.eva.pipeline.configuration.jobs.flows.AnnotationFlowConfiguration} is executed in parallel with statistics + * {@link uk.ac.ebi.eva.pipeline.configuration.jobs.flows.PopulationStatisticsFlowConfiguration} and they are not sharing the same context. * With @JobScope will not work! */ @Service diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AggregatedVcfJobParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AggregatedVcfJobParametersValidator.java index c1cf71b18..0afc8ee9c 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AggregatedVcfJobParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AggregatedVcfJobParametersValidator.java @@ -21,7 +21,7 @@ import org.springframework.batch.core.job.CompositeJobParametersValidator; import org.springframework.batch.core.job.DefaultJobParametersValidator; -import uk.ac.ebi.eva.pipeline.configuration.jobs.AggregatedVcfJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.AggregatedVcfJobConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.pipeline.parameters.validation.step.LoadVepAnnotationStepParametersValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.step.AnnotationMetadataStepParametersValidator; @@ -33,7 +33,7 @@ import java.util.List; /** - * Validates the job parameters necessary to execute an {@link AggregatedVcfJob} + * Validates the job parameters necessary to execute an {@link AggregatedVcfJobConfiguration} */ public class AggregatedVcfJobParametersValidator extends DefaultJobParametersValidator { diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AnnotationJobParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AnnotationJobParametersValidator.java index 85246e1d5..47d727155 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AnnotationJobParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AnnotationJobParametersValidator.java @@ -21,15 +21,16 @@ import org.springframework.batch.core.job.CompositeJobParametersValidator; import org.springframework.batch.core.job.DefaultJobParametersValidator; -import uk.ac.ebi.eva.pipeline.parameters.validation.step.AnnotationLoaderStepParametersValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.step.AnnotationMetadataStepParametersValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.step.GenerateVepAnnotationStepParametersValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.step.LoadVepAnnotationStepParametersValidator; import java.util.ArrayList; import java.util.List; /** - * Validates the job parameters necessary to execute an {@link uk.ac.ebi.eva.pipeline.jobs.AnnotationJob} + * Validates the job parameters necessary to execute an + * {@link uk.ac.ebi.eva.pipeline.configuration.jobs.AnnotationJobConfiguration} */ public class AnnotationJobParametersValidator extends DefaultJobParametersValidator { @@ -44,7 +45,7 @@ private CompositeJobParametersValidator compositeJobParametersValidator(JobParam boolean studyIdRequired = false; jobParametersValidators.add(new GenerateVepAnnotationStepParametersValidator(studyIdRequired)); - jobParametersValidators.add(new AnnotationLoaderStepParametersValidator(studyIdRequired)); + jobParametersValidators.add(new LoadVepAnnotationStepParametersValidator(studyIdRequired)); jobParametersValidators.add(new AnnotationMetadataStepParametersValidator()); CompositeJobParametersValidator compositeJobParametersValidator = new CompositeJobParametersValidator(); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/DropStudyJobParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/DropStudyJobParametersValidator.java index 1d23bb482..c0be47289 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/DropStudyJobParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/DropStudyJobParametersValidator.java @@ -21,7 +21,7 @@ import org.springframework.batch.core.job.CompositeJobParametersValidator; import org.springframework.batch.core.job.DefaultJobParametersValidator; -import uk.ac.ebi.eva.pipeline.configuration.jobs.DropStudyJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.DropStudyJobConfiguration; import uk.ac.ebi.eva.pipeline.parameters.validation.step.DropFilesByStudyStepParametersValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.step.DropSingleStudyVariantsStepParametersValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.step.PullFilesAndStatisticsByStudyStepParametersValidator; @@ -30,7 +30,7 @@ import java.util.List; /** - * Validates the job parameters necessary to execute an {@link DropStudyJob} + * Validates the job parameters necessary to execute an {@link DropStudyJobConfiguration} */ public class DropStudyJobParametersValidator extends DefaultJobParametersValidator { diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidator.java index d255d3bef..f6a84c6f8 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidator.java @@ -21,7 +21,7 @@ import org.springframework.batch.core.job.CompositeJobParametersValidator; import org.springframework.batch.core.job.DefaultJobParametersValidator; -import uk.ac.ebi.eva.pipeline.configuration.jobs.GenotypedVcfJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.GenotypedVcfJobConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.pipeline.parameters.validation.step.LoadVepAnnotationStepParametersValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.step.AnnotationMetadataStepParametersValidator; @@ -35,7 +35,7 @@ import java.util.List; /** - * Validates the job parameters necessary to execute an {@link GenotypedVcfJob} + * Validates the job parameters necessary to execute an {@link GenotypedVcfJobConfiguration} */ public class GenotypedVcfJobParametersValidator extends DefaultJobParametersValidator { diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AggregatedVcfJobTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AggregatedVcfJobTest.java index c8a506c76..e11aa67b5 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AggregatedVcfJobTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AggregatedVcfJobTest.java @@ -42,7 +42,6 @@ import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.configuration.jobs.AggregatedVcfJob; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; @@ -67,13 +66,13 @@ import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** - * Test for {@link AggregatedVcfJob} + * Test for {@link AggregatedVcfJobConfiguration} */ @RunWith(SpringRunner.class) @ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE, Application.VARIANT_ANNOTATION_MONGO_PROFILE}) @TestPropertySource({"classpath:variant-aggregated.properties", "classpath:test-mongo.properties"}) -@ContextConfiguration(classes = {AggregatedVcfJob.class, BatchTestConfiguration.class}) +@ContextConfiguration(classes = {AggregatedVcfJobConfiguration.class, BatchTestConfiguration.class}) public class AggregatedVcfJobTest { public static final String INPUT = "/input-files/vcf/aggregated.vcf.gz"; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AnnotationJobTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AnnotationJobTest.java index 9c2455fa0..6ee820369 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AnnotationJobTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/AnnotationJobTest.java @@ -32,7 +32,6 @@ import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.configuration.jobs.AnnotationJob; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; @@ -52,14 +51,14 @@ import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** - * Test for {@link AnnotationJob} + * Test for {@link AnnotationJobConfiguration} *

* TODO The test should fail when we will integrate the JobParameter validation since there are empty parameters for VEP */ @RunWith(SpringRunner.class) @SpringBootTest @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) -@ContextConfiguration(classes = {AnnotationJob.class, BatchTestConfiguration.class}) +@ContextConfiguration(classes = {AnnotationJobConfiguration.class, BatchTestConfiguration.class}) public class AnnotationJobTest { private static final String MOCK_VEP = "/mockvep.pl"; private static final String MONGO_DUMP = "/dump/VariantStatsConfigurationTest_vl"; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJobTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJobTest.java index 9f02a1ebc..c6e777fd1 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJobTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJobTest.java @@ -30,8 +30,6 @@ import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; -import uk.ac.ebi.eva.pipeline.configuration.jobs.DropStudyJob; -import uk.ac.ebi.eva.pipeline.configuration.jobs.PopulationStatisticsJob; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.data.VariantData; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -48,11 +46,11 @@ import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; /** - * Test for {@link DropStudyJob} + * Test for {@link DropStudyJobConfiguration} */ @RunWith(SpringRunner.class) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) -@ContextConfiguration(classes = {DropStudyJob.class, BatchTestConfiguration.class}) +@ContextConfiguration(classes = {DropStudyJobConfiguration.class, BatchTestConfiguration.class}) public class DropStudyJobTest { private static final String COLLECTION_VARIANTS_NAME = "variants"; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJobTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJobTest.java index dc959d4ca..8c6faf415 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJobTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJobTest.java @@ -30,7 +30,6 @@ import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; import uk.ac.ebi.eva.pipeline.Application; -import uk.ac.ebi.eva.pipeline.configuration.jobs.GenotypedVcfJob; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; @@ -44,14 +43,14 @@ import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertFailed; /** - * Test for {@link GenotypedVcfJob} + * Test for {@link GenotypedVcfJobConfiguration} *

* TODO: FILE_WRONG_NO_ALT should be renamed because the alt allele is not missing but is the same as the reference */ @RunWith(SpringRunner.class) @ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE, Application.VARIANT_ANNOTATION_MONGO_PROFILE}) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) -@ContextConfiguration(classes = {GenotypedVcfJob.class, BatchTestConfiguration.class}) +@ContextConfiguration(classes = {GenotypedVcfJobConfiguration.class, BatchTestConfiguration.class}) public class GenotypedVcfJobTest { @Rule diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJobWorkflowTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJobWorkflowTest.java index 5d3be2bad..7519db550 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJobWorkflowTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/GenotypedVcfJobWorkflowTest.java @@ -32,7 +32,6 @@ import org.springframework.test.context.junit4.SpringRunner; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.configuration.jobs.GenotypedVcfJob; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -58,14 +57,14 @@ import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** - * Workflow test for {@link GenotypedVcfJob} + * Workflow test for {@link GenotypedVcfJobConfiguration} *

* TODO The test should fail when we will integrate the JobParameter validation since there are empty parameters for VEP */ @RunWith(SpringRunner.class) @ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE, Application.VARIANT_ANNOTATION_MONGO_PROFILE}) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) -@ContextConfiguration(classes = {GenotypedVcfJob.class, BatchTestConfiguration.class}) +@ContextConfiguration(classes = {GenotypedVcfJobConfiguration.class, BatchTestConfiguration.class}) public class GenotypedVcfJobWorkflowTest { private static final String MOCK_VEP = "/mockvep.pl"; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/PopulationStatisticsJobTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/PopulationStatisticsJobTest.java index 51ab4f1a5..50e38ac07 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/PopulationStatisticsJobTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/PopulationStatisticsJobTest.java @@ -33,7 +33,6 @@ import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; -import uk.ac.ebi.eva.pipeline.configuration.jobs.PopulationStatisticsJob; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; @@ -50,11 +49,11 @@ import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** - * Test for {@link PopulationStatisticsJob} + * Test for {@link PopulationStatisticsJobConfiguration} */ @RunWith(SpringRunner.class) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) -@ContextConfiguration(classes = {PopulationStatisticsJob.class, BatchTestConfiguration.class}) +@ContextConfiguration(classes = {PopulationStatisticsJobConfiguration.class, BatchTestConfiguration.class}) public class PopulationStatisticsJobTest { private static final String SMALL_VCF_FILE = "/input-files/vcf/genotyped.vcf.gz"; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/AnnotationMetadataStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/AnnotationMetadataStepTest.java index 9819ec8c3..f78e92f78 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/AnnotationMetadataStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/AnnotationMetadataStepTest.java @@ -36,7 +36,7 @@ import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; -import uk.ac.ebi.eva.pipeline.configuration.jobs.AnnotationJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.AnnotationJobConfiguration; import uk.ac.ebi.eva.pipeline.parameters.MongoConnection; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -55,7 +55,7 @@ @RunWith(SpringRunner.class) @ActiveProfiles(Application.VARIANT_ANNOTATION_MONGO_PROFILE) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) -@ContextConfiguration(classes = {AnnotationJob.class, BatchTestConfiguration.class}) +@ContextConfiguration(classes = {AnnotationJobConfiguration.class, BatchTestConfiguration.class}) public class AnnotationMetadataStepTest { @Rule public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/CalculateStatisticsStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/CalculateStatisticsStepTest.java index 01d44236e..1121ec466 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/CalculateStatisticsStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/CalculateStatisticsStepTest.java @@ -29,7 +29,7 @@ import org.springframework.test.context.junit4.SpringRunner; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.configuration.jobs.PopulationStatisticsJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.PopulationStatisticsJobConfiguration; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; @@ -52,7 +52,7 @@ */ @RunWith(SpringRunner.class) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) -@ContextConfiguration(classes = {PopulationStatisticsJob.class, BatchTestConfiguration.class}) +@ContextConfiguration(classes = {PopulationStatisticsJobConfiguration.class, BatchTestConfiguration.class}) public class CalculateStatisticsStepTest { private static final String SMALL_VCF_FILE = "/input-files/vcf/genotyped.vcf.gz"; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/CreateDatabaseIndexesStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/CreateDatabaseIndexesStepTest.java index f7ec7e28c..f066337ce 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/CreateDatabaseIndexesStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/CreateDatabaseIndexesStepTest.java @@ -32,7 +32,7 @@ import org.springframework.test.context.junit4.SpringRunner; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.configuration.jobs.DatabaseInitializationJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.DatabaseInitializationJobConfiguration; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; import uk.ac.ebi.eva.test.utils.JobTestUtils; @@ -47,7 +47,7 @@ */ @RunWith(SpringRunner.class) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) -@ContextConfiguration(classes = {DatabaseInitializationJob.class, BatchTestConfiguration.class}) +@ContextConfiguration(classes = {DatabaseInitializationJobConfiguration.class, BatchTestConfiguration.class}) public class CreateDatabaseIndexesStepTest { private static final String COLLECTION_FEATURES_NAME = "features"; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropFilesByStudyStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropFilesByStudyStepTest.java index 0f37e083b..53acb9109 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropFilesByStudyStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropFilesByStudyStepTest.java @@ -33,7 +33,7 @@ import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.configuration.jobs.DropStudyJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.DropStudyJobConfiguration; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; import uk.ac.ebi.eva.test.utils.JobTestUtils; @@ -52,7 +52,7 @@ @RunWith(SpringRunner.class) @ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE, Application.VARIANT_ANNOTATION_MONGO_PROFILE}) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) -@ContextConfiguration(classes = {DropStudyJob.class, BatchTestConfiguration.class}) +@ContextConfiguration(classes = {DropStudyJobConfiguration.class, BatchTestConfiguration.class}) public class DropFilesByStudyStepTest { private static final String COLLECTION_FILES_NAME = "files"; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropSingleStudyVariantsStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropSingleStudyVariantsStepTest.java index 9bd16f6c4..ca6c25a0f 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropSingleStudyVariantsStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropSingleStudyVariantsStepTest.java @@ -33,7 +33,7 @@ import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.configuration.jobs.DropStudyJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.DropStudyJobConfiguration; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.data.VariantData; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; @@ -52,7 +52,7 @@ @RunWith(SpringRunner.class) @ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE, Application.VARIANT_ANNOTATION_MONGO_PROFILE}) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) -@ContextConfiguration(classes = {DropStudyJob.class, BatchTestConfiguration.class}) +@ContextConfiguration(classes = {DropStudyJobConfiguration.class, BatchTestConfiguration.class}) public class DropSingleStudyVariantsStepTest { private static final String COLLECTION_VARIANTS_NAME = "variants"; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/GenerateVepAnnotationStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/GenerateVepAnnotationStepTest.java index a53a6b8c1..8ad0d68f9 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/GenerateVepAnnotationStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/GenerateVepAnnotationStepTest.java @@ -31,7 +31,7 @@ import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.configuration.jobs.AnnotationJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.AnnotationJobConfiguration; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; @@ -61,7 +61,7 @@ @RunWith(SpringRunner.class) @ActiveProfiles(Application.VARIANT_ANNOTATION_MONGO_PROFILE) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) -@ContextConfiguration(classes = {AnnotationJob.class, BatchTestConfiguration.class}) +@ContextConfiguration(classes = {AnnotationJobConfiguration.class, BatchTestConfiguration.class}) public class GenerateVepAnnotationStepTest { private static final String MONGO_DUMP = "/dump/VariantStatsConfigurationTest_vl"; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadFileStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadFileStepTest.java index 0fea1cd7f..a328df557 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadFileStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadFileStepTest.java @@ -33,7 +33,7 @@ import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.configuration.jobs.GenotypedVcfJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.GenotypedVcfJobConfiguration; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; import uk.ac.ebi.eva.test.utils.JobTestUtils; @@ -50,7 +50,7 @@ @RunWith(SpringRunner.class) @ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE, Application.VARIANT_ANNOTATION_MONGO_PROFILE}) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) -@ContextConfiguration(classes = {GenotypedVcfJob.class, BatchTestConfiguration.class}) +@ContextConfiguration(classes = {GenotypedVcfJobConfiguration.class, BatchTestConfiguration.class}) public class LoadFileStepTest { private static final int EXPECTED_FILES = 1; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadStatisticsStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadStatisticsStepTest.java index a2760523b..cdec11e67 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadStatisticsStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadStatisticsStepTest.java @@ -40,7 +40,7 @@ import org.springframework.test.context.junit4.SpringRunner; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.configuration.jobs.PopulationStatisticsJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.PopulationStatisticsJobConfiguration; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.configuration.MongoOperationConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -65,7 +65,7 @@ */ @RunWith(SpringRunner.class) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) -@ContextConfiguration(classes = {PopulationStatisticsJob.class, BatchTestConfiguration.class, +@ContextConfiguration(classes = {PopulationStatisticsJobConfiguration.class, BatchTestConfiguration.class, MongoOperationConfiguration.class}) public class LoadStatisticsStepTest { private static final String SMALL_VCF_FILE = "/input-files/vcf/genotyped.vcf.gz"; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVariantsStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVariantsStepTest.java index e921150f1..c7229ed22 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVariantsStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVariantsStepTest.java @@ -36,7 +36,7 @@ import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.configuration.jobs.GenotypedVcfJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.GenotypedVcfJobConfiguration; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; import uk.ac.ebi.eva.test.utils.GenotypedVcfJobTestUtils; @@ -53,7 +53,7 @@ @RunWith(SpringRunner.class) @ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE, Application.VARIANT_ANNOTATION_MONGO_PROFILE}) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) -@ContextConfiguration(classes = {GenotypedVcfJob.class, BatchTestConfiguration.class}) +@ContextConfiguration(classes = {GenotypedVcfJobConfiguration.class, BatchTestConfiguration.class}) public class LoadVariantsStepTest { private static final int EXPECTED_VARIANTS = 300; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVepAnnotationStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVepAnnotationStepTest.java index 6cfe32f04..288e8f569 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVepAnnotationStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVepAnnotationStepTest.java @@ -34,7 +34,7 @@ import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.configuration.jobs.AnnotationJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.AnnotationJobConfiguration; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.data.VepOutputContent; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -55,14 +55,14 @@ import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResourceUrl; /** - * Test for {@link LoadVepAnnotationStepConfiguration}. In the context it is loaded {@link AnnotationJob} + * Test for {@link LoadVepAnnotationStepConfiguration}. In the context it is loaded {@link AnnotationJobConfiguration} * because {@link JobLauncherTestUtils} require one {@link org.springframework.batch.core.Job} to be present in order * to run properly. */ @RunWith(SpringRunner.class) @ActiveProfiles(Application.VARIANT_ANNOTATION_MONGO_PROFILE) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) -@ContextConfiguration(classes = {AnnotationJob.class, BatchTestConfiguration.class}) +@ContextConfiguration(classes = {AnnotationJobConfiguration.class, BatchTestConfiguration.class}) public class LoadVepAnnotationStepTest { private static final String MONGO_DUMP = "/dump/VariantStatsConfigurationTest_vl"; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/PullFilesAndStatisticsByStudyStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/PullFilesAndStatisticsByStudyStepTest.java index 18e1892e8..03f88bfd4 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/PullFilesAndStatisticsByStudyStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/PullFilesAndStatisticsByStudyStepTest.java @@ -33,7 +33,7 @@ import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.configuration.jobs.DropStudyJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.DropStudyJobConfiguration; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.data.VariantData; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; @@ -53,7 +53,7 @@ @RunWith(SpringRunner.class) @ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE, Application.VARIANT_ANNOTATION_MONGO_PROFILE}) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) -@ContextConfiguration(classes = {DropStudyJob.class, BatchTestConfiguration.class}) +@ContextConfiguration(classes = {DropStudyJobConfiguration.class, BatchTestConfiguration.class}) public class PullFilesAndStatisticsByStudyStepTest { private static final String COLLECTION_VARIANTS_NAME = "variants"; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AggregatedVcfJobParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AggregatedVcfJobParametersValidatorTest.java index 7b6ce8f30..f4784cbd6 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AggregatedVcfJobParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AggregatedVcfJobParametersValidatorTest.java @@ -22,7 +22,7 @@ import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.JobParametersInvalidException; -import uk.ac.ebi.eva.pipeline.configuration.jobs.AggregatedVcfJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.AggregatedVcfJobConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -30,7 +30,7 @@ import java.util.TreeMap; /** - * Tests that the arguments necessary to run a {@link AggregatedVcfJob} are + * Tests that the arguments necessary to run a {@link AggregatedVcfJobConfiguration} are * correctly validated */ public class AggregatedVcfJobParametersValidatorTest { diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/DropStudyJobParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/DropStudyJobParametersValidatorTest.java index 04414ad54..14fa6c8b5 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/DropStudyJobParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/DropStudyJobParametersValidatorTest.java @@ -22,7 +22,7 @@ import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.JobParametersInvalidException; -import uk.ac.ebi.eva.pipeline.configuration.jobs.DropStudyJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.DropStudyJobConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -30,7 +30,7 @@ import java.util.TreeMap; /** - * Tests that the arguments necessary to run a {@link DropStudyJob} are + * Tests that the arguments necessary to run a {@link DropStudyJobConfiguration} are * correctly validated */ public class DropStudyJobParametersValidatorTest { diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidatorTest.java index fceaccb4c..ca2995e55 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidatorTest.java @@ -22,7 +22,7 @@ import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.JobParametersInvalidException; -import uk.ac.ebi.eva.pipeline.configuration.jobs.GenotypedVcfJob; +import uk.ac.ebi.eva.pipeline.configuration.jobs.GenotypedVcfJobConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -30,7 +30,7 @@ import java.util.TreeMap; /** - * Tests that the arguments necessary to run a {@link GenotypedVcfJob} are + * Tests that the arguments necessary to run a {@link GenotypedVcfJobConfiguration} are * correctly validated */ public class GenotypedVcfJobParametersValidatorTest { diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunnerTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunnerTest.java index 22eda9d66..5a25b0713 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunnerTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunnerTest.java @@ -56,7 +56,7 @@ import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** - * This suit of tests checks the behaviour of the EvaPipelineJobLauncherCommandLineRunner and launches a full execution of the + * This suit of tests checks the behaviour of the {@link EvaPipelineJobLauncherCommandLineRunner} and launches a full execution of the * genotype vcf test. */ @RunWith(SpringRunner.class) From 2b17c40caad25ed6ecc48560f3faefa9cd4db7ad Mon Sep 17 00:00:00 2001 From: jorizci Date: Wed, 24 May 2017 12:12:23 +0100 Subject: [PATCH 33/48] Added missing package protected empty constructors to let spring generate new instances. --- .../models/data/VariantSourceEntity.java | 18 ++++++++++++++++++ .../models/mongo/entity/VariantDocument.java | 4 ++++ .../projections/SimplifiedAnnotation.java | 4 ++++ .../entity/projections/SimplifiedVariant.java | 4 ++++ .../mongo/entity/subdocuments/HgvsMongo.java | 8 ++++++-- .../mongo/entity/subdocuments/Score.java | 4 ++++ .../subdocuments/VariantSourceEntryMongo.java | 1 - .../entity/subdocuments/VariantStatsMongo.java | 4 ++++ .../models/mongo/entity/subdocuments/Xref.java | 4 ++++ 9 files changed, 48 insertions(+), 3 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantSourceEntity.java b/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantSourceEntity.java index 0632143a6..2b2f407e2 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantSourceEntity.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/data/VariantSourceEntity.java @@ -34,27 +34,45 @@ public class VariantSourceEntity { public final static String FILEID_FIELD = "fid"; + public final static String FILENAME_FIELD = "fname"; + public final static String STUDYID_FIELD = "sid"; + public final static String STUDYNAME_FIELD = "sname"; + public final static String STUDYTYPE_FIELD = "stype"; + public final static String AGGREGATION_FIELD = "aggregation"; + public final static String DATE_FIELD = "date"; + public final static String SAMPLES_FIELD = "samp"; public final static String STATISTICS_FIELD = "st"; + public final static String STATISTICS_NUMSAMPLES_FIELD = "nSamp"; + public final static String STATISTICS_NUMVARIANTS_FIELD = "nVar"; + public final static String STATISTICS_NUMSNPS_FIELD = "nSnp"; + public final static String STATISTICS_NUMINDELS_FIELD = "nIndel"; + public final static String STATISTICS_NUMSTRUCTURAL_FIELD = "nSv"; + public final static String STATISTICS_NUMPASSFILTERS_FIELD = "nPass"; + public final static String STATISTICS_NUMTRANSITIONS_FIELD = "nTi"; + public final static String STATISTICS_NUMTRANSVERSIONS_FIELD = "nTv"; + public final static String STATISTICS_MEANQUALITY_FIELD = "meanQ"; public final static String METADATA_FIELD = "meta"; + public final static String METADATA_FILEFORMAT_FIELD = "fileformat"; + public final static String METADATA_HEADER_FIELD = "header"; diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/VariantDocument.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/VariantDocument.java index e67033b0f..f25a533ba 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/VariantDocument.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/VariantDocument.java @@ -109,6 +109,10 @@ public class VariantDocument { @Field(ANNOTATION_FIELD) private Set annotations; + VariantDocument(){ + //Empty constructor for spring + } + public VariantDocument(Variant.VariantType variantType, String chromosome, int start, int end, int length, String reference, String alternate, Map> hgvs, Set ids, Set variantSources) { diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedAnnotation.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedAnnotation.java index 5825acba5..63b3a1ef0 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedAnnotation.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedAnnotation.java @@ -48,6 +48,10 @@ public class SimplifiedAnnotation { @Field(value = VEP_CACHE_VERSION_FIELD) private String vepCacheVersion; + SimplifiedAnnotation(){ + //Empty constructor for spring + } + public SimplifiedAnnotation(Annotation annotation) { this.id = annotation.getId(); this.chromosome = annotation.getChromosome(); diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedVariant.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedVariant.java index f1175c996..5c597e34b 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedVariant.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedVariant.java @@ -73,6 +73,10 @@ public class SimplifiedVariant { @Field(HGVS_FIELD) private Set hgvs; + SimplifiedVariant(){ + //Empty constructor for spring + } + public SimplifiedVariant(Variant.VariantType variantType, String chromosome, int start, int end, int length, String reference, String alternate, Map> hgvs) { this.id = buildVariantId(chromosome, start, reference, alternate); diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/HgvsMongo.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/HgvsMongo.java index 34b5ec163..2841fed5c 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/HgvsMongo.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/HgvsMongo.java @@ -27,10 +27,14 @@ public class HgvsMongo { private static final String NAME_FIELD = "name"; @Field(TYPE_FIELD) - private final String type; + private String type; @Field(NAME_FIELD) - private final String name; + private String name; + + HgvsMongo(){ + //Empty constructor for spring + } public HgvsMongo(String type, String name) { this.type = type; diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/Score.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/Score.java index 7853131fd..c8b021743 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/Score.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/Score.java @@ -33,6 +33,10 @@ public class Score { @Field(value = SCORE_DESCRIPTION_FIELD) private String description; + Score(){ + //Empty constructor for spring + } + public Score(Double score, String description) { this.score = score; this.description = description; diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantSourceEntryMongo.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantSourceEntryMongo.java index be8cebdf5..b7e3c9f78 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantSourceEntryMongo.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantSourceEntryMongo.java @@ -65,7 +65,6 @@ public class VariantSourceEntryMongo { @Field(SAMPLES_FIELD) private BasicDBObject samp; - VariantSourceEntryMongo() { // Spring empty constructor } diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantStatsMongo.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantStatsMongo.java index 30099ce36..4bea94431 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantStatsMongo.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantStatsMongo.java @@ -77,6 +77,10 @@ public class VariantStatsMongo { @Field(NUMGT_FIELD) private Map numGt; + VariantStatsMongo(){ + //Empty constructor for spring + } + public VariantStatsMongo(String studyId, String fileId, String cohortId, VariantStats stats) { this.studyId = studyId; this.fileId = fileId; diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/Xref.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/Xref.java index c9949915d..00b245dd0 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/Xref.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/Xref.java @@ -32,6 +32,10 @@ public class Xref { @Field(value = XREF_SOURCE_FIELD) private String src; + Xref(){ + //Empty constructor for spring + } + public Xref(String id, String src) { this.id = id; this.src = src; From f3f445e795e34d85049729b50a518bd633675bad Mon Sep 17 00:00:00 2001 From: jose miguel mut Date: Tue, 13 Jun 2017 10:01:01 +0100 Subject: [PATCH 34/48] rename dropSingleStudyVariantsStep to dropVariantsByStudyStep --- .../eva/pipeline/configuration/BeanNames.java | 2 +- .../jobs/DropStudyJobConfiguration.java | 12 +++++----- ...DropVariantsByStudyStepConfiguration.java} | 24 +++++++++---------- .../steps/LoadGenesStepConfiguration.java | 2 +- ...t.java => DropVariantsByStudyTasklet.java} | 4 ++-- .../job/DropStudyJobParametersValidator.java | 4 ++-- ...riantsByStudyStepParametersValidator.java} | 8 +++---- .../configuration/jobs/DropStudyJobTest.java | 6 ++--- ....java => DropVariantsByStudyStepTest.java} | 14 ++++------- ...tsByStudyStepParametersValidatorTest.java} | 10 ++++---- .../eva/test/utils/DropStudyJobTestUtils.java | 4 ++-- 11 files changed, 42 insertions(+), 48 deletions(-) rename src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/{DropSingleStudyVariantsStepConfiguration.java => DropVariantsByStudyStepConfiguration.java} (68%) rename src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/{DropSingleStudyVariantsTasklet.java => DropVariantsByStudyTasklet.java} (96%) rename src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{DropSingleStudyVariantsStepParametersValidator.java => DropVariantsByStudyStepParametersValidator.java} (89%) rename src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/{DropSingleStudyVariantsStepTest.java => DropVariantsByStudyStepTest.java} (90%) rename src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/{DropSingleStudyVariantsStepParametersValidatorTest.java => DropVariantsByStudyStepParametersValidatorTest.java} (89%) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/BeanNames.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/BeanNames.java index 716dd5458..e1becc9ad 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/BeanNames.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/BeanNames.java @@ -50,7 +50,7 @@ public class BeanNames { public static final String LOAD_STATISTICS_STEP = "load-statistics-step"; public static final String LOAD_VARIANTS_STEP = "load-variants-step"; public static final String LOAD_FILE_STEP = "load-file-step"; - public static final String DROP_SINGLE_STUDY_VARIANTS_STEP = "drop-single-study-variants-step"; + public static final String DROP_VARIANTS_BY_STUDY_STEP = "drop-variants-by-study-step"; public static final String PULL_FILES_AND_STATISTICS_BY_STUDY_STEP = "pull-files-and-statistics-by-study-step"; public static final String DROP_FILES_BY_STUDY_STEP = "drop-files-by-study-step"; public static final String LOAD_ANNOTATION_METADATA_STEP = "annotation-metadata-step"; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJobConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJobConfiguration.java index 6a055c053..867152c3a 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJobConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJobConfiguration.java @@ -32,13 +32,13 @@ import org.springframework.context.annotation.Scope; import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.DropFilesByStudyStepConfiguration; -import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.DropSingleStudyVariantsStepConfiguration; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.DropVariantsByStudyStepConfiguration; import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.PullFilesAndStatisticsByStudyStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.NewJobIncrementer; import uk.ac.ebi.eva.pipeline.parameters.validation.job.DropStudyJobParametersValidator; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.DROP_FILES_BY_STUDY_STEP; -import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.DROP_SINGLE_STUDY_VARIANTS_STEP; +import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.DROP_VARIANTS_BY_STUDY_STEP; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.DROP_STUDY_JOB; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.PULL_FILES_AND_STATISTICS_BY_STUDY_STEP; @@ -49,14 +49,14 @@ */ @Configuration @EnableBatchProcessing -@Import({DropSingleStudyVariantsStepConfiguration.class, PullFilesAndStatisticsByStudyStepConfiguration.class, DropFilesByStudyStepConfiguration.class}) +@Import({DropVariantsByStudyStepConfiguration.class, PullFilesAndStatisticsByStudyStepConfiguration.class, DropFilesByStudyStepConfiguration.class}) public class DropStudyJobConfiguration { private static final Logger logger = LoggerFactory.getLogger(DropStudyJobConfiguration.class); @Autowired - @Qualifier(DROP_SINGLE_STUDY_VARIANTS_STEP) - private Step dropSingleStudyVariantsStep; + @Qualifier(DROP_VARIANTS_BY_STUDY_STEP) + private Step dropVariantsByStudyStep; @Autowired @Qualifier(PULL_FILES_AND_STATISTICS_BY_STUDY_STEP) @@ -77,7 +77,7 @@ public Job dropStudyJob(JobBuilderFactory jobBuilderFactory) { .validator(new DropStudyJobParametersValidator()); SimpleJobBuilder builder = jobBuilder - .start(dropSingleStudyVariantsStep) + .start(dropVariantsByStudyStep) .next(dropVariantsAndStatisticsByStudyStep) .next(dropFileStep); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropSingleStudyVariantsStepConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropVariantsByStudyStepConfiguration.java similarity index 68% rename from src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropSingleStudyVariantsStepConfiguration.java rename to src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropVariantsByStudyStepConfiguration.java index 5f1acd536..0cee3632a 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropSingleStudyVariantsStepConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropVariantsByStudyStepConfiguration.java @@ -25,32 +25,32 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.DropSingleStudyVariantsTasklet; +import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.DropVariantsByStudyTasklet; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; import uk.ac.ebi.eva.utils.TaskletUtils; -import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.DROP_SINGLE_STUDY_VARIANTS_STEP; +import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.DROP_VARIANTS_BY_STUDY_STEP; /** - * Configuration class that inject a step created with the tasklet {@link DropSingleStudyVariantsTasklet} + * Configuration class that inject a step created with the tasklet {@link DropVariantsByStudyTasklet} */ @Configuration @EnableBatchProcessing -public class DropSingleStudyVariantsStepConfiguration { +public class DropVariantsByStudyStepConfiguration { - private static final Logger logger = LoggerFactory.getLogger(DropSingleStudyVariantsStepConfiguration.class); + private static final Logger logger = LoggerFactory.getLogger(DropVariantsByStudyStepConfiguration.class); @Bean @StepScope - public DropSingleStudyVariantsTasklet dropSingleStudyVariantsTasklet() { - return new DropSingleStudyVariantsTasklet(); + public DropVariantsByStudyTasklet dropVariantsByStudyTasklet() { + return new DropVariantsByStudyTasklet(); } - @Bean(DROP_SINGLE_STUDY_VARIANTS_STEP) - public TaskletStep dropSingleStudyVariantsStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions) { - logger.debug("Building '" + DROP_SINGLE_STUDY_VARIANTS_STEP + "'"); - return TaskletUtils.generateStep(stepBuilderFactory, DROP_SINGLE_STUDY_VARIANTS_STEP, - dropSingleStudyVariantsTasklet(), jobOptions.isAllowStartIfComplete()); + @Bean(DROP_VARIANTS_BY_STUDY_STEP) + public TaskletStep dropVariantsByStudyStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions) { + logger.debug("Building '" + DROP_VARIANTS_BY_STUDY_STEP + "'"); + return TaskletUtils.generateStep(stepBuilderFactory, DROP_VARIANTS_BY_STUDY_STEP, + dropVariantsByStudyTasklet(), jobOptions.isAllowStartIfComplete()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadGenesStepConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadGenesStepConfiguration.java index 5dcb34a2f..55fcba474 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadGenesStepConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadGenesStepConfiguration.java @@ -75,7 +75,7 @@ public class LoadGenesStepConfiguration { private ItemWriter writer; @Bean(LOAD_GENES_STEP) - public Step genesLoadStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions, + public Step loadGenesStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions, SimpleCompletionPolicy chunkSizeCompletionPolicy) { logger.debug("Building '" + LOAD_GENES_STEP + "'"); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/DropSingleStudyVariantsTasklet.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/DropVariantsByStudyTasklet.java similarity index 96% rename from src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/DropSingleStudyVariantsTasklet.java rename to src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/DropVariantsByStudyTasklet.java index b9603742f..9be3a66d3 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/DropSingleStudyVariantsTasklet.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/DropVariantsByStudyTasklet.java @@ -42,9 +42,9 @@ *

* Output: those variants are removed */ -public class DropSingleStudyVariantsTasklet implements Tasklet { +public class DropVariantsByStudyTasklet implements Tasklet { - private static final Logger logger = LoggerFactory.getLogger(DropSingleStudyVariantsTasklet.class); + private static final Logger logger = LoggerFactory.getLogger(DropVariantsByStudyTasklet.class); @Autowired private MongoOperations mongoOperations; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/DropStudyJobParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/DropStudyJobParametersValidator.java index c0be47289..9add1b12b 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/DropStudyJobParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/DropStudyJobParametersValidator.java @@ -23,7 +23,7 @@ import uk.ac.ebi.eva.pipeline.configuration.jobs.DropStudyJobConfiguration; import uk.ac.ebi.eva.pipeline.parameters.validation.step.DropFilesByStudyStepParametersValidator; -import uk.ac.ebi.eva.pipeline.parameters.validation.step.DropSingleStudyVariantsStepParametersValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.step.DropVariantsByStudyStepParametersValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.step.PullFilesAndStatisticsByStudyStepParametersValidator; import java.util.ArrayList; @@ -42,7 +42,7 @@ public void validate(JobParameters parameters) throws JobParametersInvalidExcept private CompositeJobParametersValidator compositeJobParametersValidator() { List jobParametersValidators = new ArrayList<>(); - jobParametersValidators.add(new DropSingleStudyVariantsStepParametersValidator()); + jobParametersValidators.add(new DropVariantsByStudyStepParametersValidator()); jobParametersValidators.add(new PullFilesAndStatisticsByStudyStepParametersValidator()); jobParametersValidators.add(new DropFilesByStudyStepParametersValidator()); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropSingleStudyVariantsStepParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropVariantsByStudyStepParametersValidator.java similarity index 89% rename from src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropSingleStudyVariantsStepParametersValidator.java rename to src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropVariantsByStudyStepParametersValidator.java index 84d1f89e7..4e2d8193a 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropSingleStudyVariantsStepParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropVariantsByStudyStepParametersValidator.java @@ -21,7 +21,7 @@ import org.springframework.batch.core.job.CompositeJobParametersValidator; import org.springframework.batch.core.job.DefaultJobParametersValidator; -import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.DropSingleStudyVariantsStepConfiguration; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.DropVariantsByStudyStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.pipeline.parameters.validation.ConfigRestartabilityAllowValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.DbCollectionsVariantsNameValidator; @@ -34,11 +34,11 @@ /** * Validates the job parameters necessary to execute a - * {@link DropSingleStudyVariantsStepConfiguration} + * {@link DropVariantsByStudyStepConfiguration} */ -public class DropSingleStudyVariantsStepParametersValidator extends DefaultJobParametersValidator { +public class DropVariantsByStudyStepParametersValidator extends DefaultJobParametersValidator { - public DropSingleStudyVariantsStepParametersValidator() { + public DropVariantsByStudyStepParametersValidator() { super(new String[]{JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME, JobParametersNames.DB_NAME, JobParametersNames.INPUT_STUDY_ID}, diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJobTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJobTest.java index c6e777fd1..09be3a69a 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJobTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/DropStudyJobTest.java @@ -20,8 +20,6 @@ import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; -import org.springframework.batch.core.BatchStatus; -import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.JobExecution; import org.springframework.batch.core.JobParameters; import org.springframework.batch.test.JobLauncherTestUtils; @@ -41,7 +39,7 @@ import static org.junit.Assert.assertEquals; import static uk.ac.ebi.eva.test.utils.DropStudyJobTestUtils.assertDropFiles; -import static uk.ac.ebi.eva.test.utils.DropStudyJobTestUtils.assertDropSingleStudy; +import static uk.ac.ebi.eva.test.utils.DropStudyJobTestUtils.assertDropVariantsByStudy; import static uk.ac.ebi.eva.test.utils.DropStudyJobTestUtils.assertPullStudy; import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; @@ -103,7 +101,7 @@ public void fullDropStudyJob() throws Exception { JobExecution jobExecution = jobLauncherTestUtils.launchJob(jobParameters); assertCompleted(jobExecution); - assertDropSingleStudy(variantsCollection, STUDY_ID_TO_DROP, EXPECTED_VARIANTS_AFTER_DROP_STUDY); + assertDropVariantsByStudy(variantsCollection, STUDY_ID_TO_DROP, EXPECTED_VARIANTS_AFTER_DROP_STUDY); assertPullStudy(variantsCollection, STUDY_ID_TO_DROP, EXPECTED_FILE_COUNT, EXPECTED_STATS_COUNT); assertDropFiles(filesCollection, STUDY_ID_TO_DROP, EXPECTED_FILES_AFTER_DROP_STUDY); } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropSingleStudyVariantsStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropVariantsByStudyStepTest.java similarity index 90% rename from src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropSingleStudyVariantsStepTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropVariantsByStudyStepTest.java index ca6c25a0f..40a19ea11 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropSingleStudyVariantsStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/DropVariantsByStudyStepTest.java @@ -20,8 +20,6 @@ import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; -import org.springframework.batch.core.BatchStatus; -import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.JobExecution; import org.springframework.batch.core.JobParameters; import org.springframework.batch.test.JobLauncherTestUtils; @@ -37,23 +35,21 @@ import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.data.VariantData; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; -import uk.ac.ebi.eva.test.utils.JobTestUtils; import uk.ac.ebi.eva.utils.EvaJobParameterBuilder; import java.util.Arrays; -import static org.junit.Assert.assertEquals; -import static uk.ac.ebi.eva.test.utils.DropStudyJobTestUtils.assertDropSingleStudy; +import static uk.ac.ebi.eva.test.utils.DropStudyJobTestUtils.assertDropVariantsByStudy; import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; /** - * Test for {@link DropSingleStudyVariantsStepConfiguration} + * Test for {@link DropVariantsByStudyStepConfiguration} */ @RunWith(SpringRunner.class) @ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE, Application.VARIANT_ANNOTATION_MONGO_PROFILE}) @TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) @ContextConfiguration(classes = {DropStudyJobConfiguration.class, BatchTestConfiguration.class}) -public class DropSingleStudyVariantsStepTest { +public class DropVariantsByStudyStepTest { private static final String COLLECTION_VARIANTS_NAME = "variants"; @@ -104,13 +100,13 @@ private void checkDrop(String databaseName, long expectedVariantsAfterDropStudy) .inputStudyId(STUDY_ID_TO_DROP) .toJobParameters(); - JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.DROP_SINGLE_STUDY_VARIANTS_STEP, + JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.DROP_VARIANTS_BY_STUDY_STEP, jobParameters); assertCompleted(jobExecution); DBCollection variantsCollection = mongoRule.getCollection(databaseName, COLLECTION_VARIANTS_NAME); - assertDropSingleStudy(variantsCollection, STUDY_ID_TO_DROP, expectedVariantsAfterDropStudy); + assertDropVariantsByStudy(variantsCollection, STUDY_ID_TO_DROP, expectedVariantsAfterDropStudy); } } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropSingleStudyVariantsStepParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropVariantsByStudyStepParametersValidatorTest.java similarity index 89% rename from src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropSingleStudyVariantsStepParametersValidatorTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropVariantsByStudyStepParametersValidatorTest.java index 7a3c41c10..4c27a19b1 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropSingleStudyVariantsStepParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/DropVariantsByStudyStepParametersValidatorTest.java @@ -22,7 +22,7 @@ import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.JobParametersInvalidException; -import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.DropSingleStudyVariantsStepConfiguration; +import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.DropVariantsByStudyStepConfiguration; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -31,12 +31,12 @@ import java.util.TreeMap; /** - * Tests that the arguments necessary to run a {@link DropSingleStudyVariantsStepConfiguration} + * Tests that the arguments necessary to run a {@link DropVariantsByStudyStepConfiguration} * are correctly validated */ -public class DropSingleStudyVariantsStepParametersValidatorTest { +public class DropVariantsByStudyStepParametersValidatorTest { - private DropSingleStudyVariantsStepParametersValidator validator; + private DropVariantsByStudyStepParametersValidator validator; @Rule public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); @@ -47,7 +47,7 @@ public class DropSingleStudyVariantsStepParametersValidatorTest { @Before public void setUp() throws IOException { - validator = new DropSingleStudyVariantsStepParametersValidator(); + validator = new DropVariantsByStudyStepParametersValidator(); requiredParameters = new TreeMap<>(); requiredParameters.put(JobParametersNames.DB_NAME, new JobParameter("database")); diff --git a/src/test/java/uk/ac/ebi/eva/test/utils/DropStudyJobTestUtils.java b/src/test/java/uk/ac/ebi/eva/test/utils/DropStudyJobTestUtils.java index 37e16899d..99c29fb27 100644 --- a/src/test/java/uk/ac/ebi/eva/test/utils/DropStudyJobTestUtils.java +++ b/src/test/java/uk/ac/ebi/eva/test/utils/DropStudyJobTestUtils.java @@ -31,8 +31,8 @@ public class DropStudyJobTestUtils { private static final String STATS_STUDY_ID_FIELD = String.format("%s.%s", STATS_FIELD, STUDYID_FIELD); - public static void assertDropSingleStudy(DBCollection variantsCollection, String studyId, - long expectedVariantsAfterDropStudy) { + public static void assertDropVariantsByStudy(DBCollection variantsCollection, String studyId, + long expectedVariantsAfterDropStudy) { assertEquals(expectedVariantsAfterDropStudy, variantsCollection.count()); From e94259d78fc6e933390f55c29e518a44eedb2785 Mon Sep 17 00:00:00 2001 From: jose miguel mut Date: Fri, 26 May 2017 16:25:48 +0100 Subject: [PATCH 35/48] removed some usages of custom converters and DBAdaptor usages in tests --- .../models/mongo/entity/VariantDocument.java | 55 +++++++++++++++++++ .../entity/projections/SimplifiedVariant.java | 39 +++++++++++++ .../subdocuments/VariantStatsMongo.java | 40 ++++++++++++++ .../io/readers/VariantsMongoReader.java | 18 ++++-- .../eva/pipeline/model/VariantWrapper.java | 6 +- .../jobs/steps/LoadStatisticsStepTest.java | 32 ++++------- .../jobs/steps/LoadVariantsStepTest.java | 18 ++---- .../ac/ebi/eva/test/utils/JobTestUtils.java | 37 +++++++++++++ 8 files changed, 200 insertions(+), 45 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/VariantDocument.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/VariantDocument.java index f25a533ba..cce17112f 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/VariantDocument.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/VariantDocument.java @@ -204,4 +204,59 @@ public static Set createHgvsMongo(Map> hgvs) { } + public String getId() { + return id; + } + + public Variant.VariantType getVariantType() { + return variantType; + } + + public String getChromosome() { + return chromosome; + } + + public int getStart() { + return start; + } + + public int getEnd() { + return end; + } + + public int getLength() { + return length; + } + + public String getReference() { + return reference; + } + + public String getAlternate() { + return alternate; + } + + public VariantAt getAt() { + return at; + } + + public Set getHgvs() { + return hgvs; + } + + public Set getIds() { + return ids; + } + + public Set getVariantSources() { + return variantSources; + } + + public Set getVariantStatsMongo() { + return variantStatsMongo; + } + + public Set getAnnotations() { + return annotations; + } } diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedVariant.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedVariant.java index 5c597e34b..9b8583ed4 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedVariant.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/projections/SimplifiedVariant.java @@ -91,4 +91,43 @@ public SimplifiedVariant(Variant.VariantType variantType, String chromosome, int this.hgvs = createHgvsMongo(hgvs); } + public String getId() { + return id; + } + + public Variant.VariantType getVariantType() { + return variantType; + } + + public String getChromosome() { + return chromosome; + } + + public int getStart() { + return start; + } + + public int getEnd() { + return end; + } + + public int getLength() { + return length; + } + + public String getReference() { + return reference; + } + + public String getAlternate() { + return alternate; + } + + public VariantAt getAt() { + return at; + } + + public Set getHgvs() { + return hgvs; + } } diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantStatsMongo.java b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantStatsMongo.java index 4bea94431..e66a3071b 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantStatsMongo.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/mongo/entity/subdocuments/VariantStatsMongo.java @@ -103,4 +103,44 @@ private Map buildGenotypes(Map genotypesCoun } return genotypes; } + + public String getStudyId() { + return studyId; + } + + public String getFileId() { + return fileId; + } + + public String getCohortId() { + return cohortId; + } + + public float getMaf() { + return maf; + } + + public float getMgf() { + return mgf; + } + + public String getMafAllele() { + return mafAllele; + } + + public String getMgfGenotype() { + return mgfGenotype; + } + + public int getMissingAlleles() { + return missingAlleles; + } + + public int getMissingGenotypes() { + return missingGenotypes; + } + + public Map getNumGt() { + return numGt; + } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReader.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReader.java index 1b6d9be13..ba88823f7 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReader.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReader.java @@ -23,8 +23,10 @@ import org.springframework.batch.item.support.AbstractItemCountingItemStreamItemReader; import org.springframework.beans.factory.InitializingBean; import org.springframework.data.mongodb.core.MongoOperations; +import org.springframework.data.mongodb.core.convert.MongoConverter; import org.springframework.util.ClassUtils; import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument; +import uk.ac.ebi.eva.commons.models.mongo.entity.projections.SimplifiedVariant; import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantSourceEntryMongo; import uk.ac.ebi.eva.pipeline.model.VariantWrapper; @@ -41,7 +43,7 @@ public class VariantsMongoReader private MongoDbCursorItemReader delegateReader; - private DBObjectToVariantConverter converter; + private MongoConverter converter; private static final String STUDY_KEY = VariantDocument.FILES_FIELD + "." + VariantSourceEntryMongo.STUDYID_FIELD; @@ -69,7 +71,7 @@ public VariantsMongoReader(MongoOperations template, String collectionsVariantsN String[] fields = {"chr", "start", "end", "ref", "alt"}; delegateReader.setFields(fields); - converter = new DBObjectToVariantConverter(); + converter = template.getConverter(); } @PostConstruct @@ -87,13 +89,21 @@ protected void doOpen() throws Exception { protected VariantWrapper doRead() throws Exception { DBObject dbObject = delegateReader.doRead(); if (dbObject != null) { - Variant variant = converter.convertToDataModelType(dbObject); - return new VariantWrapper(variant); + SimplifiedVariant variant = converter.read(SimplifiedVariant.class, dbObject); + return buildVariantWrapper(variant); } else { return null; } } + private VariantWrapper buildVariantWrapper(SimplifiedVariant variant) { + return new VariantWrapper(variant.getChromosome(), + variant.getStart(), + variant.getEnd(), + variant.getReference(), + variant.getAlternate()); + } + @Override protected void doClose() throws Exception { delegateReader.doClose(); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/model/VariantWrapper.java b/src/main/java/uk/ac/ebi/eva/pipeline/model/VariantWrapper.java index edfcabc12..12aeb3bd0 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/model/VariantWrapper.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/model/VariantWrapper.java @@ -26,11 +26,7 @@ public class VariantWrapper { private String strand = "+"; public VariantWrapper(String chromosome, int start, int end, String reference, String alternate) { - this(new Variant(chromosome, start, end, reference, alternate)); - } - - public VariantWrapper(Variant variant) { - this.variant = variant; + this.variant = new Variant(chromosome, start, end, reference, alternate); transformToEnsemblFormat(); } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadStatisticsStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadStatisticsStepTest.java index cdec11e67..653072abd 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadStatisticsStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadStatisticsStepTest.java @@ -20,21 +20,14 @@ import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; -import org.opencb.biodata.models.variant.Variant; -import org.opencb.biodata.models.variant.stats.VariantStats; import org.opencb.opencga.storage.core.StorageManagerException; -import org.opencb.opencga.storage.core.variant.VariantStorageManager; -import org.opencb.opencga.storage.mongodb.variant.DBObjectToVariantConverter; -import org.opencb.opencga.storage.mongodb.variant.DBObjectToVariantSourceEntryConverter; -import org.opencb.opencga.storage.mongodb.variant.DBObjectToVariantStatsConverter; -import org.springframework.batch.core.BatchStatus; -import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.JobExecution; import org.springframework.batch.core.JobExecutionException; import org.springframework.batch.core.JobParameters; import org.springframework.batch.test.JobLauncherTestUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.rule.OutputCapture; +import org.springframework.data.mongodb.core.MongoOperations; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; @@ -49,7 +42,6 @@ import uk.ac.ebi.eva.utils.EvaJobParameterBuilder; import java.io.IOException; -import java.util.Map; import static org.hamcrest.Matchers.containsString; import static org.junit.Assert.assertEquals; @@ -69,12 +61,17 @@ MongoOperationConfiguration.class}) public class LoadStatisticsStepTest { private static final String SMALL_VCF_FILE = "/input-files/vcf/genotyped.vcf.gz"; + private static final String MONGO_DUMP = "/dump/VariantStatsConfigurationTest_vl"; + private static final String SOURCE_FILE_NAME = "/input-files/statistics/1_1.source.stats.json.gz"; + private static final String VARIANTS_FILE_NAME = "/input-files/statistics/1_1.variants.stats.json.gz"; + private static final String FILE_NOT_FOUND_EXCEPTION = "java.io.FileNotFoundException:"; private static final String COLLECTION_FILES_NAME = "files"; + private static final String COLLECTION_VARIANTS_NAME = "variants"; @Rule @@ -86,6 +83,9 @@ public class LoadStatisticsStepTest { @Autowired private JobLauncherTestUtils jobLauncherTestUtils; + @Autowired + private MongoOperations mongoOperations; + //Capture error output @Rule public OutputCapture capture = new OutputCapture(); @@ -121,19 +121,7 @@ public void statisticsLoaderStepShouldLoadStatsIntoDb() throws StorageManagerExc // The DB docs should have the field "st" DBCursor cursor = mongoRule.getCollection(dbName, COLLECTION_VARIANTS_NAME).find(); - assertEquals(1, getCohortStatsFromFirstVariant(cursor).size()); - } - - private Map getCohortStatsFromFirstVariant(DBCursor cursor) { - DBObjectToVariantConverter variantConverter = getVariantConverter(); - Variant variant = variantConverter.convertToDataModelType(cursor.iterator().next()); - return variant.getSourceEntries().values().iterator().next().getCohortStats(); - } - - private DBObjectToVariantConverter getVariantConverter() { - return new DBObjectToVariantConverter( - new DBObjectToVariantSourceEntryConverter(VariantStorageManager.IncludeSrc.FIRST_8_COLUMNS), - new DBObjectToVariantStatsConverter()); + assertEquals(1, JobTestUtils.getCohortStatsFromFirstVariant(cursor, mongoOperations).size()); } private void copyFilesToOutpurDir(String outputDir) throws IOException { diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVariantsStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVariantsStepTest.java index c7229ed22..f97dbc376 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVariantsStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVariantsStepTest.java @@ -19,12 +19,6 @@ import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; -import org.opencb.datastore.core.QueryOptions; -import org.opencb.opencga.lib.common.Config; -import org.opencb.opencga.storage.core.StorageManagerFactory; -import org.opencb.opencga.storage.core.variant.VariantStorageManager; -import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; -import org.opencb.opencga.storage.core.variant.adaptors.VariantDBIterator; import org.springframework.batch.core.JobExecution; import org.springframework.batch.core.JobParameters; import org.springframework.batch.test.JobLauncherTestUtils; @@ -44,7 +38,6 @@ import static org.junit.Assert.assertEquals; import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; -import static uk.ac.ebi.eva.test.utils.JobTestUtils.count; import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** @@ -60,6 +53,8 @@ public class LoadVariantsStepTest { private static final String SMALL_VCF_FILE = "/input-files/vcf/genotyped.vcf.gz"; + private static final String COLLECTION_VARIANTS_NAME = "variants"; + @Rule public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); @@ -71,7 +66,6 @@ public class LoadVariantsStepTest { @Before public void setUp() throws Exception { input = getResource(SMALL_VCF_FILE).getAbsolutePath(); - Config.setOpenCGAHome(GenotypedVcfJobTestUtils.getDefaultOpencgaHome()); } @Test @@ -80,7 +74,7 @@ public void loaderStepShouldLoadAllVariants() throws Exception { // When the execute method in variantsLoad is executed JobParameters jobParameters = new EvaJobParameterBuilder() - .collectionVariantsName("variants") + .collectionVariantsName(COLLECTION_VARIANTS_NAME) .databaseName(databaseName) .inputStudyId("1") .inputVcf(input) @@ -94,10 +88,6 @@ public void loaderStepShouldLoadAllVariants() throws Exception { assertCompleted(jobExecution); // And the number of documents in the DB should be equals to the number of lines in the VCF file - VariantStorageManager variantStorageManager = StorageManagerFactory.getVariantStorageManager(); - VariantDBAdaptor variantDBAdaptor = variantStorageManager.getDBAdaptor(databaseName, null); - VariantDBIterator iterator = variantDBAdaptor.iterator(new QueryOptions()); - - assertEquals(EXPECTED_VARIANTS, count(iterator)); + assertEquals(EXPECTED_VARIANTS, mongoRule.getCollection(databaseName, COLLECTION_VARIANTS_NAME).count()); } } diff --git a/src/test/java/uk/ac/ebi/eva/test/utils/JobTestUtils.java b/src/test/java/uk/ac/ebi/eva/test/utils/JobTestUtils.java index 70260cdd1..1d838045c 100644 --- a/src/test/java/uk/ac/ebi/eva/test/utils/JobTestUtils.java +++ b/src/test/java/uk/ac/ebi/eva/test/utils/JobTestUtils.java @@ -17,8 +17,11 @@ import com.mongodb.BasicDBList; import com.mongodb.BasicDBObject; +import com.mongodb.DBCursor; import com.mongodb.DBObject; import com.mongodb.util.JSON; +import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.stats.VariantStats; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.batch.core.BatchStatus; @@ -26,6 +29,10 @@ import org.springframework.batch.core.JobExecution; import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.JobParametersBuilder; +import org.springframework.data.mongodb.core.MongoOperations; + +import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantStatsMongo; import java.io.BufferedReader; import java.io.File; @@ -41,6 +48,7 @@ import java.util.TreeSet; import java.util.zip.GZIPInputStream; +import static java.util.stream.Collectors.toList; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -165,4 +173,33 @@ public static void assertFailed(JobExecution jobExecution) { assertEquals(ExitStatus.FAILED.getExitCode(), jobExecution.getExitStatus().getExitCode()); assertEquals(BatchStatus.FAILED, jobExecution.getStatus()); } + + public static VariantStats buildVariantStats(VariantStatsMongo variantStatsMongo) { + return new VariantStats("", + 0, + "", + "", + Variant.VariantType.SNV, + variantStatsMongo.getMaf(), + variantStatsMongo.getMgf(), + variantStatsMongo.getMafAllele(), + variantStatsMongo.getMgfGenotype(), + variantStatsMongo.getMissingAlleles(), + variantStatsMongo.getMissingGenotypes(), + 0, + 0, + 0, + 0, + 0); + } + + public static List getCohortStatsFromFirstVariant(DBCursor cursor, MongoOperations mongoOperations) { + assertTrue(cursor.hasNext()); + + DBObject dbObject = cursor.iterator().next(); + VariantDocument variantDocument = mongoOperations.getConverter().read(VariantDocument.class, dbObject); + return variantDocument.getVariantStatsMongo().stream() + .map(JobTestUtils::buildVariantStats) + .collect(toList()); + } } From 9af00fbd6651764d394c30b6f6baee11dbb84ac7 Mon Sep 17 00:00:00 2001 From: jose miguel mut Date: Fri, 2 Jun 2017 11:26:01 +0100 Subject: [PATCH 36/48] rename VariantWrapper to EnsemblVariant --- .../VepAnnotationFileWriterConfiguration.java | 6 +- ...enerateVepAnnotationStepConfiguration.java | 8 +- .../io/readers/VariantsMongoReader.java | 12 +-- .../io/writers/VepAnnotationFileWriter.java | 26 +++--- .../eva/pipeline/model/EnsemblVariant.java | 91 +++++++++++++++++++ .../eva/pipeline/model/VariantWrapper.java | 64 ------------- .../job/AnnotationJobParametersValidator.java | 2 +- .../ebi/eva/pipeline/io/VepProcessTest.java | 16 ++-- .../io/readers/VariantsMongoReaderTest.java | 26 +++--- .../writers/VepAnnotationFileWriterTest.java | 48 +++++----- ...apperTest.java => EnsemblVariantTest.java} | 20 ++-- 11 files changed, 172 insertions(+), 147 deletions(-) create mode 100644 src/main/java/uk/ac/ebi/eva/pipeline/model/EnsemblVariant.java delete mode 100644 src/main/java/uk/ac/ebi/eva/pipeline/model/VariantWrapper.java rename src/test/java/uk/ac/ebi/eva/pipeline/model/{VariantWrapperTest.java => EnsemblVariantTest.java} (87%) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/VepAnnotationFileWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/VepAnnotationFileWriterConfiguration.java index 00ec38322..e5e3bb0e9 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/VepAnnotationFileWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/writers/VepAnnotationFileWriterConfiguration.java @@ -22,7 +22,7 @@ import org.springframework.context.annotation.Configuration; import uk.ac.ebi.eva.pipeline.io.writers.VepAnnotationFileWriter; -import uk.ac.ebi.eva.pipeline.model.VariantWrapper; +import uk.ac.ebi.eva.pipeline.model.EnsemblVariant; import uk.ac.ebi.eva.pipeline.parameters.AnnotationParameters; import uk.ac.ebi.eva.pipeline.parameters.ChunkSizeParameters; @@ -33,8 +33,8 @@ public class VepAnnotationFileWriterConfiguration { @Bean(VEP_ANNOTATION_WRITER) @StepScope - public ItemWriter vepAnnotationFileWriter(AnnotationParameters annotationParameters, - ChunkSizeParameters chunkSizeParameters) { + public ItemWriter vepAnnotationFileWriter(AnnotationParameters annotationParameters, + ChunkSizeParameters chunkSizeParameters) { return new VepAnnotationFileWriter(annotationParameters, chunkSizeParameters.getChunkSize(), annotationParameters.getTimeout()); } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/GenerateVepAnnotationStepConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/GenerateVepAnnotationStepConfiguration.java index 50023d659..7f48ae5ca 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/GenerateVepAnnotationStepConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/GenerateVepAnnotationStepConfiguration.java @@ -34,7 +34,7 @@ import uk.ac.ebi.eva.pipeline.configuration.io.writers.VepAnnotationFileWriterConfiguration; import uk.ac.ebi.eva.pipeline.io.readers.AnnotationFlatFileReader; import uk.ac.ebi.eva.pipeline.listeners.StepProgressListener; -import uk.ac.ebi.eva.pipeline.model.VariantWrapper; +import uk.ac.ebi.eva.pipeline.model.EnsemblVariant; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.GENERATE_VEP_ANNOTATION_STEP; @@ -59,11 +59,11 @@ public class GenerateVepAnnotationStepConfiguration { @Autowired @Qualifier(VARIANTS_READER) - private ItemStreamReader nonAnnotatedVariantsReader; + private ItemStreamReader nonAnnotatedVariantsReader; @Autowired @Qualifier(VEP_ANNOTATION_WRITER) - private ItemWriter vepAnnotationWriter; + private ItemWriter vepAnnotationWriter; @Bean(GENERATE_VEP_ANNOTATION_STEP) public Step generateVepAnnotationStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions, @@ -71,7 +71,7 @@ public Step generateVepAnnotationStep(StepBuilderFactory stepBuilderFactory, Job logger.debug("Building '" + GENERATE_VEP_ANNOTATION_STEP + "'"); return stepBuilderFactory.get(GENERATE_VEP_ANNOTATION_STEP) - .chunk(chunkSizeCompletionPolicy) + .chunk(chunkSizeCompletionPolicy) .reader(nonAnnotatedVariantsReader) .writer(vepAnnotationWriter) .allowStartIfComplete(jobOptions.isAllowStartIfComplete()) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReader.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReader.java index ba88823f7..0e50e12f4 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReader.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReader.java @@ -18,8 +18,6 @@ import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObjectBuilder; import com.mongodb.DBObject; -import org.opencb.biodata.models.variant.Variant; -import org.opencb.opencga.storage.mongodb.variant.DBObjectToVariantConverter; import org.springframework.batch.item.support.AbstractItemCountingItemStreamItemReader; import org.springframework.beans.factory.InitializingBean; import org.springframework.data.mongodb.core.MongoOperations; @@ -28,7 +26,7 @@ import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument; import uk.ac.ebi.eva.commons.models.mongo.entity.projections.SimplifiedVariant; import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantSourceEntryMongo; -import uk.ac.ebi.eva.pipeline.model.VariantWrapper; +import uk.ac.ebi.eva.pipeline.model.EnsemblVariant; import javax.annotation.PostConstruct; @@ -39,7 +37,7 @@ * pagination and it is slow with large collections */ public class VariantsMongoReader - extends AbstractItemCountingItemStreamItemReader implements InitializingBean { + extends AbstractItemCountingItemStreamItemReader implements InitializingBean { private MongoDbCursorItemReader delegateReader; @@ -86,7 +84,7 @@ protected void doOpen() throws Exception { } @Override - protected VariantWrapper doRead() throws Exception { + protected EnsemblVariant doRead() throws Exception { DBObject dbObject = delegateReader.doRead(); if (dbObject != null) { SimplifiedVariant variant = converter.read(SimplifiedVariant.class, dbObject); @@ -96,8 +94,8 @@ protected VariantWrapper doRead() throws Exception { } } - private VariantWrapper buildVariantWrapper(SimplifiedVariant variant) { - return new VariantWrapper(variant.getChromosome(), + private EnsemblVariant buildVariantWrapper(SimplifiedVariant variant) { + return new EnsemblVariant(variant.getChromosome(), variant.getStart(), variant.getEnd(), variant.getReference(), diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationFileWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationFileWriter.java index cf74d0b81..29052dcb7 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationFileWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationFileWriter.java @@ -20,7 +20,7 @@ import org.springframework.batch.item.ItemWriter; import uk.ac.ebi.eva.pipeline.io.VepProcess; -import uk.ac.ebi.eva.pipeline.model.VariantWrapper; +import uk.ac.ebi.eva.pipeline.model.EnsemblVariant; import uk.ac.ebi.eva.pipeline.parameters.AnnotationParameters; import java.util.List; @@ -29,7 +29,7 @@ * ItemStreamWriter that takes VariantWrappers and serialize them into a {@link VepProcess}, which will be responsible * for annotating the variants and writing them to a file. */ -public class VepAnnotationFileWriter implements ItemWriter { +public class VepAnnotationFileWriter implements ItemWriter { private static final Logger logger = LoggerFactory.getLogger(VepAnnotationFileWriter.class); @@ -46,19 +46,19 @@ public VepAnnotationFileWriter(AnnotationParameters annotationParameters, Intege } @Override - public void write(List variantWrappers) throws Exception { + public void write(List variantWrappers) throws Exception { VepProcess vepProcess = new VepProcess(annotationParameters, chunkSize, timeoutInSeconds); vepProcess.open(); - for (VariantWrapper variantWrapper : variantWrappers) { - String line = getVariantInVepInputFormat(variantWrapper); + for (EnsemblVariant ensemblVariant : variantWrappers) { + String line = getVariantInVepInputFormat(ensemblVariant); vepProcess.write(line.getBytes()); vepProcess.write(System.lineSeparator().getBytes()); } if (variantWrappers.size() > 0) { - VariantWrapper first = variantWrappers.get(0); - VariantWrapper last = variantWrappers.get(variantWrappers.size() - 1); + EnsemblVariant first = variantWrappers.get(0); + EnsemblVariant last = variantWrappers.get(variantWrappers.size() - 1); logger.trace("VEP has received {} variants from {}:{} to {}:{}", variantWrappers.size(), first.getChr(), first.getStart(), last.getChr(), last.getStart()); } @@ -67,13 +67,13 @@ public void write(List variantWrappers) throws Excepti vepProcess.close(); } - private String getVariantInVepInputFormat(VariantWrapper variantWrapper) { + private String getVariantInVepInputFormat(EnsemblVariant ensemblVariant) { return String.join("\t", - variantWrapper.getChr(), - Integer.toString(variantWrapper.getStart()), - Integer.toString(variantWrapper.getEnd()), - variantWrapper.getRefAlt(), - variantWrapper.getStrand()); + ensemblVariant.getChr(), + Integer.toString(ensemblVariant.getStart()), + Integer.toString(ensemblVariant.getEnd()), + ensemblVariant.getRefAlt(), + ensemblVariant.getStrand()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/model/EnsemblVariant.java b/src/main/java/uk/ac/ebi/eva/pipeline/model/EnsemblVariant.java new file mode 100644 index 000000000..282f08aca --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/model/EnsemblVariant.java @@ -0,0 +1,91 @@ +/* + * Copyright 2016 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.model; + +import org.springframework.util.Assert; + + +/** + * Container for the ensembl coordinates of a variant including strand. By default strand in VCF is always '+' + * + * The definition of the coordinates is: + * + * Start: position after the last unmodified base before the variant. + * + * End: position before the first unmodified base after the variant. That is, end = start + reference.length -1. + * + * Reference and alternate: the alleles, but if anyone is empty, substitute by "-". + * + * @see VEP default format + */ +public class EnsemblVariant { + + private String chromosome; + + private int start; + + private int end; + + private String reference; + + private String alternate; + + private String strand = "+"; + + public EnsemblVariant(String chromosome, int start, int end, String reference, String alternate) { + Assert.hasText(chromosome); + Assert.notNull(reference); + Assert.notNull(alternate); + this.chromosome = chromosome; + this.start = start; + this.end = end; + this.reference = reference; + this.alternate = alternate; + transformToEnsemblFormat(); + } + + public String getChr() { + return chromosome; + } + + public int getStart() { + return start; + } + + public int getEnd() { + return end; + } + + public String getRefAlt() { + return String.format("%s/%s", reference, alternate); + } + + public String getStrand() { + return strand; + } + + private void transformToEnsemblFormat() { + end = start + reference.length() - 1; + + if (reference.isEmpty()) { + reference = "-"; + } + + if (alternate.isEmpty()) { + alternate = "-"; + } + } +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/model/VariantWrapper.java b/src/main/java/uk/ac/ebi/eva/pipeline/model/VariantWrapper.java deleted file mode 100644 index 12aeb3bd0..000000000 --- a/src/main/java/uk/ac/ebi/eva/pipeline/model/VariantWrapper.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright 2016 EMBL - European Bioinformatics Institute - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package uk.ac.ebi.eva.pipeline.model; - -import org.opencb.biodata.models.variant.Variant; - -/** - * Container for {@link Variant} including strand. By default strand in VCF is always '+' - */ -public class VariantWrapper { - - private Variant variant; - private String strand = "+"; - - public VariantWrapper(String chromosome, int start, int end, String reference, String alternate) { - this.variant = new Variant(chromosome, start, end, reference, alternate); - transformToEnsemblFormat(); - } - - public String getChr() { - return variant.getChromosome(); - } - - public int getStart() { - return variant.getStart(); - } - - public int getEnd() { - return variant.getEnd(); - } - - public String getRefAlt() { - return String.format("%s/%s", variant.getReference(), variant.getAlternate()); - } - - public String getStrand() { - return strand; - } - - private void transformToEnsemblFormat() { - variant.setEnd(variant.getStart() + variant.getReference().length() - 1); - - if (variant.getReference().equals("")) { - variant.setReference("-"); - } - - if (variant.getAlternate().equals("")) { - variant.setAlternate("-"); - } - } -} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AnnotationJobParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AnnotationJobParametersValidator.java index 47d727155..b454115c1 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AnnotationJobParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AnnotationJobParametersValidator.java @@ -32,7 +32,7 @@ * Validates the job parameters necessary to execute an * {@link uk.ac.ebi.eva.pipeline.configuration.jobs.AnnotationJobConfiguration} */ -public class AnnotationJobParametersValidator extends DefaultJobParametersValidator { +public class AnnotationJobParametersValidator extends DefaultJobParametersValidator { @Override public void validate(JobParameters parameters) throws JobParametersInvalidException { diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/VepProcessTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/VepProcessTest.java index 308566ff7..d68cf7d12 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/VepProcessTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/VepProcessTest.java @@ -21,7 +21,7 @@ import org.junit.Test; import org.junit.rules.ExpectedException; -import uk.ac.ebi.eva.pipeline.model.VariantWrapper; +import uk.ac.ebi.eva.pipeline.model.EnsemblVariant; import uk.ac.ebi.eva.pipeline.parameters.AnnotationParameters; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -43,7 +43,7 @@ public class VepProcessTest { private static final long VEP_TIMEOUT = 1; - private final VariantWrapper VARIANT_WRAPPER = new VariantWrapper("1", 100, 105, "A", "T"); + private final EnsemblVariant VARIANT_WRAPPER = new EnsemblVariant("1", 100, 105, "A", "T"); @Before public void setUp() throws Exception { @@ -69,13 +69,13 @@ public void testWorkflowWriteWithoutOpening() throws Exception { vepAnnotationFileWriter.write(getVariantInVepInputFormat(VARIANT_WRAPPER).getBytes()); } - private String getVariantInVepInputFormat(VariantWrapper variantWrapper) { + private String getVariantInVepInputFormat(EnsemblVariant ensemblVariant) { return String.join("\t", - variantWrapper.getChr(), - Integer.toString(variantWrapper.getStart()), - Integer.toString(variantWrapper.getEnd()), - variantWrapper.getRefAlt(), - variantWrapper.getStrand()); + ensemblVariant.getChr(), + Integer.toString(ensemblVariant.getStart()), + Integer.toString(ensemblVariant.getEnd()), + ensemblVariant.getRefAlt(), + ensemblVariant.getStrand()); } @Test diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReaderTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReaderTest.java index d1acf5507..ab44c7ac7 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReaderTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReaderTest.java @@ -33,7 +33,7 @@ import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; -import uk.ac.ebi.eva.pipeline.model.VariantWrapper; +import uk.ac.ebi.eva.pipeline.model.EnsemblVariant; import uk.ac.ebi.eva.pipeline.parameters.MongoConnection; import uk.ac.ebi.eva.test.data.VariantData; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; @@ -111,23 +111,23 @@ private void checkNonAnnotatedVariantsRead(int expectedNonAnnotatedVariants, Str mongoItemReader.open(executionContext); int itemCount = 0; - VariantWrapper variantWrapper; - while ((variantWrapper = mongoItemReader.read()) != null) { + EnsemblVariant ensemblVariant; + while ((ensemblVariant = mongoItemReader.read()) != null) { itemCount++; - assertFalse(variantWrapper.getChr().isEmpty()); - assertNotEquals(0, variantWrapper.getStart()); + assertFalse(ensemblVariant.getChr().isEmpty()); + assertNotEquals(0, ensemblVariant.getStart()); - assertDoesNotHaveVariantAnnotation(variantWrapper); + assertDoesNotHaveVariantAnnotation(ensemblVariant); } assertEquals(expectedNonAnnotatedVariants, itemCount); mongoItemReader.close(); } - private void assertDoesNotHaveVariantAnnotation(VariantWrapper variantWrapper) + private void assertDoesNotHaveVariantAnnotation(EnsemblVariant ensemblVariant) throws NoSuchFieldException, IllegalAccessException { - Field privateVariantField = VariantWrapper.class.getDeclaredField("variant"); + Field privateVariantField = EnsemblVariant.class.getDeclaredField("variant"); privateVariantField.setAccessible(true); - VariantAnnotation annotation = ((Variant) privateVariantField.get(variantWrapper)).getAnnotation(); + VariantAnnotation annotation = ((Variant) privateVariantField.get(ensemblVariant)).getAnnotation(); assertNull(annotation.getConsequenceTypes()); } @@ -162,11 +162,11 @@ private void checkAllVariantsRead(int expectedVariants, String study) throws Exc mongoItemReader.open(executionContext); int itemCount = 0; - VariantWrapper variantWrapper; - while ((variantWrapper = mongoItemReader.read()) != null) { + EnsemblVariant ensemblVariant; + while ((ensemblVariant = mongoItemReader.read()) != null) { itemCount++; - assertFalse(variantWrapper.getChr().isEmpty()); - assertNotEquals(0, variantWrapper.getStart()); + assertFalse(ensemblVariant.getChr().isEmpty()); + assertNotEquals(0, ensemblVariant.getStart()); } assertEquals(expectedVariants, itemCount); mongoItemReader.close(); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationFileWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationFileWriterTest.java index e416df9e3..345ced824 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationFileWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationFileWriterTest.java @@ -22,7 +22,7 @@ import org.junit.rules.ExpectedException; import org.springframework.batch.item.ItemStreamException; -import uk.ac.ebi.eva.pipeline.model.VariantWrapper; +import uk.ac.ebi.eva.pipeline.model.EnsemblVariant; import uk.ac.ebi.eva.pipeline.parameters.AnnotationParameters; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; @@ -54,7 +54,7 @@ public class VepAnnotationFileWriterTest { private static final int HEADER_LINES = 3; - private final VariantWrapper VARIANT_WRAPPER = new VariantWrapper("1", 100, 105, "A", "T"); + private final EnsemblVariant VARIANT_WRAPPER = new EnsemblVariant("1", 100, 105, "A", "T"); private AnnotationParameters annotationParameters; @@ -83,71 +83,71 @@ public void setUp() throws Exception { @Test public void testMockVep() throws Exception { - List variantWrappers = Collections.singletonList(VARIANT_WRAPPER); - int chunkSize = variantWrappers.size(); + List ensemblVariants = Collections.singletonList(VARIANT_WRAPPER); + int chunkSize = ensemblVariants.size(); VepAnnotationFileWriter vepAnnotationFileWriter = new VepAnnotationFileWriter(annotationParameters, chunkSize, TIMEOUT_IN_SECONDS); - vepAnnotationFileWriter.write(variantWrappers); + vepAnnotationFileWriter.write(ensemblVariants); File vepOutputFile = new File(annotationParameters.getVepOutput()); assertTrue(vepOutputFile.exists()); - assertEquals(variantWrappers.size() + EXTRA_ANNOTATIONS, - getLines(new GZIPInputStream(new FileInputStream(vepOutputFile)))); + assertEquals(ensemblVariants.size() + EXTRA_ANNOTATIONS, + getLines(new GZIPInputStream(new FileInputStream(vepOutputFile)))); } @Test public void testMockVepSeveralChunks() throws Exception { - List variantWrappers = new ArrayList<>(); + List ensemblVariants = new ArrayList<>(); for (int i = 0; i < 20; i++) { - variantWrappers.add(VARIANT_WRAPPER); + ensemblVariants.add(VARIANT_WRAPPER); } int chunkSize = 5; VepAnnotationFileWriter vepAnnotationFileWriter = new VepAnnotationFileWriter(annotationParameters, chunkSize, TIMEOUT_IN_SECONDS); - vepAnnotationFileWriter.write(variantWrappers); + vepAnnotationFileWriter.write(ensemblVariants); File vepOutputFile = new File(annotationParameters.getVepOutput()); assertTrue(vepOutputFile.exists()); - assertEquals(variantWrappers.size() + EXTRA_ANNOTATIONS, - getLines(new GZIPInputStream(new FileInputStream(vepOutputFile)))); + assertEquals(ensemblVariants.size() + EXTRA_ANNOTATIONS, + getLines(new GZIPInputStream(new FileInputStream(vepOutputFile)))); } @Test public void testVepWriterWritesLastSmallerChunk() throws Exception { - List variantWrappers = Collections.singletonList(VARIANT_WRAPPER); - int chunkSizeGreaterThanActualVariants = variantWrappers.size() * 10; + List ensemblVariants = Collections.singletonList(VARIANT_WRAPPER); + int chunkSizeGreaterThanActualVariants = ensemblVariants.size() * 10; VepAnnotationFileWriter vepAnnotationFileWriter = new VepAnnotationFileWriter(annotationParameters, chunkSizeGreaterThanActualVariants, TIMEOUT_IN_SECONDS); - vepAnnotationFileWriter.write(variantWrappers); + vepAnnotationFileWriter.write(ensemblVariants); File vepOutputFile = new File(annotationParameters.getVepOutput()); assertTrue(vepOutputFile.exists()); - assertEquals(variantWrappers.size() + EXTRA_ANNOTATIONS, - getLines(new GZIPInputStream(new FileInputStream(vepOutputFile)))); + assertEquals(ensemblVariants.size() + EXTRA_ANNOTATIONS, + getLines(new GZIPInputStream(new FileInputStream(vepOutputFile)))); } @Test public void testHeaderIsWrittenOnlyOnce() throws Exception { - List variantWrappers = Collections.singletonList(VARIANT_WRAPPER); - int chunkSize = variantWrappers.size(); + List ensemblVariants = Collections.singletonList(VARIANT_WRAPPER); + int chunkSize = ensemblVariants.size(); VepAnnotationFileWriter vepAnnotationFileWriter = new VepAnnotationFileWriter(annotationParameters, chunkSize, TIMEOUT_IN_SECONDS); long chunks = 3; for (int i = 0; i < chunks; i++) { - vepAnnotationFileWriter.write(variantWrappers); + vepAnnotationFileWriter.write(ensemblVariants); } File vepOutputFile = new File(annotationParameters.getVepOutput()); assertTrue(vepOutputFile.exists()); - assertEquals((variantWrappers.size() + EXTRA_ANNOTATIONS)*chunks, + assertEquals((ensemblVariants.size() + EXTRA_ANNOTATIONS)*chunks, getLines(new GZIPInputStream(new FileInputStream(vepOutputFile)))); assertEquals(HEADER_LINES, getCommentLines(new GZIPInputStream(new FileInputStream(vepOutputFile)))); @@ -176,8 +176,8 @@ public static long getCommentLines(InputStream in) throws IOException { @Test public void testVepTimeouts() throws Exception { - List variantWrappers = Collections.singletonList(VARIANT_WRAPPER); - int chunkSizeGreaterThanActualVariants = variantWrappers.size() * 10; + List ensemblVariants = Collections.singletonList(VARIANT_WRAPPER); + int chunkSizeGreaterThanActualVariants = ensemblVariants.size() * 10; annotationParameters.setVepPath(getResource("/mockvep_writeToFile_delayed.pl").getAbsolutePath()); long vepTimeouts = 1; @@ -185,7 +185,7 @@ public void testVepTimeouts() throws Exception { chunkSizeGreaterThanActualVariants, vepTimeouts); exception.expect(ItemStreamException.class); - vepAnnotationFileWriter.write(variantWrappers); + vepAnnotationFileWriter.write(ensemblVariants); } } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/model/VariantWrapperTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/model/EnsemblVariantTest.java similarity index 87% rename from src/test/java/uk/ac/ebi/eva/pipeline/model/VariantWrapperTest.java rename to src/test/java/uk/ac/ebi/eva/pipeline/model/EnsemblVariantTest.java index 58c648040..7f4c2a269 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/model/VariantWrapperTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/model/EnsemblVariantTest.java @@ -29,7 +29,7 @@ * VEP input format uses a different end than we. These tests check that we comply with the examples in the VEP * documentation at: www.ensembl.org/info/docs/tools/vep/vep_formats.html ("Default" and "VCF" sections) */ -public class VariantWrapperTest { +public class EnsemblVariantTest { private static final String FILE_ID = "fid"; @@ -39,13 +39,13 @@ public class VariantWrapperTest { @Test public void transformInsertionsToEnsembleCoordinates() throws Exception { - VariantWrapper insertion; - insertion = new VariantWrapper("1", 12601, 12601, "", "C"); + EnsemblVariant insertion; + insertion = new EnsemblVariant("1", 12601, 12601, "", "C"); assertEquals(12601, insertion.getStart()); assertEquals(12600, insertion.getEnd()); assertEquals("-/C", insertion.getRefAlt()); - insertion = new VariantWrapper("20", 4, 5, "", "A"); + insertion = new EnsemblVariant("20", 4, 5, "", "A"); assertEquals(4, insertion.getStart()); assertEquals(3, insertion.getEnd()); assertEquals("-/A", insertion.getRefAlt()); @@ -53,13 +53,13 @@ public void transformInsertionsToEnsembleCoordinates() throws Exception { @Test public void transformDeletionToEnsembleCoordinates() throws Exception { - VariantWrapper deletion; - deletion = new VariantWrapper("1", 12600, 12602, "CGT", ""); + EnsemblVariant deletion; + deletion = new EnsemblVariant("1", 12600, 12602, "CGT", ""); assertEquals(12600, deletion.getStart()); assertEquals(12602, deletion.getEnd()); assertEquals("CGT/-", deletion.getRefAlt()); - deletion = new VariantWrapper("20", 3, 3, "C", ""); + deletion = new EnsemblVariant("20", 3, 3, "C", ""); assertEquals(3, deletion.getStart()); assertEquals(3, deletion.getEnd()); assertEquals("C/-", deletion.getRefAlt()); @@ -67,7 +67,7 @@ public void transformDeletionToEnsembleCoordinates() throws Exception { @Test public void transformSnvToEnsembleCoordinates() throws Exception { - VariantWrapper insertion = new VariantWrapper("20", 3, 3, "C", "G"); + EnsemblVariant insertion = new EnsemblVariant("20", 3, 3, "C", "G"); assertEquals(3, insertion.getStart()); assertEquals(3, insertion.getEnd()); assertEquals("C/G", insertion.getRefAlt()); @@ -87,7 +87,7 @@ public void transformInsertionFromVcfToEnsemblCoordinates() throws Exception { assertEquals("", reference); assertEquals("A", alternate); - VariantWrapper insertion = new VariantWrapper(result.get(0).getChromosome(), start, result.get(0).getEnd(), + EnsemblVariant insertion = new EnsemblVariant(result.get(0).getChromosome(), start, result.get(0).getEnd(), reference, alternate); assertEquals(4, insertion.getStart()); assertEquals(3, insertion.getEnd()); @@ -108,7 +108,7 @@ public void transformDeletionFromVcfToEnsemblCoordinates() throws Exception { assertEquals("C", reference); assertEquals("", alternate); - VariantWrapper insertion = new VariantWrapper(result.get(0).getChromosome(), start, result.get(0).getEnd(), + EnsemblVariant insertion = new EnsemblVariant(result.get(0).getChromosome(), start, result.get(0).getEnd(), reference, alternate); assertEquals(3, insertion.getStart()); assertEquals(3, insertion.getEnd()); From c5105299993bb669f7a72ecd953bd581e911ebd3 Mon Sep 17 00:00:00 2001 From: jose miguel mut Date: Fri, 2 Jun 2017 11:32:57 +0100 Subject: [PATCH 37/48] fix tests --- .../pipeline/io/readers/VariantsMongoReaderTest.java | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReaderTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReaderTest.java index ab44c7ac7..3e3195942 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReaderTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReaderTest.java @@ -116,21 +116,11 @@ private void checkNonAnnotatedVariantsRead(int expectedNonAnnotatedVariants, Str itemCount++; assertFalse(ensemblVariant.getChr().isEmpty()); assertNotEquals(0, ensemblVariant.getStart()); - - assertDoesNotHaveVariantAnnotation(ensemblVariant); } assertEquals(expectedNonAnnotatedVariants, itemCount); mongoItemReader.close(); } - private void assertDoesNotHaveVariantAnnotation(EnsemblVariant ensemblVariant) - throws NoSuchFieldException, IllegalAccessException { - Field privateVariantField = EnsemblVariant.class.getDeclaredField("variant"); - privateVariantField.setAccessible(true); - VariantAnnotation annotation = ((Variant) privateVariantField.get(ensemblVariant)).getAnnotation(); - assertNull(annotation.getConsequenceTypes()); - } - @Test public void shouldReadVariantsInAStudy() throws Exception { checkAllVariantsRead(EXPECTED_VARIANTS_IN_STUDY, STUDY_ID); From 6bd39b66c6bb806be6aa5ceb184115ed73aafab7 Mon Sep 17 00:00:00 2001 From: jose miguel mut Date: Mon, 5 Jun 2017 10:00:00 +0100 Subject: [PATCH 38/48] update VepProcess to be also compatible with ensemble-vep previously, it was only compatible with the VEP version from ensembl-tools --- src/main/java/uk/ac/ebi/eva/pipeline/io/VepProcess.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/VepProcess.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/VepProcess.java index 005f76786..1f1531a9d 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/VepProcess.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/VepProcess.java @@ -110,7 +110,8 @@ public void open() throws ItemStreamException { "-o", "STDOUT", "--force_overwrite", "--offline", - "--everything" + "--everything", + "--format", "ensembl" ); logger.trace("Starting VEP annotation with parameters = {}", Arrays.toString(processBuilder.command().toArray())); From c20e83b0b49afea108010ec34860172cbf26d86f Mon Sep 17 00:00:00 2001 From: jose miguel mut Date: Tue, 6 Jun 2017 15:41:42 +0100 Subject: [PATCH 39/48] using vepv and cachev for AnnotationMetadata --- .../ebi/eva/commons/models/metadata/AnnotationMetadata.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/java/uk/ac/ebi/eva/commons/models/metadata/AnnotationMetadata.java b/src/main/java/uk/ac/ebi/eva/commons/models/metadata/AnnotationMetadata.java index 52e95cf9c..d15c2c311 100644 --- a/src/main/java/uk/ac/ebi/eva/commons/models/metadata/AnnotationMetadata.java +++ b/src/main/java/uk/ac/ebi/eva/commons/models/metadata/AnnotationMetadata.java @@ -17,15 +17,20 @@ package uk.ac.ebi.eva.commons.models.metadata; import org.springframework.data.mongodb.core.mapping.Document; +import org.springframework.data.mongodb.core.mapping.Field; import org.springframework.util.Assert; +import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; + @Document public class AnnotationMetadata { private String id; + @Field(Annotation.VEP_VERSION_FIELD) private String vepVersion; + @Field(Annotation.VEP_CACHE_VERSION_FIELD) private String cacheVersion; AnnotationMetadata() { From caf76558c7cc8610e356b26685134a29846c3c6f Mon Sep 17 00:00:00 2001 From: jose miguel mut Date: Thu, 29 Jun 2017 16:47:21 +0100 Subject: [PATCH 40/48] annot writer: test that annotations are added to existing ones --- .../io/writers/AnnotationMongoWriterTest.java | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java index 206664342..8f019f5ad 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java @@ -62,6 +62,7 @@ import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.Score.SCORE_DESCRIPTION_FIELD; import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.Score.SCORE_SCORE_FIELD; import static uk.ac.ebi.eva.test.data.VepOutputContent.vepOutputContent; +import static uk.ac.ebi.eva.test.utils.JobTestUtils.count; /** * {@link AnnotationMongoWriter} @@ -245,4 +246,53 @@ public void indexesShouldBeCreatedInBackground() throws UnknownHostException { indexInfo.stream().filter(index -> !("_id_".equals(index.get("name").toString()))).forEach(index -> assertEquals("true", index.get(MongoDBHelper.BACKGROUND_INDEX).toString())); } + @Test + public void shouldAddToSetIfThereAreOtherAnnotationsInSameVersion() throws Exception { + String databaseName = mongoRule.getRandomTemporaryDatabaseName(); + + List annotations = new ArrayList<>(); + for (String annotLine : vepOutputContent.split("\n")) { + annotations.add(AnnotationLineMapper.mapLine(annotLine, 0)); + } + + // load the annotation + MongoOperations operations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, + mongoMappingContext); + annotationWriter = new AnnotationMongoWriter(operations, COLLECTION_ANNOTATIONS_NAME); + annotationWriter.write(annotations.subList(1, 2)); + + // check that 3 consequence types were written in the annotation document + DBCollection annotCollection = mongoRule.getCollection(databaseName, COLLECTION_ANNOTATIONS_NAME); + assertEquals(1, count(annotCollection.find())); + assertEquals(1, countConsequenceType(annotCollection.find())); + assertEquals(3, countXref(annotCollection.find())); + + // check that consequence types were added to that document + annotationWriter.write(annotations.subList(2, 3)); + assertEquals(1, count(annotCollection.find())); + assertEquals(2, countConsequenceType(annotCollection.find())); + assertEquals(4, countXref(annotCollection.find())); + } + + private int countConsequenceType(DBCursor cursor) { + int consequenceTypeCount = 0; + while (cursor.hasNext()) { + DBObject annotation = cursor.next(); + BasicDBList consequenceTypes = (BasicDBList) annotation.get(CONSEQUENCE_TYPE_FIELD); + assertNotNull(consequenceTypes); + consequenceTypeCount += consequenceTypes.size(); + } + return consequenceTypeCount; + } + + private int countXref(DBCursor cursor) { + int xrefCount = 0; + while (cursor.hasNext()) { + DBObject annotation = cursor.next(); + BasicDBList xrefs = (BasicDBList) annotation.get(XREFS_FIELD); + assertNotNull(xrefs); + xrefCount += xrefs.size(); + } + return xrefCount; + } } From 2f858bdebe2f6104a15e08ada91c500d0ce8a611 Mon Sep 17 00:00:00 2001 From: jose miguel mut Date: Thu, 29 Jun 2017 22:11:09 +0100 Subject: [PATCH 41/48] add test for annotation writer in variant collection --- .../AnnotationInVariantMongoWriterTest.java | 112 +++++++++++++----- .../io/writers/AnnotationMongoWriterTest.java | 23 ++-- 2 files changed, 91 insertions(+), 44 deletions(-) diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java index e9f59a8c4..bcaad9ef0 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java @@ -30,6 +30,7 @@ import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; + import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument; import uk.ac.ebi.eva.pipeline.Application; @@ -40,9 +41,13 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; +import java.util.Set; +import java.util.TreeSet; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.POLYPHEN_FIELD; @@ -131,46 +136,91 @@ public void shouldWriteAllFieldsIntoMongoDb() throws Exception { BasicDBObject annotationField = (BasicDBObject) ((BasicDBList) (variant).get( VariantDocument.ANNOTATION_FIELD)).get(0); - BasicDBList sifts = (BasicDBList) annotationField.get(SIFT_FIELD); - assertNotNull(sifts); - assertTrue(sifts.containsAll(Arrays.asList(0.1, 0.2))); - - BasicDBList so = (BasicDBList) annotationField.get(SO_ACCESSION_FIELD); - assertNotNull(so); - assertTrue(so.contains(1631)); - - BasicDBList polyphen = (BasicDBList) annotationField.get(POLYPHEN_FIELD); - assertNotNull(polyphen); - assertTrue(polyphen.containsAll(Arrays.asList(0.1, 0.2))); - - BasicDBList geneNames = (BasicDBList) annotationField.get(XREFS_FIELD); - assertNotNull(geneNames); - assertTrue(geneNames.containsAll( - Arrays.asList("ENST00000382410", "DEFB125", "ENST00000608838", "ENSG00000178591"))); + checkAnnotationFields(annotationField, + Arrays.asList(0.1, 0.2), + Arrays.asList(0.1, 0.2), + new TreeSet<>(Arrays.asList(1631)), + new TreeSet<>(Arrays.asList("DEFB125", "ENSG00000178591", "ENST00000382410", + "ENST00000608838"))); } if (id.equals("20_63399_G_A")) { BasicDBObject annotationField = (BasicDBObject) ((BasicDBList) (variant).get( VariantDocument.ANNOTATION_FIELD)).get(0); - BasicDBList sifts = (BasicDBList) annotationField.get(SIFT_FIELD); - assertNotNull(sifts); - assertEquals(2, sifts.size()); - - BasicDBList so = (BasicDBList) annotationField.get(SO_ACCESSION_FIELD); - assertNotNull(so); - assertEquals(1, so.size()); - - BasicDBList polyphen = (BasicDBList) annotationField.get(POLYPHEN_FIELD); - assertNotNull(polyphen); - assertEquals(2, polyphen.size()); - - BasicDBList geneNames = (BasicDBList) annotationField.get(XREFS_FIELD); - assertNotNull(geneNames); - assertEquals(4, geneNames.size()); + checkAnnotationFields(annotationField, + Arrays.asList(0.07, 0.07), + Arrays.asList(0.859, 0.859), + new TreeSet<>(Arrays.asList(1631)), + new TreeSet<>(Arrays.asList("DEFB125", "ENSG00000178591", "ENST00000382410", + "ENST00000608838"))); } } cursor.close(); } + private void checkAnnotationFields(BasicDBObject annotationField, + List expectedSifts, List expectedPolyphens, + Set expectedSos, Set expectedXrefs) { + BasicDBList sifts = (BasicDBList) annotationField.get(SIFT_FIELD); + assertEquals(expectedSifts, sifts); + + BasicDBList polyphen = (BasicDBList) annotationField.get(POLYPHEN_FIELD); + assertEquals(expectedPolyphens, polyphen); + + BasicDBList so = (BasicDBList) annotationField.get(SO_ACCESSION_FIELD); + assertEquals(expectedSos, new TreeSet<>(so)); + + BasicDBList geneNames = (BasicDBList) annotationField.get(XREFS_FIELD); + assertEquals(expectedXrefs, new TreeSet<>(geneNames)); + } + + @Test + public void shouldAddToSetIfThereAreOtherAnnotationsInSameVersion() throws Exception { + String databaseName = mongoRule.restoreDumpInTemporaryDatabase(getResourceUrl(MONGO_DUMP)); + + String[] vepOutputLines = vepOutputContentWithExtraFields.split("\n"); + + List annotations = new ArrayList<>(); + annotations.add(AnnotationLineMapper.mapLine(vepOutputLines[1], 0)); + annotations.add(AnnotationLineMapper.mapLine(vepOutputLines[2], 0)); + + // load the first annotation + MongoOperations operations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, + mongoMappingContext); + annotationInVariantMongoWriter = new AnnotationInVariantMongoWriter(operations, COLLECTION_VARIANTS_NAME, + VEP_VERSION, VEP_CACHE_VERSION); + + BasicDBObject annotationField = writeAndGetAnnotation(databaseName, annotations.get(0)); + + checkAnnotationFields(annotationField, + Arrays.asList(0.1, 0.1), + Arrays.asList(0.1, 0.1), + new TreeSet<>(Arrays.asList(1631)), + new TreeSet<>(Arrays.asList("DEFB125", "ENSG00000178591", "ENST00000382410"))); + + // load the second annotation and check the information is updated (not overwritten) + BasicDBObject annotationFieldAfter = writeAndGetAnnotation(databaseName, annotations.get(1)); + + checkAnnotationFields(annotationFieldAfter, + Arrays.asList(0.1, 0.2), + Arrays.asList(0.1, 0.2), + new TreeSet<>(Arrays.asList(1631)), + new TreeSet<>(Arrays.asList("DEFB125", "ENSG00000178591", "ENST00000382410", + "ENST00000608838"))); + } + + private BasicDBObject writeAndGetAnnotation(String databaseName, Annotation annotation) throws Exception { + annotationInVariantMongoWriter.write(Collections.singletonList(annotation)); + + BasicDBObject query = new BasicDBObject(Annotation.START_FIELD, annotation.getStart()); + DBCursor cursor = mongoRule.getCollection(databaseName, COLLECTION_VARIANTS_NAME).find(query); + + assertTrue(cursor.hasNext()); + DBObject variant = cursor.next(); + assertFalse(cursor.hasNext()); + + return (BasicDBObject) ((BasicDBList) variant.get(VariantDocument.ANNOTATION_FIELD)).get(0); + } + } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java index 8f019f5ad..e4b610b5f 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java @@ -275,24 +275,21 @@ public void shouldAddToSetIfThereAreOtherAnnotationsInSameVersion() throws Excep } private int countConsequenceType(DBCursor cursor) { - int consequenceTypeCount = 0; + return getArrayCount(cursor, CONSEQUENCE_TYPE_FIELD); + } + + private int getArrayCount(DBCursor cursor, String field) { + int count = 0; while (cursor.hasNext()) { DBObject annotation = cursor.next(); - BasicDBList consequenceTypes = (BasicDBList) annotation.get(CONSEQUENCE_TYPE_FIELD); - assertNotNull(consequenceTypes); - consequenceTypeCount += consequenceTypes.size(); + BasicDBList elements = (BasicDBList) annotation.get(field); + assertNotNull(elements); + count += elements.size(); } - return consequenceTypeCount; + return count; } private int countXref(DBCursor cursor) { - int xrefCount = 0; - while (cursor.hasNext()) { - DBObject annotation = cursor.next(); - BasicDBList xrefs = (BasicDBList) annotation.get(XREFS_FIELD); - assertNotNull(xrefs); - xrefCount += xrefs.size(); - } - return xrefCount; + return getArrayCount(cursor, XREFS_FIELD); } } From 4619b7921cb69d5a5e02d35139c6e1d2758ae1af Mon Sep 17 00:00:00 2001 From: jose miguel mut Date: Tue, 4 Jul 2017 14:09:06 +0100 Subject: [PATCH 42/48] add example properties for job annotate-variants-job --- README.md | 2 +- examples/annotate-variants-job.properties | 61 +++++++++++++++++++++++ examples/drop-study-job.properties | 1 + examples/load-aggregated-vcf.properties | 3 +- examples/load-genotyped-vcf.properties | 3 +- 5 files changed, 67 insertions(+), 3 deletions(-) create mode 100755 examples/annotate-variants-job.properties diff --git a/README.md b/README.md index 6271d2a14..4b612cd93 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,7 @@ Database name and collection names can be specified with these parameters. To se * `db.collections.variants.name`: Main collection. Has variant coordinates, sample information, and some statistics and annotation. * `db.collections.files.name`: File (and study) metadata information. * `db.collections.stats.name`: Main collection for statistics. The variants collection might contain a subset of this. -* `db.collections.annotation.metadata.name`: Main collection for annotation. The variants collection might contain a subset of this. +* `db.collections.annotation-metadata.name`: Main collection for annotation. The variants collection might contain a subset of this. #### Configuration of third party applications diff --git a/examples/annotate-variants-job.properties b/examples/annotate-variants-job.properties new file mode 100755 index 000000000..0d7919018 --- /dev/null +++ b/examples/annotate-variants-job.properties @@ -0,0 +1,61 @@ +# +# Copyright 2015-2017 EMBL - European Bioinformatics Institute +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# JOB +spring.batch.job.names=annotate-variants-job + +# SUBMISSION FIELDS +input.vcf=/path/to/input.vcf +input.vcf.id=1234 +input.vcf.aggregation=NONE + +input.study.name=Study name +input.study.id=2345 +input.study.type=COLLECTION + +input.pedigree= +input.fasta=/path/to/homo_sapiens/sequence.fa + +output.dir=/path/to/output_files +output.dir.annotation=/path/to/annotation-files/ +output.dir.statistics=/path/to/statistics-files/ + + +# VEP +app.vep.path=/path/to/variant_effect_predictor.pl +app.vep.cache.path=/path/to/vep/cache_folder +app.vep.cache.version=82 +app.vep.version=82 +app.vep.cache.species=homo_sapiens +app.vep.num-forks=4 +app.vep.timeout=600 + +# STEPS MANAGEMENT +config.chunk.size=1000 + +## Skip steps +statistics.skip=false +annotation.skip=false +annotation.overwrite=false + +# Database configuration +spring.data.mongodb.database= +# TODO The following 2 properties will be used exclusive after removing readers and writers dependency +# on OpenCGA. At the moment they need to be specified in both. +db.collections.files.name=files +db.collections.variants.name=variants +db.collections.annotation-metadata.name=annotationMetadata +db.collections.annotations.name=annotations diff --git a/examples/drop-study-job.properties b/examples/drop-study-job.properties index 5045a7b9c..aee29acd4 100644 --- a/examples/drop-study-job.properties +++ b/examples/drop-study-job.properties @@ -9,4 +9,5 @@ spring.data.mongodb.database=your_db db.collections.variants.name=variants db.collections.files.name=files +db.collections.annotations.name=annotations diff --git a/examples/load-aggregated-vcf.properties b/examples/load-aggregated-vcf.properties index 14bae65de..e0301b663 100755 --- a/examples/load-aggregated-vcf.properties +++ b/examples/load-aggregated-vcf.properties @@ -38,4 +38,5 @@ spring.data.mongodb.database= # on OpenCGA. At the moment they need to be specified in both. db.collections.files.name=files db.collections.variants.name=variants -db.collections.annotation.metadata.name=annotationMetadata +db.collections.annotation-metadata.name=annotationMetadata +db.collections.annotations.name=annotations diff --git a/examples/load-genotyped-vcf.properties b/examples/load-genotyped-vcf.properties index ccf317d27..bd3c4ea88 100755 --- a/examples/load-genotyped-vcf.properties +++ b/examples/load-genotyped-vcf.properties @@ -40,4 +40,5 @@ spring.data.mongodb.database= # on OpenCGA. At the moment they need to be specified in both. db.collections.files.name=files db.collections.variants.name=variants -db.collections.annotation.metadata.name=annotationMetadata +db.collections.annotation-metadata.name=annotationMetadata +db.collections.annotations.name=annotations From 5cfb90bb1b22c816e8340ba5670b6c5092650276 Mon Sep 17 00:00:00 2001 From: jose miguel mut Date: Tue, 11 Jul 2017 16:03:54 +0100 Subject: [PATCH 43/48] minor style and docs fixes --- examples/annotate-variants-job.properties | 16 ---------------- .../ebi/eva/pipeline/model/EnsemblVariant.java | 2 +- .../job/AnnotationJobParametersValidator.java | 2 +- .../AnnotationInVariantMongoWriterTest.java | 2 +- .../io/writers/AnnotationMongoWriterTest.java | 4 ++-- 5 files changed, 5 insertions(+), 21 deletions(-) diff --git a/examples/annotate-variants-job.properties b/examples/annotate-variants-job.properties index 0d7919018..d800224dd 100755 --- a/examples/annotate-variants-job.properties +++ b/examples/annotate-variants-job.properties @@ -1,19 +1,3 @@ -# -# Copyright 2015-2017 EMBL - European Bioinformatics Institute -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - # JOB spring.batch.job.names=annotate-variants-job diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/model/EnsemblVariant.java b/src/main/java/uk/ac/ebi/eva/pipeline/model/EnsemblVariant.java index 282f08aca..e1f2f1f02 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/model/EnsemblVariant.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/model/EnsemblVariant.java @@ -1,5 +1,5 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2016-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AnnotationJobParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AnnotationJobParametersValidator.java index b454115c1..47d727155 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AnnotationJobParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/AnnotationJobParametersValidator.java @@ -32,7 +32,7 @@ * Validates the job parameters necessary to execute an * {@link uk.ac.ebi.eva.pipeline.configuration.jobs.AnnotationJobConfiguration} */ -public class AnnotationJobParametersValidator extends DefaultJobParametersValidator { +public class AnnotationJobParametersValidator extends DefaultJobParametersValidator { @Override public void validate(JobParameters parameters) throws JobParametersInvalidException { diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java index bcaad9ef0..953c11c48 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java @@ -176,7 +176,7 @@ private void checkAnnotationFields(BasicDBObject annotationField, } @Test - public void shouldAddToSetIfThereAreOtherAnnotationsInSameVersion() throws Exception { + public void shouldUpdateFieldsOfExistingAnnotationVersion() throws Exception { String databaseName = mongoRule.restoreDumpInTemporaryDatabase(getResourceUrl(MONGO_DUMP)); String[] vepOutputLines = vepOutputContentWithExtraFields.split("\n"); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java index e4b610b5f..868393049 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java @@ -247,7 +247,7 @@ public void indexesShouldBeCreatedInBackground() throws UnknownHostException { } @Test - public void shouldAddToSetIfThereAreOtherAnnotationsInSameVersion() throws Exception { + public void shouldUpdateFieldsOfExistingAnnotationVersion() throws Exception { String databaseName = mongoRule.getRandomTemporaryDatabaseName(); List annotations = new ArrayList<>(); @@ -261,7 +261,7 @@ public void shouldAddToSetIfThereAreOtherAnnotationsInSameVersion() throws Excep annotationWriter = new AnnotationMongoWriter(operations, COLLECTION_ANNOTATIONS_NAME); annotationWriter.write(annotations.subList(1, 2)); - // check that 3 consequence types were written in the annotation document + // check that consequence type was written in the annotation document DBCollection annotCollection = mongoRule.getCollection(databaseName, COLLECTION_ANNOTATIONS_NAME); assertEquals(1, count(annotCollection.find())); assertEquals(1, countConsequenceType(annotCollection.find())); From ca40e12e140c1bf90da4dde5ad05056fc2102330 Mon Sep 17 00:00:00 2001 From: jose miguel mut Date: Wed, 12 Jul 2017 12:09:05 +0100 Subject: [PATCH 44/48] fix query in VariantsMongoReader for non-annotated variants --- .../VariantsMongoReaderConfiguration.java | 2 ++ .../io/readers/VariantsMongoReader.java | 29 ++++++++++++++----- .../io/readers/VariantsMongoReaderTest.java | 12 ++++---- .../annotation/VariantWithAnnotation.json | 21 +++++++------- .../annotation/VariantWithOutAnnotation.json | 2 +- .../VariantWithOutAnnotationOtherStudy.json | 2 +- .../OtherVariantWithOneStudyToDrop.json | 2 +- .../variant/VariantWithOneStudy.json | 2 +- .../variant/VariantWithOneStudyToDrop.json | 2 +- .../variant/VariantWithTwoStudies.json | 4 +-- 10 files changed, 48 insertions(+), 30 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/readers/VariantsMongoReaderConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/readers/VariantsMongoReaderConfiguration.java index 6fa2dfb5a..289574ab4 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/readers/VariantsMongoReaderConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/readers/VariantsMongoReaderConfiguration.java @@ -45,6 +45,8 @@ public VariantsMongoReader variantsMongoReader(MongoOperations mongoOperations, VariantsMongoReader variantsMongoReader = new VariantsMongoReader( mongoOperations, databaseParameters.getCollectionVariantsName(), + annotationParameters.getVepVersion(), + annotationParameters.getVepCacheVersion(), inputParameters.getStudyId(), excludeAnnotated); variantsMongoReader.setSaveState(false); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReader.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReader.java index 0e50e12f4..69f567c5b 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReader.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReader.java @@ -23,13 +23,22 @@ import org.springframework.data.mongodb.core.MongoOperations; import org.springframework.data.mongodb.core.convert.MongoConverter; import org.springframework.util.ClassUtils; + +import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument; import uk.ac.ebi.eva.commons.models.mongo.entity.projections.SimplifiedVariant; +import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation; import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantSourceEntryMongo; import uk.ac.ebi.eva.pipeline.model.EnsemblVariant; import javax.annotation.PostConstruct; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.ALTERNATE_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.CHROMOSOME_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.END_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.REFERENCE_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.START_FIELD; + /** * Mongo variant reader using an ItemReader cursor based. This is speeding up * the reading of the variant in big collections. The @@ -50,26 +59,32 @@ public class VariantsMongoReader * If the studyId string is not empty, bring only non-annotated variants from that study. * @param excludeAnnotated bring only non-annotated variants. */ - public VariantsMongoReader(MongoOperations template, String collectionsVariantsName, String studyId, - boolean excludeAnnotated) { + public VariantsMongoReader(MongoOperations mongoOperations, String collectionVariantsName, String vepVersion, + String vepCacheVersion, String studyId, boolean excludeAnnotated) { setName(ClassUtils.getShortName(VariantsMongoReader.class)); delegateReader = new MongoDbCursorItemReader(); - delegateReader.setTemplate(template); - delegateReader.setCollection(collectionsVariantsName); + delegateReader.setTemplate(mongoOperations); + delegateReader.setCollection(collectionVariantsName); BasicDBObjectBuilder queryBuilder = BasicDBObjectBuilder.start(); if (studyId != null && !studyId.isEmpty()) { queryBuilder.add(STUDY_KEY, studyId); } if (excludeAnnotated) { - queryBuilder.add("annot.ct.so", new BasicDBObject("$exists", false)); + BasicDBObject exists = new BasicDBObject("$exists", 1); + BasicDBObject annotationSubdocument = new BasicDBObject(VariantAnnotation.SO_ACCESSION_FIELD, exists) + .append(Annotation.VEP_VERSION_FIELD, vepVersion) + .append(Annotation.VEP_CACHE_VERSION_FIELD, vepCacheVersion); + BasicDBObject noElementMatchesOurVersion = + new BasicDBObject("$not", new BasicDBObject("$elemMatch", annotationSubdocument)); + queryBuilder.add(VariantDocument.ANNOTATION_FIELD, noElementMatchesOurVersion); } delegateReader.setQuery(queryBuilder.get()); - String[] fields = {"chr", "start", "end", "ref", "alt"}; + String[] fields = {CHROMOSOME_FIELD, START_FIELD, END_FIELD, REFERENCE_FIELD, ALTERNATE_FIELD}; delegateReader.setFields(fields); - converter = template.getConverter(); + converter = mongoOperations.getConverter(); } @PostConstruct diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReaderTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReaderTest.java index 3e3195942..cd8291cba 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReaderTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReaderTest.java @@ -19,8 +19,6 @@ import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; -import org.opencb.biodata.models.variant.Variant; -import org.opencb.biodata.models.variant.annotation.VariantAnnotation; import org.springframework.batch.item.ExecutionContext; import org.springframework.batch.test.MetaDataInstanceFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -38,13 +36,11 @@ import uk.ac.ebi.eva.test.data.VariantData; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; -import java.lang.reflect.Field; import java.util.Arrays; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; -import static org.junit.Assert.assertNull; /** * {@link VariantsMongoReader} @@ -71,6 +67,10 @@ public class VariantsMongoReaderTest { private static final String ALL_STUDIES = ""; + private static final String VEP_VERSION = "78"; + + private static final String VEP_CACHE_VERSION = "78"; + @Autowired private MongoConnection mongoConnection; @@ -107,7 +107,7 @@ private void checkNonAnnotatedVariantsRead(int expectedNonAnnotatedVariants, Str boolean excludeAnnotated = true; VariantsMongoReader mongoItemReader = new VariantsMongoReader( - mongoOperations, COLLECTION_VARIANTS_NAME, study, excludeAnnotated); + mongoOperations, COLLECTION_VARIANTS_NAME, VEP_VERSION, VEP_CACHE_VERSION, study, excludeAnnotated); mongoItemReader.open(executionContext); int itemCount = 0; @@ -148,7 +148,7 @@ private void checkAllVariantsRead(int expectedVariants, String study) throws Exc boolean excludeAnnotated = false; VariantsMongoReader mongoItemReader = new VariantsMongoReader( - mongoOperations, COLLECTION_VARIANTS_NAME, study, excludeAnnotated); + mongoOperations, COLLECTION_VARIANTS_NAME, VEP_VERSION, VEP_CACHE_VERSION, study, excludeAnnotated); mongoItemReader.open(executionContext); int itemCount = 0; diff --git a/src/test/resources/input-files/annotation/VariantWithAnnotation.json b/src/test/resources/input-files/annotation/VariantWithAnnotation.json index 8d58cf471..3e36affa7 100644 --- a/src/test/resources/input-files/annotation/VariantWithAnnotation.json +++ b/src/test/resources/input-files/annotation/VariantWithAnnotation.json @@ -19,7 +19,7 @@ "AFR_AF" : "0.0000", "EUR_AF" : "0.0000", "SAN_AF" : "0.0000", - "ssID" : "ss1363765667", + "ssID" : "ss1363765667" } } ], @@ -59,13 +59,14 @@ "fid" : "5" } ], - "annot" : { - "ct" : [ - { - "so" : [ - 1628 - ] - } - ] - } + "annot" : [ + { + "vepv" : "78", + "cachev" : "78", + "so" : [ + 1628 + ], + "xrefs" : [ ] + } + ] } \ No newline at end of file diff --git a/src/test/resources/input-files/annotation/VariantWithOutAnnotation.json b/src/test/resources/input-files/annotation/VariantWithOutAnnotation.json index 4c8ac68ab..6270c21ef 100644 --- a/src/test/resources/input-files/annotation/VariantWithOutAnnotation.json +++ b/src/test/resources/input-files/annotation/VariantWithOutAnnotation.json @@ -19,7 +19,7 @@ "AFR_AF" : "0.0000", "EUR_AF" : "0.0000", "SAN_AF" : "0.0000", - "ssID" : "ss1363765667", + "ssID" : "ss1363765667" } } ], diff --git a/src/test/resources/input-files/annotation/VariantWithOutAnnotationOtherStudy.json b/src/test/resources/input-files/annotation/VariantWithOutAnnotationOtherStudy.json index bcf72bb3d..15ec67800 100644 --- a/src/test/resources/input-files/annotation/VariantWithOutAnnotationOtherStudy.json +++ b/src/test/resources/input-files/annotation/VariantWithOutAnnotationOtherStudy.json @@ -19,7 +19,7 @@ "AFR_AF" : "0.0000", "EUR_AF" : "0.0000", "SAN_AF" : "0.0000", - "ssID" : "ss1363765667", + "ssID" : "ss1363765667" } } ], diff --git a/src/test/resources/input-files/variant/OtherVariantWithOneStudyToDrop.json b/src/test/resources/input-files/variant/OtherVariantWithOneStudyToDrop.json index 0cf488168..a5ad78e89 100644 --- a/src/test/resources/input-files/variant/OtherVariantWithOneStudyToDrop.json +++ b/src/test/resources/input-files/variant/OtherVariantWithOneStudyToDrop.json @@ -19,7 +19,7 @@ "AFR_AF" : "0.0000", "EUR_AF" : "0.0000", "SAN_AF" : "0.0000", - "ssID" : "ss1363765667", + "ssID" : "ss1363765667" } } ], diff --git a/src/test/resources/input-files/variant/VariantWithOneStudy.json b/src/test/resources/input-files/variant/VariantWithOneStudy.json index b8fa03239..6d7aee4d6 100644 --- a/src/test/resources/input-files/variant/VariantWithOneStudy.json +++ b/src/test/resources/input-files/variant/VariantWithOneStudy.json @@ -19,7 +19,7 @@ "AFR_AF" : "0.0000", "EUR_AF" : "0.0000", "SAN_AF" : "0.0000", - "ssID" : "ss1363765667", + "ssID" : "ss1363765667" } } ], diff --git a/src/test/resources/input-files/variant/VariantWithOneStudyToDrop.json b/src/test/resources/input-files/variant/VariantWithOneStudyToDrop.json index 3a9cceb04..3f5e0b989 100644 --- a/src/test/resources/input-files/variant/VariantWithOneStudyToDrop.json +++ b/src/test/resources/input-files/variant/VariantWithOneStudyToDrop.json @@ -19,7 +19,7 @@ "AFR_AF" : "0.0000", "EUR_AF" : "0.0000", "SAN_AF" : "0.0000", - "ssID" : "ss1363765667", + "ssID" : "ss1363765667" } } ], diff --git a/src/test/resources/input-files/variant/VariantWithTwoStudies.json b/src/test/resources/input-files/variant/VariantWithTwoStudies.json index ca54ed9e7..7ea0fa7f4 100644 --- a/src/test/resources/input-files/variant/VariantWithTwoStudies.json +++ b/src/test/resources/input-files/variant/VariantWithTwoStudies.json @@ -19,7 +19,7 @@ "AFR_AF" : "0.0000", "EUR_AF" : "0.0000", "SAN_AF" : "0.0000", - "ssID" : "ss1363765667", + "ssID" : "ss1363765667" } }, { @@ -38,7 +38,7 @@ "AFR_AF" : "0.0000", "EUR_AF" : "0.0000", "SAN_AF" : "0.0000", - "ssID" : "ss1363765667", + "ssID" : "ss1363765667" } } ], From a143ae285ad39661e70ad4580d9e89519e8063d8 Mon Sep 17 00:00:00 2001 From: jose miguel mut Date: Wed, 12 Jul 2017 15:06:09 +0100 Subject: [PATCH 45/48] add test for AnnotInVariantWriter (add different version annot) --- .../AnnotationInVariantMongoWriter.java | 116 ++++++++++-------- .../AnnotationInVariantMongoWriterTest.java | 83 ++++++++++--- 2 files changed, 134 insertions(+), 65 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java index 8600bec0d..87e0344df 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriter.java @@ -17,6 +17,7 @@ import com.mongodb.BasicDBList; import com.mongodb.BasicDBObject; +import com.mongodb.DBCursor; import com.mongodb.DBObject; import org.springframework.batch.item.ItemWriter; import org.springframework.data.mongodb.core.BulkOperations; @@ -27,12 +28,18 @@ import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation; import uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; +import static java.util.stream.Collectors.toList; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.ALTERNATE_FIELD; import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.ANNOTATION_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.CHROMOSOME_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.REFERENCE_FIELD; +import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.START_FIELD; import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.VEP_CACHE_VERSION_FIELD; import static uk.ac.ebi.eva.commons.models.mongo.entity.subdocuments.VariantAnnotation.VEP_VERSION_FIELD; @@ -77,67 +84,81 @@ public AnnotationInVariantMongoWriter(MongoOperations mongoOperations, this.vepCacheVersion = vepCacheVersion; } + @Override + public void write(List annotations) throws Exception { + Map variantAnnotations = generateVariantAnnotations(annotations); + + BulkOperations bulkOperations = mongoOperations.bulkOps(BulkOperations.BulkMode.UNORDERED, collection); + bulkPrepare(bulkOperations, variantAnnotations); + bulkOperations.execute(); + } + private Map generateVariantAnnotations(List annotations) { HashMap variantAnnotations = new HashMap<>(); for (Annotation annotation : annotations) { - String id = annotation.buildVariantId(); + String id = annotation.getId(); variantAnnotations.putIfAbsent(id, new VariantAnnotation(annotation)); variantAnnotations.computeIfPresent(id, (key, oldVar) -> oldVar.concatenate(annotation)); } return variantAnnotations; } - @Override - public void write(List annotations) throws Exception { - Map variantAnnotations = generateVariantAnnotations(annotations); - - BulkOperations bulkOperations = mongoOperations.bulkOps(BulkOperations.BulkMode.UNORDERED, collection); - bulkPrepare(bulkOperations, variantAnnotations); - bulkOperations.execute(); - } - private void bulkPrepare(BulkOperations bulkOperations, Map variantAnnotations) { Map storedVariantAnnotations = getStoredVariantAnnotations(variantAnnotations); for (Map.Entry entry : variantAnnotations.entrySet()) { - final String variantId = entry.getKey(); - if (storedVariantAnnotations.containsKey(variantId)) { - bulkUpdate(bulkOperations, variantId, storedVariantAnnotations.get(variantId).concatenate(entry.getValue())); + final String annotationId = entry.getKey(); + if (storedVariantAnnotations.containsKey(annotationId)) { + bulkUpdate(bulkOperations, annotationId, + storedVariantAnnotations.get(annotationId).concatenate(entry.getValue())); } else { - bulkAddToSet(bulkOperations, variantId, entry.getValue()); + bulkAddToSet(bulkOperations, annotationId, entry.getValue()); } } } - private void bulkAddToSet(BulkOperations bulkOperations, String variantId, VariantAnnotation value) { - DBObject id = new BasicDBObject(ID, variantId); - DBObject variantAnnotation = convertToMongo(value); - BasicDBObject addToSet = new BasicDBObject(ADD_TO_SET, new BasicDBObject(ANNOTATION_FIELD, variantAnnotation)); - bulkOperations.updateOne(new BasicQuery(id), new BasicUpdate(addToSet)); + private Map getStoredVariantAnnotations( + Map variantAnnotations) { + Map storedVariantAnnotations = new HashMap<>(); + BasicDBObject query = generateQueryForAnnotationInVariant(variantAnnotations.keySet().toArray(new String[]{})); + BasicDBObject projection = new BasicDBObject(ANNOTATION_FIELD, 1); + for (DBObject variantDocument : mongoOperations.getCollection(collection).find(query, projection)) { + final BasicDBList dbAnnotations = (BasicDBList) variantDocument.get(ANNOTATION_FIELD); + if (dbAnnotations != null && !dbAnnotations.isEmpty()) { + for (Object storedAnnotationDocument : dbAnnotations) { + VariantAnnotation storedAnnotation = convertToVariantAnnotation( + (DBObject) storedAnnotationDocument); + final String annotationId = getAnnotationId(variantDocument, storedAnnotation); + storedVariantAnnotations.put(annotationId, storedAnnotation); + } + } + } + return storedVariantAnnotations; } - private void bulkUpdate(BulkOperations bulkOperations, String variantId, VariantAnnotation value) { - BasicDBObject query = generateQueryForAnnotationInVariant(variantId); - - DBObject variantAnnotation = convertToMongo(value); - final BasicDBObject annotation = new BasicDBObject(ANNOTATION_IN_LIST, variantAnnotation); - BasicDBObject setAnnotation = new BasicDBObject(SET, annotation); - - bulkOperations.updateOne(new BasicQuery(query), new BasicUpdate(setAnnotation)); + private String getAnnotationId(DBObject object, VariantAnnotation storedAnnotation) { + return String.join("_", (String) object.get(ID), + storedAnnotation.getVepVersion(), + storedAnnotation.getVepCacheVersion()); } - private BasicDBObject generateQueryForAnnotationInVariant(String... variantIds) { + private BasicDBObject generateQueryForAnnotationInVariant(String... annotationIds) { BasicDBObject query = new BasicDBObject(); - if (variantIds.length == 1) { - query.append(ID, variantIds[0]); + if (annotationIds.length == 1) { + query.append(ID, getVariantId(annotationIds[0])); } else { - query.append(ID, new BasicDBObject(IN, variantIds)); + List ids = Arrays.stream(annotationIds).map(this::getVariantId).collect(toList()); + query.append(ID, new BasicDBObject(IN, ids)); } query.append(ANNOTATION_FIELD, createQueryMatchForVepAndCacheVersion()); return query; } + private String getVariantId(String annotationId) { + return annotationId.substring(0, annotationId.length() - vepVersion.length() - vepCacheVersion.length() - 2); + } + private BasicDBObject createQueryMatchForVepAndCacheVersion() { BasicDBObject annotationQuery = new BasicDBObject(); annotationQuery.append(VEP_VERSION_FIELD, vepVersion); @@ -145,26 +166,25 @@ private BasicDBObject createQueryMatchForVepAndCacheVersion() { return new BasicDBObject(ELEM_MATCH, annotationQuery); } - private Map getStoredVariantAnnotations(Map variantAnnotations) { - Map storedVariantAnnotations = new HashMap<>(); - BasicDBObject query = generateQueryForAnnotationInVariant(variantAnnotations.keySet().toArray(new String[]{})); - BasicDBObject projection = new BasicDBObject(ANNOTATION_FIELD, 1); + private VariantAnnotation convertToVariantAnnotation(DBObject dbAnnotation) { + return mongoOperations.getConverter().read(VariantAnnotation.class, dbAnnotation); + } - Iterator iterator = mongoOperations.getCollection(collection).find(query, projection).iterator(); - while (iterator.hasNext()) { - final DBObject object = iterator.next(); - final String variantId = (String) object.get(ID); - final BasicDBList dbAnnotations = (BasicDBList) object.get(ANNOTATION_FIELD); - if (dbAnnotations != null && !dbAnnotations.isEmpty()) { - final DBObject dbAnnotation = (DBObject) dbAnnotations.get(0); - storedVariantAnnotations.put(variantId, convertToVariantAnnotation(dbAnnotation)); - } - } - return storedVariantAnnotations; + private void bulkUpdate(BulkOperations bulkOperations, String annotationId, VariantAnnotation value) { + BasicDBObject query = generateQueryForAnnotationInVariant(annotationId); + + DBObject variantAnnotation = convertToMongo(value); + final BasicDBObject annotation = new BasicDBObject(ANNOTATION_IN_LIST, variantAnnotation); + BasicDBObject setAnnotation = new BasicDBObject(SET, annotation); + + bulkOperations.updateOne(new BasicQuery(query), new BasicUpdate(setAnnotation)); } - private VariantAnnotation convertToVariantAnnotation(DBObject dbAnnotation) { - return mongoOperations.getConverter().read(VariantAnnotation.class, dbAnnotation); + private void bulkAddToSet(BulkOperations bulkOperations, String annotationId, VariantAnnotation value) { + DBObject id = new BasicDBObject(ID, getVariantId(annotationId)); + DBObject variantAnnotation = convertToMongo(value); + BasicDBObject addToSet = new BasicDBObject(ADD_TO_SET, new BasicDBObject(ANNOTATION_FIELD, variantAnnotation)); + bulkOperations.updateOne(new BasicQuery(id), new BasicUpdate(addToSet)); } private DBObject convertToMongo(VariantAnnotation value) { diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java index 953c11c48..18e27095a 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java @@ -85,11 +85,11 @@ public class AnnotationInVariantMongoWriterTest { private AnnotationInVariantMongoWriter annotationInVariantMongoWriter; - private AnnotationLineMapper AnnotationLineMapper; + private AnnotationLineMapper annotationLineMapper; @Before public void setUp() throws Exception { - AnnotationLineMapper = new AnnotationLineMapper(VEP_VERSION, VEP_CACHE_VERSION); + annotationLineMapper = new AnnotationLineMapper(VEP_VERSION, VEP_CACHE_VERSION); } @Test @@ -104,15 +104,15 @@ public void shouldWriteAllFieldsIntoMongoDb() throws Exception { String[] vepOutputLines = vepOutputContentWithExtraFields.split("\n"); for (String annotLine : Arrays.copyOfRange(vepOutputLines, 0, 2)) { - annotationSet1.add(AnnotationLineMapper.mapLine(annotLine, 0)); + annotationSet1.add(annotationLineMapper.mapLine(annotLine, 0)); } for (String annotLine : Arrays.copyOfRange(vepOutputLines, 2, 4)) { - annotationSet2.add(AnnotationLineMapper.mapLine(annotLine, 0)); + annotationSet2.add(annotationLineMapper.mapLine(annotLine, 0)); } for (String annotLine : Arrays.copyOfRange(vepOutputLines, 4, 7)) { - annotationSet3.add(AnnotationLineMapper.mapLine(annotLine, 0)); + annotationSet3.add(annotationLineMapper.mapLine(annotLine, 0)); } // load the annotation @@ -132,8 +132,8 @@ public void shouldWriteAllFieldsIntoMongoDb() throws Exception { DBObject variant = cursor.next(); String id = (String) variant.get("_id"); - if (id.equals("20_63360_C_T")) { - BasicDBObject annotationField = (BasicDBObject) ((BasicDBList) (variant).get( + if (id.equals("20_63360_C_T_" + VEP_VERSION + "_" + VEP_CACHE_VERSION)) { + BasicDBObject annotationField = (BasicDBObject) ((BasicDBList) variant.get( VariantDocument.ANNOTATION_FIELD)).get(0); checkAnnotationFields(annotationField, @@ -144,8 +144,8 @@ public void shouldWriteAllFieldsIntoMongoDb() throws Exception { "ENST00000608838"))); } - if (id.equals("20_63399_G_A")) { - BasicDBObject annotationField = (BasicDBObject) ((BasicDBList) (variant).get( + if (id.equals("20_63399_G_A_" + VEP_VERSION + "_" + VEP_CACHE_VERSION)) { + BasicDBObject annotationField = (BasicDBObject) ((BasicDBList) variant.get( VariantDocument.ANNOTATION_FIELD)).get(0); checkAnnotationFields(annotationField, @@ -182,8 +182,8 @@ public void shouldUpdateFieldsOfExistingAnnotationVersion() throws Exception { String[] vepOutputLines = vepOutputContentWithExtraFields.split("\n"); List annotations = new ArrayList<>(); - annotations.add(AnnotationLineMapper.mapLine(vepOutputLines[1], 0)); - annotations.add(AnnotationLineMapper.mapLine(vepOutputLines[2], 0)); + annotations.add(annotationLineMapper.mapLine(vepOutputLines[1], 0)); + annotations.add(annotationLineMapper.mapLine(vepOutputLines[2], 0)); // load the first annotation MongoOperations operations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, @@ -191,18 +191,18 @@ public void shouldUpdateFieldsOfExistingAnnotationVersion() throws Exception { annotationInVariantMongoWriter = new AnnotationInVariantMongoWriter(operations, COLLECTION_VARIANTS_NAME, VEP_VERSION, VEP_CACHE_VERSION); - BasicDBObject annotationField = writeAndGetAnnotation(databaseName, annotations.get(0)); + BasicDBList annotationField = writeAndGetAnnotation(databaseName, annotations.get(0)); - checkAnnotationFields(annotationField, + checkAnnotationFields((BasicDBObject) annotationField.get(0), Arrays.asList(0.1, 0.1), Arrays.asList(0.1, 0.1), new TreeSet<>(Arrays.asList(1631)), new TreeSet<>(Arrays.asList("DEFB125", "ENSG00000178591", "ENST00000382410"))); // load the second annotation and check the information is updated (not overwritten) - BasicDBObject annotationFieldAfter = writeAndGetAnnotation(databaseName, annotations.get(1)); + BasicDBList annotationFieldAfter = writeAndGetAnnotation(databaseName, annotations.get(1)); - checkAnnotationFields(annotationFieldAfter, + checkAnnotationFields((BasicDBObject) annotationFieldAfter.get(0), Arrays.asList(0.1, 0.2), Arrays.asList(0.1, 0.2), new TreeSet<>(Arrays.asList(1631)), @@ -210,7 +210,7 @@ public void shouldUpdateFieldsOfExistingAnnotationVersion() throws Exception { "ENST00000608838"))); } - private BasicDBObject writeAndGetAnnotation(String databaseName, Annotation annotation) throws Exception { + private BasicDBList writeAndGetAnnotation(String databaseName, Annotation annotation) throws Exception { annotationInVariantMongoWriter.write(Collections.singletonList(annotation)); BasicDBObject query = new BasicDBObject(Annotation.START_FIELD, annotation.getStart()); @@ -220,7 +220,56 @@ private BasicDBObject writeAndGetAnnotation(String databaseName, Annotation anno DBObject variant = cursor.next(); assertFalse(cursor.hasNext()); - return (BasicDBObject) ((BasicDBList) variant.get(VariantDocument.ANNOTATION_FIELD)).get(0); + return ((BasicDBList) variant.get(VariantDocument.ANNOTATION_FIELD)); } + @Test + public void shouldAddAnnotationIfVersionIsNotPresent() throws Exception { + String differentVepVersion = "different_" + VEP_VERSION; + String differentVepCacheVersion = "different_" + VEP_CACHE_VERSION; + AnnotationLineMapper differentVersionAnnotationLineMapper = new AnnotationLineMapper( + differentVepVersion, differentVepCacheVersion); + + String databaseName = mongoRule.restoreDumpInTemporaryDatabase(getResourceUrl(MONGO_DUMP)); + + String[] vepOutputLines = vepOutputContentWithExtraFields.split("\n"); + + Annotation firstAnnotation = annotationLineMapper.mapLine(vepOutputLines[1], 0); + Annotation differentVersionAnnotation = differentVersionAnnotationLineMapper.mapLine(vepOutputLines[2], 0); + + // load the first annotation + MongoOperations operations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, + mongoMappingContext); + annotationInVariantMongoWriter = new AnnotationInVariantMongoWriter(operations, COLLECTION_VARIANTS_NAME, + VEP_VERSION, VEP_CACHE_VERSION); + + BasicDBList annotationField = writeAndGetAnnotation(databaseName, firstAnnotation); + + assertEquals(1, annotationField.size()); + checkAnnotationFields((BasicDBObject) annotationField.get(0), + Arrays.asList(0.1, 0.1), + Arrays.asList(0.1, 0.1), + new TreeSet<>(Arrays.asList(1631)), + new TreeSet<>(Arrays.asList("DEFB125", "ENSG00000178591", "ENST00000382410"))); + + // load the second annotation and check the information is added to the annotation array + annotationInVariantMongoWriter = new AnnotationInVariantMongoWriter(operations, COLLECTION_VARIANTS_NAME, + differentVepVersion, + differentVepCacheVersion); + BasicDBList annotationFieldAfter = writeAndGetAnnotation(databaseName, differentVersionAnnotation); + + assertEquals(2, annotationFieldAfter.size()); + checkAnnotationFields((BasicDBObject) annotationField.get(0), + Arrays.asList(0.1, 0.1), + Arrays.asList(0.1, 0.1), + new TreeSet<>(Arrays.asList(1631)), + new TreeSet<>(Arrays.asList("DEFB125", "ENSG00000178591", "ENST00000382410"))); + + checkAnnotationFields((BasicDBObject) annotationFieldAfter.get(1), + Arrays.asList(0.2, 0.2), + Arrays.asList(0.2, 0.2), + new TreeSet<>(Arrays.asList(1631)), + new TreeSet<>(Arrays.asList("DEFB125", "ENSG00000178591", "ENST00000608838"))); + } } + From b34278f537bf506126f32d17c953fe05e2314e1f Mon Sep 17 00:00:00 2001 From: jose miguel mut Date: Wed, 12 Jul 2017 15:46:40 +0100 Subject: [PATCH 46/48] add test in AnnotMongoWriter with different annot version --- .../AnnotationInVariantMongoWriterTest.java | 2 +- .../io/writers/AnnotationMongoWriterTest.java | 47 +++++++++++++++---- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java index 18e27095a..644a06366 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationInVariantMongoWriterTest.java @@ -72,7 +72,7 @@ public class AnnotationInVariantMongoWriterTest { private static final String VEP_VERSION = "1"; - private static final String VEP_CACHE_VERSION = "1"; + private static final String VEP_CACHE_VERSION = "2"; @Autowired private MongoConnection mongoConnection; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java index 868393049..e7690471e 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/AnnotationMongoWriterTest.java @@ -80,7 +80,7 @@ public class AnnotationMongoWriterTest { private static final String VEP_VERSION = "1"; - private static final String VEP_CACHE_VERSION = "1"; + private static final String VEP_CACHE_VERSION = "2"; @Autowired private MongoConnection mongoConnection; @@ -93,11 +93,11 @@ public class AnnotationMongoWriterTest { private AnnotationMongoWriter annotationWriter; - private AnnotationLineMapper AnnotationLineMapper; + private AnnotationLineMapper annotationLineMapper; @Before public void setUp() throws Exception { - AnnotationLineMapper = new AnnotationLineMapper(VEP_VERSION, VEP_CACHE_VERSION); + annotationLineMapper = new AnnotationLineMapper(VEP_VERSION, VEP_CACHE_VERSION); } @Test @@ -106,7 +106,7 @@ public void shouldWriteAllFieldsIntoMongoDb() throws Exception { List annotations = new ArrayList<>(); for (String annotLine : vepOutputContent.split("\n")) { - annotations.add(AnnotationLineMapper.mapLine(annotLine, 0)); + annotations.add(annotationLineMapper.mapLine(annotLine, 0)); } // load the annotation @@ -150,15 +150,15 @@ public void shouldWriteAllFieldsIntoMongoDbMultipleSetsAnnotations() throws Exce String[] vepOutputLines = vepOutputContent.split("\n"); for (String annotLine : Arrays.copyOfRange(vepOutputLines, 0, 2)) { - annotationSet1.add(AnnotationLineMapper.mapLine(annotLine, 0)); + annotationSet1.add(annotationLineMapper.mapLine(annotLine, 0)); } for (String annotLine : Arrays.copyOfRange(vepOutputLines, 2, 4)) { - annotationSet2.add(AnnotationLineMapper.mapLine(annotLine, 0)); + annotationSet2.add(annotationLineMapper.mapLine(annotLine, 0)); } for (String annotLine : Arrays.copyOfRange(vepOutputLines, 4, 7)) { - annotationSet3.add(AnnotationLineMapper.mapLine(annotLine, 0)); + annotationSet3.add(annotationLineMapper.mapLine(annotLine, 0)); } // load the annotation @@ -252,7 +252,7 @@ public void shouldUpdateFieldsOfExistingAnnotationVersion() throws Exception { List annotations = new ArrayList<>(); for (String annotLine : vepOutputContent.split("\n")) { - annotations.add(AnnotationLineMapper.mapLine(annotLine, 0)); + annotations.add(annotationLineMapper.mapLine(annotLine, 0)); } // load the annotation @@ -274,6 +274,37 @@ public void shouldUpdateFieldsOfExistingAnnotationVersion() throws Exception { assertEquals(4, countXref(annotCollection.find())); } + @Test + public void shouldAddAnnotationIfAnnotationVersionIsNotPresent() throws Exception { + String differentVepVersion = "different_" + VEP_VERSION; + String differentVepCacheVersion = "different_" + VEP_CACHE_VERSION; + AnnotationLineMapper differentVersionAnnotationLineMapper = new AnnotationLineMapper(differentVepVersion, + differentVepCacheVersion); + String databaseName = mongoRule.getRandomTemporaryDatabaseName(); + + String annotLine = vepOutputContent.split("\n")[1]; + Annotation firstVersionAnnotation = annotationLineMapper.mapLine(annotLine, 0); + Annotation secondVersionAnnotation = differentVersionAnnotationLineMapper.mapLine(annotLine, 0); + + // load the annotation + MongoOperations operations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, + mongoMappingContext); + annotationWriter = new AnnotationMongoWriter(operations, COLLECTION_ANNOTATIONS_NAME); + annotationWriter.write(Collections.singletonList(firstVersionAnnotation)); + + // check that consequence type was written in the annotation document + DBCollection annotCollection = mongoRule.getCollection(databaseName, COLLECTION_ANNOTATIONS_NAME); + assertEquals(1, annotCollection.count()); + assertEquals(1, countConsequenceType(annotCollection.find())); + assertEquals(3, countXref(annotCollection.find())); + + // check that consequence types were added to that document + annotationWriter.write(Collections.singletonList(secondVersionAnnotation)); + assertEquals(2, annotCollection.count()); + assertEquals(2, countConsequenceType(annotCollection.find())); + assertEquals(6, countXref(annotCollection.find())); + } + private int countConsequenceType(DBCursor cursor) { return getArrayCount(cursor, CONSEQUENCE_TYPE_FIELD); } From f1add617595f920ae06966c84f68e8a543dec95c Mon Sep 17 00:00:00 2001 From: jose miguel mut Date: Wed, 12 Jul 2017 15:53:08 +0100 Subject: [PATCH 47/48] update example .properties for annotate-variants-job --- examples/annotate-variants-job.properties | 10 ---------- examples/application.properties | 2 +- examples/load-aggregated-vcf.properties | 1 + examples/load-genotyped-vcf.properties | 1 + 4 files changed, 3 insertions(+), 11 deletions(-) diff --git a/examples/annotate-variants-job.properties b/examples/annotate-variants-job.properties index d800224dd..97906aa2d 100755 --- a/examples/annotate-variants-job.properties +++ b/examples/annotate-variants-job.properties @@ -2,20 +2,12 @@ spring.batch.job.names=annotate-variants-job # SUBMISSION FIELDS -input.vcf=/path/to/input.vcf input.vcf.id=1234 -input.vcf.aggregation=NONE - -input.study.name=Study name input.study.id=2345 -input.study.type=COLLECTION -input.pedigree= input.fasta=/path/to/homo_sapiens/sequence.fa -output.dir=/path/to/output_files output.dir.annotation=/path/to/annotation-files/ -output.dir.statistics=/path/to/statistics-files/ # VEP @@ -31,8 +23,6 @@ app.vep.timeout=600 config.chunk.size=1000 ## Skip steps -statistics.skip=false -annotation.skip=false annotation.overwrite=false # Database configuration diff --git a/examples/application.properties b/examples/application.properties index 48be2c657..b0745c35e 100644 --- a/examples/application.properties +++ b/examples/application.properties @@ -1,7 +1,7 @@ # EXECUTION PROFILE ## "production" to keep track of half-executed jobs using a job repository database ## "test" to use an in-memory database that will record a single run -spring.profiles.active=production +spring.profiles.active=production,mongo # OpenCGA diff --git a/examples/load-aggregated-vcf.properties b/examples/load-aggregated-vcf.properties index e0301b663..4e571139b 100755 --- a/examples/load-aggregated-vcf.properties +++ b/examples/load-aggregated-vcf.properties @@ -31,6 +31,7 @@ config.chunk.size=1000 ## Skip steps statistics.skip=false annotation.skip=false +annotation.overwrite=false # Database configuration spring.data.mongodb.database= diff --git a/examples/load-genotyped-vcf.properties b/examples/load-genotyped-vcf.properties index bd3c4ea88..201cc8628 100755 --- a/examples/load-genotyped-vcf.properties +++ b/examples/load-genotyped-vcf.properties @@ -33,6 +33,7 @@ config.chunk.size=1000 ## Skip steps statistics.skip=false annotation.skip=false +annotation.overwrite=false # Database configuration spring.data.mongodb.database= From 2ab10e64618c3a8a4cb072228bc65a57ef26adb7 Mon Sep 17 00:00:00 2001 From: Cristina Yenyxe Gonzalez Garcia Date: Tue, 15 Aug 2017 10:32:43 +0100 Subject: [PATCH 48/48] Filter non-annotated variants by file ID in addition to study ID --- .../VariantsMongoReaderConfiguration.java | 1 + .../io/readers/VariantsMongoReader.java | 16 ++++- .../io/readers/VariantsMongoReaderTest.java | 69 +++++++++---------- 3 files changed, 48 insertions(+), 38 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/readers/VariantsMongoReaderConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/readers/VariantsMongoReaderConfiguration.java index 289574ab4..2d3159b02 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/readers/VariantsMongoReaderConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/io/readers/VariantsMongoReaderConfiguration.java @@ -48,6 +48,7 @@ public VariantsMongoReader variantsMongoReader(MongoOperations mongoOperations, annotationParameters.getVepVersion(), annotationParameters.getVepCacheVersion(), inputParameters.getStudyId(), + inputParameters.getVcfId(), excludeAnnotated); variantsMongoReader.setSaveState(false); return variantsMongoReader; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReader.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReader.java index 69f567c5b..7f6fa17d7 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReader.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReader.java @@ -54,22 +54,34 @@ public class VariantsMongoReader private static final String STUDY_KEY = VariantDocument.FILES_FIELD + "." + VariantSourceEntryMongo.STUDYID_FIELD; + private static final String FILE_KEY = VariantDocument.FILES_FIELD + "." + VariantSourceEntryMongo.FILEID_FIELD; + /** * @param studyId Can be the empty string or null, meaning to bring all non-annotated variants in the collection. - * If the studyId string is not empty, bring only non-annotated variants from that study. + * If the studyId string is not empty, bring only non-annotated variants from that study. + * @param fileId File identifier that it is checked inside a study. If the study identifier is not defined, the + * file is ignored. This is mainly due to performance reasons. + * Can be the empty string or null, meaning to bring all non-annotated variants in a study. + * If the studyId string is not empty, bring only non-annotated variants from that study and file. * @param excludeAnnotated bring only non-annotated variants. */ public VariantsMongoReader(MongoOperations mongoOperations, String collectionVariantsName, String vepVersion, - String vepCacheVersion, String studyId, boolean excludeAnnotated) { + String vepCacheVersion, String studyId, String fileId, boolean excludeAnnotated) { setName(ClassUtils.getShortName(VariantsMongoReader.class)); delegateReader = new MongoDbCursorItemReader(); delegateReader.setTemplate(mongoOperations); delegateReader.setCollection(collectionVariantsName); BasicDBObjectBuilder queryBuilder = BasicDBObjectBuilder.start(); + if (studyId != null && !studyId.isEmpty()) { queryBuilder.add(STUDY_KEY, studyId); + + if (fileId != null && !fileId.isEmpty()) { + queryBuilder.add(FILE_KEY, fileId); + } } + if (excludeAnnotated) { BasicDBObject exists = new BasicDBObject("$exists", 1); BasicDBObject annotationSubdocument = new BasicDBObject(VariantAnnotation.SO_ACCESSION_FIELD, exists) diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReaderTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReaderTest.java index cd8291cba..9cc6909b3 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReaderTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VariantsMongoReaderTest.java @@ -55,6 +55,8 @@ public class VariantsMongoReaderTest { private static final String COLLECTION_VARIANTS_NAME = "variants"; + private static final int EXPECTED_NO_VARIANTS = 0; + private static final int EXPECTED_NON_ANNOTATED_VARIANTS_IN_STUDY = 1; private static final int EXPECTED_NON_ANNOTATED_VARIANTS_IN_DB = 2; @@ -65,7 +67,9 @@ public class VariantsMongoReaderTest { private static final String STUDY_ID = "7"; - private static final String ALL_STUDIES = ""; + private static final String FILE_ID = "5"; + + private static final String ALL_IDS = ""; private static final String VEP_VERSION = "78"; @@ -82,61 +86,53 @@ public class VariantsMongoReaderTest { @Test public void shouldReadVariantsWithoutAnnotationFieldInAStudy() throws Exception { - checkNonAnnotatedVariantsRead(EXPECTED_NON_ANNOTATED_VARIANTS_IN_STUDY, STUDY_ID); + checkVariantsRead(EXPECTED_NON_ANNOTATED_VARIANTS_IN_STUDY, STUDY_ID, FILE_ID, true); } @Test public void shouldReadVariantsWithoutAnnotationFieldInAllStudies() throws Exception { - checkNonAnnotatedVariantsRead(EXPECTED_NON_ANNOTATED_VARIANTS_IN_DB, ALL_STUDIES); + checkVariantsRead(EXPECTED_NON_ANNOTATED_VARIANTS_IN_DB, ALL_IDS, ALL_IDS, true); } @Test public void shouldReadVariantsWithoutAnnotationFieldInAllStudiesWhenNoStudySpecified() throws Exception { - checkNonAnnotatedVariantsRead(EXPECTED_NON_ANNOTATED_VARIANTS_IN_DB, null); + checkVariantsRead(EXPECTED_NON_ANNOTATED_VARIANTS_IN_DB, null, null, true); } - private void checkNonAnnotatedVariantsRead(int expectedNonAnnotatedVariants, String study) throws Exception { - ExecutionContext executionContext = MetaDataInstanceFactory.createStepExecution().getExecutionContext(); - String databaseName = mongoRule.createDBAndInsertDocuments(COLLECTION_VARIANTS_NAME, Arrays.asList( - VariantData.getVariantWithAnnotation(), - VariantData.getVariantWithoutAnnotation(), - VariantData.getVariantWithoutAnnotationOtherStudy())); - - MongoOperations mongoOperations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, - mongoMappingContext); - - boolean excludeAnnotated = true; - VariantsMongoReader mongoItemReader = new VariantsMongoReader( - mongoOperations, COLLECTION_VARIANTS_NAME, VEP_VERSION, VEP_CACHE_VERSION, study, excludeAnnotated); - mongoItemReader.open(executionContext); - - int itemCount = 0; - EnsemblVariant ensemblVariant; - while ((ensemblVariant = mongoItemReader.read()) != null) { - itemCount++; - assertFalse(ensemblVariant.getChr().isEmpty()); - assertNotEquals(0, ensemblVariant.getStart()); - } - assertEquals(expectedNonAnnotatedVariants, itemCount); - mongoItemReader.close(); + @Test + public void shouldReadVariantsInAStudy() throws Exception { + checkVariantsRead(EXPECTED_VARIANTS_IN_STUDY, STUDY_ID, FILE_ID, false); } @Test - public void shouldReadVariantsInAStudy() throws Exception { - checkAllVariantsRead(EXPECTED_VARIANTS_IN_STUDY, STUDY_ID); + public void shouldReadVariantsInAStudyWhenNoFileSpecified() throws Exception { + checkVariantsRead(EXPECTED_VARIANTS_IN_STUDY, STUDY_ID, null, false); } @Test public void shouldReadVariantsInAllStudies() throws Exception { - checkAllVariantsRead(EXPECTED_VARIANTS_IN_DB, ALL_STUDIES); + checkVariantsRead(EXPECTED_VARIANTS_IN_DB, ALL_IDS, ALL_IDS, false); } @Test public void shouldReadVariantsInAllStudiesWhenNoStudySpecified() throws Exception { - checkAllVariantsRead(EXPECTED_VARIANTS_IN_DB, null); + checkVariantsRead(EXPECTED_VARIANTS_IN_DB, null, null, false); + checkVariantsRead(EXPECTED_VARIANTS_IN_DB, null, FILE_ID, false); } - private void checkAllVariantsRead(int expectedVariants, String study) throws Exception { + @Test + public void shouldNotReadVariantsWhenStudyDoesNotExist() throws Exception { + checkVariantsRead(EXPECTED_NO_VARIANTS, "nonExistingStudy", null, false); + } + + @Test + public void shouldNotReadVariantsInAStudyWhenFileDoesNotExist() throws Exception { + checkVariantsRead(EXPECTED_NO_VARIANTS, STUDY_ID, "nonExistingFile", false); + } + + + private void checkVariantsRead(int expectedVariants, String study, String file, boolean excludeAnnotated) + throws Exception { ExecutionContext executionContext = MetaDataInstanceFactory.createStepExecution().getExecutionContext(); String databaseName = mongoRule.createDBAndInsertDocuments(COLLECTION_VARIANTS_NAME, Arrays.asList( VariantData.getVariantWithAnnotation(), @@ -144,11 +140,11 @@ private void checkAllVariantsRead(int expectedVariants, String study) throws Exc VariantData.getVariantWithoutAnnotationOtherStudy())); MongoOperations mongoOperations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, - mongoMappingContext); + mongoMappingContext); - boolean excludeAnnotated = false; VariantsMongoReader mongoItemReader = new VariantsMongoReader( - mongoOperations, COLLECTION_VARIANTS_NAME, VEP_VERSION, VEP_CACHE_VERSION, study, excludeAnnotated); + mongoOperations, COLLECTION_VARIANTS_NAME, VEP_VERSION, VEP_CACHE_VERSION, study, file, + excludeAnnotated); mongoItemReader.open(executionContext); int itemCount = 0; @@ -161,4 +157,5 @@ private void checkAllVariantsRead(int expectedVariants, String study) throws Exc assertEquals(expectedVariants, itemCount); mongoItemReader.close(); } + }