From b106b29c6a5ad3c44290089a09ffe379f6ebeb75 Mon Sep 17 00:00:00 2001 From: sundarvenkata-ebi Date: Thu, 8 Jun 2023 16:51:19 +0100 Subject: [PATCH 1/2] Partial commits --- .../active/AccessionedVariantMongoReader.java | 33 +++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/active/AccessionedVariantMongoReader.java b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/active/AccessionedVariantMongoReader.java index 3151ac166..ddce52cd1 100644 --- a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/active/AccessionedVariantMongoReader.java +++ b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/active/AccessionedVariantMongoReader.java @@ -27,6 +27,11 @@ import org.slf4j.LoggerFactory; import org.springframework.batch.item.ExecutionContext; import org.springframework.batch.item.ItemStreamException; +import org.springframework.batch.item.ItemStreamReader; +import org.springframework.batch.item.NonTransientResourceException; +import org.springframework.batch.item.ParseException; +import org.springframework.batch.item.UnexpectedInputException; +import org.springframework.data.mongodb.core.MongoTemplate; import org.springframework.data.mongodb.core.aggregation.Aggregation; import uk.ac.ebi.eva.accession.release.batch.io.VariantMongoAggregationReader; @@ -56,17 +61,18 @@ import static uk.ac.ebi.eva.accession.core.model.ISubmittedVariant.DEFAULT_SUPPORTED_BY_EVIDENCE; import static uk.ac.ebi.eva.accession.core.model.ISubmittedVariant.DEFAULT_VALIDATED; -public class AccessionedVariantMongoReader extends VariantMongoAggregationReader { +public class AccessionedVariantMongoReader implements ItemStreamReader> { private static final Logger logger = LoggerFactory.getLogger(AccessionedVariantMongoReader.class); private static final List allSubmittedVariantCollectionNames = Arrays.asList("submittedVariantEntity", "dbsnpSubmittedVariantEntity"); + private VariantMongoAggregationReader reader; public AccessionedVariantMongoReader(String assemblyAccession, int taxonomyAccession, - MongoClient mongoClient, String database, int chunkSize, - CollectionNames names) { - super(assemblyAccession, taxonomyAccession, mongoClient, database, chunkSize, names); + MongoClient mongoClient, MongoTemplate mongoTemplate, String database, + int chunkSize, CollectionNames names) { + EVAO } @Override @@ -74,11 +80,21 @@ public void open(ExecutionContext executionContext) throws ItemStreamException { aggregate(names.getClusteredVariantEntity()); } + @Override + public void update(ExecutionContext executionContext) throws ItemStreamException { + + } + + @Override + public void close() throws ItemStreamException { + + } + protected List buildAggregation() { Bson match = Aggregates.match(eq(REFERENCE_ASSEMBLY_FIELD, assemblyAccession)); - Bson sort = Aggregates.sort(orderBy(ascending(CONTIG_FIELD, START_FIELD))); + //Bson sort = Aggregates.sort(orderBy(ascending(CONTIG_FIELD, START_FIELD))); Bson singlemap = Aggregates.match(Filters.not(exists(MAPPING_WEIGHT_FIELD))); - List aggregation = new ArrayList<>(Arrays.asList(match, sort, singlemap)); + List aggregation = new ArrayList<>(Arrays.asList(match, singlemap)); for (String submittedVariantCollectionName : allSubmittedVariantCollectionNames) { String lookupQuery = "{ $lookup: { " + @@ -147,4 +163,9 @@ protected List getVariants(Document clusteredVariant) { } return new ArrayList<>(variants.values()); } + + @Override + public List read() throws Exception, UnexpectedInputException, ParseException, NonTransientResourceException { + return null; + } } From 7e7720c5ffcd855c0156fb56e6c12a1d2e9f16df Mon Sep 17 00:00:00 2001 From: sundarvenkata-ebi Date: Sun, 11 Jun 2023 06:55:13 +0100 Subject: [PATCH 2/2] Initial commit [skip ci] --- .../io/VariantMongoAggregationReader.java | 6 +- .../active/AccessionedVariantMongoReader.java | 203 +++++++++-------- .../batch/io/contig/ContigMongoReader.java | 19 -- .../release/batch/io/contig/ContigWriter.java | 10 - .../MultimapVariantContextWriter.java | 47 ---- .../multimap/MultimapVariantMongoReader.java | 70 ------ .../release/configuration/BeanNames.java | 24 -- ...sionedVariantMongoReaderConfiguration.java | 7 +- .../batch/io/ContigReaderConfiguration.java | 22 -- .../batch/io/ContigWriterConfiguration.java | 18 -- ...ltimapVariantMongoReaderConfiguration.java | 69 ------ .../io/VariantContextWriterConfiguration.java | 21 -- .../AccessionReleaseJobConfiguration.java | 20 +- ...reateMultimapReleaseStepConfiguration.java | 99 --------- .../steps/ListContigsStepConfiguration.java | 32 --- .../parameters/ReportPathResolver.java | 10 - .../AccessionedVariantMongoReaderTest.java | 86 +++---- .../io/contig/ContigMongoReaderTest.java | 13 -- .../DeprecatedVariantMongoReaderTest.java | 4 +- .../AccessionReleaseJobConfigurationTest.java | 30 +-- ...eMultimapReleaseStepConfigurationTest.java | 209 ------------------ ...tMultimapContigsStepConfigurationTest.java | 131 ----------- .../dbsnpClusteredVariantEntity.json | 2 +- 23 files changed, 167 insertions(+), 985 deletions(-) delete mode 100644 eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/multimap/MultimapVariantContextWriter.java delete mode 100644 eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/multimap/MultimapVariantMongoReader.java delete mode 100644 eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/io/MultimapVariantMongoReaderConfiguration.java delete mode 100644 eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/steps/CreateMultimapReleaseStepConfiguration.java delete mode 100644 eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/configuration/batch/steps/CreateMultimapReleaseStepConfigurationTest.java delete mode 100644 eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/configuration/batch/steps/ListMultimapContigsStepConfigurationTest.java diff --git a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/VariantMongoAggregationReader.java b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/VariantMongoAggregationReader.java index d1f739cb3..d2fb9f1b7 100644 --- a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/VariantMongoAggregationReader.java +++ b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/VariantMongoAggregationReader.java @@ -140,7 +140,7 @@ protected void aggregate(String collectionName) { abstract protected List buildAggregation(); @Override - public List read() throws UnexpectedInputException, ParseException, NonTransientResourceException { + public List read() throws Exception { return cursor.hasNext() ? getVariants(cursor.next()) : null; } @@ -241,4 +241,8 @@ private boolean isSameStart(long clusteredVariantStart, long submittedVariantSta private boolean isIndel(String type) { return type.equals(VariantType.INS.toString()) || type.equals(VariantType.DEL.toString()); } + + public int getChunkSize() { + return chunkSize; + } } diff --git a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/active/AccessionedVariantMongoReader.java b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/active/AccessionedVariantMongoReader.java index ddce52cd1..141cd81d7 100644 --- a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/active/AccessionedVariantMongoReader.java +++ b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/active/AccessionedVariantMongoReader.java @@ -17,23 +17,23 @@ package uk.ac.ebi.eva.accession.release.batch.io.active; import com.mongodb.MongoClient; -import com.mongodb.client.model.Aggregates; -import com.mongodb.client.model.Field; -import com.mongodb.client.model.Filters; import org.bson.Document; import org.bson.conversions.Bson; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.springframework.batch.item.ExecutionContext; import org.springframework.batch.item.ItemStreamException; import org.springframework.batch.item.ItemStreamReader; -import org.springframework.batch.item.NonTransientResourceException; -import org.springframework.batch.item.ParseException; -import org.springframework.batch.item.UnexpectedInputException; import org.springframework.data.mongodb.core.MongoTemplate; -import org.springframework.data.mongodb.core.aggregation.Aggregation; - +import org.springframework.data.mongodb.core.query.Meta; +import org.springframework.data.mongodb.core.query.Query; +import uk.ac.ebi.ampt2d.commons.accession.persistence.mongodb.document.AccessionedDocument; + +import uk.ac.ebi.eva.accession.core.EVAObjectModelUtils; +import uk.ac.ebi.eva.accession.core.batch.io.MongoDbCursorItemReader; +import uk.ac.ebi.eva.accession.core.model.dbsnp.DbsnpClusteredVariantEntity; +import uk.ac.ebi.eva.accession.core.model.dbsnp.DbsnpSubmittedVariantEntity; +import uk.ac.ebi.eva.accession.core.model.eva.ClusteredVariantEntity; +import uk.ac.ebi.eva.accession.core.model.eva.SubmittedVariantEntity; import uk.ac.ebi.eva.accession.release.batch.io.VariantMongoAggregationReader; import uk.ac.ebi.eva.accession.release.collectionNames.CollectionNames; import uk.ac.ebi.eva.commons.core.models.VariantType; @@ -43,41 +43,47 @@ import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.stream.Collectors; -import static com.mongodb.client.model.Filters.eq; -import static com.mongodb.client.model.Filters.exists; -import static com.mongodb.client.model.Sorts.ascending; -import static com.mongodb.client.model.Sorts.orderBy; +import static org.springframework.data.mongodb.core.query.Criteria.where; +import static org.springframework.data.mongodb.core.query.Query.query; -import static uk.ac.ebi.eva.accession.core.model.ISubmittedVariant.DEFAULT_ALLELES_MATCH; -import static uk.ac.ebi.eva.accession.core.model.ISubmittedVariant.DEFAULT_ASSEMBLY_MATCH; -import static uk.ac.ebi.eva.accession.core.model.ISubmittedVariant.DEFAULT_SUPPORTED_BY_EVIDENCE; -import static uk.ac.ebi.eva.accession.core.model.ISubmittedVariant.DEFAULT_VALIDATED; -public class AccessionedVariantMongoReader implements ItemStreamReader> { +public class AccessionedVariantMongoReader extends VariantMongoAggregationReader + implements ItemStreamReader> { - private static final Logger logger = LoggerFactory.getLogger(AccessionedVariantMongoReader.class); + private MongoDbCursorItemReader cursorItemReader; - private static final List allSubmittedVariantCollectionNames = Arrays.asList("submittedVariantEntity", - "dbsnpSubmittedVariantEntity"); - private VariantMongoAggregationReader reader; + private MongoTemplate mongoTemplate; public AccessionedVariantMongoReader(String assemblyAccession, int taxonomyAccession, MongoClient mongoClient, MongoTemplate mongoTemplate, String database, int chunkSize, CollectionNames names) { - EVAO + super(assemblyAccession, taxonomyAccession, mongoClient, database, chunkSize, names); + this.mongoTemplate = mongoTemplate; + this.cursorItemReader = new MongoDbCursorItemReader<>(); + Class className = + names.getClusteredVariantEntity().equals("clusteredVariantEntity")? + ClusteredVariantEntity.class: DbsnpClusteredVariantEntity.class; + this.cursorItemReader.setTargetType(className); + this.cursorItemReader.setTemplate(mongoTemplate); + Query queryToGetClusteredVariants = query(where(REFERENCE_ASSEMBLY_FIELD) + .is(this.assemblyAccession) + .and(MAPPING_WEIGHT_FIELD).exists(false)); + Meta meta = new Meta(); + meta.addFlag(Meta.CursorOption.NO_TIMEOUT); + queryToGetClusteredVariants.setMeta(meta); + this.cursorItemReader.setQuery(queryToGetClusteredVariants); } @Override public void open(ExecutionContext executionContext) throws ItemStreamException { - aggregate(names.getClusteredVariantEntity()); + this.cursorItemReader.open(executionContext); } @Override @@ -87,73 +93,33 @@ public void update(ExecutionContext executionContext) throws ItemStreamException @Override public void close() throws ItemStreamException { - + this.cursorItemReader.close(); } - protected List buildAggregation() { - Bson match = Aggregates.match(eq(REFERENCE_ASSEMBLY_FIELD, assemblyAccession)); - //Bson sort = Aggregates.sort(orderBy(ascending(CONTIG_FIELD, START_FIELD))); - Bson singlemap = Aggregates.match(Filters.not(exists(MAPPING_WEIGHT_FIELD))); - List aggregation = new ArrayList<>(Arrays.asList(match, singlemap)); - - for (String submittedVariantCollectionName : allSubmittedVariantCollectionNames) { - String lookupQuery = "{ $lookup: { " + - String.format("from: \"%s\",", submittedVariantCollectionName) + - String.format("let: { rsAccession: \"$%s\" },", ACCESSION_FIELD) + - "pipeline: [{" + - "$match: {$expr: {$and: [" + - String.format("{ $eq: ['$%s', \"$$rsAccession\"]},", - CLUSTERED_VARIANT_ACCESSION_FIELD) + - String.format("{ $eq: [\"$%s\", \"%s\"]},", - REFERENCE_ASSEMBLY_FIELD_IN_SUBMITTED_COLLECTIONS, - this.assemblyAccession) + - String.format("{ $eq: [\"$%s\", %d]}]}}}],", - TAXONOMY_FIELD, this.taxonomyAccession) + - String.format("as: \"%s\"}}", submittedVariantCollectionName); - logger.info(lookupQuery); - Bson lookup = Aggregation.DEFAULT_CONTEXT.getMappedObject(Document.parse(lookupQuery)); - aggregation.add(lookup); - } - // Concat ss entries from all submitted variant collections - Bson concat = Aggregates.addFields(new Field<>(SS_INFO_FIELD, - new Document("$concatArrays", allSubmittedVariantCollectionNames - .stream().map(v -> "$" + v) - .collect(Collectors.toList())))); - // We only need the SS info field - aggregation.add(concat); - - Bson matchOnlyNonEmptySSInfo = Aggregates.match(Filters.ne(SS_INFO_FIELD, Collections.emptyList())); - aggregation.add(matchOnlyNonEmptySSInfo); - logger.info("Issuing aggregation: {}", aggregation); - return aggregation; - } - - protected List getVariants(Document clusteredVariant) { - String contig = clusteredVariant.getString(CONTIG_FIELD); - long start = clusteredVariant.getLong(START_FIELD); - long rs = clusteredVariant.getLong(ACCESSION_FIELD); - String type = clusteredVariant.getString(TYPE_FIELD); + protected List getVariants(ClusteredVariantEntity clusteredVariant, + List submittedVariants) { + String contig = clusteredVariant.getContig(); + long start = clusteredVariant.getStart(); + long rs = clusteredVariant.getAccession(); + String type = clusteredVariant.getType().toString(); String sequenceOntology = VariantTypeToSOAccessionMap.getSequenceOntologyAccession(VariantType.valueOf(type)); - boolean validated = clusteredVariant.getBoolean(VALIDATED_FIELD, DEFAULT_VALIDATED); + boolean validated = clusteredVariant.getModel().isValidated(); + boolean remappedRS = submittedVariants.stream().allMatch(sve -> Objects.nonNull(sve.getRemappedFrom())); Map variants = new HashMap<>(); - Collection submittedVariants = (Collection)clusteredVariant.get(SS_INFO_FIELD); - boolean remappedRS = submittedVariants.stream() - .allMatch(sve -> Objects.nonNull(sve.getString("remappedFrom"))); - - for (Document submittedVariant : submittedVariants) { - long submittedVariantStart = submittedVariant.getLong(START_FIELD); - String submittedVariantContig = submittedVariant.getString(CONTIG_FIELD); + for (SubmittedVariantEntity submittedVariant : submittedVariants) { + long submittedVariantStart = submittedVariant.getStart(); + String submittedVariantContig = submittedVariant.getContig(); if (!isSameLocation(contig, start, submittedVariantContig, submittedVariantStart, type)) { continue; } - String reference = submittedVariant.getString(REFERENCE_ALLELE_FIELD); - String alternate = submittedVariant.getString(ALTERNATE_ALLELE_FIELD); - String study = submittedVariant.getString(STUDY_FIELD); - boolean submittedVariantValidated = submittedVariant.getBoolean(VALIDATED_FIELD, DEFAULT_VALIDATED); - boolean allelesMatch = submittedVariant.getBoolean(ALLELES_MATCH_FIELD, DEFAULT_ALLELES_MATCH); - boolean assemblyMatch = submittedVariant.getBoolean(ASSEMBLY_MATCH_FIELD, DEFAULT_ASSEMBLY_MATCH); - boolean evidence = submittedVariant.getBoolean(SUPPORTED_BY_EVIDENCE_FIELD, DEFAULT_SUPPORTED_BY_EVIDENCE); + String reference = submittedVariant.getReferenceAllele(); + String alternate = submittedVariant.getAlternateAllele(); + String study = submittedVariant.getProjectAccession(); + boolean submittedVariantValidated = submittedVariant.getModel().isValidated(); + boolean allelesMatch = submittedVariant.getModel().isAllelesMatch(); + boolean assemblyMatch = submittedVariant.getModel().isAssemblyMatch(); + boolean evidence = submittedVariant.getModel().isSupportedByEvidence(); VariantSourceEntry sourceEntry = buildVariantSourceEntry(study, sequenceOntology, validated, submittedVariantValidated, allelesMatch, @@ -163,9 +129,72 @@ protected List getVariants(Document clusteredVariant) { } return new ArrayList<>(variants.values()); } + public Map> getAccessionKeyedCVERecords() throws Exception { + Map> rsHashKeyedCVE = new HashMap<>(); + for (int i = 0; i < this.getChunkSize(); i++) { + ClusteredVariantEntity cve = this.cursorItemReader.read(); + if (Objects.isNull(cve)) return rsHashKeyedCVE; + Long rsAccession = cve.getAccession(); + if (!rsHashKeyedCVE.containsKey(rsAccession)) { + rsHashKeyedCVE.put(rsAccession, new ArrayList<>()); + } + rsHashKeyedCVE.get(rsAccession).add(cve); + } + return rsHashKeyedCVE; + } + public Map> getCorrespondingSS( + Map> accessionKeyedCVEs) { + List correspondingSS = new ArrayList<>(); + Map> correspondingRSMap = new HashMap<>(); + Set rsAccessionsToFind = accessionKeyedCVEs.keySet(); + + for (Class className: Arrays.asList(DbsnpSubmittedVariantEntity.class, + SubmittedVariantEntity.class)) { + correspondingSS.addAll( + this.mongoTemplate.find(query(where(REFERENCE_ASSEMBLY_FIELD_IN_SUBMITTED_COLLECTIONS) + .is(this.assemblyAccession) + .and(TAXONOMY_FIELD).is(this.taxonomyAccession) + .and(CLUSTERED_VARIANT_ACCESSION_FIELD).in(rsAccessionsToFind) + .and(MAPPING_WEIGHT_FIELD).exists(false)), + SubmittedVariantEntity.class, this.mongoTemplate.getCollectionName(className))); + } + for (SubmittedVariantEntity sve: correspondingSS) { + Long rsAccession = sve.getClusteredVariantAccession(); + if (accessionKeyedCVEs.containsKey(rsAccession)) { + for (ClusteredVariantEntity correspondingCVE: accessionKeyedCVEs.get(rsAccession)) { + if (!correspondingRSMap.containsKey(correspondingCVE)) { + correspondingRSMap.put(correspondingCVE, new ArrayList<>()); + } + correspondingRSMap.get(correspondingCVE).add(sve); + } + } + } + return correspondingRSMap; + } + + @Override + public List read() throws Exception { + List variantList = new ArrayList<>(); + Map> accessionKeyedCVEs = getAccessionKeyedCVERecords(); + if (accessionKeyedCVEs.size() > 0) { + Map> results = getCorrespondingSS(accessionKeyedCVEs); + for (Map.Entry> entry : results.entrySet()) { + variantList.addAll(getVariants(entry.getKey(), entry.getValue())); + } + } + return variantList.size() > 0? variantList: null; + } + + // The following two overrides are necessary evils to minimize code changes + // since we still haven't gotten rid of the dependency on VariantAggregationMongoReader @Override - public List read() throws Exception, UnexpectedInputException, ParseException, NonTransientResourceException { + protected List buildAggregation() { + return null; + } + + @Override + protected List getVariants(Document clusteredVariant) { return null; } } diff --git a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/contig/ContigMongoReader.java b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/contig/ContigMongoReader.java index 51dc2ccab..0ff37882a 100644 --- a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/contig/ContigMongoReader.java +++ b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/contig/ContigMongoReader.java @@ -40,9 +40,6 @@ import java.util.Arrays; import java.util.List; -import static uk.ac.ebi.eva.accession.release.batch.io.multimap.MultimapVariantMongoReader.NON_SINGLE_LOCATION_MAPPING; -import static uk.ac.ebi.eva.accession.release.batch.io.multimap.MultimapVariantMongoReader.MAPPING_WEIGHT_FIELD; - public class ContigMongoReader implements ItemStreamReader { private static final Logger logger = LoggerFactory.getLogger(ContigMongoReader.class); @@ -89,13 +86,6 @@ public static ContigMongoReader mergedContigReader(String assemblyAccession, Mon buildAggregationForMergedContigs(assemblyAccession)); } - public static ContigMongoReader multimapContigReader(String assemblyAccession, MongoClient mongoClient, - String database, CollectionNames names) { - return new ContigMongoReader(assemblyAccession, mongoClient, database, - names.getClusteredVariantEntity(), - buildAggregationForMultimapContigs(assemblyAccession)); - } - private ContigMongoReader(String assemblyAccession, MongoClient mongoClient, String database, String collection, List aggregation) { this.assemblyAccession = assemblyAccession; @@ -143,15 +133,6 @@ private static List buildAggregationForMergedContigs(String assemblyAccess return aggregation; } - private static List buildAggregationForMultimapContigs(String assemblyAccession) { - Bson match = Aggregates.match(Filters.and(Filters.eq(ACTIVE_REFERENCE_ASSEMBLY_FIELD, assemblyAccession), - Filters.gte(MAPPING_WEIGHT_FIELD, NON_SINGLE_LOCATION_MAPPING))); - Bson uniqueContigs = Aggregates.group(ACTIVE_CONTIG_KEY); - List aggregation = Arrays.asList(match, uniqueContigs); - logger.info("Issuing aggregation: {}", aggregation); - return aggregation; - } - @Override public void open(ExecutionContext executionContext) throws ItemStreamException { aggregate(); diff --git a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/contig/ContigWriter.java b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/contig/ContigWriter.java index 853cee24d..b79575cf2 100644 --- a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/contig/ContigWriter.java +++ b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/contig/ContigWriter.java @@ -41,8 +41,6 @@ public class ContigWriter implements ItemStreamWriter { private static final String MERGED_FILE_PREFIX = "merged_contigs_"; - private static final String MULTIMAP_FILE_PREFIX = "multimap_contigs_"; - private static final String EVA_PREFIX = "/eva_"; private static final String DBSNP_PREFIX = "/dbsnp_"; @@ -103,10 +101,6 @@ public static String getDbsnpMergedContigsFilePath(String outputFolder, String r return Paths.get(outputFolder) + (DBSNP_PREFIX + MERGED_FILE_PREFIX + referenceAssembly + FILE_EXTENSION); } - public static String getDbsnpMultimapContigsFilePath(String outputFolder, String referenceAssembly) { - return Paths.get(outputFolder) + (DBSNP_PREFIX + MULTIMAP_FILE_PREFIX + referenceAssembly + FILE_EXTENSION); - } - public static String getEvaActiveContigsFilePath(String outputFolder, String referenceAssembly) { return Paths.get(outputFolder) + (EVA_PREFIX + ACTIVE_FILE_PREFIX + referenceAssembly + FILE_EXTENSION); } @@ -114,8 +108,4 @@ public static String getEvaActiveContigsFilePath(String outputFolder, String ref public static String getEvaMergedContigsFilePath(String outputFolder, String referenceAssembly) { return Paths.get(outputFolder) + (EVA_PREFIX + MERGED_FILE_PREFIX + referenceAssembly + FILE_EXTENSION); } - - public static String getEvaMultimapContigsFilePath(String outputFolder, String referenceAssembly) { - return Paths.get(outputFolder) + (EVA_PREFIX + MULTIMAP_FILE_PREFIX + referenceAssembly + FILE_EXTENSION); - } } diff --git a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/multimap/MultimapVariantContextWriter.java b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/multimap/MultimapVariantContextWriter.java deleted file mode 100644 index 9693ab369..000000000 --- a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/multimap/MultimapVariantContextWriter.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright 2020 EMBL - European Bioinformatics Institute - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package uk.ac.ebi.eva.accession.release.batch.io.multimap; - -import htsjdk.variant.vcf.VCFHeaderLine; -import htsjdk.variant.vcf.VCFHeaderLineType; -import htsjdk.variant.vcf.VCFInfoHeaderLine; - -import uk.ac.ebi.eva.accession.release.batch.io.active.VariantContextWriter; - -import java.nio.file.Path; -import java.util.Set; - -import static uk.ac.ebi.eva.accession.release.batch.io.VariantMongoAggregationReader.MAPPING_WEIGHT_KEY; - -public class MultimapVariantContextWriter extends VariantContextWriter { - - public static final String DBSNP_MAP_WEIGHT_DEFINITION_URL = - "https://www.ncbi.nlm.nih.gov/books/NBK44455/#Build.your_descriptions_of_mapweight_in"; - - public MultimapVariantContextWriter(Path outputPath, String referenceAssembly, String multimapContigsFilePath) { - super(outputPath, referenceAssembly, multimapContigsFilePath); - } - - @Override - protected Set buildHeaderLines() { - Set vcfHeaderLines = super.buildHeaderLines(); - vcfHeaderLines.add(new VCFInfoHeaderLine(MAPPING_WEIGHT_KEY, 1, VCFHeaderLineType.Integer, - "mapping weight as defined by dbSNP for database tables at " + - DBSNP_MAP_WEIGHT_DEFINITION_URL)); - return vcfHeaderLines; - } - -} diff --git a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/multimap/MultimapVariantMongoReader.java b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/multimap/MultimapVariantMongoReader.java deleted file mode 100644 index a7f7446f1..000000000 --- a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/multimap/MultimapVariantMongoReader.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright 2019 EMBL - European Bioinformatics Institute - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package uk.ac.ebi.eva.accession.release.batch.io.multimap; - -import com.mongodb.MongoClient; -import com.mongodb.client.model.Aggregates; -import com.mongodb.client.model.Filters; -import org.bson.Document; -import org.bson.conversions.Bson; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import uk.ac.ebi.eva.accession.release.batch.io.active.AccessionedVariantMongoReader; -import uk.ac.ebi.eva.accession.release.collectionNames.CollectionNames; -import uk.ac.ebi.eva.commons.core.models.pipeline.Variant; - -import java.util.Arrays; -import java.util.List; - -import static com.mongodb.client.model.Sorts.ascending; -import static com.mongodb.client.model.Sorts.orderBy; - -public class MultimapVariantMongoReader extends AccessionedVariantMongoReader { - - private static final Logger logger = LoggerFactory.getLogger(MultimapVariantMongoReader.class); - - // see https://www.ncbi.nlm.nih.gov/books/NBK44455/#Build.your_descriptions_of_mapweight_in - public static final int NON_SINGLE_LOCATION_MAPPING = 2; - - public MultimapVariantMongoReader(String assemblyAccession, int taxonomyAccession, MongoClient mongoClient, - String database, int chunkSize, CollectionNames names) { - super(assemblyAccession, taxonomyAccession, mongoClient, database, chunkSize, names); - } - - @Override - protected List buildAggregation() { - Bson match = Aggregates.match(Filters.and(Filters.eq(REFERENCE_ASSEMBLY_FIELD, assemblyAccession), - Filters.eq(TAXONOMY_FIELD, taxonomyAccession), - Filters.gte(MAPPING_WEIGHT_FIELD, NON_SINGLE_LOCATION_MAPPING))); - Bson sort = Aggregates.sort(orderBy(ascending(CONTIG_FIELD, START_FIELD))); - Bson lookup = Aggregates.lookup(names.getSubmittedVariantEntity(), ACCESSION_FIELD, - CLUSTERED_VARIANT_ACCESSION_FIELD, SS_INFO_FIELD); - List aggregation = Arrays.asList(match, sort, lookup); - logger.info("Issuing aggregation: {}", aggregation); - return aggregation; - } - - @Override - protected List getVariants(Document clusteredVariant) { - List variants = super.getVariants(clusteredVariant); - for (Variant variant : variants) { - variant.getSourceEntries().iterator().next().addAttribute(MAPPING_WEIGHT_KEY, - clusteredVariant.get(MAPPING_WEIGHT_FIELD).toString()); - } - return variants; - } -} diff --git a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/BeanNames.java b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/BeanNames.java index 112cbec18..e063371ef 100644 --- a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/BeanNames.java +++ b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/BeanNames.java @@ -42,8 +42,6 @@ public class BeanNames { public static final String RELEASE_DBSNP_MAPPED_MERGED_DEPRECATED_VARIANTS_STEP = "RELEASE_DBSNP_MAPPED_MERGED_DEPRECATED_VARIANTS_STEP"; - public static final String RELEASE_DBSNP_MULTIMAP_VARIANTS_STEP = "RELEASE_DBSNP_MULTIMAP_VARIANTS_STEP"; - public static final String RELEASE_DBSNP_MAPPED_DEPRECATED_VARIANTS_STEP = "RELEASE_DBSNP_MAPPED_DEPRECATED_VARIANTS_STEP"; @@ -55,14 +53,10 @@ public class BeanNames { public static final String DBSNP_MERGED_DEPRECATED_VARIANT_READER = "DBSNP_MERGED_DEPRECATED_VARIANT_READER"; - public static final String DBSNP_MULTIMAP_VARIANT_READER = "DBSNP_MULTIMAP_VARIANT_READER"; - public static final String DBSNP_RELEASE_WRITER = "DBSNP_RELEASE_WRITER"; public static final String DBSNP_MERGED_RELEASE_WRITER = "DBSNP_MERGED_RELEASE_WRITER"; - public static final String DBSNP_MULTIMAP_RELEASE_WRITER = "DBSNP_MULTIMAP_RELEASE_WRITER"; - public static final String DBSNP_MERGED_DEPRECATED_RELEASE_WRITER = "DBSNP_MERGED_DEPRECATED_RELEASE_WRITER"; public static final String DBSNP_DEPRECATED_RELEASE_WRITER = "DBSNP_DEPRECATED_RELEASE_WRITER"; @@ -71,20 +65,14 @@ public class BeanNames { public static final String LIST_DBSNP_MERGED_CONTIGS_STEP = "LIST_DBSNP_MERGED_CONTIGS_STEP"; - public static final String LIST_DBSNP_MULTIMAP_CONTIGS_STEP = "LIST_DBSNP_MULTIMAP_CONTIGS_STEP"; - public static final String DBSNP_ACTIVE_CONTIG_READER = "DBSNP_ACTIVE_CONTIG_READER"; public static final String DBSNP_MERGED_CONTIG_READER = "DBSNP_MERGED_CONTIG_READER"; - public static final String DBSNP_MULTIMAP_CONTIG_READER = "DBSNP_MULTIMAP_CONTIG_READER"; - public static final String DBSNP_ACTIVE_CONTIG_WRITER = "DBSNP_ACTIVE_CONTIG_WRITER"; public static final String DBSNP_MERGED_CONTIG_WRITER = "DBSNP_MERGED_CONTIG_WRITER"; - public static final String DBSNP_MULTIMAP_CONTIG_WRITER = "DBSNP_MULTIMAP_CONTIG_WRITER"; - // eva beans public static final String EVA_FLOW = "EVA_FLOW"; @@ -95,8 +83,6 @@ public class BeanNames { public static final String RELEASE_EVA_MAPPED_MERGED_DEPRECATED_VARIANTS_STEP = "RELEASE_EVA_MAPPED_MERGED_DEPRECATED_VARIANTS_STEP"; - public static final String RELEASE_EVA_MULTIMAP_VARIANTS_STEP = "RELEASE_EVA_MULTIMAP_VARIANTS_STEP"; - public static final String RELEASE_EVA_MAPPED_DEPRECATED_VARIANTS_STEP = "RELEASE_EVA_MAPPED_DEPRECATED_VARIANTS_STEP"; @@ -108,14 +94,10 @@ public class BeanNames { public static final String EVA_MERGED_DEPRECATED_VARIANT_READER = "EVA_MERGED_DEPRECATED_VARIANT_READER"; - public static final String EVA_MULTIMAP_VARIANT_READER = "EVA_MULTIMAP_VARIANT_READER"; - public static final String EVA_RELEASE_WRITER = "EVA_RELEASE_WRITER"; public static final String EVA_MERGED_RELEASE_WRITER = "EVA_MERGED_RELEASE_WRITER"; - public static final String EVA_MULTIMAP_RELEASE_WRITER = "EVA_MULTIMAP_RELEASE_WRITER"; - public static final String EVA_MERGED_DEPRECATED_RELEASE_WRITER = "EVA_MERGED_DEPRECATED_RELEASE_WRITER"; public static final String EVA_DEPRECATED_RELEASE_WRITER = "EVA_DEPRECATED_RELEASE_WRITER"; @@ -124,17 +106,11 @@ public class BeanNames { public static final String LIST_EVA_MERGED_CONTIGS_STEP = "LIST_EVA_MERGED_CONTIGS_STEP"; - public static final String LIST_EVA_MULTIMAP_CONTIGS_STEP = "LIST_EVA_MULTIMAP_CONTIGS_STEP"; - public static final String EVA_ACTIVE_CONTIG_READER = "EVA_ACTIVE_CONTIG_READER"; public static final String EVA_MERGED_CONTIG_READER = "EVA_MERGED_CONTIG_READER"; - public static final String EVA_MULTIMAP_CONTIG_READER = "EVA_MULTIMAP_CONTIG_READER"; - public static final String EVA_ACTIVE_CONTIG_WRITER = "EVA_ACTIVE_CONTIG_WRITER"; public static final String EVA_MERGED_CONTIG_WRITER = "EVA_MERGED_CONTIG_WRITER"; - - public static final String EVA_MULTIMAP_CONTIG_WRITER = "EVA_MULTIMAP_CONTIG_WRITER"; } diff --git a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/io/AccessionedVariantMongoReaderConfiguration.java b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/io/AccessionedVariantMongoReaderConfiguration.java index 91f805db0..d462ebdb6 100644 --- a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/io/AccessionedVariantMongoReaderConfiguration.java +++ b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/io/AccessionedVariantMongoReaderConfiguration.java @@ -25,6 +25,7 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; +import org.springframework.data.mongodb.core.MongoTemplate; import uk.ac.ebi.eva.accession.core.configuration.nonhuman.MongoConfiguration; import uk.ac.ebi.eva.accession.release.batch.io.active.AccessionedVariantMongoReader; @@ -46,22 +47,24 @@ public class AccessionedVariantMongoReaderConfiguration { @Bean(DBSNP_ACCESSIONED_VARIANT_READER) @StepScope public ItemStreamReader unwindingReaderDbsnp(InputParameters parameters, MongoClient mongoClient, + MongoTemplate mongoTemplate, MongoProperties mongoProperties) { logger.info("Injecting Dbsnp AccessionedVariantMongoReader with parameters: {}", parameters.toJobParameters()); return new UnwindingItemStreamReader<>( new AccessionedVariantMongoReader(parameters.getAssemblyAccession(), parameters.getTaxonomyAccession(), - mongoClient, mongoProperties.getDatabase(), parameters.getChunkSize(), + mongoClient, mongoTemplate, mongoProperties.getDatabase(), parameters.getChunkSize(), new DbsnpCollectionNames())); } @Bean(EVA_ACCESSIONED_VARIANT_READER) @StepScope public ItemStreamReader unwindingReaderEva(InputParameters parameters, MongoClient mongoClient, + MongoTemplate mongoTemplate, MongoProperties mongoProperties) { logger.info("Injecting Eva AccessionedVariantMongoReader with parameters: {}", parameters.toJobParameters()); return new UnwindingItemStreamReader<>( new AccessionedVariantMongoReader(parameters.getAssemblyAccession(), parameters.getTaxonomyAccession(), - mongoClient, mongoProperties.getDatabase(), parameters.getChunkSize(), + mongoClient, mongoTemplate, mongoProperties.getDatabase(), parameters.getChunkSize(), new EvaCollectionNames())); } } diff --git a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/io/ContigReaderConfiguration.java b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/io/ContigReaderConfiguration.java index 3727f86b7..54fc4509e 100644 --- a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/io/ContigReaderConfiguration.java +++ b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/io/ContigReaderConfiguration.java @@ -33,10 +33,8 @@ import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.DBSNP_ACTIVE_CONTIG_READER; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.DBSNP_MERGED_CONTIG_READER; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.DBSNP_MULTIMAP_CONTIG_READER; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.EVA_ACTIVE_CONTIG_READER; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.EVA_MERGED_CONTIG_READER; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.EVA_MULTIMAP_CONTIG_READER; @Configuration @EnableConfigurationProperties({DbsnpDataSource.class}) @@ -64,16 +62,6 @@ ItemStreamReader mergedContigReaderDbsnp(InputParameters parameters, Mon mongoProperties.getDatabase(), new DbsnpCollectionNames()); } - @Bean(DBSNP_MULTIMAP_CONTIG_READER) - @StepScope - ItemStreamReader multimapContigReaderDbsnp(InputParameters parameters, MongoClient mongoClient, - MongoProperties mongoProperties) throws Exception { - logger.info("Injecting {} with parameters: {}, {}", Thread.currentThread().getStackTrace()[1].getMethodName(), - parameters.getAssemblyAccession(), mongoProperties.getDatabase()); - return ContigMongoReader.multimapContigReader(parameters.getAssemblyAccession(), mongoClient, - mongoProperties.getDatabase(), new DbsnpCollectionNames()); - } - @Bean(EVA_ACTIVE_CONTIG_READER) @StepScope ItemStreamReader activeContigReaderEva(InputParameters parameters, MongoClient mongoClient, @@ -93,14 +81,4 @@ ItemStreamReader mergedContigReaderEva(InputParameters parameters, Mongo return ContigMongoReader.mergedContigReader(parameters.getAssemblyAccession(), mongoClient, mongoProperties.getDatabase(), new EvaCollectionNames()); } - - @Bean(EVA_MULTIMAP_CONTIG_READER) - @StepScope - ItemStreamReader multimapContigReaderEva(InputParameters parameters, MongoClient mongoClient, - MongoProperties mongoProperties) throws Exception { - logger.info("Injecting {} with parameters: {}, {}", Thread.currentThread().getStackTrace()[1].getMethodName(), - parameters.getAssemblyAccession(), mongoProperties.getDatabase()); - return ContigMongoReader.multimapContigReader(parameters.getAssemblyAccession(), mongoClient, - mongoProperties.getDatabase(), new EvaCollectionNames()); - } } diff --git a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/io/ContigWriterConfiguration.java b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/io/ContigWriterConfiguration.java index aa2bb2315..03dd36994 100644 --- a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/io/ContigWriterConfiguration.java +++ b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/io/ContigWriterConfiguration.java @@ -27,16 +27,12 @@ import static uk.ac.ebi.eva.accession.release.batch.io.contig.ContigWriter.getDbsnpActiveContigsFilePath; import static uk.ac.ebi.eva.accession.release.batch.io.contig.ContigWriter.getDbsnpMergedContigsFilePath; -import static uk.ac.ebi.eva.accession.release.batch.io.contig.ContigWriter.getDbsnpMultimapContigsFilePath; import static uk.ac.ebi.eva.accession.release.batch.io.contig.ContigWriter.getEvaActiveContigsFilePath; import static uk.ac.ebi.eva.accession.release.batch.io.contig.ContigWriter.getEvaMergedContigsFilePath; -import static uk.ac.ebi.eva.accession.release.batch.io.contig.ContigWriter.getEvaMultimapContigsFilePath; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.DBSNP_ACTIVE_CONTIG_WRITER; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.DBSNP_MERGED_CONTIG_WRITER; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.DBSNP_MULTIMAP_CONTIG_WRITER; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.EVA_ACTIVE_CONTIG_WRITER; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.EVA_MERGED_CONTIG_WRITER; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.EVA_MULTIMAP_CONTIG_WRITER; @Configuration public class ContigWriterConfiguration { @@ -58,13 +54,6 @@ public ContigWriter mergedContigWriterDbsnp(InputParameters inputParameters) { contigMapping); } - @Bean(DBSNP_MULTIMAP_CONTIG_WRITER) - public ContigWriter multimapContigWriterDbsnp(InputParameters inputParameters) { - return new ContigWriter(new File(getDbsnpMultimapContigsFilePath(inputParameters.getOutputFolder(), - inputParameters.getAssemblyAccession())), - contigMapping); - } - @Bean(EVA_ACTIVE_CONTIG_WRITER) public ContigWriter activeContigWriterEva(InputParameters inputParameters) { return new ContigWriter(new File(getEvaActiveContigsFilePath(inputParameters.getOutputFolder(), @@ -78,11 +67,4 @@ public ContigWriter mergedContigWriterEva(InputParameters inputParameters) { inputParameters.getAssemblyAccession())), contigMapping); } - - @Bean(EVA_MULTIMAP_CONTIG_WRITER) - public ContigWriter multimapContigWriterEva(InputParameters inputParameters) { - return new ContigWriter(new File(getEvaMultimapContigsFilePath(inputParameters.getOutputFolder(), - inputParameters.getAssemblyAccession())), - contigMapping); - } } diff --git a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/io/MultimapVariantMongoReaderConfiguration.java b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/io/MultimapVariantMongoReaderConfiguration.java deleted file mode 100644 index b5540ff11..000000000 --- a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/io/MultimapVariantMongoReaderConfiguration.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright 2020 EMBL - European Bioinformatics Institute - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package uk.ac.ebi.eva.accession.release.configuration.batch.io; - -import com.mongodb.MongoClient; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.batch.core.configuration.annotation.StepScope; -import org.springframework.batch.item.ItemStreamReader; -import org.springframework.boot.autoconfigure.mongo.MongoProperties; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Configuration; -import org.springframework.context.annotation.Import; - -import uk.ac.ebi.eva.accession.core.configuration.nonhuman.MongoConfiguration; -import uk.ac.ebi.eva.accession.release.batch.io.multimap.MultimapVariantMongoReader; -import uk.ac.ebi.eva.accession.release.collectionNames.DbsnpCollectionNames; -import uk.ac.ebi.eva.accession.release.collectionNames.EvaCollectionNames; -import uk.ac.ebi.eva.accession.release.parameters.InputParameters; -import uk.ac.ebi.eva.commons.batch.io.UnwindingItemStreamReader; -import uk.ac.ebi.eva.commons.core.models.pipeline.Variant; - -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.DBSNP_MULTIMAP_VARIANT_READER; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.EVA_MULTIMAP_VARIANT_READER; - -@Configuration -@Import({MongoConfiguration.class}) -public class MultimapVariantMongoReaderConfiguration { - - private static final Logger logger = LoggerFactory.getLogger(AccessionedVariantMongoReaderConfiguration.class); - - @Bean(DBSNP_MULTIMAP_VARIANT_READER) - @StepScope - public ItemStreamReader unwindingReaderDbsnp(InputParameters parameters, MongoClient mongoClient, - MongoProperties mongoProperties) { - logger.info("Injecting Dbsnp MultimapVariantMongoReader with parameters: {}", parameters); - return new UnwindingItemStreamReader<>( - new MultimapVariantMongoReader(parameters.getAssemblyAccession(), - parameters.getTaxonomyAccession(), mongoClient, - mongoProperties.getDatabase(), parameters.getChunkSize(), - new DbsnpCollectionNames())); - } - - @Bean(EVA_MULTIMAP_VARIANT_READER) - @StepScope - public ItemStreamReader unwindingReaderEva(InputParameters parameters, MongoClient mongoClient, - MongoProperties mongoProperties) { - logger.info("Injecting Eva MultimapVariantMongoReader with parameters: {}", parameters); - return new UnwindingItemStreamReader<>( - new MultimapVariantMongoReader(parameters.getAssemblyAccession(), - parameters.getTaxonomyAccession(), mongoClient, - mongoProperties.getDatabase(), parameters.getChunkSize(), - new EvaCollectionNames())); - } -} diff --git a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/io/VariantContextWriterConfiguration.java b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/io/VariantContextWriterConfiguration.java index 6437e0119..56f11a819 100644 --- a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/io/VariantContextWriterConfiguration.java +++ b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/io/VariantContextWriterConfiguration.java @@ -27,7 +27,6 @@ import uk.ac.ebi.eva.accession.release.batch.io.ReleaseRecordWriter; import uk.ac.ebi.eva.accession.release.batch.io.contig.ContigWriter; import uk.ac.ebi.eva.accession.release.batch.io.merged.MergedVariantContextWriter; -import uk.ac.ebi.eva.accession.release.batch.io.multimap.MultimapVariantContextWriter; import uk.ac.ebi.eva.accession.release.batch.io.active.VariantContextWriter; import uk.ac.ebi.eva.accession.release.batch.processors.ContextNucleotideAdditionProcessor; import uk.ac.ebi.eva.accession.release.parameters.InputParameters; @@ -36,10 +35,8 @@ import java.nio.file.Path; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.DBSNP_MERGED_RELEASE_WRITER; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.DBSNP_MULTIMAP_RELEASE_WRITER; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.DBSNP_RELEASE_WRITER; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.EVA_MERGED_RELEASE_WRITER; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.EVA_MULTIMAP_RELEASE_WRITER; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.EVA_RELEASE_WRITER; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.INCREMENTAL_RELEASE_WRITER; @@ -64,15 +61,6 @@ public MergedVariantContextWriter mergedVariantContextWriter(InputParameters par return new MergedVariantContextWriter(reportPath, parameters.getAssemblyAccession(), mergedContigsFilePath); } - @Bean(DBSNP_MULTIMAP_RELEASE_WRITER) - public MultimapVariantContextWriter multimapVariantContextWriter(InputParameters parameters) { - Path reportPath = ReportPathResolver.getDbsnpMultimapIdsReportPath(parameters.getOutputFolder(), - parameters.getAssemblyAccession()); - String activeContigsFilePath = ContigWriter.getDbsnpMultimapContigsFilePath(reportPath.toFile().getParent(), - parameters.getAssemblyAccession()); - return new MultimapVariantContextWriter(reportPath, parameters.getAssemblyAccession(), activeContigsFilePath); - } - @Bean(EVA_RELEASE_WRITER) public VariantContextWriter variantContextWriterEva(InputParameters parameters) { Path reportPath = ReportPathResolver.getEvaCurrentIdsReportPath(parameters.getOutputFolder(), @@ -91,15 +79,6 @@ public MergedVariantContextWriter mergedVariantContextWriterEva(InputParameters return new MergedVariantContextWriter(reportPath, parameters.getAssemblyAccession(), mergedContigsFilePath); } - @Bean(EVA_MULTIMAP_RELEASE_WRITER) - public MultimapVariantContextWriter multimapVariantContextWriterEva(InputParameters parameters) { - Path reportPath = ReportPathResolver.getEvaMultimapIdsReportPath(parameters.getOutputFolder(), - parameters.getAssemblyAccession()); - String activeContigsFilePath = ContigWriter.getEvaMultimapContigsFilePath(reportPath.toFile().getParent(), - parameters.getAssemblyAccession()); - return new MultimapVariantContextWriter(reportPath, parameters.getAssemblyAccession(), activeContigsFilePath); - } - @Bean(INCREMENTAL_RELEASE_WRITER) public ReleaseRecordWriter incrementalReleaseWriter(MongoOperations mongoOperations, SubmittedVariantAccessioningRepository submittedVariantAccessioningRepository, diff --git a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/jobs/AccessionReleaseJobConfiguration.java b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/jobs/AccessionReleaseJobConfiguration.java index c795a8cb6..55cddd23d 100644 --- a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/jobs/AccessionReleaseJobConfiguration.java +++ b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/jobs/AccessionReleaseJobConfiguration.java @@ -34,7 +34,6 @@ import uk.ac.ebi.eva.accession.release.configuration.batch.steps.CreateDeprecatedReleaseStepConfiguration; import uk.ac.ebi.eva.accession.release.configuration.batch.steps.CreateMergedDeprecatedReleaseStepConfiguration; import uk.ac.ebi.eva.accession.release.configuration.batch.steps.CreateMergedReleaseStepConfiguration; -import uk.ac.ebi.eva.accession.release.configuration.batch.steps.CreateMultimapReleaseStepConfiguration; import uk.ac.ebi.eva.accession.release.configuration.batch.steps.CreateReleaseStepConfiguration; import uk.ac.ebi.eva.accession.release.configuration.batch.steps.ListContigsStepConfiguration; @@ -43,20 +42,16 @@ import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.EVA_FLOW; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.LIST_DBSNP_ACTIVE_CONTIGS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.LIST_DBSNP_MERGED_CONTIGS_STEP; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.LIST_DBSNP_MULTIMAP_CONTIGS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.LIST_EVA_ACTIVE_CONTIGS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.LIST_EVA_MERGED_CONTIGS_STEP; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.LIST_EVA_MULTIMAP_CONTIGS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_DBSNP_MAPPED_ACTIVE_VARIANTS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_DBSNP_MAPPED_DEPRECATED_VARIANTS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_DBSNP_MAPPED_MERGED_DEPRECATED_VARIANTS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_DBSNP_MAPPED_MERGED_VARIANTS_STEP; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_DBSNP_MULTIMAP_VARIANTS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_EVA_MAPPED_ACTIVE_VARIANTS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_EVA_MAPPED_DEPRECATED_VARIANTS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_EVA_MAPPED_MERGED_DEPRECATED_VARIANTS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_EVA_MAPPED_MERGED_VARIANTS_STEP; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_EVA_MULTIMAP_VARIANTS_STEP; @Configuration @EnableBatchProcessing @@ -64,8 +59,7 @@ CreateReleaseStepConfiguration.class, CreateDeprecatedReleaseStepConfiguration.class, CreateMergedDeprecatedReleaseStepConfiguration.class, - CreateMergedReleaseStepConfiguration.class, - CreateMultimapReleaseStepConfiguration.class}) + CreateMergedReleaseStepConfiguration.class}) public class AccessionReleaseJobConfiguration { /** @@ -89,21 +83,17 @@ public Job accessionReleaseJob(JobBuilderFactory jobBuilderFactory, public Flow dbsnpFlow( @Qualifier(LIST_DBSNP_ACTIVE_CONTIGS_STEP) Step listActiveContigsStep, @Qualifier(LIST_DBSNP_MERGED_CONTIGS_STEP) Step listMergedContigsStep, - @Qualifier(LIST_DBSNP_MULTIMAP_CONTIGS_STEP) Step listMultimapContigsStep, @Qualifier(RELEASE_DBSNP_MAPPED_ACTIVE_VARIANTS_STEP) Step createReleaseStep, @Qualifier(RELEASE_DBSNP_MAPPED_MERGED_VARIANTS_STEP) Step createMergedReleaseStep, @Qualifier(RELEASE_DBSNP_MAPPED_DEPRECATED_VARIANTS_STEP) Step createDeprecatedReleaseStep, - @Qualifier(RELEASE_DBSNP_MAPPED_MERGED_DEPRECATED_VARIANTS_STEP) Step createMergedDeprecatedReleaseStep, - @Qualifier(RELEASE_DBSNP_MULTIMAP_VARIANTS_STEP) Step createMultimapReleaseStep) { + @Qualifier(RELEASE_DBSNP_MAPPED_MERGED_DEPRECATED_VARIANTS_STEP) Step createMergedDeprecatedReleaseStep) { return new FlowBuilder(DBSNP_FLOW) .start(listActiveContigsStep) .next(listMergedContigsStep) - .next(listMultimapContigsStep) .next(createReleaseStep) .next(createMergedReleaseStep) .next(createDeprecatedReleaseStep) .next(createMergedDeprecatedReleaseStep) - .next(createMultimapReleaseStep) .build(); } @@ -111,21 +101,17 @@ public Flow dbsnpFlow( public Flow evaFlow( @Qualifier(LIST_EVA_ACTIVE_CONTIGS_STEP) Step listActiveContigsStep, @Qualifier(LIST_EVA_MERGED_CONTIGS_STEP) Step listMergedContigsStep, - @Qualifier(LIST_EVA_MULTIMAP_CONTIGS_STEP) Step listMultimapContigsStep, @Qualifier(RELEASE_EVA_MAPPED_ACTIVE_VARIANTS_STEP) Step createReleaseStep, @Qualifier(RELEASE_EVA_MAPPED_MERGED_VARIANTS_STEP) Step createMergedReleaseStep, @Qualifier(RELEASE_EVA_MAPPED_DEPRECATED_VARIANTS_STEP) Step createDeprecatedReleaseStep, - @Qualifier(RELEASE_EVA_MAPPED_MERGED_DEPRECATED_VARIANTS_STEP) Step createMergedDeprecatedReleaseStep, - @Qualifier(RELEASE_EVA_MULTIMAP_VARIANTS_STEP) Step createMultimapReleaseStep) { + @Qualifier(RELEASE_EVA_MAPPED_MERGED_DEPRECATED_VARIANTS_STEP) Step createMergedDeprecatedReleaseStep) { return new FlowBuilder(EVA_FLOW) .start(listActiveContigsStep) .next(listMergedContigsStep) - .next(listMultimapContigsStep) .next(createReleaseStep) .next(createMergedReleaseStep) .next(createDeprecatedReleaseStep) .next(createMergedDeprecatedReleaseStep) - .next(createMultimapReleaseStep) .build(); } } diff --git a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/steps/CreateMultimapReleaseStepConfiguration.java b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/steps/CreateMultimapReleaseStepConfiguration.java deleted file mode 100644 index 060c814fa..000000000 --- a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/steps/CreateMultimapReleaseStepConfiguration.java +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright 2019 EMBL - European Bioinformatics Institute - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package uk.ac.ebi.eva.accession.release.configuration.batch.steps; - -import htsjdk.variant.variantcontext.VariantContext; -import org.springframework.batch.core.Step; -import org.springframework.batch.core.StepExecutionListener; -import org.springframework.batch.core.configuration.annotation.StepBuilderFactory; -import org.springframework.batch.core.step.tasklet.TaskletStep; -import org.springframework.batch.item.ItemProcessor; -import org.springframework.batch.item.ItemReader; -import org.springframework.batch.item.ItemStreamWriter; -import org.springframework.batch.repeat.policy.SimpleCompletionPolicy; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Qualifier; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Configuration; -import org.springframework.context.annotation.Import; - -import uk.ac.ebi.eva.accession.release.configuration.batch.io.MultimapVariantMongoReaderConfiguration; -import uk.ac.ebi.eva.accession.release.configuration.batch.io.VariantContextWriterConfiguration; -import uk.ac.ebi.eva.accession.release.configuration.batch.listeners.ListenersConfiguration; -import uk.ac.ebi.eva.accession.release.configuration.batch.processors.ReleaseProcessorConfiguration; -import uk.ac.ebi.eva.commons.core.models.pipeline.Variant; - -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.EVA_MULTIMAP_RELEASE_WRITER; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.EVA_MULTIMAP_VARIANT_READER; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.EXCLUDE_VARIANTS_LISTENER; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.DBSNP_MULTIMAP_RELEASE_WRITER; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.DBSNP_MULTIMAP_VARIANT_READER; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.PROGRESS_LISTENER; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_DBSNP_MULTIMAP_VARIANTS_STEP; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_EVA_MULTIMAP_VARIANTS_STEP; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_PROCESSOR; - -@Configuration -@Import({MultimapVariantMongoReaderConfiguration.class, - ReleaseProcessorConfiguration.class, - VariantContextWriterConfiguration.class, - ListenersConfiguration.class}) -public class CreateMultimapReleaseStepConfiguration { - - @Autowired - @Qualifier(PROGRESS_LISTENER) - private StepExecutionListener progressListener; - - @Autowired - @Qualifier(EXCLUDE_VARIANTS_LISTENER) - private StepExecutionListener excludeVariantsListener; - - @Bean(RELEASE_DBSNP_MULTIMAP_VARIANTS_STEP) - public Step createMultimapReleaseStepDbsnp( - StepBuilderFactory stepBuilderFactory, - SimpleCompletionPolicy chunkSizeCompletionPolicy, - @Qualifier(DBSNP_MULTIMAP_VARIANT_READER) ItemReader variantReader, - @Qualifier(RELEASE_PROCESSOR) ItemProcessor variantProcessor, - @Qualifier(DBSNP_MULTIMAP_RELEASE_WRITER) ItemStreamWriter accessionWriter) { - TaskletStep step = stepBuilderFactory.get(RELEASE_DBSNP_MULTIMAP_VARIANTS_STEP) - .chunk(chunkSizeCompletionPolicy) - .reader(variantReader) - .processor(variantProcessor) - .writer(accessionWriter) - .listener(excludeVariantsListener) - .listener(progressListener) - .build(); - return step; - } - - @Bean(RELEASE_EVA_MULTIMAP_VARIANTS_STEP) - public Step createMultimapReleaseStepEva( - StepBuilderFactory stepBuilderFactory, - SimpleCompletionPolicy chunkSizeCompletionPolicy, - @Qualifier(EVA_MULTIMAP_VARIANT_READER) ItemReader variantReader, - @Qualifier(RELEASE_PROCESSOR) ItemProcessor variantProcessor, - @Qualifier(EVA_MULTIMAP_RELEASE_WRITER) ItemStreamWriter accessionWriter) { - TaskletStep step = stepBuilderFactory.get(RELEASE_EVA_MULTIMAP_VARIANTS_STEP) - .chunk(chunkSizeCompletionPolicy) - .reader(variantReader) - .processor(variantProcessor) - .writer(accessionWriter) - .listener(excludeVariantsListener) - .listener(progressListener) - .build(); - return step; - } -} diff --git a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/steps/ListContigsStepConfiguration.java b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/steps/ListContigsStepConfiguration.java index 689289df7..29f15caa1 100644 --- a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/steps/ListContigsStepConfiguration.java +++ b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/configuration/batch/steps/ListContigsStepConfiguration.java @@ -37,18 +37,12 @@ import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.EVA_ACTIVE_CONTIG_WRITER; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.EVA_MERGED_CONTIG_READER; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.EVA_MERGED_CONTIG_WRITER; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.EVA_MULTIMAP_CONTIG_READER; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.EVA_MULTIMAP_CONTIG_WRITER; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.LIST_DBSNP_ACTIVE_CONTIGS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.LIST_DBSNP_MERGED_CONTIGS_STEP; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.LIST_DBSNP_MULTIMAP_CONTIGS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.DBSNP_MERGED_CONTIG_READER; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.DBSNP_MERGED_CONTIG_WRITER; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.DBSNP_MULTIMAP_CONTIG_READER; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.DBSNP_MULTIMAP_CONTIG_WRITER; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.LIST_EVA_ACTIVE_CONTIGS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.LIST_EVA_MERGED_CONTIGS_STEP; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.LIST_EVA_MULTIMAP_CONTIGS_STEP; /** * Creates a file with the contigs in INSDC (GenBank) when possible. The file will be used in @@ -86,19 +80,6 @@ public Step mergedContigsStepDbsnp( return step; } - @Bean(LIST_DBSNP_MULTIMAP_CONTIGS_STEP) - public Step multimapContigsStepDbsnp( - StepBuilderFactory stepBuilderFactory, SimpleCompletionPolicy chunkSizeCompletionPolicy, - @Qualifier(DBSNP_MULTIMAP_CONTIG_READER) ItemStreamReader multimapContigReader, - @Qualifier(DBSNP_MULTIMAP_CONTIG_WRITER) ItemStreamWriter multimapContigWriter) { - TaskletStep step = stepBuilderFactory.get(LIST_DBSNP_MULTIMAP_CONTIGS_STEP) - .chunk(chunkSizeCompletionPolicy) - .reader(multimapContigReader) - .writer(multimapContigWriter) - .build(); - return step; - } - @Bean(LIST_EVA_ACTIVE_CONTIGS_STEP) public Step activeContigsStepEva( StepBuilderFactory stepBuilderFactory, SimpleCompletionPolicy chunkSizeCompletionPolicy, @@ -124,17 +105,4 @@ public Step mergedContigsStepEva( .build(); return step; } - - @Bean(LIST_EVA_MULTIMAP_CONTIGS_STEP) - public Step multimapContigsStepEva( - StepBuilderFactory stepBuilderFactory, SimpleCompletionPolicy chunkSizeCompletionPolicy, - @Qualifier(EVA_MULTIMAP_CONTIG_READER) ItemStreamReader multimapContigReader, - @Qualifier(EVA_MULTIMAP_CONTIG_WRITER) ItemStreamWriter multimapContigWriter) { - TaskletStep step = stepBuilderFactory.get(LIST_EVA_MULTIMAP_CONTIGS_STEP) - .chunk(chunkSizeCompletionPolicy) - .reader(multimapContigReader) - .writer(multimapContigWriter) - .build(); - return step; - } } diff --git a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/parameters/ReportPathResolver.java b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/parameters/ReportPathResolver.java index a3825665d..ad34f9521 100644 --- a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/parameters/ReportPathResolver.java +++ b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/parameters/ReportPathResolver.java @@ -32,8 +32,6 @@ public class ReportPathResolver { public static final String MERGED_DEPRECATED_FILE_SUFFIX = "_merged_deprecated_ids.unsorted.txt"; - public static final String MULTIMAP_FILE_SUFFIX = "_multimap_ids.vcf"; - public static final String DBSNP_PREFIX = "dbsnp_"; public static final String EVA_PREFIX = "eva_"; @@ -55,10 +53,6 @@ public static Path getDbsnpMergedDeprecatedIdsReportPath(String outputFolder, St return Paths.get(outputFolder).resolve(DBSNP_PREFIX + referenceAssembly + MERGED_DEPRECATED_FILE_SUFFIX); } - public static Path getDbsnpMultimapIdsReportPath(String outputFolder, String referenceAssembly) { - return Paths.get(outputFolder).resolve(DBSNP_PREFIX + referenceAssembly + MULTIMAP_FILE_SUFFIX); - } - public static Path getEvaCurrentIdsReportPath(String outputFolder, String referenceAssembly) { return Paths.get(outputFolder).resolve(EVA_PREFIX + referenceAssembly + CURRENT_FILE_SUFFIX); } @@ -74,8 +68,4 @@ public static Path getEvaDeprecatedIdsReportPath(String outputFolder, String ref public static Path getEvaMergedDeprecatedIdsReportPath(String outputFolder, String referenceAssembly) { return Paths.get(outputFolder).resolve(EVA_PREFIX + referenceAssembly + MERGED_DEPRECATED_FILE_SUFFIX); } - - public static Path getEvaMultimapIdsReportPath(String outputFolder, String referenceAssembly) { - return Paths.get(outputFolder).resolve(EVA_PREFIX + referenceAssembly + MULTIMAP_FILE_SUFFIX); - } } diff --git a/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/batch/io/active/AccessionedVariantMongoReaderTest.java b/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/batch/io/active/AccessionedVariantMongoReaderTest.java index fce15442d..e0bfe2e1c 100644 --- a/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/batch/io/active/AccessionedVariantMongoReaderTest.java +++ b/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/batch/io/active/AccessionedVariantMongoReaderTest.java @@ -19,11 +19,7 @@ import com.lordofthejars.nosqlunit.mongodb.MongoDbConfigurationBuilder; import com.lordofthejars.nosqlunit.mongodb.MongoDbRule; import com.mongodb.MongoClient; -import com.mongodb.client.AggregateIterable; -import com.mongodb.client.MongoCollection; -import com.mongodb.client.MongoCursor; import com.mongodb.client.MongoDatabase; -import org.bson.Document; import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -31,12 +27,14 @@ import org.springframework.batch.item.ExecutionContext; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.ApplicationContext; +import org.springframework.data.mongodb.core.MongoTemplate; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; import uk.ac.ebi.eva.accession.core.configuration.nonhuman.MongoConfiguration; import uk.ac.ebi.eva.accession.release.collectionNames.DbsnpCollectionNames; +import uk.ac.ebi.eva.accession.release.collectionNames.EvaCollectionNames; import uk.ac.ebi.eva.accession.release.test.configuration.MongoTestConfiguration; import uk.ac.ebi.eva.accession.release.test.rule.FixSpringMongoDbRule; import uk.ac.ebi.eva.commons.core.models.pipeline.Variant; @@ -119,13 +117,16 @@ public class AccessionedVariantMongoReaderTest { private static final int CHUNK_SIZE = 5; - private AccessionedVariantMongoReader reader; + private AccessionedVariantMongoReader reader1, reader2; private ExecutionContext executionContext; @Autowired private MongoClient mongoClient; + @Autowired + private MongoTemplate mongoTemplate; + //Required by nosql-unit @Autowired private ApplicationContext applicationContext; @@ -137,29 +138,10 @@ public class AccessionedVariantMongoReaderTest { @Before public void setUp() throws Exception { executionContext = new ExecutionContext(); - reader = new AccessionedVariantMongoReader(ASSEMBLY_ACCESSION_1, TAXONOMY_1, mongoClient, TEST_DB, CHUNK_SIZE, - new DbsnpCollectionNames()); + reader1 = new AccessionedVariantMongoReader(ASSEMBLY_ACCESSION_1, TAXONOMY_1, mongoClient, mongoTemplate, + TEST_DB, CHUNK_SIZE, new DbsnpCollectionNames()); } - @Test - public void readTestDataMongo() { - MongoDatabase db = mongoClient.getDatabase(TEST_DB); - MongoCollection collection = db.getCollection(DBSNP_CLUSTERED_VARIANT_ENTITY); - - AggregateIterable result = collection.aggregate(reader.buildAggregation()) - .allowDiskUse(true) - .useCursor(true); - - MongoCursor cursor = result.iterator(); - - List variants = new ArrayList<>(); - while (cursor.hasNext()) { - Document clusteredVariant = cursor.next(); - variants.addAll(reader.getVariants(clusteredVariant)); - } - assertEquals(EXPECTED_LINES, variants.size()); - } - @Test public void reader() throws Exception { List variants = readIntoList(); @@ -167,13 +149,13 @@ public void reader() throws Exception { } private List readIntoList() throws Exception { - reader.open(executionContext); + reader1.open(executionContext); List allVariants = new ArrayList<>(); List variants; - while ((variants = reader.read()) != null) { + while ((variants = reader1.read()) != null) { allVariants.addAll(variants); } - reader.close(); + reader1.close(); return allVariants; } @@ -189,15 +171,15 @@ public void linkedSubmittedVariants() throws Exception { } private Map readIntoMap() throws Exception { - reader.open(executionContext); + reader1.open(executionContext); Map allVariants = new HashMap<>(); List variants; - while ((variants = reader.read()) != null) { + while ((variants = reader1.read()) != null) { for (Variant variant : variants) { allVariants.put(getStringId(variant), variant); } } - reader.close(); + reader1.close(); return allVariants; } @@ -208,8 +190,8 @@ private String getStringId(Variant variant) { @Test public void queryOtherAssembly() throws Exception { - reader = new AccessionedVariantMongoReader(ASSEMBLY_ACCESSION_2, TAXONOMY_2, mongoClient, TEST_DB, CHUNK_SIZE, - new DbsnpCollectionNames()); + reader1 = new AccessionedVariantMongoReader(ASSEMBLY_ACCESSION_2, TAXONOMY_2, mongoClient, mongoTemplate, + TEST_DB, CHUNK_SIZE, new DbsnpCollectionNames()); Map variants = readIntoMap(); assertEquals(3, variants.size()); @@ -243,8 +225,8 @@ public void snpVariantClassAttribute() throws Exception { @Test public void insertionVariantClassAttribute() throws Exception { - reader = new AccessionedVariantMongoReader(ASSEMBLY_ACCESSION_4, TAXONOMY_4, mongoClient, TEST_DB, CHUNK_SIZE, - new DbsnpCollectionNames()); + reader1 = new AccessionedVariantMongoReader(ASSEMBLY_ACCESSION_4, TAXONOMY_4, mongoClient, mongoTemplate, + TEST_DB, CHUNK_SIZE, new DbsnpCollectionNames()); List variants = readIntoList(); assertEquals(1, variants.size()); String insertionSequenceOntology = "SO:0000667"; @@ -256,8 +238,8 @@ public void insertionVariantClassAttribute() throws Exception { @Test public void otherVariantClasses() throws Exception { - reader = new AccessionedVariantMongoReader(ASSEMBLY_ACCESSION_5, TAXONOMY_5, mongoClient, TEST_DB, CHUNK_SIZE, - new DbsnpCollectionNames()); + reader1 = new AccessionedVariantMongoReader(ASSEMBLY_ACCESSION_5, TAXONOMY_5, mongoClient, mongoTemplate, + TEST_DB, CHUNK_SIZE, new DbsnpCollectionNames()); List variants = readIntoList(); assertEquals(4, variants.size()); String indelSequenceOntology = "SO:1000032"; @@ -288,8 +270,8 @@ public void studyIdAttribute() throws Exception { @Test public void clusteredVariantWithoutSubmittedVariants() throws Exception { - reader = new AccessionedVariantMongoReader(ASSEMBLY_ACCESSION_3, TAXONOMY_3, mongoClient, TEST_DB, CHUNK_SIZE, - new DbsnpCollectionNames()); + reader1 = new AccessionedVariantMongoReader(ASSEMBLY_ACCESSION_3, TAXONOMY_3, mongoClient, mongoTemplate, + TEST_DB, CHUNK_SIZE, new DbsnpCollectionNames()); List variants = readIntoList(); assertEquals(0, variants.size()); } @@ -349,8 +331,8 @@ public void includeEvidenceFlag() throws Exception { @Test public void includeValidatedNonDefaultFlag() throws Exception { - reader = new AccessionedVariantMongoReader(ASSEMBLY_ACCESSION_5, TAXONOMY_5, mongoClient, TEST_DB, CHUNK_SIZE, - new DbsnpCollectionNames()); + reader1 = new AccessionedVariantMongoReader(ASSEMBLY_ACCESSION_5, TAXONOMY_5, mongoClient, mongoTemplate, + TEST_DB, CHUNK_SIZE, new DbsnpCollectionNames()); assertFlagEqualsInAllVariants(SUBMITTED_VARIANT_VALIDATED_KEY, true); assertFlagEqualsInRS(CLUSTERED_VARIANT_VALIDATED_KEY, false, RS_4); assertFlagEqualsInRS(CLUSTERED_VARIANT_VALIDATED_KEY, true, RS_5); @@ -370,22 +352,22 @@ private void assertFlagEqualsInRS(String key, boolean value, String clusteredVar @Test public void includeAssemblyMatchNonDefaultFlag() throws Exception { - reader = new AccessionedVariantMongoReader(ASSEMBLY_ACCESSION_4, TAXONOMY_4, mongoClient, TEST_DB, CHUNK_SIZE, - new DbsnpCollectionNames()); + reader1 = new AccessionedVariantMongoReader(ASSEMBLY_ACCESSION_4, TAXONOMY_4, mongoClient, mongoTemplate, + TEST_DB, CHUNK_SIZE, new DbsnpCollectionNames()); assertFlagEqualsInAllVariants(ASSEMBLY_MATCH_KEY, false); } @Test public void includeAllelesMatchNonDefaultFlag() throws Exception { - reader = new AccessionedVariantMongoReader(ASSEMBLY_ACCESSION_4, TAXONOMY_4, mongoClient, TEST_DB, CHUNK_SIZE, - new DbsnpCollectionNames()); + reader1 = new AccessionedVariantMongoReader(ASSEMBLY_ACCESSION_4, TAXONOMY_4, mongoClient, mongoTemplate, + TEST_DB, CHUNK_SIZE, new DbsnpCollectionNames()); assertFlagEqualsInAllVariants(ALLELES_MATCH_KEY, false); } @Test public void includeEvidenceNonDefaultFlag() throws Exception { - reader = new AccessionedVariantMongoReader(ASSEMBLY_ACCESSION_5, TAXONOMY_5, mongoClient, TEST_DB, CHUNK_SIZE, - new DbsnpCollectionNames()); + reader1 = new AccessionedVariantMongoReader(ASSEMBLY_ACCESSION_5, TAXONOMY_5, mongoClient, mongoTemplate, + TEST_DB, CHUNK_SIZE, new DbsnpCollectionNames()); assertFlagEqualsInAllVariants(SUPPORTED_BY_EVIDENCE_KEY, false); } @@ -398,8 +380,8 @@ public void includeEvidenceNonDefaultFlag() throws Exception { */ @Test public void includeOnlyVariantsWithTheSameChromosomeAndStartInRsAndSs() throws Exception { - reader = new AccessionedVariantMongoReader("GCA_000002775.1", 3694, mongoClient, - TEST_DB, CHUNK_SIZE, new DbsnpCollectionNames()); + reader1 = new AccessionedVariantMongoReader("GCA_000002775.1", 3694, mongoClient, + mongoTemplate, TEST_DB, CHUNK_SIZE, new DbsnpCollectionNames()); List allVariants = readIntoList(); assertEquals(3, allVariants.size()); @@ -440,8 +422,8 @@ private boolean isVariantPresent(List variants, String chromosome, long */ @Test public void includeAmbiguousVariantsWithDifferentStartInSsAndRs() throws Exception { - reader = new AccessionedVariantMongoReader(ASSEMBLY_ACCESSION_4, TAXONOMY_4, mongoClient, TEST_DB, CHUNK_SIZE, - new DbsnpCollectionNames()); + reader1 = new AccessionedVariantMongoReader(ASSEMBLY_ACCESSION_4, TAXONOMY_4, mongoClient, mongoTemplate, + TEST_DB, CHUNK_SIZE, new DbsnpCollectionNames()); List allVariants = readIntoList(); assertEquals(1, allVariants.size()); diff --git a/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/batch/io/contig/ContigMongoReaderTest.java b/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/batch/io/contig/ContigMongoReaderTest.java index b4351357e..b64dd7931 100644 --- a/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/batch/io/contig/ContigMongoReaderTest.java +++ b/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/batch/io/contig/ContigMongoReaderTest.java @@ -91,17 +91,4 @@ public void basicMergedContigsRead() { } assertEquals(new HashSet<>(Arrays.asList("CM001954.1", "CM001941.2")), new HashSet<>(contigs)); } - - @Test - public void basicMultimapContigsRead() { - ContigMongoReader reader = ContigMongoReader.multimapContigReader(ASSEMBLY_ACCESSION, mongoClient, TEST_DB, - new DbsnpCollectionNames()); - reader.open(new ExecutionContext()); - String contig; - List contigs = new ArrayList<>(); - while ((contig = reader.read()) != null) { - contigs.add(contig); - } - assertEquals(new HashSet<>(Arrays.asList("CM001954.1")), new HashSet<>(contigs)); - } } diff --git a/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/batch/io/deprecated/DeprecatedVariantMongoReaderTest.java b/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/batch/io/deprecated/DeprecatedVariantMongoReaderTest.java index 2fb3ff4e8..23529e424 100644 --- a/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/batch/io/deprecated/DeprecatedVariantMongoReaderTest.java +++ b/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/batch/io/deprecated/DeprecatedVariantMongoReaderTest.java @@ -105,7 +105,7 @@ public void tearDown() { this.reader.close(); } - private List readIntoList() { + private List readIntoList() throws Exception { List variants = new ArrayList<>(); List variantsInBatch; @@ -116,7 +116,7 @@ private List readIntoList() { } @Test - public void testOnlySpecifiedTaxVariantsRead() { + public void testOnlySpecifiedTaxVariantsRead() throws Exception { // See scenario here: https://docs.google.com/spreadsheets/d/12QJT4N0-UJGTv3BtVq_gyyrVzweXd5ev2WFlnP-4MW4/edit#rangeid=1202280213 DbsnpSubmittedVariantEntity ss1 = createSS(ASSEMBLY, TAXONOMY_1, 1L, 1L, 100L, "C", "A"); DbsnpSubmittedVariantEntity ss2 = createSS(ASSEMBLY, TAXONOMY_2, 2L, 2L, 101L, "A", "T"); diff --git a/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/configuration/batch/jobs/AccessionReleaseJobConfigurationTest.java b/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/configuration/batch/jobs/AccessionReleaseJobConfigurationTest.java index 685e55e9d..51c9a63b5 100644 --- a/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/configuration/batch/jobs/AccessionReleaseJobConfigurationTest.java +++ b/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/configuration/batch/jobs/AccessionReleaseJobConfigurationTest.java @@ -48,20 +48,16 @@ import static org.junit.Assert.assertEquals; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.LIST_DBSNP_ACTIVE_CONTIGS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.LIST_DBSNP_MERGED_CONTIGS_STEP; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.LIST_DBSNP_MULTIMAP_CONTIGS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.LIST_EVA_ACTIVE_CONTIGS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.LIST_EVA_MERGED_CONTIGS_STEP; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.LIST_EVA_MULTIMAP_CONTIGS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_DBSNP_MAPPED_ACTIVE_VARIANTS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_DBSNP_MAPPED_DEPRECATED_VARIANTS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_DBSNP_MAPPED_MERGED_DEPRECATED_VARIANTS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_DBSNP_MAPPED_MERGED_VARIANTS_STEP; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_DBSNP_MULTIMAP_VARIANTS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_EVA_MAPPED_ACTIVE_VARIANTS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_EVA_MAPPED_DEPRECATED_VARIANTS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_EVA_MAPPED_MERGED_DEPRECATED_VARIANTS_STEP; import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_EVA_MAPPED_MERGED_VARIANTS_STEP; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_EVA_MULTIMAP_VARIANTS_STEP; @RunWith(SpringRunner.class) @ContextConfiguration(classes = {BatchTestConfiguration.class, MongoTestConfiguration.class}) @@ -96,10 +92,6 @@ public class AccessionReleaseJobConfigurationTest { private static final long EXPECTED_EVA_LINES_MERGED_DEPRECATED = 1; - private static final long EXPECTED_LINES_MULTIMAP = 2; - - private static final long EXPECTED_EVA_LINES_MULTIMAP = 1; - @Autowired private JobLauncherTestUtils jobLauncherTestUtils; @@ -124,19 +116,15 @@ public void basicJobCompletion() throws Exception { JobExecution jobExecution = jobLauncherTestUtils.launchJob(); List expectedSteps = Arrays.asList(LIST_DBSNP_ACTIVE_CONTIGS_STEP, LIST_DBSNP_MERGED_CONTIGS_STEP, - LIST_DBSNP_MULTIMAP_CONTIGS_STEP, RELEASE_DBSNP_MAPPED_ACTIVE_VARIANTS_STEP, RELEASE_DBSNP_MAPPED_MERGED_VARIANTS_STEP, RELEASE_DBSNP_MAPPED_DEPRECATED_VARIANTS_STEP, RELEASE_DBSNP_MAPPED_MERGED_DEPRECATED_VARIANTS_STEP, - RELEASE_DBSNP_MULTIMAP_VARIANTS_STEP, LIST_EVA_ACTIVE_CONTIGS_STEP, LIST_EVA_MERGED_CONTIGS_STEP, - LIST_EVA_MULTIMAP_CONTIGS_STEP, RELEASE_EVA_MAPPED_ACTIVE_VARIANTS_STEP, RELEASE_EVA_MAPPED_MERGED_VARIANTS_STEP, RELEASE_EVA_MAPPED_DEPRECATED_VARIANTS_STEP, - RELEASE_EVA_MAPPED_MERGED_DEPRECATED_VARIANTS_STEP, - RELEASE_EVA_MULTIMAP_VARIANTS_STEP); + RELEASE_EVA_MAPPED_MERGED_DEPRECATED_VARIANTS_STEP); assertStepsExecuted(expectedSteps, jobExecution); assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); @@ -154,8 +142,6 @@ public void variantsWritten() throws Exception { assertEquals(EXPECTED_LINES_DEPRECATED, numVariantsInDeprecatedRelease); long numVariantsInMergedDeprecatedRelease = FileUtils.countNonCommentLines(getMergedDeprecatedRelease()); assertEquals(EXPECTED_LINES_MERGED_DEPRECATED, numVariantsInMergedDeprecatedRelease); - long numVariantsInMultimapRelease = FileUtils.countNonCommentLines(getMultimapRelease()); - assertEquals(EXPECTED_LINES_MULTIMAP, numVariantsInMultimapRelease); long numVariantsInEvaRelease = FileUtils.countNonCommentLines(getEvaRelease()); assertEquals(EXPECTED_EVA_LINES, numVariantsInEvaRelease); @@ -165,8 +151,6 @@ public void variantsWritten() throws Exception { assertEquals(EXPECTED_EVA_LINES_DEPRECATED, numVariantsInEvaDeprecatedRelease); long numVariantsInEvaMergedDeprecatedRelease = FileUtils.countNonCommentLines(getEvaMergedDeprecatedRelease()); assertEquals(EXPECTED_EVA_LINES_MERGED_DEPRECATED, numVariantsInEvaMergedDeprecatedRelease); - long numVariantsInEvaMultimapRelease = FileUtils.countNonCommentLines(getEvaMultimapRelease()); - assertEquals(EXPECTED_EVA_LINES_MULTIMAP, numVariantsInEvaMultimapRelease); } private FileInputStream getRelease() throws FileNotFoundException { @@ -219,18 +203,6 @@ private FileInputStream getEvaMergedDeprecatedRelease() throws FileNotFoundExcep inputParameters.getAssemblyAccession()).toFile()); } - private FileInputStream getMultimapRelease() throws FileNotFoundException { - return new FileInputStream( - ReportPathResolver.getDbsnpMultimapIdsReportPath(inputParameters.getOutputFolder(), - inputParameters.getAssemblyAccession()).toFile()); - } - - private FileInputStream getEvaMultimapRelease() throws FileNotFoundException { - return new FileInputStream( - ReportPathResolver.getEvaMultimapIdsReportPath(inputParameters.getOutputFolder(), - inputParameters.getAssemblyAccession()).toFile()); - } - private void assertStepsExecuted(List expectedSteps, JobExecution jobExecution) { Collection stepExecutions = jobExecution.getStepExecutions(); List steps = stepExecutions.stream().map(StepExecution::getStepName).collect(Collectors.toList()); diff --git a/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/configuration/batch/steps/CreateMultimapReleaseStepConfigurationTest.java b/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/configuration/batch/steps/CreateMultimapReleaseStepConfigurationTest.java deleted file mode 100644 index 50d419589..000000000 --- a/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/configuration/batch/steps/CreateMultimapReleaseStepConfigurationTest.java +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Copyright 2018 EMBL - European Bioinformatics Institute - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package uk.ac.ebi.eva.accession.release.configuration.batch.steps; - -import com.lordofthejars.nosqlunit.annotation.UsingDataSet; -import com.lordofthejars.nosqlunit.mongodb.MongoDbConfigurationBuilder; -import com.lordofthejars.nosqlunit.mongodb.MongoDbRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.springframework.batch.core.BatchStatus; -import org.springframework.batch.core.JobExecution; -import org.springframework.batch.test.JobLauncherTestUtils; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.context.ApplicationContext; -import org.springframework.test.context.ContextConfiguration; -import org.springframework.test.context.TestPropertySource; -import org.springframework.test.context.junit4.SpringRunner; - -import uk.ac.ebi.eva.accession.release.parameters.InputParameters; -import uk.ac.ebi.eva.accession.release.parameters.ReportPathResolver; -import uk.ac.ebi.eva.accession.release.test.configuration.BatchTestConfiguration; -import uk.ac.ebi.eva.accession.release.test.configuration.MongoTestConfiguration; -import uk.ac.ebi.eva.accession.release.test.rule.FixSpringMongoDbRule; -import uk.ac.ebi.eva.commons.core.utils.FileUtils; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static uk.ac.ebi.eva.accession.release.batch.io.active.AccessionedVariantMongoReader.STUDY_ID_KEY; -import static uk.ac.ebi.eva.accession.release.batch.io.active.AccessionedVariantMongoReader.VARIANT_CLASS_KEY; -import static uk.ac.ebi.eva.accession.release.batch.io.VariantMongoAggregationReader.MAPPING_WEIGHT_KEY; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_DBSNP_MULTIMAP_VARIANTS_STEP; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.RELEASE_EVA_MULTIMAP_VARIANTS_STEP; - -@RunWith(SpringRunner.class) -@ContextConfiguration(classes = {BatchTestConfiguration.class, MongoTestConfiguration.class}) -@UsingDataSet(locations = { - "/test-data/dbsnpClusteredVariantEntity.json", - "/test-data/dbsnpSubmittedVariantEntity.json", - "/test-data/clusteredVariantEntity.json", - "/test-data/submittedVariantEntity.json"}) -@TestPropertySource("classpath:application.properties") -public class CreateMultimapReleaseStepConfigurationTest { - - private static final String TEST_DB = "test-db"; - - private static final long EXPECTED_LINES = 2; - - private static final long EXPECTED_EVA_LINES = 1; - - private static final Map assemblyAccessionToName = - Collections.singletonMap("GCA_000409795.2", "Chlorocebus_sabeus 1.1"); - - @Autowired - private JobLauncherTestUtils jobLauncherTestUtils; - - @Autowired - private InputParameters inputParameters; - - //Required by nosql-unit - @Autowired - private ApplicationContext applicationContext; - - @Rule - public MongoDbRule mongoDbRule = new FixSpringMongoDbRule( - MongoDbConfigurationBuilder.mongoDb().databaseName(TEST_DB).build()); - - @Test - public void contextLoads() { - - } - - @Test - public void basicStepCompletion() { - assertStepExecutesAndCompletes(); - } - - private void assertStepExecutesAndCompletes() { - JobExecution jobExecution = jobLauncherTestUtils.launchStep(RELEASE_DBSNP_MULTIMAP_VARIANTS_STEP); - assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); - } - - @Test - public void variantsWritten() throws Exception { - assertStepExecutesAndCompletes(); - long numVariantsInRelease = FileUtils.countNonCommentLines(new FileInputStream(getReleaseFile())); - assertEquals(EXPECTED_LINES, numVariantsInRelease); - } - - private File getReleaseFile() { - return ReportPathResolver.getDbsnpMultimapIdsReportPath(inputParameters.getOutputFolder(), - inputParameters.getAssemblyAccession()).toFile(); - } - - @Test - public void metadataIsPresent() throws Exception { - assertStepExecutesAndCompletes(); - - String assemblyName = assemblyAccessionToName.get(inputParameters.getAssemblyAccession()); - assertNotNull(assemblyName); - List referenceLines = grepFile(getReleaseFile(), "^##reference= metadataVariantClassLines = grepFile(getReleaseFile(), - "^##INFO= metadataStudyIdLines = grepFile(getReleaseFile(), - "^##INFO= metadataWeightLines = grepFile(getReleaseFile(), - "^##INFO= headerLines = grepFile(getReleaseFile(), - "^#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO$"); - assertEquals(1, headerLines.size()); - } - - private List grepFile(File file, String regex) throws IOException { - BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file))); - List lines = new ArrayList<>(); - String line; - while ((line = reader.readLine()) != null) { - if (line.matches(regex)) { - lines.add(line); - } - } - reader.close(); - return lines; - } - - @Test - public void rsAccessionsWritten() throws Exception { - assertStepExecutesAndCompletes(); - long numVariantsInRelease = FileUtils.countNonCommentLines(new FileInputStream(getReleaseFile())); - assertEquals(EXPECTED_LINES, numVariantsInRelease); - List dataLinesWithRs = grepFile(getReleaseFile(), "^.*\trs[0-9]+\t.*$"); - assertEquals(EXPECTED_LINES, dataLinesWithRs.size()); - } - - @Test - public void infoWritten() throws Exception { - assertStepExecutesAndCompletes(); - File outputFile = getReleaseFile(); - long numVariantsInRelease = FileUtils.countNonCommentLines(new FileInputStream(outputFile)); - assertEquals(EXPECTED_LINES, numVariantsInRelease); - String dataLinesDoNotStartWithHash = "^[^#]"; - String variantClass = VARIANT_CLASS_KEY + "=SO:[0-9]+"; - String studyId = STUDY_ID_KEY + "=[a-zA-Z0-9,]+"; - String mapWeight = MAPPING_WEIGHT_KEY + "=[0-9]+"; - - List dataLines; - dataLines = grepFile(outputFile, dataLinesDoNotStartWithHash + ".*" + variantClass + ".*"); - assertEquals(EXPECTED_LINES, dataLines.size()); - dataLines = grepFile(outputFile, dataLinesDoNotStartWithHash + ".*" + studyId + ".*"); - assertEquals(EXPECTED_LINES, dataLines.size()); - dataLines = grepFile(outputFile, dataLinesDoNotStartWithHash + ".*" + mapWeight + ".*"); - assertEquals(EXPECTED_LINES, dataLines.size()); - } - - /** - * Variant rs8182 is an insertion and when retrieving the context nucleotide from the FASTA it brings a Y which is - * invalid in VCF. We have to make sure variants like that one are excluded before we write the VCF file. - */ - @Test - public void excludeInvalidVariants() throws IOException { - assertStepExecutesAndCompletes(); - File outputFile = getReleaseFile(); - long numVariantsInRelease = FileUtils.countNonCommentLines(new FileInputStream(outputFile)); - assertEquals(EXPECTED_LINES, numVariantsInRelease); - assertEquals(0, grepFile(outputFile, ".*rs8182.*").size()); - } - - @Test - public void evaVariantsWritten() throws Exception { - JobExecution jobExecution = jobLauncherTestUtils.launchStep(RELEASE_EVA_MULTIMAP_VARIANTS_STEP); - assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); - long numVariantsInRelease = FileUtils.countNonCommentLines(new FileInputStream( - ReportPathResolver.getEvaMultimapIdsReportPath(inputParameters.getOutputFolder(), - inputParameters.getAssemblyAccession()).toFile())); - assertEquals(EXPECTED_EVA_LINES, numVariantsInRelease); - } -} diff --git a/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/configuration/batch/steps/ListMultimapContigsStepConfigurationTest.java b/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/configuration/batch/steps/ListMultimapContigsStepConfigurationTest.java deleted file mode 100644 index e1fac8775..000000000 --- a/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/configuration/batch/steps/ListMultimapContigsStepConfigurationTest.java +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright 2020 EMBL - European Bioinformatics Institute - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package uk.ac.ebi.eva.accession.release.configuration.batch.steps; - -import com.lordofthejars.nosqlunit.annotation.UsingDataSet; -import com.lordofthejars.nosqlunit.mongodb.MongoDbConfigurationBuilder; -import com.lordofthejars.nosqlunit.mongodb.MongoDbRule; -import org.junit.After; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.springframework.batch.core.BatchStatus; -import org.springframework.batch.core.JobExecution; -import org.springframework.batch.test.JobLauncherTestUtils; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.context.ApplicationContext; -import org.springframework.data.mongodb.core.MongoTemplate; -import org.springframework.test.annotation.DirtiesContext; -import org.springframework.test.context.ContextConfiguration; -import org.springframework.test.context.TestPropertySource; -import org.springframework.test.context.junit4.SpringRunner; - -import uk.ac.ebi.eva.accession.release.parameters.InputParameters; -import uk.ac.ebi.eva.accession.release.test.configuration.BatchTestConfiguration; -import uk.ac.ebi.eva.accession.release.test.rule.FixSpringMongoDbRule; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; -import java.util.stream.Collectors; - -import static org.junit.Assert.assertEquals; -import static uk.ac.ebi.eva.accession.release.batch.io.contig.ContigWriter.getDbsnpMultimapContigsFilePath; -import static uk.ac.ebi.eva.accession.release.batch.io.contig.ContigWriter.getEvaMultimapContigsFilePath; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.LIST_DBSNP_MULTIMAP_CONTIGS_STEP; -import static uk.ac.ebi.eva.accession.release.configuration.BeanNames.LIST_EVA_MULTIMAP_CONTIGS_STEP; - -@RunWith(SpringRunner.class) -@ContextConfiguration(classes = {BatchTestConfiguration.class}) -@UsingDataSet(locations = {"/test-data/dbsnpClusteredVariantEntity.json", - "/test-data/clusteredVariantEntity.json"}) -@TestPropertySource("classpath:application.properties") -public class ListMultimapContigsStepConfigurationTest { - - private static final String TEST_DB = "test-db"; - - @Autowired - private JobLauncherTestUtils jobLauncherTestUtils; - - @Autowired - private InputParameters inputParameters; - - @Autowired - private ApplicationContext applicationContext; - - @Rule - public MongoDbRule mongoDbRule = new FixSpringMongoDbRule( - MongoDbConfigurationBuilder.mongoDb().databaseName(TEST_DB).build()); - - @Autowired - MongoTemplate mongoTemplate; - - @Before - public void setUp() throws Exception { - new File(getDbsnpMultimapContigsFilePath(inputParameters.getOutputFolder(), - inputParameters.getAssemblyAccession())) - .delete(); - new File(getEvaMultimapContigsFilePath(inputParameters.getOutputFolder(), - inputParameters.getAssemblyAccession())) - .delete(); - } - - @After - public void tearDown() throws Exception { - new File(getDbsnpMultimapContigsFilePath(inputParameters.getOutputFolder(), - inputParameters.getAssemblyAccession())) - .delete(); - new File(getEvaMultimapContigsFilePath(inputParameters.getOutputFolder(), - inputParameters.getAssemblyAccession())) - .delete(); - } - @Test - @DirtiesContext - public void assertStepExecutesAndCompletes() { - JobExecution jobExecution = jobLauncherTestUtils.launchStep(LIST_DBSNP_MULTIMAP_CONTIGS_STEP); - assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); - } - - @Test - @DirtiesContext - public void contigsWritten() throws Exception { - assertStepExecutesAndCompletes(); - - assertEquals(new HashSet<>(Arrays.asList("CM001954.1,CAE13")), - setOfLines(getDbsnpMultimapContigsFilePath(inputParameters.getOutputFolder(), - inputParameters.getAssemblyAccession()))); - } - - private Set setOfLines(String path) throws IOException { - BufferedReader bufferedReader = new BufferedReader(new FileReader(path)); - return bufferedReader.lines().collect(Collectors.toSet()); - } - @Test - @DirtiesContext - public void evaContigsWritten() throws Exception { - JobExecution jobExecution = jobLauncherTestUtils.launchStep(LIST_EVA_MULTIMAP_CONTIGS_STEP); - assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); - - assertEquals(new HashSet<>(Arrays.asList("CM001941.2,CAE1")), - setOfLines(getEvaMultimapContigsFilePath(inputParameters.getOutputFolder(), - inputParameters.getAssemblyAccession()))); - } -} diff --git a/eva-accession-release/src/test/resources/test-data/dbsnpClusteredVariantEntity.json b/eva-accession-release/src/test/resources/test-data/dbsnpClusteredVariantEntity.json index 8f8d65216..3be425b81 100644 --- a/eva-accession-release/src/test/resources/test-data/dbsnpClusteredVariantEntity.json +++ b/eva-accession-release/src/test/resources/test-data/dbsnpClusteredVariantEntity.json @@ -27,7 +27,7 @@ ) }, { - "_id" : "3D0AFF1E1CD40ED24279EF031D3E54B36D2A1111", + "_id" : "AB1080D025DC5D80D844E26F1F6FB3998FF95B0D", "asm" : "GCA_000409795.2", "tax" : 60711, "contig" : "CM001954.1",